LCOV - code coverage report
Current view: top level - lib/CodeGen - MachineSink.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 322 325 99.1 %
Date: 2018-06-17 00:07:59 Functions: 37 37 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- MachineSink.cpp - Sinking for machine instructions -----------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This pass moves instructions into successor blocks when possible, so that
      11             : // they aren't executed on paths where their results aren't needed.
      12             : //
      13             : // This pass is not intended to be a replacement or a complete alternative
      14             : // for an LLVM-IR-level sinking pass. It is only designed to sink simple
      15             : // constructs that are not exposed before lowering and instruction selection.
      16             : //
      17             : //===----------------------------------------------------------------------===//
      18             : 
      19             : #include "llvm/ADT/SetVector.h"
      20             : #include "llvm/ADT/SmallSet.h"
      21             : #include "llvm/ADT/SmallVector.h"
      22             : #include "llvm/ADT/SparseBitVector.h"
      23             : #include "llvm/ADT/Statistic.h"
      24             : #include "llvm/Analysis/AliasAnalysis.h"
      25             : #include "llvm/CodeGen/MachineBasicBlock.h"
      26             : #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
      27             : #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
      28             : #include "llvm/CodeGen/MachineDominators.h"
      29             : #include "llvm/CodeGen/MachineFunction.h"
      30             : #include "llvm/CodeGen/MachineFunctionPass.h"
      31             : #include "llvm/CodeGen/MachineInstr.h"
      32             : #include "llvm/CodeGen/MachineLoopInfo.h"
      33             : #include "llvm/CodeGen/MachineOperand.h"
      34             : #include "llvm/CodeGen/MachinePostDominators.h"
      35             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      36             : #include "llvm/CodeGen/TargetInstrInfo.h"
      37             : #include "llvm/CodeGen/TargetRegisterInfo.h"
      38             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      39             : #include "llvm/IR/BasicBlock.h"
      40             : #include "llvm/IR/LLVMContext.h"
      41             : #include "llvm/IR/DebugInfoMetadata.h"
      42             : #include "llvm/Pass.h"
      43             : #include "llvm/Support/BranchProbability.h"
      44             : #include "llvm/Support/CommandLine.h"
      45             : #include "llvm/Support/Debug.h"
      46             : #include "llvm/Support/raw_ostream.h"
      47             : #include <algorithm>
      48             : #include <cassert>
      49             : #include <cstdint>
      50             : #include <map>
      51             : #include <utility>
      52             : #include <vector>
      53             : 
      54             : using namespace llvm;
      55             : 
      56             : #define DEBUG_TYPE "machine-sink"
      57             : 
      58             : static cl::opt<bool>
      59      101169 : SplitEdges("machine-sink-split",
      60      101169 :            cl::desc("Split critical edges during machine sinking"),
      61      303507 :            cl::init(true), cl::Hidden);
      62             : 
      63             : static cl::opt<bool>
      64      101169 : UseBlockFreqInfo("machine-sink-bfi",
      65      101169 :            cl::desc("Use block frequency info to find successors to sink"),
      66      303507 :            cl::init(true), cl::Hidden);
      67             : 
      68      101169 : static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
      69             :     "machine-sink-split-probability-threshold",
      70      101169 :     cl::desc(
      71             :         "Percentage threshold for splitting single-instruction critical edge. "
      72             :         "If the branch threshold is higher than this threshold, we allow "
      73             :         "speculative execution of up to 1 instruction to avoid branching to "
      74             :         "splitted critical edge"),
      75      303507 :     cl::init(40), cl::Hidden);
      76             : 
      77             : STATISTIC(NumSunk,      "Number of machine instructions sunk");
      78             : STATISTIC(NumSplit,     "Number of critical edges split");
      79             : STATISTIC(NumCoalesces, "Number of copies coalesced");
      80             : STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
      81             : 
      82             : namespace {
      83             : 
      84       57060 :   class MachineSinking : public MachineFunctionPass {
      85             :     const TargetInstrInfo *TII;
      86             :     const TargetRegisterInfo *TRI;
      87             :     MachineRegisterInfo  *MRI;     // Machine register information
      88             :     MachineDominatorTree *DT;      // Machine dominator tree
      89             :     MachinePostDominatorTree *PDT; // Machine post dominator tree
      90             :     MachineLoopInfo *LI;
      91             :     const MachineBlockFrequencyInfo *MBFI;
      92             :     const MachineBranchProbabilityInfo *MBPI;
      93             :     AliasAnalysis *AA;
      94             : 
      95             :     // Remember which edges have been considered for breaking.
      96             :     SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
      97             :     CEBCandidates;
      98             :     // Remember which edges we are about to split.
      99             :     // This is different from CEBCandidates since those edges
     100             :     // will be split.
     101             :     SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
     102             : 
     103             :     SparseBitVector<> RegsToClearKillFlags;
     104             : 
     105             :     using AllSuccsCache =
     106             :         std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
     107             : 
     108             :   public:
     109             :     static char ID; // Pass identification
     110             : 
     111       19123 :     MachineSinking() : MachineFunctionPass(ID) {
     112       19123 :       initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
     113       19123 :     }
     114             : 
     115             :     bool runOnMachineFunction(MachineFunction &MF) override;
     116             : 
     117       18980 :     void getAnalysisUsage(AnalysisUsage &AU) const override {
     118       18980 :       AU.setPreservesCFG();
     119       18980 :       MachineFunctionPass::getAnalysisUsage(AU);
     120             :       AU.addRequired<AAResultsWrapperPass>();
     121             :       AU.addRequired<MachineDominatorTree>();
     122             :       AU.addRequired<MachinePostDominatorTree>();
     123             :       AU.addRequired<MachineLoopInfo>();
     124             :       AU.addRequired<MachineBranchProbabilityInfo>();
     125             :       AU.addPreserved<MachineDominatorTree>();
     126             :       AU.addPreserved<MachinePostDominatorTree>();
     127             :       AU.addPreserved<MachineLoopInfo>();
     128       18980 :       if (UseBlockFreqInfo)
     129             :         AU.addRequired<MachineBlockFrequencyInfo>();
     130       18980 :     }
     131             : 
     132      181365 :     void releaseMemory() override {
     133             :       CEBCandidates.clear();
     134      181365 :     }
     135             : 
     136             :   private:
     137             :     bool ProcessBlock(MachineBasicBlock &MBB);
     138             :     bool isWorthBreakingCriticalEdge(MachineInstr &MI,
     139             :                                      MachineBasicBlock *From,
     140             :                                      MachineBasicBlock *To);
     141             : 
     142             :     /// Postpone the splitting of the given critical
     143             :     /// edge (\p From, \p To).
     144             :     ///
     145             :     /// We do not split the edges on the fly. Indeed, this invalidates
     146             :     /// the dominance information and thus triggers a lot of updates
     147             :     /// of that information underneath.
     148             :     /// Instead, we postpone all the splits after each iteration of
     149             :     /// the main loop. That way, the information is at least valid
     150             :     /// for the lifetime of an iteration.
     151             :     ///
     152             :     /// \return True if the edge is marked as toSplit, false otherwise.
     153             :     /// False can be returned if, for instance, this is not profitable.
     154             :     bool PostponeSplitCriticalEdge(MachineInstr &MI,
     155             :                                    MachineBasicBlock *From,
     156             :                                    MachineBasicBlock *To,
     157             :                                    bool BreakPHIEdge);
     158             :     bool SinkInstruction(MachineInstr &MI, bool &SawStore,
     159             : 
     160             :                          AllSuccsCache &AllSuccessors);
     161             :     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
     162             :                                  MachineBasicBlock *DefMBB,
     163             :                                  bool &BreakPHIEdge, bool &LocalUse) const;
     164             :     MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
     165             :                bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
     166             :     bool isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
     167             :                               MachineBasicBlock *MBB,
     168             :                               MachineBasicBlock *SuccToSinkTo,
     169             :                               AllSuccsCache &AllSuccessors);
     170             : 
     171             :     bool PerformTrivialForwardCoalescing(MachineInstr &MI,
     172             :                                          MachineBasicBlock *MBB);
     173             : 
     174             :     SmallVector<MachineBasicBlock *, 4> &
     175             :     GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
     176             :                            AllSuccsCache &AllSuccessors) const;
     177             :   };
     178             : 
     179             : } // end anonymous namespace
     180             : 
     181             : char MachineSinking::ID = 0;
     182             : 
     183             : char &llvm::MachineSinkingID = MachineSinking::ID;
     184             : 
     185       26770 : INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
     186             :                       "Machine code sinking", false, false)
     187       26770 : INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
     188       26770 : INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     189       26770 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     190       26770 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     191      186358 : INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
     192             :                     "Machine code sinking", false, false)
     193             : 
     194     2896313 : bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
     195             :                                                      MachineBasicBlock *MBB) {
     196     2896313 :   if (!MI.isCopy())
     197             :     return false;
     198             : 
     199      439769 :   unsigned SrcReg = MI.getOperand(1).getReg();
     200      439769 :   unsigned DstReg = MI.getOperand(0).getReg();
     201      341684 :   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
     202      474929 :       !TargetRegisterInfo::isVirtualRegister(DstReg) ||
     203       35160 :       !MRI->hasOneNonDBGUse(SrcReg))
     204             :     return false;
     205             : 
     206       21869 :   const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
     207             :   const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
     208       21869 :   if (SRC != DRC)
     209             :     return false;
     210             : 
     211       13307 :   MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
     212             :   if (DefMI->isCopyLike())
     213             :     return false;
     214             :   LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
     215             :   LLVM_DEBUG(dbgs() << "*** to: " << MI);
     216         498 :   MRI->replaceRegWith(DstReg, SrcReg);
     217         498 :   MI.eraseFromParent();
     218             : 
     219             :   // Conservatively, clear any kill flags, since it's possible that they are no
     220             :   // longer correct.
     221         498 :   MRI->clearKillFlags(SrcReg);
     222             : 
     223             :   ++NumCoalesces;
     224             :   return true;
     225             : }
     226             : 
     227             : /// AllUsesDominatedByBlock - Return true if all uses of the specified register
     228             : /// occur in blocks dominated by the specified block. If any use is in the
     229             : /// definition block, then return false since it is never legal to move def
     230             : /// after uses.
     231             : bool
     232      874908 : MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
     233             :                                         MachineBasicBlock *MBB,
     234             :                                         MachineBasicBlock *DefMBB,
     235             :                                         bool &BreakPHIEdge,
     236             :                                         bool &LocalUse) const {
     237             :   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
     238             :          "Only makes sense for vregs");
     239             : 
     240             :   // Ignore debug uses because debug info doesn't affect the code.
     241     1749816 :   if (MRI->use_nodbg_empty(Reg))
     242             :     return true;
     243             : 
     244             :   // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
     245             :   // into and they are all PHI nodes. In this case, machine-sink must break
     246             :   // the critical edge first. e.g.
     247             :   //
     248             :   // %bb.1: derived from LLVM BB %bb4.preheader
     249             :   //   Predecessors according to CFG: %bb.0
     250             :   //     ...
     251             :   //     %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags
     252             :   //     ...
     253             :   //     JE_4 <%bb.37>, implicit %eflags
     254             :   //   Successors according to CFG: %bb.37 %bb.2
     255             :   //
     256             :   // %bb.2: derived from LLVM BB %bb.nph
     257             :   //   Predecessors according to CFG: %bb.0 %bb.1
     258             :   //     %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1
     259      832513 :   BreakPHIEdge = true;
     260     1682001 :   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
     261      836515 :     MachineInstr *UseInst = MO.getParent();
     262      836515 :     unsigned OpNo = &MO - &UseInst->getOperand(0);
     263      836515 :     MachineBasicBlock *UseBlock = UseInst->getParent();
     264      854735 :     if (!(UseBlock == MBB && UseInst->isPHI() &&
     265       36440 :           UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) {
     266      819540 :       BreakPHIEdge = false;
     267             :       break;
     268             :     }
     269             :   }
     270      832513 :   if (BreakPHIEdge)
     271             :     return true;
     272             : 
     273     1752670 :   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
     274             :     // Determine the block of the use.
     275      875024 :     MachineInstr *UseInst = MO.getParent();
     276      875024 :     unsigned OpNo = &MO - &UseInst->getOperand(0);
     277      875024 :     MachineBasicBlock *UseBlock = UseInst->getParent();
     278             :     if (UseInst->isPHI()) {
     279             :       // PHI nodes use the operand in the predecessor block, not the block with
     280             :       // the PHI.
     281       74218 :       UseBlock = UseInst->getOperand(OpNo+1).getMBB();
     282      837915 :     } else if (UseBlock == DefMBB) {
     283      669875 :       LocalUse = true;
     284             :       return false;
     285             :     }
     286             : 
     287             :     // Check that it dominates.
     288      410298 :     if (!DT->dominates(MBB, UseBlock))
     289             :       return false;
     290             :   }
     291             : 
     292             :   return true;
     293             : }
     294             : 
     295      181340 : bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
     296      181340 :   if (skipFunction(MF.getFunction()))
     297             :     return false;
     298             : 
     299             :   LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
     300             : 
     301      181184 :   TII = MF.getSubtarget().getInstrInfo();
     302      181184 :   TRI = MF.getSubtarget().getRegisterInfo();
     303      181184 :   MRI = &MF.getRegInfo();
     304      181184 :   DT = &getAnalysis<MachineDominatorTree>();
     305      181184 :   PDT = &getAnalysis<MachinePostDominatorTree>();
     306      181184 :   LI = &getAnalysis<MachineLoopInfo>();
     307      181184 :   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
     308      181184 :   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
     309      362368 :   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     310             : 
     311             :   bool EverMadeChange = false;
     312             : 
     313             :   while (true) {
     314             :     bool MadeChange = false;
     315             : 
     316             :     // Process all basic blocks.
     317             :     CEBCandidates.clear();
     318             :     ToSplit.clear();
     319      736388 :     for (auto &MBB: MF)
     320      548861 :       MadeChange |= ProcessBlock(MBB);
     321             : 
     322             :     // If we have anything we marked as toSplit, split it now.
     323      191133 :     for (auto &Pair : ToSplit) {
     324        3606 :       auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
     325        3606 :       if (NewSucc != nullptr) {
     326             :         LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
     327             :                           << printMBBReference(*Pair.first) << " -- "
     328             :                           << printMBBReference(*NewSucc) << " -- "
     329             :                           << printMBBReference(*Pair.second) << '\n');
     330             :         MadeChange = true;
     331             :         ++NumSplit;
     332             :       } else
     333             :         LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
     334             :     }
     335             :     // If this iteration over the code changed anything, keep iterating.
     336      187527 :     if (!MadeChange) break;
     337             :     EverMadeChange = true;
     338             :   }
     339             : 
     340             :   // Now clear any kill flags for recorded registers.
     341      195457 :   for (auto I : RegsToClearKillFlags)
     342       14273 :     MRI->clearKillFlags(I);
     343             :   RegsToClearKillFlags.clear();
     344             : 
     345      181184 :   return EverMadeChange;
     346             : }
     347             : 
     348      548861 : bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     349             :   // Can't sink anything out of a block that has less than two successors.
     350      721940 :   if (MBB.succ_size() <= 1 || MBB.empty()) return false;
     351             : 
     352             :   // Don't bother sinking code out of unreachable blocks. In addition to being
     353             :   // unprofitable, it can also lead to infinite looping, because in an
     354             :   // unreachable loop there may be nowhere to stop.
     355      346158 :   if (!DT->isReachableFromEntry(&MBB)) return false;
     356             : 
     357             :   bool MadeChange = false;
     358             : 
     359             :   // Cache all successors, sorted by frequency info and loop depth.
     360             :   AllSuccsCache AllSuccessors;
     361             : 
     362             :   // Walk the basic block bottom-up.  Remember if we saw a store.
     363      173078 :   MachineBasicBlock::iterator I = MBB.end();
     364             :   --I;
     365      173078 :   bool ProcessedBegin, SawStore = false;
     366             :   do {
     367             :     MachineInstr &MI = *I;  // The instruction to sink.
     368             : 
     369             :     // Predecrement I (if it's not begin) so that it isn't invalidated by
     370             :     // sinking.
     371             :     ProcessedBegin = I == MBB.begin();
     372     2977125 :     if (!ProcessedBegin)
     373             :       --I;
     374             : 
     375       80812 :     if (MI.isDebugInstr())
     376       80812 :       continue;
     377             : 
     378     2896313 :     bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
     379     2896811 :     if (Joined) {
     380             :       MadeChange = true;
     381         498 :       continue;
     382             :     }
     383             : 
     384     2895815 :     if (SinkInstruction(MI, SawStore, AllSuccessors)) {
     385             :       ++NumSunk;
     386             :       MadeChange = true;
     387             :     }
     388             : 
     389             :     // If we just processed the first instruction in the block, we're done.
     390     2977125 :   } while (!ProcessedBegin);
     391             : 
     392             :   return MadeChange;
     393             : }
     394             : 
     395        9643 : bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
     396             :                                                  MachineBasicBlock *From,
     397             :                                                  MachineBasicBlock *To) {
     398             :   // FIXME: Need much better heuristics.
     399             : 
     400             :   // If the pass has already considered breaking this edge (during this pass
     401             :   // through the function), then let's go ahead and break it. This means
     402             :   // sinking multiple "cheap" instructions into the same block.
     403        9643 :   if (!CEBCandidates.insert(std::make_pair(From, To)).second)
     404             :     return true;
     405             : 
     406        7251 :   if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
     407             :     return true;
     408             : 
     409        6978 :   if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
     410             :       BranchProbability(SplitEdgeProbabilityThreshold, 100))
     411             :     return true;
     412             : 
     413             :   // MI is cheap, we probably don't want to break the critical edge for it.
     414             :   // However, if this would allow some definitions of its source operands
     415             :   // to be sunk then it's probably worth it.
     416       14854 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     417        5252 :     const MachineOperand &MO = MI.getOperand(i);
     418       13783 :     if (!MO.isReg() || !MO.isUse())
     419        4021 :       continue;
     420        1231 :     unsigned Reg = MO.getReg();
     421        1260 :     if (Reg == 0)
     422          29 :       continue;
     423             : 
     424             :     // We don't move live definitions of physical registers,
     425             :     // so sinking their uses won't enable any opportunities.
     426        1202 :     if (TargetRegisterInfo::isPhysicalRegister(Reg))
     427          33 :       continue;
     428             : 
     429             :     // If this instruction is the only user of a virtual register,
     430             :     // check if breaking the edge will enable sinking
     431             :     // both this instruction and the defining instruction.
     432        1169 :     if (MRI->hasOneNonDBGUse(Reg)) {
     433             :       // If the definition resides in same MBB,
     434             :       // claim it's likely we can sink these together.
     435             :       // If definition resides elsewhere, we aren't
     436             :       // blocking it from being sunk so don't break the edge.
     437         457 :       MachineInstr *DefMI = MRI->getVRegDef(Reg);
     438         457 :       if (DefMI->getParent() == MI.getParent())
     439             :         return true;
     440             :     }
     441             :   }
     442             : 
     443             :   return false;
     444             : }
     445             : 
     446        9643 : bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
     447             :                                                MachineBasicBlock *FromBB,
     448             :                                                MachineBasicBlock *ToBB,
     449             :                                                bool BreakPHIEdge) {
     450        9643 :   if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
     451             :     return false;
     452             : 
     453             :   // Avoid breaking back edge. From == To means backedge for single BB loop.
     454        7468 :   if (!SplitEdges || FromBB == ToBB)
     455             :     return false;
     456             : 
     457             :   // Check for backedges of more "complex" loops.
     458       29715 :   if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
     459        7311 :       LI->isLoopHeader(ToBB))
     460             :     return false;
     461             : 
     462             :   // It's not always legal to break critical edges and sink the computation
     463             :   // to the edge.
     464             :   //
     465             :   // %bb.1:
     466             :   // v1024
     467             :   // Beq %bb.3
     468             :   // <fallthrough>
     469             :   // %bb.2:
     470             :   // ... no uses of v1024
     471             :   // <fallthrough>
     472             :   // %bb.3:
     473             :   // ...
     474             :   //       = v1024
     475             :   //
     476             :   // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
     477             :   //
     478             :   // %bb.1:
     479             :   // ...
     480             :   // Bne %bb.2
     481             :   // %bb.4:
     482             :   // v1024 =
     483             :   // B %bb.3
     484             :   // %bb.2:
     485             :   // ... no uses of v1024
     486             :   // <fallthrough>
     487             :   // %bb.3:
     488             :   // ...
     489             :   //       = v1024
     490             :   //
     491             :   // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
     492             :   // flow. We need to ensure the new basic block where the computation is
     493             :   // sunk to dominates all the uses.
     494             :   // It's only legal to break critical edge and sink the computation to the
     495             :   // new block if all the predecessors of "To", except for "From", are
     496             :   // not dominated by "From". Given SSA property, this means these
     497             :   // predecessors are dominated by "To".
     498             :   //
     499             :   // There is no need to do this check if all the uses are PHI nodes. PHI
     500             :   // sources are only defined on the specific predecessor edges.
     501        5164 :   if (!BreakPHIEdge) {
     502             :     for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
     503         321 :            E = ToBB->pred_end(); PI != E; ++PI) {
     504         318 :       if (*PI == FromBB)
     505          98 :         continue;
     506         440 :       if (!DT->dominates(ToBB, *PI))
     507             :         return false;
     508             :     }
     509             :   }
     510             : 
     511        4971 :   ToSplit.insert(std::make_pair(FromBB, ToBB));
     512             :   
     513        4971 :   return true;
     514             : }
     515             : 
     516             : /// collectDebgValues - Scan instructions following MI and collect any
     517             : /// matching DBG_VALUEs.
     518       23557 : static void collectDebugValues(MachineInstr &MI,
     519             :                                SmallVectorImpl<MachineInstr *> &DbgValues) {
     520             :   DbgValues.clear();
     521       47114 :   if (!MI.getOperand(0).isReg())
     522       23532 :     return;
     523             : 
     524             :   MachineBasicBlock::iterator DI = MI; ++DI;
     525       23555 :   for (MachineBasicBlock::iterator DE = MI.getParent()->end();
     526       26730 :        DI != DE; ++DI) {
     527       26705 :     if (!DI->isDebugValue())
     528             :       return;
     529        9427 :     if (DI->getOperand(0).isReg() &&
     530        3077 :         DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
     531        2517 :       DbgValues.push_back(&*DI);
     532             :   }
     533             : }
     534             : 
     535             : /// isProfitableToSinkTo - Return true if it is profitable to sink MI.
     536      113470 : bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
     537             :                                           MachineBasicBlock *MBB,
     538             :                                           MachineBasicBlock *SuccToSinkTo,
     539             :                                           AllSuccsCache &AllSuccessors) {
     540             :   assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
     541             : 
     542      113470 :   if (MBB == SuccToSinkTo)
     543             :     return false;
     544             : 
     545             :   // It is profitable if SuccToSinkTo does not post dominate current block.
     546      218546 :   if (!PDT->dominates(SuccToSinkTo, MBB))
     547             :     return true;
     548             : 
     549             :   // It is profitable to sink an instruction from a deeper loop to a shallower
     550             :   // loop, even if the latter post-dominates the former (PR21115).
     551       52293 :   if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
     552             :     return true;
     553             : 
     554             :   // Check if only use in post dominated block is PHI instruction.
     555             :   bool NonPHIUse = false;
     556       74173 :   for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
     557       17966 :     MachineBasicBlock *UseBlock = UseInst.getParent();
     558       17966 :     if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
     559             :       NonPHIUse = true;
     560             :   }
     561       12747 :   if (!NonPHIUse)
     562             :     return true;
     563             : 
     564             :   // If SuccToSinkTo post dominates then also it may be profitable if MI
     565             :   // can further profitably sinked into another block in next round.
     566        4848 :   bool BreakPHIEdge = false;
     567             :   // FIXME - If finding successor is compile time expensive then cache results.
     568        4848 :   if (MachineBasicBlock *MBB2 =
     569        4848 :           FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
     570           0 :     return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
     571             : 
     572             :   // If SuccToSinkTo is final destination and it is a post dominator of current
     573             :   // block then it is not profitable to sink MI into SuccToSinkTo block.
     574             :   return false;
     575             : }
     576             : 
     577             : /// Get the sorted sequence of successors for this MachineBasicBlock, possibly
     578             : /// computing it if it was not already cached.
     579             : SmallVector<MachineBasicBlock *, 4> &
     580      793133 : MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
     581             :                                        AllSuccsCache &AllSuccessors) const {
     582             :   // Do we have the sorted successors in cache ?
     583             :   auto Succs = AllSuccessors.find(MBB);
     584      793133 :   if (Succs != AllSuccessors.end())
     585      627092 :     return Succs->second;
     586             : 
     587             :   SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->succ_begin(),
     588             :                                                MBB->succ_end());
     589             : 
     590             :   // Handle cases where sinking can happen but where the sink point isn't a
     591             :   // successor. For example:
     592             :   //
     593             :   //   x = computation
     594             :   //   if () {} else {}
     595             :   //   use x
     596             :   //
     597             :   const std::vector<MachineDomTreeNode *> &Children =
     598      166041 :     DT->getNode(MBB)->getChildren();
     599      166041 :   for (const auto &DTChild : Children)
     600             :     // DomTree children of MBB that have MBB as immediate dominator are added.
     601      663295 :     if (DTChild->getIDom()->getBlock() == MI.getParent() &&
     602             :         // Skip MBBs already added to the AllSuccs vector above.
     603      330236 :         !MBB->isSuccessor(DTChild->getBlock()))
     604       48106 :       AllSuccs.push_back(DTChild->getBlock());
     605             : 
     606             :   // Sort Successors according to their loop depth or block frequency info.
     607             :   std::stable_sort(
     608             :       AllSuccs.begin(), AllSuccs.end(),
     609      576802 :       [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
     610      542120 :         uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
     611      542120 :         uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
     612      271060 :         bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
     613      288401 :         return HasBlockFreq ? LHSFreq < RHSFreq
     614      305742 :                             : LI->getLoopDepth(L) < LI->getLoopDepth(R);
     615             :       });
     616             : 
     617      166041 :   auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
     618             : 
     619      166041 :   return it.first->second;
     620             : }
     621             : 
     622             : /// FindSuccToSinkTo - Find a successor to sink this instruction to.
     623             : MachineBasicBlock *
     624     1494412 : MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
     625             :                                  bool &BreakPHIEdge,
     626             :                                  AllSuccsCache &AllSuccessors) {
     627             :   assert (MBB && "Invalid MachineBasicBlock!");
     628             : 
     629             :   // Loop over all the operands of the specified instruction.  If there is
     630             :   // anything we can't handle, bail out.
     631             : 
     632             :   // SuccToSinkTo - This is the successor to sink this instruction to, once we
     633             :   // decide.
     634             :   MachineBasicBlock *SuccToSinkTo = nullptr;
     635     3592425 :   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     636     3558905 :     const MachineOperand &MO = MI.getOperand(i);
     637     3558905 :     if (!MO.isReg()) continue;  // Ignore non-register operands.
     638             : 
     639     2760025 :     unsigned Reg = MO.getReg();
     640     2760025 :     if (Reg == 0) continue;
     641             : 
     642     2738007 :     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
     643     1657633 :       if (MO.isUse()) {
     644             :         // If the physreg has no defs anywhere, it's just an ambient register
     645             :         // and we can freely move its uses. Alternatively, if it's allocatable,
     646             :         // it could get allocated to something with a def during allocation.
     647      323130 :         if (!MRI->isConstantPhysReg(Reg))
     648             :           return nullptr;
     649     1334503 :       } else if (!MO.isDead()) {
     650             :         // A def that isn't dead. We can't move it.
     651             :         return nullptr;
     652             :       }
     653             :     } else {
     654             :       // Virtual register uses are always safe to sink.
     655     1080374 :       if (MO.isUse()) continue;
     656             : 
     657             :       // If it's not safe to move defs of the register class, then abort.
     658     1587594 :       if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
     659             :         return nullptr;
     660             : 
     661             :       // Virtual register defs can only be sunk if all their uses are in blocks
     662             :       // dominated by one of the successors.
     663      793149 :       if (SuccToSinkTo) {
     664             :         // If a previous operand picked a block to sink to, then this operand
     665             :         // must be sinkable to the same block.
     666          16 :         bool LocalUse = false;
     667          16 :         if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
     668             :                                      BreakPHIEdge, LocalUse))
     669          12 :           return nullptr;
     670             : 
     671           4 :         continue;
     672             :       }
     673             : 
     674             :       // Otherwise, we should look at all the successors and decide which one
     675             :       // we should sink to. If we have reliable block frequency information
     676             :       // (frequency != 0) available, give successors with smaller frequencies
     677             :       // higher priority, otherwise prioritize smaller loop depths.
     678      966449 :       for (MachineBasicBlock *SuccBlock :
     679     1677823 :            GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
     680      874892 :         bool LocalUse = false;
     681      874892 :         if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
     682             :                                     BreakPHIEdge, LocalUse)) {
     683             :           SuccToSinkTo = SuccBlock;
     684      113470 :           break;
     685             :         }
     686      761422 :         if (LocalUse)
     687             :           // Def is used locally, it's never safe to move this def.
     688      669865 :           return nullptr;
     689             :       }
     690             : 
     691             :       // If we couldn't find a block to sink to, ignore this instruction.
     692      123268 :       if (!SuccToSinkTo)
     693             :         return nullptr;
     694      113470 :       if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
     695             :         return nullptr;
     696             :     }
     697             :   }
     698             : 
     699             :   // It is not possible to sink an instruction into its own block.  This can
     700             :   // happen with loops.
     701       33520 :   if (MBB == SuccToSinkTo)
     702             :     return nullptr;
     703             : 
     704             :   // It's not safe to sink instructions to EH landing pad. Control flow into
     705             :   // landing pad is implicitly defined.
     706       33520 :   if (SuccToSinkTo && SuccToSinkTo->isEHPad())
     707             :     return nullptr;
     708             : 
     709             :   return SuccToSinkTo;
     710             : }
     711             : 
     712             : /// Return true if MI is likely to be usable as a memory operation by the
     713             : /// implicit null check optimization.
     714             : ///
     715             : /// This is a "best effort" heuristic, and should not be relied upon for
     716             : /// correctness.  This returning true does not guarantee that the implicit null
     717             : /// check optimization is legal over MI, and this returning false does not
     718             : /// guarantee MI cannot possibly be used to do a null check.
     719     1489568 : static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
     720             :                                              const TargetInstrInfo *TII,
     721             :                                              const TargetRegisterInfo *TRI) {
     722             :   using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
     723             : 
     724     1489568 :   auto *MBB = MI.getParent();
     725     1489568 :   if (MBB->pred_size() != 1)
     726             :     return false;
     727             : 
     728      896085 :   auto *PredMBB = *MBB->pred_begin();
     729      896085 :   auto *PredBB = PredMBB->getBasicBlock();
     730             : 
     731             :   // Frontends that don't use implicit null checks have no reason to emit
     732             :   // branches with make.implicit metadata, and this function should always
     733             :   // return false for them.
     734     1764757 :   if (!PredBB ||
     735      895944 :       !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
     736             :     return false;
     737             : 
     738             :   unsigned BaseReg;
     739             :   int64_t Offset;
     740          11 :   if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
     741             :     return false;
     742             : 
     743           8 :   if (!(MI.mayLoad() && !MI.isPredicable()))
     744             :     return false;
     745             : 
     746             :   MachineBranchPredicate MBP;
     747           4 :   if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
     748             :     return false;
     749             : 
     750           8 :   return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
     751           4 :          (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
     752           8 :           MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
     753           4 :          MBP.LHS.getReg() == BaseReg;
     754             : }
     755             : 
     756             : /// SinkInstruction - Determine whether it is safe to sink the specified machine
     757             : /// instruction out of its current block into a successor.
     758     2895815 : bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
     759             :                                      AllSuccsCache &AllSuccessors) {
     760             :   // Don't sink instructions that the target prefers not to sink.
     761     2895815 :   if (!TII->shouldSink(MI))
     762             :     return false;
     763             : 
     764             :   // Check if it's safe to move the instruction.
     765     2895805 :   if (!MI.isSafeToMove(AA, SawStore))
     766             :     return false;
     767             : 
     768             :   // Convergent operations may not be made control-dependent on additional
     769             :   // values.
     770     1489602 :   if (MI.isConvergent())
     771             :     return false;
     772             : 
     773             :   // Don't break implicit null checks.  This is a performance heuristic, and not
     774             :   // required for correctness.
     775     1489568 :   if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
     776             :     return false;
     777             : 
     778             :   // FIXME: This should include support for sinking instructions within the
     779             :   // block they are currently in to shorten the live ranges.  We often get
     780             :   // instructions sunk into the top of a large block, but it would be better to
     781             :   // also sink them down before their first use in the block.  This xform has to
     782             :   // be careful not to *increase* register pressure though, e.g. sinking
     783             :   // "x = y + z" down if it kills y and z would increase the live ranges of y
     784             :   // and z and only shrink the live range of x.
     785             : 
     786     1489564 :   bool BreakPHIEdge = false;
     787     1489564 :   MachineBasicBlock *ParentBlock = MI.getParent();
     788             :   MachineBasicBlock *SuccToSinkTo =
     789     1489564 :       FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
     790             : 
     791             :   // If there are no outputs, it must have side-effects.
     792     1489564 :   if (!SuccToSinkTo)
     793             :     return false;
     794             : 
     795             :   // If the instruction to move defines a dead physical register which is live
     796             :   // when leaving the basic block, don't move it because it could turn into a
     797             :   // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
     798      148636 :   for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
     799      115436 :     const MachineOperand &MO = MI.getOperand(I);
     800      115436 :     if (!MO.isReg()) continue;
     801       83130 :     unsigned Reg = MO.getReg();
     802      150758 :     if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     803        8102 :     if (SuccToSinkTo->isLiveIn(Reg))
     804             :       return false;
     805             :   }
     806             : 
     807             :   LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
     808             : 
     809             :   // If the block has multiple predecessors, this is a critical edge.
     810             :   // Decide if we can sink along it or need to break the edge.
     811       33200 :   if (SuccToSinkTo->pred_size() > 1) {
     812             :     // We cannot sink a load across a critical edge - there may be stores in
     813             :     // other code paths.
     814             :     bool TryBreak = false;
     815       12406 :     bool store = true;
     816       12406 :     if (!MI.isSafeToMove(AA, store)) {
     817             :       LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
     818             :       TryBreak = true;
     819             :     }
     820             : 
     821             :     // We don't want to sink across a critical edge if we don't dominate the
     822             :     // successor. We could be introducing calculations to new code paths.
     823       24268 :     if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
     824             :       LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
     825             :       TryBreak = true;
     826             :     }
     827             : 
     828             :     // Don't sink instructions into a loop.
     829       16030 :     if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
     830             :       LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");
     831             :       TryBreak = true;
     832             :     }
     833             : 
     834             :     // Otherwise we are OK with sinking along a critical edge.
     835       12401 :     if (!TryBreak)
     836             :       LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
     837             :     else {
     838             :       // Mark this edge as to be split.
     839             :       // If the edge can actually be split, the next iteration of the main loop
     840             :       // will sink MI in the newly created block.
     841             :       bool Status =
     842        4532 :         PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
     843             :       if (!Status)
     844             :         LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
     845             :                              "break critical edge\n");
     846             :       // The instruction will not be sunk this time.
     847        4532 :       return false;
     848             :     }
     849             :   }
     850             : 
     851       28668 :   if (BreakPHIEdge) {
     852             :     // BreakPHIEdge is true if all the uses are in the successor MBB being
     853             :     // sunken into and they are all PHI nodes. In this case, machine-sink must
     854             :     // break the critical edge first.
     855             :     bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
     856        5111 :                                             SuccToSinkTo, BreakPHIEdge);
     857             :     if (!Status)
     858             :       LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
     859             :                            "break critical edge\n");
     860             :     // The instruction will not be sunk this time.
     861        5111 :     return false;
     862             :   }
     863             : 
     864             :   // Determine where to insert into. Skip phi nodes.
     865       23557 :   MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
     866       25471 :   while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
     867             :     ++InsertPos;
     868             : 
     869             :   // collect matching debug values.
     870             :   SmallVector<MachineInstr *, 2> DbgValuesToSink;
     871       23557 :   collectDebugValues(MI, DbgValuesToSink);
     872             : 
     873             :   // Merge or erase debug location to ensure consistent stepping in profilers
     874             :   // and debuggers.
     875       46372 :   if (!SuccToSinkTo->empty() && InsertPos != SuccToSinkTo->end())
     876       45624 :     MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
     877             :                                                  InsertPos->getDebugLoc()));
     878             :   else
     879        1490 :     MI.setDebugLoc(DebugLoc());
     880             : 
     881             : 
     882             :   // Move the instruction.
     883             :   SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
     884       23557 :                        ++MachineBasicBlock::iterator(MI));
     885             : 
     886             :   // Move previously adjacent debug value instructions to the insert position.
     887        2517 :   for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
     888       26074 :          DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
     889        2517 :     MachineInstr *DbgMI = *DBI;
     890             :     SuccToSinkTo->splice(InsertPos, ParentBlock,  DbgMI,
     891        2517 :                          ++MachineBasicBlock::iterator(DbgMI));
     892             :   }
     893             : 
     894             :   // Conservatively, clear any kill flags, since it's possible that they are no
     895             :   // longer correct.
     896             :   // Note that we have to clear the kill flags for any register this instruction
     897             :   // uses as we may sink over another instruction which currently kills the
     898             :   // used registers.
     899      187035 :   for (MachineOperand &MO : MI.operands()) {
     900      140262 :     if (MO.isReg() && MO.isUse())
     901       31632 :       RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
     902             :   }
     903             : 
     904             :   return true;
     905             : }
     906             : 
     907             : //===----------------------------------------------------------------------===//
     908             : // This pass is not intended to be a replacement or a complete alternative
     909             : // for the pre-ra machine sink pass. It is only designed to sink COPY
     910             : // instructions which should be handled after RA.
     911             : //
     912             : // This pass sinks COPY instructions into a successor block, if the COPY is not
     913             : // used in the current block and the COPY is live-in to a single successor
     914             : // (i.e., doesn't require the COPY to be duplicated).  This avoids executing the
     915             : // copy on paths where their results aren't needed.  This also exposes
     916             : // additional opportunites for dead copy elimination and shrink wrapping.
     917             : //
     918             : // These copies were either not handled by or are inserted after the MachineSink
     919             : // pass. As an example of the former case, the MachineSink pass cannot sink
     920             : // COPY instructions with allocatable source registers; for AArch64 these type
     921             : // of copy instructions are frequently used to move function parameters (PhyReg)
     922             : // into virtual registers in the entry block.
     923             : //
     924             : // For the machine IR below, this pass will sink %w19 in the entry into its
     925             : // successor (%bb.1) because %w19 is only live-in in %bb.1.
     926             : // %bb.0:
     927             : //   %wzr = SUBSWri %w1, 1
     928             : //   %w19 = COPY %w0
     929             : //   Bcc 11, %bb.2
     930             : // %bb.1:
     931             : //   Live Ins: %w19
     932             : //   BL @fun
     933             : //   %w0 = ADDWrr %w0, %w19
     934             : //   RET %w0
     935             : // %bb.2:
     936             : //   %w0 = COPY %wzr
     937             : //   RET %w0
     938             : // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
     939             : // able to see %bb.0 as a candidate.
     940             : //===----------------------------------------------------------------------===//
     941             : namespace {
     942             : 
     943       55707 : class PostRAMachineSinking : public MachineFunctionPass {
     944             : public:
     945             :   bool runOnMachineFunction(MachineFunction &MF) override;
     946             : 
     947             :   static char ID;
     948       18663 :   PostRAMachineSinking() : MachineFunctionPass(ID) {}
     949       18533 :   StringRef getPassName() const override { return "PostRA Machine Sink"; }
     950             : 
     951       18517 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
     952       18517 :     AU.setPreservesCFG();
     953       18517 :     MachineFunctionPass::getAnalysisUsage(AU);
     954       18517 :   }
     955             : 
     956       18521 :   MachineFunctionProperties getRequiredProperties() const override {
     957       37042 :     return MachineFunctionProperties().set(
     958       18521 :         MachineFunctionProperties::Property::NoVRegs);
     959             :   }
     960             : 
     961             : private:
     962             :   /// Track which register units have been modified and used.
     963             :   LiveRegUnits ModifiedRegUnits, UsedRegUnits;
     964             : 
     965             :   /// Sink Copy instructions unused in the same block close to their uses in
     966             :   /// successors.
     967             :   bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
     968             :                      const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
     969             : };
     970             : } // namespace
     971             : 
     972             : char PostRAMachineSinking::ID = 0;
     973             : char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
     974             : 
     975      148112 : INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
     976             :                 "PostRA Machine Sink", false, false)
     977             : 
     978       10973 : static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
     979             :                                   const TargetRegisterInfo *TRI) {
     980             :   LiveRegUnits LiveInRegUnits(*TRI);
     981       10973 :   LiveInRegUnits.addLiveIns(MBB);
     982       21946 :   return !LiveInRegUnits.available(Reg);
     983             : }
     984             : 
     985             : static MachineBasicBlock *
     986        5883 : getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
     987             :                       const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
     988             :                       unsigned Reg, const TargetRegisterInfo *TRI) {
     989             :   // Try to find a single sinkable successor in which Reg is live-in.
     990             :   MachineBasicBlock *BB = nullptr;
     991        5883 :   for (auto *SI : SinkableBBs) {
     992        7282 :     if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) {
     993             :       // If BB is set here, Reg is live-in to at least two sinkable successors,
     994             :       // so quit.
     995        5587 :       if (BB)
     996         463 :         return nullptr;
     997             :       BB = SI;
     998             :     }
     999             :   }
    1000             :   // Reg is not live-in to any sinkable successors.
    1001        5420 :   if (!BB)
    1002             :     return nullptr;
    1003             : 
    1004             :   // Check if any register aliased with Reg is live-in in other successors.
    1005       13257 :   for (auto *SI : CurBB.successors()) {
    1006        9059 :     if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI))
    1007             :       return nullptr;
    1008             :   }
    1009             :   return BB;
    1010             : }
    1011             : 
    1012             : static MachineBasicBlock *
    1013        5876 : getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
    1014             :                       const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
    1015             :                       ArrayRef<unsigned> DefedRegsInCopy,
    1016             :                       const TargetRegisterInfo *TRI) {
    1017             :   MachineBasicBlock *SingleBB = nullptr;
    1018       14272 :   for (auto DefReg : DefedRegsInCopy) {
    1019             :     MachineBasicBlock *BB =
    1020        5883 :         getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
    1021        5883 :     if (!BB || (SingleBB && SingleBB != BB))
    1022             :       return nullptr;
    1023             :     SingleBB = BB;
    1024             :   }
    1025             :   return SingleBB;
    1026             : }
    1027             : 
    1028        4191 : static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
    1029             :                            SmallVectorImpl<unsigned> &UsedOpsInCopy,
    1030             :                            LiveRegUnits &UsedRegUnits,
    1031             :                            const TargetRegisterInfo *TRI) {
    1032       12575 :   for (auto U : UsedOpsInCopy) {
    1033        4192 :     MachineOperand &MO = MI->getOperand(U);
    1034        4192 :     unsigned SrcReg = MO.getReg();
    1035        4192 :     if (!UsedRegUnits.available(SrcReg)) {
    1036         455 :       MachineBasicBlock::iterator NI = std::next(MI->getIterator());
    1037        6292 :       for (MachineInstr &UI : make_range(NI, CurBB.end())) {
    1038        5421 :         if (UI.killsRegister(SrcReg, TRI)) {
    1039          39 :           UI.clearRegisterKills(SrcReg, TRI);
    1040             :           MO.setIsKill(true);
    1041             :           break;
    1042             :         }
    1043             :       }
    1044             :     }
    1045             :   }
    1046        4191 : }
    1047             : 
    1048        4191 : static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
    1049             :                          SmallVectorImpl<unsigned> &UsedOpsInCopy,
    1050             :                          SmallVectorImpl<unsigned> &DefedRegsInCopy) {
    1051       12587 :   for (auto DefReg : DefedRegsInCopy)
    1052        4198 :     SuccBB->removeLiveIn(DefReg);
    1053       12575 :   for (auto U : UsedOpsInCopy) {
    1054        8384 :     unsigned Reg = MI->getOperand(U).getReg();
    1055        4192 :     if (!SuccBB->isLiveIn(Reg))
    1056             :       SuccBB->addLiveIn(Reg);
    1057             :   }
    1058        4191 : }
    1059             : 
    1060       16232 : static bool hasRegisterDependency(MachineInstr *MI,
    1061             :                                   SmallVectorImpl<unsigned> &UsedOpsInCopy,
    1062             :                                   SmallVectorImpl<unsigned> &DefedRegsInCopy,
    1063             :                                   LiveRegUnits &ModifiedRegUnits,
    1064             :                                   LiveRegUnits &UsedRegUnits) {
    1065             :   bool HasRegDependency = false;
    1066       29779 :   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    1067       23903 :     MachineOperand &MO = MI->getOperand(i);
    1068       23903 :     if (!MO.isReg())
    1069           0 :       continue;
    1070       23903 :     unsigned Reg = MO.getReg();
    1071       23903 :     if (!Reg)
    1072           0 :       continue;
    1073       23903 :     if (MO.isDef()) {
    1074       16259 :       if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
    1075             :         HasRegDependency = true;
    1076       10356 :         break;
    1077             :       }
    1078        7629 :       DefedRegsInCopy.push_back(Reg);
    1079             : 
    1080             :       // FIXME: instead of isUse(), readsReg() would be a better fix here,
    1081             :       // For example, we can ignore modifications in reg with undef. However,
    1082             :       // it's not perfectly clear if skipping the internal read is safe in all
    1083             :       // other targets.
    1084             :     } else if (MO.isUse()) {
    1085        7644 :       if (!ModifiedRegUnits.available(Reg)) {
    1086             :         HasRegDependency = true;
    1087             :         break;
    1088             :       }
    1089        5918 :       UsedOpsInCopy.push_back(i);
    1090             :     }
    1091             :   }
    1092       16232 :   return HasRegDependency;
    1093             : }
    1094             : 
    1095      340079 : bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
    1096             :                                          MachineFunction &MF,
    1097             :                                          const TargetRegisterInfo *TRI,
    1098             :                                          const TargetInstrInfo *TII) {
    1099             :   SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
    1100             :   // FIXME: For now, we sink only to a successor which has a single predecessor
    1101             :   // so that we can directly sink COPY instructions to the successor without
    1102             :   // adding any new block or branch instruction.
    1103      562988 :   for (MachineBasicBlock *SI : CurBB.successors())
    1104      423611 :     if (!SI->livein_empty() && SI->pred_size() == 1)
    1105       97880 :       SinkableBBs.insert(SI);
    1106             : 
    1107      340079 :   if (SinkableBBs.empty())
    1108             :     return false;
    1109             : 
    1110             :   bool Changed = false;
    1111             : 
    1112             :   // Track which registers have been modified and used between the end of the
    1113             :   // block and the current instruction.
    1114             :   ModifiedRegUnits.clear();
    1115             :   UsedRegUnits.clear();
    1116             : 
    1117      658109 :   for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
    1118             :     MachineInstr *MI = &*I;
    1119             :     ++I;
    1120             : 
    1121             :     // Do not move any instruction across function call.
    1122      559656 :     if (MI->isCall())
    1123       34879 :       return false;
    1124             : 
    1125     1033322 :     if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) {
    1126      508545 :       LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
    1127             :                                         TRI);
    1128      520586 :       continue;
    1129             :     }
    1130             : 
    1131             :     // Track the operand index for use in Copy.
    1132             :     SmallVector<unsigned, 2> UsedOpsInCopy;
    1133             :     // Track the register number defed in Copy.
    1134             :     SmallVector<unsigned, 2> DefedRegsInCopy;
    1135             : 
    1136             :     // Don't sink the COPY if it would violate a register dependency.
    1137       26588 :     if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
    1138             :                               ModifiedRegUnits, UsedRegUnits)) {
    1139       10356 :       LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
    1140             :                                         TRI);
    1141             :       continue;
    1142             :     }
    1143             :     assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
    1144             :            "Unexpect SrcReg or DefReg");
    1145             :     MachineBasicBlock *SuccBB =
    1146        5876 :         getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
    1147             :     // Don't sink if we cannot find a single sinkable successor in which Reg
    1148             :     // is live-in.
    1149        7561 :     if (!SuccBB) {
    1150        1685 :       LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
    1151             :                                         TRI);
    1152             :       continue;
    1153             :     }
    1154             :     assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
    1155             :            "Unexpected predecessor");
    1156             : 
    1157             :     // Clear the kill flag if SrcReg is killed between MI and the end of the
    1158             :     // block.
    1159        4191 :     clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
    1160        4191 :     MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
    1161        4191 :     SuccBB->splice(InsertPos, &CurBB, MI);
    1162        4191 :     updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
    1163             : 
    1164             :     Changed = true;
    1165             :     ++NumPostRACopySink;
    1166             :   }
    1167             :   return Changed;
    1168             : }
    1169             : 
    1170      179214 : bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
    1171             :   bool Changed = false;
    1172      179214 :   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
    1173      179214 :   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    1174             : 
    1175      179214 :   ModifiedRegUnits.init(*TRI);
    1176      179214 :   UsedRegUnits.init(*TRI);
    1177      519293 :   for (auto &BB : MF)
    1178      340079 :     Changed |= tryToSinkCopy(BB, MF, TRI, TII);
    1179             : 
    1180      179214 :   return Changed;
    1181      303507 : }

Generated by: LCOV version 1.13