LLVM API Documentation

TailDuplication.cpp
Go to the documentation of this file.
00001 //===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This pass duplicates basic blocks ending in unconditional branches into
00011 // the tails of their predecessors.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #define DEBUG_TYPE "tailduplication"
00016 #include "llvm/CodeGen/Passes.h"
00017 #include "llvm/ADT/DenseSet.h"
00018 #include "llvm/ADT/OwningPtr.h"
00019 #include "llvm/ADT/SetVector.h"
00020 #include "llvm/ADT/SmallSet.h"
00021 #include "llvm/ADT/Statistic.h"
00022 #include "llvm/CodeGen/MachineFunctionPass.h"
00023 #include "llvm/CodeGen/MachineInstrBuilder.h"
00024 #include "llvm/CodeGen/MachineModuleInfo.h"
00025 #include "llvm/CodeGen/MachineRegisterInfo.h"
00026 #include "llvm/CodeGen/MachineSSAUpdater.h"
00027 #include "llvm/CodeGen/RegisterScavenging.h"
00028 #include "llvm/IR/Function.h"
00029 #include "llvm/Support/CommandLine.h"
00030 #include "llvm/Support/Debug.h"
00031 #include "llvm/Support/ErrorHandling.h"
00032 #include "llvm/Support/raw_ostream.h"
00033 #include "llvm/Target/TargetInstrInfo.h"
00034 #include "llvm/Target/TargetRegisterInfo.h"
00035 using namespace llvm;
00036 
00037 STATISTIC(NumTails     , "Number of tails duplicated");
00038 STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
00039 STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
00040 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
00041 STATISTIC(NumAddedPHIs , "Number of phis added");
00042 
00043 // Heuristic for tail duplication.
00044 static cl::opt<unsigned>
00045 TailDuplicateSize("tail-dup-size",
00046                   cl::desc("Maximum instructions to consider tail duplicating"),
00047                   cl::init(2), cl::Hidden);
00048 
00049 static cl::opt<bool>
00050 TailDupVerify("tail-dup-verify",
00051               cl::desc("Verify sanity of PHI instructions during taildup"),
00052               cl::init(false), cl::Hidden);
00053 
00054 static cl::opt<unsigned>
00055 TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
00056 
00057 typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
00058 
00059 namespace {
00060   /// TailDuplicatePass - Perform tail duplication.
00061   class TailDuplicatePass : public MachineFunctionPass {
00062     const TargetInstrInfo *TII;
00063     const TargetRegisterInfo *TRI;
00064     MachineModuleInfo *MMI;
00065     MachineRegisterInfo *MRI;
00066     OwningPtr<RegScavenger> RS;
00067     bool PreRegAlloc;
00068 
00069     // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
00070     SmallVector<unsigned, 16> SSAUpdateVRs;
00071 
00072     // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
00073     // source virtual registers.
00074     DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
00075 
00076   public:
00077     static char ID;
00078     explicit TailDuplicatePass() :
00079       MachineFunctionPass(ID), PreRegAlloc(false) {}
00080 
00081     virtual bool runOnMachineFunction(MachineFunction &MF);
00082 
00083   private:
00084     void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
00085                            MachineBasicBlock *BB);
00086     void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
00087                     MachineBasicBlock *PredBB,
00088                     DenseMap<unsigned, unsigned> &LocalVRMap,
00089                     SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
00090                     const DenseSet<unsigned> &UsedByPhi,
00091                     bool Remove);
00092     void DuplicateInstruction(MachineInstr *MI,
00093                               MachineBasicBlock *TailBB,
00094                               MachineBasicBlock *PredBB,
00095                               MachineFunction &MF,
00096                               DenseMap<unsigned, unsigned> &LocalVRMap,
00097                               const DenseSet<unsigned> &UsedByPhi);
00098     void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
00099                               SmallVector<MachineBasicBlock*, 8> &TDBBs,
00100                               SmallSetVector<MachineBasicBlock*, 8> &Succs);
00101     bool TailDuplicateBlocks(MachineFunction &MF);
00102     bool shouldTailDuplicate(const MachineFunction &MF,
00103                              bool IsSimple, MachineBasicBlock &TailBB);
00104     bool isSimpleBB(MachineBasicBlock *TailBB);
00105     bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
00106     bool duplicateSimpleBB(MachineBasicBlock *TailBB,
00107                            SmallVector<MachineBasicBlock*, 8> &TDBBs,
00108                            const DenseSet<unsigned> &RegsUsedByPhi,
00109                            SmallVector<MachineInstr*, 16> &Copies);
00110     bool TailDuplicate(MachineBasicBlock *TailBB,
00111                        bool IsSimple,
00112                        MachineFunction &MF,
00113                        SmallVector<MachineBasicBlock*, 8> &TDBBs,
00114                        SmallVector<MachineInstr*, 16> &Copies);
00115     bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
00116                                 bool IsSimple,
00117                                 MachineFunction &MF);
00118 
00119     void RemoveDeadBlock(MachineBasicBlock *MBB);
00120   };
00121 
00122   char TailDuplicatePass::ID = 0;
00123 }
00124 
00125 char &llvm::TailDuplicateID = TailDuplicatePass::ID;
00126 
00127 INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
00128                 false, false)
00129 
00130 bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
00131   TII = MF.getTarget().getInstrInfo();
00132   TRI = MF.getTarget().getRegisterInfo();
00133   MRI = &MF.getRegInfo();
00134   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
00135   PreRegAlloc = MRI->isSSA();
00136   RS.reset();
00137   if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
00138     RS.reset(new RegScavenger());
00139 
00140   bool MadeChange = false;
00141   while (TailDuplicateBlocks(MF))
00142     MadeChange = true;
00143 
00144   return MadeChange;
00145 }
00146 
00147 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
00148   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
00149     MachineBasicBlock *MBB = I;
00150     SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
00151                                                 MBB->pred_end());
00152     MachineBasicBlock::iterator MI = MBB->begin();
00153     while (MI != MBB->end()) {
00154       if (!MI->isPHI())
00155         break;
00156       for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
00157              PE = Preds.end(); PI != PE; ++PI) {
00158         MachineBasicBlock *PredBB = *PI;
00159         bool Found = false;
00160         for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
00161           MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
00162           if (PHIBB == PredBB) {
00163             Found = true;
00164             break;
00165           }
00166         }
00167         if (!Found) {
00168           dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
00169           dbgs() << "  missing input from predecessor BB#"
00170                  << PredBB->getNumber() << '\n';
00171           llvm_unreachable(0);
00172         }
00173       }
00174 
00175       for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
00176         MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
00177         if (CheckExtra && !Preds.count(PHIBB)) {
00178           dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
00179                  << ": " << *MI;
00180           dbgs() << "  extra input from predecessor BB#"
00181                  << PHIBB->getNumber() << '\n';
00182           llvm_unreachable(0);
00183         }
00184         if (PHIBB->getNumber() < 0) {
00185           dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
00186           dbgs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
00187           llvm_unreachable(0);
00188         }
00189       }
00190       ++MI;
00191     }
00192   }
00193 }
00194 
00195 /// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
00196 bool
00197 TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
00198                                           bool IsSimple,
00199                                           MachineFunction &MF) {
00200   // Save the successors list.
00201   SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
00202                                               MBB->succ_end());
00203 
00204   SmallVector<MachineBasicBlock*, 8> TDBBs;
00205   SmallVector<MachineInstr*, 16> Copies;
00206   if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
00207     return false;
00208 
00209   ++NumTails;
00210 
00211   SmallVector<MachineInstr*, 8> NewPHIs;
00212   MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
00213 
00214   // TailBB's immediate successors are now successors of those predecessors
00215   // which duplicated TailBB. Add the predecessors as sources to the PHI
00216   // instructions.
00217   bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
00218   if (PreRegAlloc)
00219     UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
00220 
00221   // If it is dead, remove it.
00222   if (isDead) {
00223     NumInstrDups -= MBB->size();
00224     RemoveDeadBlock(MBB);
00225     ++NumDeadBlocks;
00226   }
00227 
00228   // Update SSA form.
00229   if (!SSAUpdateVRs.empty()) {
00230     for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
00231       unsigned VReg = SSAUpdateVRs[i];
00232       SSAUpdate.Initialize(VReg);
00233 
00234       // If the original definition is still around, add it as an available
00235       // value.
00236       MachineInstr *DefMI = MRI->getVRegDef(VReg);
00237       MachineBasicBlock *DefBB = 0;
00238       if (DefMI) {
00239         DefBB = DefMI->getParent();
00240         SSAUpdate.AddAvailableValue(DefBB, VReg);
00241       }
00242 
00243       // Add the new vregs as available values.
00244       DenseMap<unsigned, AvailableValsTy>::iterator LI =
00245         SSAUpdateVals.find(VReg);
00246       for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
00247         MachineBasicBlock *SrcBB = LI->second[j].first;
00248         unsigned SrcReg = LI->second[j].second;
00249         SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
00250       }
00251 
00252       // Rewrite uses that are outside of the original def's block.
00253       MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
00254       while (UI != MRI->use_end()) {
00255         MachineOperand &UseMO = UI.getOperand();
00256         MachineInstr *UseMI = &*UI;
00257         ++UI;
00258         if (UseMI->isDebugValue()) {
00259           // SSAUpdate can replace the use with an undef. That creates
00260           // a debug instruction that is a kill.
00261           // FIXME: Should it SSAUpdate job to delete debug instructions
00262           // instead of replacing the use with undef?
00263           UseMI->eraseFromParent();
00264           continue;
00265         }
00266         if (UseMI->getParent() == DefBB && !UseMI->isPHI())
00267           continue;
00268         SSAUpdate.RewriteUse(UseMO);
00269       }
00270     }
00271 
00272     SSAUpdateVRs.clear();
00273     SSAUpdateVals.clear();
00274   }
00275 
00276   // Eliminate some of the copies inserted by tail duplication to maintain
00277   // SSA form.
00278   for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
00279     MachineInstr *Copy = Copies[i];
00280     if (!Copy->isCopy())
00281       continue;
00282     unsigned Dst = Copy->getOperand(0).getReg();
00283     unsigned Src = Copy->getOperand(1).getReg();
00284     if (MRI->hasOneNonDBGUse(Src) &&
00285         MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
00286       // Copy is the only use. Do trivial copy propagation here.
00287       MRI->replaceRegWith(Dst, Src);
00288       Copy->eraseFromParent();
00289     }
00290   }
00291 
00292   if (NewPHIs.size())
00293     NumAddedPHIs += NewPHIs.size();
00294 
00295   return true;
00296 }
00297 
00298 /// TailDuplicateBlocks - Look for small blocks that are unconditionally
00299 /// branched to and do not fall through. Tail-duplicate their instructions
00300 /// into their predecessors to eliminate (dynamic) branches.
00301 bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
00302   bool MadeChange = false;
00303 
00304   if (PreRegAlloc && TailDupVerify) {
00305     DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
00306     VerifyPHIs(MF, true);
00307   }
00308 
00309   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
00310     MachineBasicBlock *MBB = I++;
00311 
00312     if (NumTails == TailDupLimit)
00313       break;
00314 
00315     bool IsSimple = isSimpleBB(MBB);
00316 
00317     if (!shouldTailDuplicate(MF, IsSimple, *MBB))
00318       continue;
00319 
00320     MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
00321   }
00322 
00323   if (PreRegAlloc && TailDupVerify)
00324     VerifyPHIs(MF, false);
00325 
00326   return MadeChange;
00327 }
00328 
00329 static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
00330                          const MachineRegisterInfo *MRI) {
00331   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
00332          UE = MRI->use_end(); UI != UE; ++UI) {
00333     MachineInstr *UseMI = &*UI;
00334     if (UseMI->isDebugValue())
00335       continue;
00336     if (UseMI->getParent() != BB)
00337       return true;
00338   }
00339   return false;
00340 }
00341 
00342 static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
00343   for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
00344     if (MI->getOperand(i+1).getMBB() == SrcBB)
00345       return i;
00346   return 0;
00347 }
00348 
00349 
00350 // Remember which registers are used by phis in this block. This is
00351 // used to determine which registers are liveout while modifying the
00352 // block (which is why we need to copy the information).
00353 static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
00354                               DenseSet<unsigned> *UsedByPhi) {
00355   for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
00356       I != E; ++I) {
00357     const MachineInstr &MI = *I;
00358     if (!MI.isPHI())
00359       break;
00360     for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
00361       unsigned SrcReg = MI.getOperand(i).getReg();
00362       UsedByPhi->insert(SrcReg);
00363     }
00364   }
00365 }
00366 
00367 /// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
00368 /// SSA update.
00369 void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
00370                                           MachineBasicBlock *BB) {
00371   DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
00372   if (LI != SSAUpdateVals.end())
00373     LI->second.push_back(std::make_pair(BB, NewReg));
00374   else {
00375     AvailableValsTy Vals;
00376     Vals.push_back(std::make_pair(BB, NewReg));
00377     SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
00378     SSAUpdateVRs.push_back(OrigReg);
00379   }
00380 }
00381 
00382 /// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
00383 /// Remember the source register that's contributed by PredBB and update SSA
00384 /// update map.
00385 void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
00386                                    MachineBasicBlock *TailBB,
00387                                    MachineBasicBlock *PredBB,
00388                                    DenseMap<unsigned, unsigned> &LocalVRMap,
00389                            SmallVector<std::pair<unsigned,unsigned>, 4> &Copies,
00390                                    const DenseSet<unsigned> &RegsUsedByPhi,
00391                                    bool Remove) {
00392   unsigned DefReg = MI->getOperand(0).getReg();
00393   unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
00394   assert(SrcOpIdx && "Unable to find matching PHI source?");
00395   unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
00396   const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
00397   LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
00398 
00399   // Insert a copy from source to the end of the block. The def register is the
00400   // available value liveout of the block.
00401   unsigned NewDef = MRI->createVirtualRegister(RC);
00402   Copies.push_back(std::make_pair(NewDef, SrcReg));
00403   if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
00404     AddSSAUpdateEntry(DefReg, NewDef, PredBB);
00405 
00406   if (!Remove)
00407     return;
00408 
00409   // Remove PredBB from the PHI node.
00410   MI->RemoveOperand(SrcOpIdx+1);
00411   MI->RemoveOperand(SrcOpIdx);
00412   if (MI->getNumOperands() == 1)
00413     MI->eraseFromParent();
00414 }
00415 
00416 /// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
00417 /// the source operands due to earlier PHI translation.
00418 void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
00419                                      MachineBasicBlock *TailBB,
00420                                      MachineBasicBlock *PredBB,
00421                                      MachineFunction &MF,
00422                                      DenseMap<unsigned, unsigned> &LocalVRMap,
00423                                      const DenseSet<unsigned> &UsedByPhi) {
00424   MachineInstr *NewMI = TII->duplicate(MI, MF);
00425   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
00426     MachineOperand &MO = NewMI->getOperand(i);
00427     if (!MO.isReg())
00428       continue;
00429     unsigned Reg = MO.getReg();
00430     if (!TargetRegisterInfo::isVirtualRegister(Reg))
00431       continue;
00432     if (MO.isDef()) {
00433       const TargetRegisterClass *RC = MRI->getRegClass(Reg);
00434       unsigned NewReg = MRI->createVirtualRegister(RC);
00435       MO.setReg(NewReg);
00436       LocalVRMap.insert(std::make_pair(Reg, NewReg));
00437       if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
00438         AddSSAUpdateEntry(Reg, NewReg, PredBB);
00439     } else {
00440       DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
00441       if (VI != LocalVRMap.end()) {
00442         MO.setReg(VI->second);
00443         MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
00444       }
00445     }
00446   }
00447   PredBB->insert(PredBB->instr_end(), NewMI);
00448 }
00449 
00450 /// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
00451 /// blocks, the successors have gained new predecessors. Update the PHI
00452 /// instructions in them accordingly.
00453 void
00454 TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
00455                                   SmallVector<MachineBasicBlock*, 8> &TDBBs,
00456                                   SmallSetVector<MachineBasicBlock*,8> &Succs) {
00457   for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
00458          SE = Succs.end(); SI != SE; ++SI) {
00459     MachineBasicBlock *SuccBB = *SI;
00460     for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
00461          II != EE; ++II) {
00462       if (!II->isPHI())
00463         break;
00464       MachineInstrBuilder MIB(*FromBB->getParent(), II);
00465       unsigned Idx = 0;
00466       for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
00467         MachineOperand &MO = II->getOperand(i+1);
00468         if (MO.getMBB() == FromBB) {
00469           Idx = i;
00470           break;
00471         }
00472       }
00473 
00474       assert(Idx != 0);
00475       MachineOperand &MO0 = II->getOperand(Idx);
00476       unsigned Reg = MO0.getReg();
00477       if (isDead) {
00478         // Folded into the previous BB.
00479         // There could be duplicate phi source entries. FIXME: Should sdisel
00480         // or earlier pass fixed this?
00481         for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
00482           MachineOperand &MO = II->getOperand(i+1);
00483           if (MO.getMBB() == FromBB) {
00484             II->RemoveOperand(i+1);
00485             II->RemoveOperand(i);
00486           }
00487         }
00488       } else
00489         Idx = 0;
00490 
00491       // If Idx is set, the operands at Idx and Idx+1 must be removed.
00492       // We reuse the location to avoid expensive RemoveOperand calls.
00493 
00494       DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
00495       if (LI != SSAUpdateVals.end()) {
00496         // This register is defined in the tail block.
00497         for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
00498           MachineBasicBlock *SrcBB = LI->second[j].first;
00499           // If we didn't duplicate a bb into a particular predecessor, we
00500           // might still have added an entry to SSAUpdateVals to correcly
00501           // recompute SSA. If that case, avoid adding a dummy extra argument
00502           // this PHI.
00503           if (!SrcBB->isSuccessor(SuccBB))
00504             continue;
00505 
00506           unsigned SrcReg = LI->second[j].second;
00507           if (Idx != 0) {
00508             II->getOperand(Idx).setReg(SrcReg);
00509             II->getOperand(Idx+1).setMBB(SrcBB);
00510             Idx = 0;
00511           } else {
00512             MIB.addReg(SrcReg).addMBB(SrcBB);
00513           }
00514         }
00515       } else {
00516         // Live in tail block, must also be live in predecessors.
00517         for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
00518           MachineBasicBlock *SrcBB = TDBBs[j];
00519           if (Idx != 0) {
00520             II->getOperand(Idx).setReg(Reg);
00521             II->getOperand(Idx+1).setMBB(SrcBB);
00522             Idx = 0;
00523           } else {
00524             MIB.addReg(Reg).addMBB(SrcBB);
00525           }
00526         }
00527       }
00528       if (Idx != 0) {
00529         II->RemoveOperand(Idx+1);
00530         II->RemoveOperand(Idx);
00531       }
00532     }
00533   }
00534 }
00535 
00536 /// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
00537 bool
00538 TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
00539                                        bool IsSimple,
00540                                        MachineBasicBlock &TailBB) {
00541   // Only duplicate blocks that end with unconditional branches.
00542   if (TailBB.canFallThrough())
00543     return false;
00544 
00545   // Don't try to tail-duplicate single-block loops.
00546   if (TailBB.isSuccessor(&TailBB))
00547     return false;
00548 
00549   // Set the limit on the cost to duplicate. When optimizing for size,
00550   // duplicate only one, because one branch instruction can be eliminated to
00551   // compensate for the duplication.
00552   unsigned MaxDuplicateCount;
00553   if (TailDuplicateSize.getNumOccurrences() == 0 &&
00554       MF.getFunction()->getAttributes().
00555         hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize))
00556     MaxDuplicateCount = 1;
00557   else
00558     MaxDuplicateCount = TailDuplicateSize;
00559 
00560   // If the target has hardware branch prediction that can handle indirect
00561   // branches, duplicating them can often make them predictable when there
00562   // are common paths through the code.  The limit needs to be high enough
00563   // to allow undoing the effects of tail merging and other optimizations
00564   // that rearrange the predecessors of the indirect branch.
00565 
00566   bool HasIndirectbr = false;
00567   if (!TailBB.empty())
00568     HasIndirectbr = TailBB.back().isIndirectBranch();
00569 
00570   if (HasIndirectbr && PreRegAlloc)
00571     MaxDuplicateCount = 20;
00572 
00573   // Check the instructions in the block to determine whether tail-duplication
00574   // is invalid or unlikely to be profitable.
00575   unsigned InstrCount = 0;
00576   for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
00577     // Non-duplicable things shouldn't be tail-duplicated.
00578     if (I->isNotDuplicable())
00579       return false;
00580 
00581     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
00582     // A return may expand into a lot more instructions (e.g. reload of callee
00583     // saved registers) after PEI.
00584     if (PreRegAlloc && I->isReturn())
00585       return false;
00586 
00587     // Avoid duplicating calls before register allocation. Calls presents a
00588     // barrier to register allocation so duplicating them may end up increasing
00589     // spills.
00590     if (PreRegAlloc && I->isCall())
00591       return false;
00592 
00593     if (!I->isPHI() && !I->isDebugValue())
00594       InstrCount += 1;
00595 
00596     if (InstrCount > MaxDuplicateCount)
00597       return false;
00598   }
00599 
00600   if (HasIndirectbr && PreRegAlloc)
00601     return true;
00602 
00603   if (IsSimple)
00604     return true;
00605 
00606   if (!PreRegAlloc)
00607     return true;
00608 
00609   return canCompletelyDuplicateBB(TailBB);
00610 }
00611 
00612 /// isSimpleBB - True if this BB has only one unconditional jump.
00613 bool
00614 TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
00615   if (TailBB->succ_size() != 1)
00616     return false;
00617   if (TailBB->pred_empty())
00618     return false;
00619   MachineBasicBlock::iterator I = TailBB->begin();
00620   MachineBasicBlock::iterator E = TailBB->end();
00621   while (I != E && I->isDebugValue())
00622     ++I;
00623   if (I == E)
00624     return true;
00625   return I->isUnconditionalBranch();
00626 }
00627 
00628 static bool
00629 bothUsedInPHI(const MachineBasicBlock &A,
00630               SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
00631   for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
00632          SE = A.succ_end(); SI != SE; ++SI) {
00633     MachineBasicBlock *BB = *SI;
00634     if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
00635       return true;
00636   }
00637 
00638   return false;
00639 }
00640 
00641 bool
00642 TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
00643   SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
00644 
00645   for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
00646        PE = BB.pred_end(); PI != PE; ++PI) {
00647     MachineBasicBlock *PredBB = *PI;
00648 
00649     if (PredBB->succ_size() > 1)
00650       return false;
00651 
00652     MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
00653     SmallVector<MachineOperand, 4> PredCond;
00654     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
00655       return false;
00656 
00657     if (!PredCond.empty())
00658       return false;
00659   }
00660   return true;
00661 }
00662 
00663 bool
00664 TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
00665                                      SmallVector<MachineBasicBlock*, 8> &TDBBs,
00666                                      const DenseSet<unsigned> &UsedByPhi,
00667                                      SmallVector<MachineInstr*, 16> &Copies) {
00668   SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
00669                                            TailBB->succ_end());
00670   SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
00671                                            TailBB->pred_end());
00672   bool Changed = false;
00673   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
00674        PE = Preds.end(); PI != PE; ++PI) {
00675     MachineBasicBlock *PredBB = *PI;
00676 
00677     if (PredBB->getLandingPadSuccessor())
00678       continue;
00679 
00680     if (bothUsedInPHI(*PredBB, Succs))
00681       continue;
00682 
00683     MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
00684     SmallVector<MachineOperand, 4> PredCond;
00685     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
00686       continue;
00687 
00688     Changed = true;
00689     DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
00690                  << "From simple Succ: " << *TailBB);
00691 
00692     MachineBasicBlock *NewTarget = *TailBB->succ_begin();
00693     MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
00694 
00695     // Make PredFBB explicit.
00696     if (PredCond.empty())
00697       PredFBB = PredTBB;
00698 
00699     // Make fall through explicit.
00700     if (!PredTBB)
00701       PredTBB = NextBB;
00702     if (!PredFBB)
00703       PredFBB = NextBB;
00704 
00705     // Redirect
00706     if (PredFBB == TailBB)
00707       PredFBB = NewTarget;
00708     if (PredTBB == TailBB)
00709       PredTBB = NewTarget;
00710 
00711     // Make the branch unconditional if possible
00712     if (PredTBB == PredFBB) {
00713       PredCond.clear();
00714       PredFBB = NULL;
00715     }
00716 
00717     // Avoid adding fall through branches.
00718     if (PredFBB == NextBB)
00719       PredFBB = NULL;
00720     if (PredTBB == NextBB && PredFBB == NULL)
00721       PredTBB = NULL;
00722 
00723     TII->RemoveBranch(*PredBB);
00724 
00725     if (PredTBB)
00726       TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
00727 
00728     PredBB->removeSuccessor(TailBB);
00729     unsigned NumSuccessors = PredBB->succ_size();
00730     assert(NumSuccessors <= 1);
00731     if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
00732       PredBB->addSuccessor(NewTarget);
00733 
00734     TDBBs.push_back(PredBB);
00735   }
00736   return Changed;
00737 }
00738 
00739 /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
00740 /// of its predecessors.
00741 bool
00742 TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
00743                                  bool IsSimple,
00744                                  MachineFunction &MF,
00745                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
00746                                  SmallVector<MachineInstr*, 16> &Copies) {
00747   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
00748 
00749   DenseSet<unsigned> UsedByPhi;
00750   getRegsUsedByPHIs(*TailBB, &UsedByPhi);
00751 
00752   if (IsSimple)
00753     return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
00754 
00755   // Iterate through all the unique predecessors and tail-duplicate this
00756   // block into them, if possible. Copying the list ahead of time also
00757   // avoids trouble with the predecessor list reallocating.
00758   bool Changed = false;
00759   SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
00760                                               TailBB->pred_end());
00761   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
00762        PE = Preds.end(); PI != PE; ++PI) {
00763     MachineBasicBlock *PredBB = *PI;
00764 
00765     assert(TailBB != PredBB &&
00766            "Single-block loop should have been rejected earlier!");
00767     // EH edges are ignored by AnalyzeBranch.
00768     if (PredBB->succ_size() > 1)
00769       continue;
00770 
00771     MachineBasicBlock *PredTBB, *PredFBB;
00772     SmallVector<MachineOperand, 4> PredCond;
00773     if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
00774       continue;
00775     if (!PredCond.empty())
00776       continue;
00777     // Don't duplicate into a fall-through predecessor (at least for now).
00778     if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
00779       continue;
00780 
00781     DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
00782                  << "From Succ: " << *TailBB);
00783 
00784     TDBBs.push_back(PredBB);
00785 
00786     // Remove PredBB's unconditional branch.
00787     TII->RemoveBranch(*PredBB);
00788 
00789     if (RS && !TailBB->livein_empty()) {
00790       // Update PredBB livein.
00791       RS->enterBasicBlock(PredBB);
00792       if (!PredBB->empty())
00793         RS->forward(prior(PredBB->end()));
00794       BitVector RegsLiveAtExit(TRI->getNumRegs());
00795       RS->getRegsUsed(RegsLiveAtExit, false);
00796       for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
00797              E = TailBB->livein_end(); I != E; ++I) {
00798         if (!RegsLiveAtExit[*I])
00799           // If a register is previously livein to the tail but it's not live
00800           // at the end of predecessor BB, then it should be added to its
00801           // livein list.
00802           PredBB->addLiveIn(*I);
00803       }
00804     }
00805 
00806     // Clone the contents of TailBB into PredBB.
00807     DenseMap<unsigned, unsigned> LocalVRMap;
00808     SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
00809     // Use instr_iterator here to properly handle bundles, e.g.
00810     // ARM Thumb2 IT block.
00811     MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
00812     while (I != TailBB->instr_end()) {
00813       MachineInstr *MI = &*I;
00814       ++I;
00815       if (MI->isPHI()) {
00816         // Replace the uses of the def of the PHI with the register coming
00817         // from PredBB.
00818         ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
00819       } else {
00820         // Replace def of virtual registers with new registers, and update
00821         // uses with PHI source register or the new registers.
00822         DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
00823       }
00824     }
00825     MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
00826     for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
00827       Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
00828                                TII->get(TargetOpcode::COPY),
00829                                CopyInfos[i].first).addReg(CopyInfos[i].second));
00830     }
00831 
00832     // Simplify
00833     TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
00834 
00835     NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
00836 
00837     // Update the CFG.
00838     PredBB->removeSuccessor(PredBB->succ_begin());
00839     assert(PredBB->succ_empty() &&
00840            "TailDuplicate called on block with multiple successors!");
00841     for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
00842            E = TailBB->succ_end(); I != E; ++I)
00843       PredBB->addSuccessor(*I);
00844 
00845     Changed = true;
00846     ++NumTailDups;
00847   }
00848 
00849   // If TailBB was duplicated into all its predecessors except for the prior
00850   // block, which falls through unconditionally, move the contents of this
00851   // block into the prior block.
00852   MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
00853   MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
00854   SmallVector<MachineOperand, 4> PriorCond;
00855   // This has to check PrevBB->succ_size() because EH edges are ignored by
00856   // AnalyzeBranch.
00857   if (PrevBB->succ_size() == 1 &&
00858       !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
00859       PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
00860       !TailBB->hasAddressTaken()) {
00861     DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
00862           << "From MBB: " << *TailBB);
00863     if (PreRegAlloc) {
00864       DenseMap<unsigned, unsigned> LocalVRMap;
00865       SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
00866       MachineBasicBlock::iterator I = TailBB->begin();
00867       // Process PHI instructions first.
00868       while (I != TailBB->end() && I->isPHI()) {
00869         // Replace the uses of the def of the PHI with the register coming
00870         // from PredBB.
00871         MachineInstr *MI = &*I++;
00872         ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
00873         if (MI->getParent())
00874           MI->eraseFromParent();
00875       }
00876 
00877       // Now copy the non-PHI instructions.
00878       while (I != TailBB->end()) {
00879         // Replace def of virtual registers with new registers, and update
00880         // uses with PHI source register or the new registers.
00881         MachineInstr *MI = &*I++;
00882         assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
00883         DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
00884         MI->eraseFromParent();
00885       }
00886       MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
00887       for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
00888         Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
00889                                  TII->get(TargetOpcode::COPY),
00890                                  CopyInfos[i].first)
00891                            .addReg(CopyInfos[i].second));
00892       }
00893     } else {
00894       // No PHIs to worry about, just splice the instructions over.
00895       PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
00896     }
00897     PrevBB->removeSuccessor(PrevBB->succ_begin());
00898     assert(PrevBB->succ_empty());
00899     PrevBB->transferSuccessors(TailBB);
00900     TDBBs.push_back(PrevBB);
00901     Changed = true;
00902   }
00903 
00904   // If this is after register allocation, there are no phis to fix.
00905   if (!PreRegAlloc)
00906     return Changed;
00907 
00908   // If we made no changes so far, we are safe.
00909   if (!Changed)
00910     return Changed;
00911 
00912 
00913   // Handle the nasty case in that we duplicated a block that is part of a loop
00914   // into some but not all of its predecessors. For example:
00915   //    1 -> 2 <-> 3                 |
00916   //          \                      |
00917   //           \---> rest            |
00918   // if we duplicate 2 into 1 but not into 3, we end up with
00919   // 12 -> 3 <-> 2 -> rest           |
00920   //   \             /               |
00921   //    \----->-----/                |
00922   // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
00923   // with a phi in 3 (which now dominates 2).
00924   // What we do here is introduce a copy in 3 of the register defined by the
00925   // phi, just like when we are duplicating 2 into 3, but we don't copy any
00926   // real instructions or remove the 3 -> 2 edge from the phi in 2.
00927   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
00928        PE = Preds.end(); PI != PE; ++PI) {
00929     MachineBasicBlock *PredBB = *PI;
00930     if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
00931       continue;
00932 
00933     // EH edges
00934     if (PredBB->succ_size() != 1)
00935       continue;
00936 
00937     DenseMap<unsigned, unsigned> LocalVRMap;
00938     SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
00939     MachineBasicBlock::iterator I = TailBB->begin();
00940     // Process PHI instructions first.
00941     while (I != TailBB->end() && I->isPHI()) {
00942       // Replace the uses of the def of the PHI with the register coming
00943       // from PredBB.
00944       MachineInstr *MI = &*I++;
00945       ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
00946     }
00947     MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
00948     for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
00949       Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
00950                                TII->get(TargetOpcode::COPY),
00951                                CopyInfos[i].first).addReg(CopyInfos[i].second));
00952     }
00953   }
00954 
00955   return Changed;
00956 }
00957 
00958 /// RemoveDeadBlock - Remove the specified dead machine basic block from the
00959 /// function, updating the CFG.
00960 void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
00961   assert(MBB->pred_empty() && "MBB must be dead!");
00962   DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
00963 
00964   // Remove all successors.
00965   while (!MBB->succ_empty())
00966     MBB->removeSuccessor(MBB->succ_end()-1);
00967 
00968   // Remove the block.
00969   MBB->eraseFromParent();
00970 }