LLVM API Documentation
00001 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This pass performs loop invariant code motion on machine instructions. We 00011 // attempt to remove as much code from the body of a loop as possible. 00012 // 00013 // This pass does not attempt to throttle itself to limit register pressure. 00014 // The register allocation phases are expected to perform rematerialization 00015 // to recover when register pressure is high. 00016 // 00017 // This pass is not intended to be a replacement or a complete alternative 00018 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 00019 // constructs that are not exposed before lowering and instruction selection. 00020 // 00021 //===----------------------------------------------------------------------===// 00022 00023 #define DEBUG_TYPE "machine-licm" 00024 #include "llvm/CodeGen/Passes.h" 00025 #include "llvm/ADT/DenseMap.h" 00026 #include "llvm/ADT/SmallSet.h" 00027 #include "llvm/ADT/Statistic.h" 00028 #include "llvm/Analysis/AliasAnalysis.h" 00029 #include "llvm/CodeGen/MachineDominators.h" 00030 #include "llvm/CodeGen/MachineFrameInfo.h" 00031 #include "llvm/CodeGen/MachineLoopInfo.h" 00032 #include "llvm/CodeGen/MachineMemOperand.h" 00033 #include "llvm/CodeGen/MachineRegisterInfo.h" 00034 #include "llvm/CodeGen/PseudoSourceValue.h" 00035 #include "llvm/MC/MCInstrItineraries.h" 00036 #include "llvm/Support/CommandLine.h" 00037 #include "llvm/Support/Debug.h" 00038 #include "llvm/Support/raw_ostream.h" 00039 #include "llvm/Target/TargetInstrInfo.h" 00040 #include "llvm/Target/TargetLowering.h" 00041 #include "llvm/Target/TargetMachine.h" 00042 #include "llvm/Target/TargetRegisterInfo.h" 00043 using namespace llvm; 00044 00045 static cl::opt<bool> 00046 AvoidSpeculation("avoid-speculation", 00047 cl::desc("MachineLICM should avoid speculation"), 00048 cl::init(true), cl::Hidden); 00049 00050 STATISTIC(NumHoisted, 00051 "Number of machine instructions hoisted out of loops"); 00052 STATISTIC(NumLowRP, 00053 "Number of instructions hoisted in low reg pressure situation"); 00054 STATISTIC(NumHighLatency, 00055 "Number of high latency instructions hoisted"); 00056 STATISTIC(NumCSEed, 00057 "Number of hoisted machine instructions CSEed"); 00058 STATISTIC(NumPostRAHoisted, 00059 "Number of machine instructions hoisted out of loops post regalloc"); 00060 00061 namespace { 00062 class MachineLICM : public MachineFunctionPass { 00063 const TargetMachine *TM; 00064 const TargetInstrInfo *TII; 00065 const TargetLoweringBase *TLI; 00066 const TargetRegisterInfo *TRI; 00067 const MachineFrameInfo *MFI; 00068 MachineRegisterInfo *MRI; 00069 const InstrItineraryData *InstrItins; 00070 bool PreRegAlloc; 00071 00072 // Various analyses that we use... 00073 AliasAnalysis *AA; // Alias analysis info. 00074 MachineLoopInfo *MLI; // Current MachineLoopInfo 00075 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 00076 00077 // State that is updated as we process loops 00078 bool Changed; // True if a loop is changed. 00079 bool FirstInLoop; // True if it's the first LICM in the loop. 00080 MachineLoop *CurLoop; // The current loop we are working on. 00081 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 00082 00083 // Exit blocks for CurLoop. 00084 SmallVector<MachineBasicBlock*, 8> ExitBlocks; 00085 00086 bool isExitBlock(const MachineBasicBlock *MBB) const { 00087 return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != 00088 ExitBlocks.end(); 00089 } 00090 00091 // Track 'estimated' register pressure. 00092 SmallSet<unsigned, 32> RegSeen; 00093 SmallVector<unsigned, 8> RegPressure; 00094 00095 // Register pressure "limit" per register class. If the pressure 00096 // is higher than the limit, then it's considered high. 00097 SmallVector<unsigned, 8> RegLimit; 00098 00099 // Register pressure on path leading from loop preheader to current BB. 00100 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 00101 00102 // For each opcode, keep a list of potential CSE instructions. 00103 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 00104 00105 enum { 00106 SpeculateFalse = 0, 00107 SpeculateTrue = 1, 00108 SpeculateUnknown = 2 00109 }; 00110 00111 // If a MBB does not dominate loop exiting blocks then it may not safe 00112 // to hoist loads from this block. 00113 // Tri-state: 0 - false, 1 - true, 2 - unknown 00114 unsigned SpeculationState; 00115 00116 public: 00117 static char ID; // Pass identification, replacement for typeid 00118 MachineLICM() : 00119 MachineFunctionPass(ID), PreRegAlloc(true) { 00120 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 00121 } 00122 00123 explicit MachineLICM(bool PreRA) : 00124 MachineFunctionPass(ID), PreRegAlloc(PreRA) { 00125 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 00126 } 00127 00128 virtual bool runOnMachineFunction(MachineFunction &MF); 00129 00130 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 00131 AU.addRequired<MachineLoopInfo>(); 00132 AU.addRequired<MachineDominatorTree>(); 00133 AU.addRequired<AliasAnalysis>(); 00134 AU.addPreserved<MachineLoopInfo>(); 00135 AU.addPreserved<MachineDominatorTree>(); 00136 MachineFunctionPass::getAnalysisUsage(AU); 00137 } 00138 00139 virtual void releaseMemory() { 00140 RegSeen.clear(); 00141 RegPressure.clear(); 00142 RegLimit.clear(); 00143 BackTrace.clear(); 00144 for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator 00145 CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) 00146 CI->second.clear(); 00147 CSEMap.clear(); 00148 } 00149 00150 private: 00151 /// CandidateInfo - Keep track of information about hoisting candidates. 00152 struct CandidateInfo { 00153 MachineInstr *MI; 00154 unsigned Def; 00155 int FI; 00156 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 00157 : MI(mi), Def(def), FI(fi) {} 00158 }; 00159 00160 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 00161 /// invariants out to the preheader. 00162 void HoistRegionPostRA(); 00163 00164 /// HoistPostRA - When an instruction is found to only use loop invariant 00165 /// operands that is safe to hoist, this instruction is called to do the 00166 /// dirty work. 00167 void HoistPostRA(MachineInstr *MI, unsigned Def); 00168 00169 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 00170 /// gather register def and frame object update information. 00171 void ProcessMI(MachineInstr *MI, 00172 BitVector &PhysRegDefs, 00173 BitVector &PhysRegClobbers, 00174 SmallSet<int, 32> &StoredFIs, 00175 SmallVector<CandidateInfo, 32> &Candidates); 00176 00177 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the 00178 /// current loop. 00179 void AddToLiveIns(unsigned Reg); 00180 00181 /// IsLICMCandidate - Returns true if the instruction may be a suitable 00182 /// candidate for LICM. e.g. If the instruction is a call, then it's 00183 /// obviously not safe to hoist it. 00184 bool IsLICMCandidate(MachineInstr &I); 00185 00186 /// IsLoopInvariantInst - Returns true if the instruction is loop 00187 /// invariant. I.e., all virtual register operands are defined outside of 00188 /// the loop, physical registers aren't accessed (explicitly or implicitly), 00189 /// and the instruction is hoistable. 00190 /// 00191 bool IsLoopInvariantInst(MachineInstr &I); 00192 00193 /// HasLoopPHIUse - Return true if the specified instruction is used by any 00194 /// phi node in the current loop. 00195 bool HasLoopPHIUse(const MachineInstr *MI) const; 00196 00197 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 00198 /// and an use in the current loop, return true if the target considered 00199 /// it 'high'. 00200 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 00201 unsigned Reg) const; 00202 00203 bool IsCheapInstruction(MachineInstr &MI) const; 00204 00205 /// CanCauseHighRegPressure - Visit BBs from header to current BB, 00206 /// check if hoisting an instruction of the given cost matrix can cause high 00207 /// register pressure. 00208 bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); 00209 00210 /// UpdateBackTraceRegPressure - Traverse the back trace from header to 00211 /// the current block and update their register pressures to reflect the 00212 /// effect of hoisting MI from the current block to the preheader. 00213 void UpdateBackTraceRegPressure(const MachineInstr *MI); 00214 00215 /// IsProfitableToHoist - Return true if it is potentially profitable to 00216 /// hoist the given loop invariant. 00217 bool IsProfitableToHoist(MachineInstr &MI); 00218 00219 /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 00220 /// If not then a load from this mbb may not be safe to hoist. 00221 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 00222 00223 void EnterScope(MachineBasicBlock *MBB); 00224 00225 void ExitScope(MachineBasicBlock *MBB); 00226 00227 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given 00228 /// dominator tree node if its a leaf or all of its children are done. Walk 00229 /// up the dominator tree to destroy ancestors which are now done. 00230 void ExitScopeIfDone(MachineDomTreeNode *Node, 00231 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 00232 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 00233 00234 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 00235 /// blocks dominated by the specified header block, and that are in the 00236 /// current loop) in depth first order w.r.t the DominatorTree. This allows 00237 /// us to visit definitions before uses, allowing us to hoist a loop body in 00238 /// one pass without iteration. 00239 /// 00240 void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 00241 void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 00242 00243 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 00244 /// index, return the ID and cost of its representative register class by 00245 /// reference. 00246 void getRegisterClassIDAndCost(const MachineInstr *MI, 00247 unsigned Reg, unsigned OpIdx, 00248 unsigned &RCId, unsigned &RCCost) const; 00249 00250 /// InitRegPressure - Find all virtual register references that are liveout 00251 /// of the preheader to initialize the starting "register pressure". Note 00252 /// this does not count live through (livein but not used) registers. 00253 void InitRegPressure(MachineBasicBlock *BB); 00254 00255 /// UpdateRegPressure - Update estimate of register pressure after the 00256 /// specified instruction. 00257 void UpdateRegPressure(const MachineInstr *MI); 00258 00259 /// ExtractHoistableLoad - Unfold a load from the given machineinstr if 00260 /// the load itself could be hoisted. Return the unfolded and hoistable 00261 /// load, or null if the load couldn't be unfolded or if it wouldn't 00262 /// be hoistable. 00263 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 00264 00265 /// LookForDuplicate - Find an instruction amount PrevMIs that is a 00266 /// duplicate of MI. Return this instruction if it's found. 00267 const MachineInstr *LookForDuplicate(const MachineInstr *MI, 00268 std::vector<const MachineInstr*> &PrevMIs); 00269 00270 /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on 00271 /// the preheader that compute the same value. If it's found, do a RAU on 00272 /// with the definition of the existing instruction rather than hoisting 00273 /// the instruction to the preheader. 00274 bool EliminateCSE(MachineInstr *MI, 00275 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI); 00276 00277 /// MayCSE - Return true if the given instruction will be CSE'd if it's 00278 /// hoisted out of the loop. 00279 bool MayCSE(MachineInstr *MI); 00280 00281 /// Hoist - When an instruction is found to only use loop invariant operands 00282 /// that is safe to hoist, this instruction is called to do the dirty work. 00283 /// It returns true if the instruction is hoisted. 00284 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 00285 00286 /// InitCSEMap - Initialize the CSE map with instructions that are in the 00287 /// current loop preheader that may become duplicates of instructions that 00288 /// are hoisted out of the loop. 00289 void InitCSEMap(MachineBasicBlock *BB); 00290 00291 /// getCurPreheader - Get the preheader for the current loop, splitting 00292 /// a critical edge if needed. 00293 MachineBasicBlock *getCurPreheader(); 00294 }; 00295 } // end anonymous namespace 00296 00297 char MachineLICM::ID = 0; 00298 char &llvm::MachineLICMID = MachineLICM::ID; 00299 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 00300 "Machine Loop Invariant Code Motion", false, false) 00301 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 00302 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 00303 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 00304 INITIALIZE_PASS_END(MachineLICM, "machinelicm", 00305 "Machine Loop Invariant Code Motion", false, false) 00306 00307 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most 00308 /// loop that has a unique predecessor. 00309 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 00310 // Check whether this loop even has a unique predecessor. 00311 if (!CurLoop->getLoopPredecessor()) 00312 return false; 00313 // Ok, now check to see if any of its outer loops do. 00314 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 00315 if (L->getLoopPredecessor()) 00316 return false; 00317 // None of them did, so this is the outermost with a unique predecessor. 00318 return true; 00319 } 00320 00321 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 00322 Changed = FirstInLoop = false; 00323 TM = &MF.getTarget(); 00324 TII = TM->getInstrInfo(); 00325 TLI = TM->getTargetLowering(); 00326 TRI = TM->getRegisterInfo(); 00327 MFI = MF.getFrameInfo(); 00328 MRI = &MF.getRegInfo(); 00329 InstrItins = TM->getInstrItineraryData(); 00330 00331 PreRegAlloc = MRI->isSSA(); 00332 00333 if (PreRegAlloc) 00334 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 00335 else 00336 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 00337 DEBUG(dbgs() << MF.getName() << " ********\n"); 00338 00339 if (PreRegAlloc) { 00340 // Estimate register pressure during pre-regalloc pass. 00341 unsigned NumRC = TRI->getNumRegClasses(); 00342 RegPressure.resize(NumRC); 00343 std::fill(RegPressure.begin(), RegPressure.end(), 0); 00344 RegLimit.resize(NumRC); 00345 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 00346 E = TRI->regclass_end(); I != E; ++I) 00347 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); 00348 } 00349 00350 // Get our Loop information... 00351 MLI = &getAnalysis<MachineLoopInfo>(); 00352 DT = &getAnalysis<MachineDominatorTree>(); 00353 AA = &getAnalysis<AliasAnalysis>(); 00354 00355 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 00356 while (!Worklist.empty()) { 00357 CurLoop = Worklist.pop_back_val(); 00358 CurPreheader = 0; 00359 ExitBlocks.clear(); 00360 00361 // If this is done before regalloc, only visit outer-most preheader-sporting 00362 // loops. 00363 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 00364 Worklist.append(CurLoop->begin(), CurLoop->end()); 00365 continue; 00366 } 00367 00368 CurLoop->getExitBlocks(ExitBlocks); 00369 00370 if (!PreRegAlloc) 00371 HoistRegionPostRA(); 00372 else { 00373 // CSEMap is initialized for loop header when the first instruction is 00374 // being hoisted. 00375 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 00376 FirstInLoop = true; 00377 HoistOutOfLoop(N); 00378 CSEMap.clear(); 00379 } 00380 } 00381 00382 return Changed; 00383 } 00384 00385 /// InstructionStoresToFI - Return true if instruction stores to the 00386 /// specified frame. 00387 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 00388 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 00389 oe = MI->memoperands_end(); o != oe; ++o) { 00390 if (!(*o)->isStore() || !(*o)->getValue()) 00391 continue; 00392 if (const FixedStackPseudoSourceValue *Value = 00393 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 00394 if (Value->getFrameIndex() == FI) 00395 return true; 00396 } 00397 } 00398 return false; 00399 } 00400 00401 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also 00402 /// gather register def and frame object update information. 00403 void MachineLICM::ProcessMI(MachineInstr *MI, 00404 BitVector &PhysRegDefs, 00405 BitVector &PhysRegClobbers, 00406 SmallSet<int, 32> &StoredFIs, 00407 SmallVector<CandidateInfo, 32> &Candidates) { 00408 bool RuledOut = false; 00409 bool HasNonInvariantUse = false; 00410 unsigned Def = 0; 00411 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00412 const MachineOperand &MO = MI->getOperand(i); 00413 if (MO.isFI()) { 00414 // Remember if the instruction stores to the frame index. 00415 int FI = MO.getIndex(); 00416 if (!StoredFIs.count(FI) && 00417 MFI->isSpillSlotObjectIndex(FI) && 00418 InstructionStoresToFI(MI, FI)) 00419 StoredFIs.insert(FI); 00420 HasNonInvariantUse = true; 00421 continue; 00422 } 00423 00424 // We can't hoist an instruction defining a physreg that is clobbered in 00425 // the loop. 00426 if (MO.isRegMask()) { 00427 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 00428 continue; 00429 } 00430 00431 if (!MO.isReg()) 00432 continue; 00433 unsigned Reg = MO.getReg(); 00434 if (!Reg) 00435 continue; 00436 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 00437 "Not expecting virtual register!"); 00438 00439 if (!MO.isDef()) { 00440 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 00441 // If it's using a non-loop-invariant register, then it's obviously not 00442 // safe to hoist. 00443 HasNonInvariantUse = true; 00444 continue; 00445 } 00446 00447 if (MO.isImplicit()) { 00448 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 00449 PhysRegClobbers.set(*AI); 00450 if (!MO.isDead()) 00451 // Non-dead implicit def? This cannot be hoisted. 00452 RuledOut = true; 00453 // No need to check if a dead implicit def is also defined by 00454 // another instruction. 00455 continue; 00456 } 00457 00458 // FIXME: For now, avoid instructions with multiple defs, unless 00459 // it's a dead implicit def. 00460 if (Def) 00461 RuledOut = true; 00462 else 00463 Def = Reg; 00464 00465 // If we have already seen another instruction that defines the same 00466 // register, then this is not safe. Two defs is indicated by setting a 00467 // PhysRegClobbers bit. 00468 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 00469 if (PhysRegDefs.test(*AS)) 00470 PhysRegClobbers.set(*AS); 00471 if (PhysRegClobbers.test(*AS)) 00472 // MI defined register is seen defined by another instruction in 00473 // the loop, it cannot be a LICM candidate. 00474 RuledOut = true; 00475 PhysRegDefs.set(*AS); 00476 } 00477 } 00478 00479 // Only consider reloads for now and remats which do not have register 00480 // operands. FIXME: Consider unfold load folding instructions. 00481 if (Def && !RuledOut) { 00482 int FI = INT_MIN; 00483 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 00484 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 00485 Candidates.push_back(CandidateInfo(MI, Def, FI)); 00486 } 00487 } 00488 00489 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop 00490 /// invariants out to the preheader. 00491 void MachineLICM::HoistRegionPostRA() { 00492 MachineBasicBlock *Preheader = getCurPreheader(); 00493 if (!Preheader) 00494 return; 00495 00496 unsigned NumRegs = TRI->getNumRegs(); 00497 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 00498 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 00499 00500 SmallVector<CandidateInfo, 32> Candidates; 00501 SmallSet<int, 32> StoredFIs; 00502 00503 // Walk the entire region, count number of defs for each register, and 00504 // collect potential LICM candidates. 00505 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 00506 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 00507 MachineBasicBlock *BB = Blocks[i]; 00508 00509 // If the header of the loop containing this basic block is a landing pad, 00510 // then don't try to hoist instructions out of this loop. 00511 const MachineLoop *ML = MLI->getLoopFor(BB); 00512 if (ML && ML->getHeader()->isLandingPad()) continue; 00513 00514 // Conservatively treat live-in's as an external def. 00515 // FIXME: That means a reload that're reused in successor block(s) will not 00516 // be LICM'ed. 00517 for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), 00518 E = BB->livein_end(); I != E; ++I) { 00519 unsigned Reg = *I; 00520 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 00521 PhysRegDefs.set(*AI); 00522 } 00523 00524 SpeculationState = SpeculateUnknown; 00525 for (MachineBasicBlock::iterator 00526 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 00527 MachineInstr *MI = &*MII; 00528 ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 00529 } 00530 } 00531 00532 // Gather the registers read / clobbered by the terminator. 00533 BitVector TermRegs(NumRegs); 00534 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 00535 if (TI != Preheader->end()) { 00536 for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { 00537 const MachineOperand &MO = TI->getOperand(i); 00538 if (!MO.isReg()) 00539 continue; 00540 unsigned Reg = MO.getReg(); 00541 if (!Reg) 00542 continue; 00543 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 00544 TermRegs.set(*AI); 00545 } 00546 } 00547 00548 // Now evaluate whether the potential candidates qualify. 00549 // 1. Check if the candidate defined register is defined by another 00550 // instruction in the loop. 00551 // 2. If the candidate is a load from stack slot (always true for now), 00552 // check if the slot is stored anywhere in the loop. 00553 // 3. Make sure candidate def should not clobber 00554 // registers read by the terminator. Similarly its def should not be 00555 // clobbered by the terminator. 00556 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 00557 if (Candidates[i].FI != INT_MIN && 00558 StoredFIs.count(Candidates[i].FI)) 00559 continue; 00560 00561 unsigned Def = Candidates[i].Def; 00562 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 00563 bool Safe = true; 00564 MachineInstr *MI = Candidates[i].MI; 00565 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 00566 const MachineOperand &MO = MI->getOperand(j); 00567 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 00568 continue; 00569 unsigned Reg = MO.getReg(); 00570 if (PhysRegDefs.test(Reg) || 00571 PhysRegClobbers.test(Reg)) { 00572 // If it's using a non-loop-invariant register, then it's obviously 00573 // not safe to hoist. 00574 Safe = false; 00575 break; 00576 } 00577 } 00578 if (Safe) 00579 HoistPostRA(MI, Candidates[i].Def); 00580 } 00581 } 00582 } 00583 00584 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current 00585 /// loop, and make sure it is not killed by any instructions in the loop. 00586 void MachineLICM::AddToLiveIns(unsigned Reg) { 00587 const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); 00588 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 00589 MachineBasicBlock *BB = Blocks[i]; 00590 if (!BB->isLiveIn(Reg)) 00591 BB->addLiveIn(Reg); 00592 for (MachineBasicBlock::iterator 00593 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 00594 MachineInstr *MI = &*MII; 00595 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 00596 MachineOperand &MO = MI->getOperand(i); 00597 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 00598 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 00599 MO.setIsKill(false); 00600 } 00601 } 00602 } 00603 } 00604 00605 /// HoistPostRA - When an instruction is found to only use loop invariant 00606 /// operands that is safe to hoist, this instruction is called to do the 00607 /// dirty work. 00608 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 00609 MachineBasicBlock *Preheader = getCurPreheader(); 00610 00611 // Now move the instructions to the predecessor, inserting it before any 00612 // terminator instructions. 00613 DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 00614 << MI->getParent()->getNumber() << ": " << *MI); 00615 00616 // Splice the instruction to the preheader. 00617 MachineBasicBlock *MBB = MI->getParent(); 00618 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 00619 00620 // Add register to livein list to all the BBs in the current loop since a 00621 // loop invariant must be kept live throughout the whole loop. This is 00622 // important to ensure later passes do not scavenge the def register. 00623 AddToLiveIns(Def); 00624 00625 ++NumPostRAHoisted; 00626 Changed = true; 00627 } 00628 00629 // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. 00630 // If not then a load from this mbb may not be safe to hoist. 00631 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 00632 if (SpeculationState != SpeculateUnknown) 00633 return SpeculationState == SpeculateFalse; 00634 00635 if (BB != CurLoop->getHeader()) { 00636 // Check loop exiting blocks. 00637 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 00638 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 00639 for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 00640 if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 00641 SpeculationState = SpeculateTrue; 00642 return false; 00643 } 00644 } 00645 00646 SpeculationState = SpeculateFalse; 00647 return true; 00648 } 00649 00650 void MachineLICM::EnterScope(MachineBasicBlock *MBB) { 00651 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 00652 00653 // Remember livein register pressure. 00654 BackTrace.push_back(RegPressure); 00655 } 00656 00657 void MachineLICM::ExitScope(MachineBasicBlock *MBB) { 00658 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 00659 BackTrace.pop_back(); 00660 } 00661 00662 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 00663 /// dominator tree node if its a leaf or all of its children are done. Walk 00664 /// up the dominator tree to destroy ancestors which are now done. 00665 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 00666 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 00667 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 00668 if (OpenChildren[Node]) 00669 return; 00670 00671 // Pop scope. 00672 ExitScope(Node->getBlock()); 00673 00674 // Now traverse upwards to pop ancestors whose offsprings are all done. 00675 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 00676 unsigned Left = --OpenChildren[Parent]; 00677 if (Left != 0) 00678 break; 00679 ExitScope(Parent->getBlock()); 00680 Node = Parent; 00681 } 00682 } 00683 00684 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all 00685 /// blocks dominated by the specified header block, and that are in the 00686 /// current loop) in depth first order w.r.t the DominatorTree. This allows 00687 /// us to visit definitions before uses, allowing us to hoist a loop body in 00688 /// one pass without iteration. 00689 /// 00690 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 00691 SmallVector<MachineDomTreeNode*, 32> Scopes; 00692 SmallVector<MachineDomTreeNode*, 8> WorkList; 00693 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 00694 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 00695 00696 // Perform a DFS walk to determine the order of visit. 00697 WorkList.push_back(HeaderN); 00698 do { 00699 MachineDomTreeNode *Node = WorkList.pop_back_val(); 00700 assert(Node != 0 && "Null dominator tree node?"); 00701 MachineBasicBlock *BB = Node->getBlock(); 00702 00703 // If the header of the loop containing this basic block is a landing pad, 00704 // then don't try to hoist instructions out of this loop. 00705 const MachineLoop *ML = MLI->getLoopFor(BB); 00706 if (ML && ML->getHeader()->isLandingPad()) 00707 continue; 00708 00709 // If this subregion is not in the top level loop at all, exit. 00710 if (!CurLoop->contains(BB)) 00711 continue; 00712 00713 Scopes.push_back(Node); 00714 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 00715 unsigned NumChildren = Children.size(); 00716 00717 // Don't hoist things out of a large switch statement. This often causes 00718 // code to be hoisted that wasn't going to be executed, and increases 00719 // register pressure in a situation where it's likely to matter. 00720 if (BB->succ_size() >= 25) 00721 NumChildren = 0; 00722 00723 OpenChildren[Node] = NumChildren; 00724 // Add children in reverse order as then the next popped worklist node is 00725 // the first child of this node. This means we ultimately traverse the 00726 // DOM tree in exactly the same order as if we'd recursed. 00727 for (int i = (int)NumChildren-1; i >= 0; --i) { 00728 MachineDomTreeNode *Child = Children[i]; 00729 ParentMap[Child] = Node; 00730 WorkList.push_back(Child); 00731 } 00732 } while (!WorkList.empty()); 00733 00734 if (Scopes.size() != 0) { 00735 MachineBasicBlock *Preheader = getCurPreheader(); 00736 if (!Preheader) 00737 return; 00738 00739 // Compute registers which are livein into the loop headers. 00740 RegSeen.clear(); 00741 BackTrace.clear(); 00742 InitRegPressure(Preheader); 00743 } 00744 00745 // Now perform LICM. 00746 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 00747 MachineDomTreeNode *Node = Scopes[i]; 00748 MachineBasicBlock *MBB = Node->getBlock(); 00749 00750 MachineBasicBlock *Preheader = getCurPreheader(); 00751 if (!Preheader) 00752 continue; 00753 00754 EnterScope(MBB); 00755 00756 // Process the block 00757 SpeculationState = SpeculateUnknown; 00758 for (MachineBasicBlock::iterator 00759 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 00760 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 00761 MachineInstr *MI = &*MII; 00762 if (!Hoist(MI, Preheader)) 00763 UpdateRegPressure(MI); 00764 MII = NextMII; 00765 } 00766 00767 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 00768 ExitScopeIfDone(Node, OpenChildren, ParentMap); 00769 } 00770 } 00771 00772 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 00773 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 00774 } 00775 00776 /// getRegisterClassIDAndCost - For a given MI, register, and the operand 00777 /// index, return the ID and cost of its representative register class. 00778 void 00779 MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, 00780 unsigned Reg, unsigned OpIdx, 00781 unsigned &RCId, unsigned &RCCost) const { 00782 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 00783 MVT VT = *RC->vt_begin(); 00784 if (VT == MVT::Untyped) { 00785 RCId = RC->getID(); 00786 RCCost = 1; 00787 } else { 00788 RCId = TLI->getRepRegClassFor(VT)->getID(); 00789 RCCost = TLI->getRepRegClassCostFor(VT); 00790 } 00791 } 00792 00793 /// InitRegPressure - Find all virtual register references that are liveout of 00794 /// the preheader to initialize the starting "register pressure". Note this 00795 /// does not count live through (livein but not used) registers. 00796 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 00797 std::fill(RegPressure.begin(), RegPressure.end(), 0); 00798 00799 // If the preheader has only a single predecessor and it ends with a 00800 // fallthrough or an unconditional branch, then scan its predecessor for live 00801 // defs as well. This happens whenever the preheader is created by splitting 00802 // the critical edge from the loop predecessor to the loop header. 00803 if (BB->pred_size() == 1) { 00804 MachineBasicBlock *TBB = 0, *FBB = 0; 00805 SmallVector<MachineOperand, 4> Cond; 00806 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 00807 InitRegPressure(*BB->pred_begin()); 00808 } 00809 00810 for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); 00811 MII != E; ++MII) { 00812 MachineInstr *MI = &*MII; 00813 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 00814 const MachineOperand &MO = MI->getOperand(i); 00815 if (!MO.isReg() || MO.isImplicit()) 00816 continue; 00817 unsigned Reg = MO.getReg(); 00818 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 00819 continue; 00820 00821 bool isNew = RegSeen.insert(Reg); 00822 unsigned RCId, RCCost; 00823 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 00824 if (MO.isDef()) 00825 RegPressure[RCId] += RCCost; 00826 else { 00827 bool isKill = isOperandKill(MO, MRI); 00828 if (isNew && !isKill) 00829 // Haven't seen this, it must be a livein. 00830 RegPressure[RCId] += RCCost; 00831 else if (!isNew && isKill) 00832 RegPressure[RCId] -= RCCost; 00833 } 00834 } 00835 } 00836 } 00837 00838 /// UpdateRegPressure - Update estimate of register pressure after the 00839 /// specified instruction. 00840 void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { 00841 if (MI->isImplicitDef()) 00842 return; 00843 00844 SmallVector<unsigned, 4> Defs; 00845 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 00846 const MachineOperand &MO = MI->getOperand(i); 00847 if (!MO.isReg() || MO.isImplicit()) 00848 continue; 00849 unsigned Reg = MO.getReg(); 00850 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 00851 continue; 00852 00853 bool isNew = RegSeen.insert(Reg); 00854 if (MO.isDef()) 00855 Defs.push_back(Reg); 00856 else if (!isNew && isOperandKill(MO, MRI)) { 00857 unsigned RCId, RCCost; 00858 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 00859 if (RCCost > RegPressure[RCId]) 00860 RegPressure[RCId] = 0; 00861 else 00862 RegPressure[RCId] -= RCCost; 00863 } 00864 } 00865 00866 unsigned Idx = 0; 00867 while (!Defs.empty()) { 00868 unsigned Reg = Defs.pop_back_val(); 00869 unsigned RCId, RCCost; 00870 getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost); 00871 RegPressure[RCId] += RCCost; 00872 ++Idx; 00873 } 00874 } 00875 00876 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction 00877 /// loads from global offset table or constant pool. 00878 static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { 00879 assert (MI.mayLoad() && "Expected MI that loads!"); 00880 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 00881 E = MI.memoperands_end(); I != E; ++I) { 00882 if (const Value *V = (*I)->getValue()) { 00883 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) 00884 if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) 00885 return true; 00886 } 00887 } 00888 return false; 00889 } 00890 00891 /// IsLICMCandidate - Returns true if the instruction may be a suitable 00892 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously 00893 /// not safe to hoist it. 00894 bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 00895 // Check if it's safe to move the instruction. 00896 bool DontMoveAcrossStore = true; 00897 if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) 00898 return false; 00899 00900 // If it is load then check if it is guaranteed to execute by making sure that 00901 // it dominates all exiting blocks. If it doesn't, then there is a path out of 00902 // the loop which does not execute this load, so we can't hoist it. Loads 00903 // from constant memory are not safe to speculate all the time, for example 00904 // indexed load from a jump table. 00905 // Stores and side effects are already checked by isSafeToMove. 00906 if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 00907 !IsGuaranteedToExecute(I.getParent())) 00908 return false; 00909 00910 return true; 00911 } 00912 00913 /// IsLoopInvariantInst - Returns true if the instruction is loop 00914 /// invariant. I.e., all virtual register operands are defined outside of the 00915 /// loop, physical registers aren't accessed explicitly, and there are no side 00916 /// effects that aren't captured by the operands or other flags. 00917 /// 00918 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 00919 if (!IsLICMCandidate(I)) 00920 return false; 00921 00922 // The instruction is loop invariant if all of its operands are. 00923 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 00924 const MachineOperand &MO = I.getOperand(i); 00925 00926 if (!MO.isReg()) 00927 continue; 00928 00929 unsigned Reg = MO.getReg(); 00930 if (Reg == 0) continue; 00931 00932 // Don't hoist an instruction that uses or defines a physical register. 00933 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 00934 if (MO.isUse()) { 00935 // If the physreg has no defs anywhere, it's just an ambient register 00936 // and we can freely move its uses. Alternatively, if it's allocatable, 00937 // it could get allocated to something with a def during allocation. 00938 if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 00939 return false; 00940 // Otherwise it's safe to move. 00941 continue; 00942 } else if (!MO.isDead()) { 00943 // A def that isn't dead. We can't move it. 00944 return false; 00945 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 00946 // If the reg is live into the loop, we can't hoist an instruction 00947 // which would clobber it. 00948 return false; 00949 } 00950 } 00951 00952 if (!MO.isUse()) 00953 continue; 00954 00955 assert(MRI->getVRegDef(Reg) && 00956 "Machine instr not mapped for this vreg?!"); 00957 00958 // If the loop contains the definition of an operand, then the instruction 00959 // isn't loop invariant. 00960 if (CurLoop->contains(MRI->getVRegDef(Reg))) 00961 return false; 00962 } 00963 00964 // If we got this far, the instruction is loop invariant! 00965 return true; 00966 } 00967 00968 00969 /// HasLoopPHIUse - Return true if the specified instruction is used by a 00970 /// phi node and hoisting it could cause a copy to be inserted. 00971 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { 00972 SmallVector<const MachineInstr*, 8> Work(1, MI); 00973 do { 00974 MI = Work.pop_back_val(); 00975 for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 00976 if (!MO->isReg() || !MO->isDef()) 00977 continue; 00978 unsigned Reg = MO->getReg(); 00979 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 00980 continue; 00981 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 00982 UE = MRI->use_end(); UI != UE; ++UI) { 00983 MachineInstr *UseMI = &*UI; 00984 // A PHI may cause a copy to be inserted. 00985 if (UseMI->isPHI()) { 00986 // A PHI inside the loop causes a copy because the live range of Reg is 00987 // extended across the PHI. 00988 if (CurLoop->contains(UseMI)) 00989 return true; 00990 // A PHI in an exit block can cause a copy to be inserted if the PHI 00991 // has multiple predecessors in the loop with different values. 00992 // For now, approximate by rejecting all exit blocks. 00993 if (isExitBlock(UseMI->getParent())) 00994 return true; 00995 continue; 00996 } 00997 // Look past copies as well. 00998 if (UseMI->isCopy() && CurLoop->contains(UseMI)) 00999 Work.push_back(UseMI); 01000 } 01001 } 01002 } while (!Work.empty()); 01003 return false; 01004 } 01005 01006 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' 01007 /// and an use in the current loop, return true if the target considered 01008 /// it 'high'. 01009 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 01010 unsigned DefIdx, unsigned Reg) const { 01011 if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) 01012 return false; 01013 01014 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 01015 E = MRI->use_nodbg_end(); I != E; ++I) { 01016 MachineInstr *UseMI = &*I; 01017 if (UseMI->isCopyLike()) 01018 continue; 01019 if (!CurLoop->contains(UseMI->getParent())) 01020 continue; 01021 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { 01022 const MachineOperand &MO = UseMI->getOperand(i); 01023 if (!MO.isReg() || !MO.isUse()) 01024 continue; 01025 unsigned MOReg = MO.getReg(); 01026 if (MOReg != Reg) 01027 continue; 01028 01029 if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) 01030 return true; 01031 } 01032 01033 // Only look at the first in loop use. 01034 break; 01035 } 01036 01037 return false; 01038 } 01039 01040 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or 01041 /// the operand latency between its def and a use is one or less. 01042 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 01043 if (MI.isAsCheapAsAMove() || MI.isCopyLike()) 01044 return true; 01045 if (!InstrItins || InstrItins->isEmpty()) 01046 return false; 01047 01048 bool isCheap = false; 01049 unsigned NumDefs = MI.getDesc().getNumDefs(); 01050 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 01051 MachineOperand &DefMO = MI.getOperand(i); 01052 if (!DefMO.isReg() || !DefMO.isDef()) 01053 continue; 01054 --NumDefs; 01055 unsigned Reg = DefMO.getReg(); 01056 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 01057 continue; 01058 01059 if (!TII->hasLowDefLatency(InstrItins, &MI, i)) 01060 return false; 01061 isCheap = true; 01062 } 01063 01064 return isCheap; 01065 } 01066 01067 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check 01068 /// if hoisting an instruction of the given cost matrix can cause high 01069 /// register pressure. 01070 bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, 01071 bool CheapInstr) { 01072 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 01073 CI != CE; ++CI) { 01074 if (CI->second <= 0) 01075 continue; 01076 01077 unsigned RCId = CI->first; 01078 unsigned Limit = RegLimit[RCId]; 01079 int Cost = CI->second; 01080 01081 // Don't hoist cheap instructions if they would increase register pressure, 01082 // even if we're under the limit. 01083 if (CheapInstr) 01084 return true; 01085 01086 for (unsigned i = BackTrace.size(); i != 0; --i) { 01087 SmallVector<unsigned, 8> &RP = BackTrace[i-1]; 01088 if (RP[RCId] + Cost >= Limit) 01089 return true; 01090 } 01091 } 01092 01093 return false; 01094 } 01095 01096 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the 01097 /// current block and update their register pressures to reflect the effect 01098 /// of hoisting MI from the current block to the preheader. 01099 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 01100 if (MI->isImplicitDef()) 01101 return; 01102 01103 // First compute the 'cost' of the instruction, i.e. its contribution 01104 // to register pressure. 01105 DenseMap<unsigned, int> Cost; 01106 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 01107 const MachineOperand &MO = MI->getOperand(i); 01108 if (!MO.isReg() || MO.isImplicit()) 01109 continue; 01110 unsigned Reg = MO.getReg(); 01111 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 01112 continue; 01113 01114 unsigned RCId, RCCost; 01115 getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); 01116 if (MO.isDef()) { 01117 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 01118 if (CI != Cost.end()) 01119 CI->second += RCCost; 01120 else 01121 Cost.insert(std::make_pair(RCId, RCCost)); 01122 } else if (isOperandKill(MO, MRI)) { 01123 DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); 01124 if (CI != Cost.end()) 01125 CI->second -= RCCost; 01126 else 01127 Cost.insert(std::make_pair(RCId, -RCCost)); 01128 } 01129 } 01130 01131 // Update register pressure of blocks from loop header to current block. 01132 for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { 01133 SmallVector<unsigned, 8> &RP = BackTrace[i]; 01134 for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); 01135 CI != CE; ++CI) { 01136 unsigned RCId = CI->first; 01137 RP[RCId] += CI->second; 01138 } 01139 } 01140 } 01141 01142 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist 01143 /// the given loop invariant. 01144 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 01145 if (MI.isImplicitDef()) 01146 return true; 01147 01148 // Besides removing computation from the loop, hoisting an instruction has 01149 // these effects: 01150 // 01151 // - The value defined by the instruction becomes live across the entire 01152 // loop. This increases register pressure in the loop. 01153 // 01154 // - If the value is used by a PHI in the loop, a copy will be required for 01155 // lowering the PHI after extending the live range. 01156 // 01157 // - When hoisting the last use of a value in the loop, that value no longer 01158 // needs to be live in the loop. This lowers register pressure in the loop. 01159 01160 bool CheapInstr = IsCheapInstruction(MI); 01161 bool CreatesCopy = HasLoopPHIUse(&MI); 01162 01163 // Don't hoist a cheap instruction if it would create a copy in the loop. 01164 if (CheapInstr && CreatesCopy) { 01165 DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 01166 return false; 01167 } 01168 01169 // Rematerializable instructions should always be hoisted since the register 01170 // allocator can just pull them down again when needed. 01171 if (TII->isTriviallyReMaterializable(&MI, AA)) 01172 return true; 01173 01174 // Estimate register pressure to determine whether to LICM the instruction. 01175 // In low register pressure situation, we can be more aggressive about 01176 // hoisting. Also, favors hoisting long latency instructions even in 01177 // moderately high pressure situation. 01178 // Cheap instructions will only be hoisted if they don't increase register 01179 // pressure at all. 01180 // FIXME: If there are long latency loop-invariant instructions inside the 01181 // loop at this point, why didn't the optimizer's LICM hoist them? 01182 DenseMap<unsigned, int> Cost; 01183 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 01184 const MachineOperand &MO = MI.getOperand(i); 01185 if (!MO.isReg() || MO.isImplicit()) 01186 continue; 01187 unsigned Reg = MO.getReg(); 01188 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 01189 continue; 01190 01191 unsigned RCId, RCCost; 01192 getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); 01193 if (MO.isDef()) { 01194 if (HasHighOperandLatency(MI, i, Reg)) { 01195 DEBUG(dbgs() << "Hoist High Latency: " << MI); 01196 ++NumHighLatency; 01197 return true; 01198 } 01199 Cost[RCId] += RCCost; 01200 } else if (isOperandKill(MO, MRI)) { 01201 // Is a virtual register use is a kill, hoisting it out of the loop 01202 // may actually reduce register pressure or be register pressure 01203 // neutral. 01204 Cost[RCId] -= RCCost; 01205 } 01206 } 01207 01208 // Visit BBs from header to current BB, if hoisting this doesn't cause 01209 // high register pressure, then it's safe to proceed. 01210 if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 01211 DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 01212 ++NumLowRP; 01213 return true; 01214 } 01215 01216 // Don't risk increasing register pressure if it would create copies. 01217 if (CreatesCopy) { 01218 DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 01219 return false; 01220 } 01221 01222 // Do not "speculate" in high register pressure situation. If an 01223 // instruction is not guaranteed to be executed in the loop, it's best to be 01224 // conservative. 01225 if (AvoidSpeculation && 01226 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 01227 DEBUG(dbgs() << "Won't speculate: " << MI); 01228 return false; 01229 } 01230 01231 // High register pressure situation, only hoist if the instruction is going 01232 // to be remat'ed. 01233 if (!TII->isTriviallyReMaterializable(&MI, AA) && 01234 !MI.isInvariantLoad(AA)) { 01235 DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 01236 return false; 01237 } 01238 01239 return true; 01240 } 01241 01242 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 01243 // Don't unfold simple loads. 01244 if (MI->canFoldAsLoad()) 01245 return 0; 01246 01247 // If not, we may be able to unfold a load and hoist that. 01248 // First test whether the instruction is loading from an amenable 01249 // memory location. 01250 if (!MI->isInvariantLoad(AA)) 01251 return 0; 01252 01253 // Next determine the register class for a temporary register. 01254 unsigned LoadRegIndex; 01255 unsigned NewOpc = 01256 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 01257 /*UnfoldLoad=*/true, 01258 /*UnfoldStore=*/false, 01259 &LoadRegIndex); 01260 if (NewOpc == 0) return 0; 01261 const MCInstrDesc &MID = TII->get(NewOpc); 01262 if (MID.getNumDefs() != 1) return 0; 01263 MachineFunction &MF = *MI->getParent()->getParent(); 01264 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 01265 // Ok, we're unfolding. Create a temporary register and do the unfold. 01266 unsigned Reg = MRI->createVirtualRegister(RC); 01267 01268 SmallVector<MachineInstr *, 2> NewMIs; 01269 bool Success = 01270 TII->unfoldMemoryOperand(MF, MI, Reg, 01271 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 01272 NewMIs); 01273 (void)Success; 01274 assert(Success && 01275 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 01276 "succeeded!"); 01277 assert(NewMIs.size() == 2 && 01278 "Unfolded a load into multiple instructions!"); 01279 MachineBasicBlock *MBB = MI->getParent(); 01280 MachineBasicBlock::iterator Pos = MI; 01281 MBB->insert(Pos, NewMIs[0]); 01282 MBB->insert(Pos, NewMIs[1]); 01283 // If unfolding produced a load that wasn't loop-invariant or profitable to 01284 // hoist, discard the new instructions and bail. 01285 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 01286 NewMIs[0]->eraseFromParent(); 01287 NewMIs[1]->eraseFromParent(); 01288 return 0; 01289 } 01290 01291 // Update register pressure for the unfolded instruction. 01292 UpdateRegPressure(NewMIs[1]); 01293 01294 // Otherwise we successfully unfolded a load that we can hoist. 01295 MI->eraseFromParent(); 01296 return NewMIs[0]; 01297 } 01298 01299 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 01300 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 01301 const MachineInstr *MI = &*I; 01302 unsigned Opcode = MI->getOpcode(); 01303 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 01304 CI = CSEMap.find(Opcode); 01305 if (CI != CSEMap.end()) 01306 CI->second.push_back(MI); 01307 else { 01308 std::vector<const MachineInstr*> CSEMIs; 01309 CSEMIs.push_back(MI); 01310 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 01311 } 01312 } 01313 } 01314 01315 const MachineInstr* 01316 MachineLICM::LookForDuplicate(const MachineInstr *MI, 01317 std::vector<const MachineInstr*> &PrevMIs) { 01318 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 01319 const MachineInstr *PrevMI = PrevMIs[i]; 01320 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) 01321 return PrevMI; 01322 } 01323 return 0; 01324 } 01325 01326 bool MachineLICM::EliminateCSE(MachineInstr *MI, 01327 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 01328 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 01329 // the undef property onto uses. 01330 if (CI == CSEMap.end() || MI->isImplicitDef()) 01331 return false; 01332 01333 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 01334 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 01335 01336 // Replace virtual registers defined by MI by their counterparts defined 01337 // by Dup. 01338 SmallVector<unsigned, 2> Defs; 01339 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 01340 const MachineOperand &MO = MI->getOperand(i); 01341 01342 // Physical registers may not differ here. 01343 assert((!MO.isReg() || MO.getReg() == 0 || 01344 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 01345 MO.getReg() == Dup->getOperand(i).getReg()) && 01346 "Instructions with different phys regs are not identical!"); 01347 01348 if (MO.isReg() && MO.isDef() && 01349 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 01350 Defs.push_back(i); 01351 } 01352 01353 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 01354 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 01355 unsigned Idx = Defs[i]; 01356 unsigned Reg = MI->getOperand(Idx).getReg(); 01357 unsigned DupReg = Dup->getOperand(Idx).getReg(); 01358 OrigRCs.push_back(MRI->getRegClass(DupReg)); 01359 01360 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 01361 // Restore old RCs if more than one defs. 01362 for (unsigned j = 0; j != i; ++j) 01363 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 01364 return false; 01365 } 01366 } 01367 01368 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 01369 unsigned Idx = Defs[i]; 01370 unsigned Reg = MI->getOperand(Idx).getReg(); 01371 unsigned DupReg = Dup->getOperand(Idx).getReg(); 01372 MRI->replaceRegWith(Reg, DupReg); 01373 MRI->clearKillFlags(DupReg); 01374 } 01375 01376 MI->eraseFromParent(); 01377 ++NumCSEed; 01378 return true; 01379 } 01380 return false; 01381 } 01382 01383 /// MayCSE - Return true if the given instruction will be CSE'd if it's 01384 /// hoisted out of the loop. 01385 bool MachineLICM::MayCSE(MachineInstr *MI) { 01386 unsigned Opcode = MI->getOpcode(); 01387 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 01388 CI = CSEMap.find(Opcode); 01389 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 01390 // the undef property onto uses. 01391 if (CI == CSEMap.end() || MI->isImplicitDef()) 01392 return false; 01393 01394 return LookForDuplicate(MI, CI->second) != 0; 01395 } 01396 01397 /// Hoist - When an instruction is found to use only loop invariant operands 01398 /// that are safe to hoist, this instruction is called to do the dirty work. 01399 /// 01400 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 01401 // First check whether we should hoist this instruction. 01402 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 01403 // If not, try unfolding a hoistable load. 01404 MI = ExtractHoistableLoad(MI); 01405 if (!MI) return false; 01406 } 01407 01408 // Now move the instructions to the predecessor, inserting it before any 01409 // terminator instructions. 01410 DEBUG({ 01411 dbgs() << "Hoisting " << *MI; 01412 if (Preheader->getBasicBlock()) 01413 dbgs() << " to MachineBasicBlock " 01414 << Preheader->getName(); 01415 if (MI->getParent()->getBasicBlock()) 01416 dbgs() << " from MachineBasicBlock " 01417 << MI->getParent()->getName(); 01418 dbgs() << "\n"; 01419 }); 01420 01421 // If this is the first instruction being hoisted to the preheader, 01422 // initialize the CSE map with potential common expressions. 01423 if (FirstInLoop) { 01424 InitCSEMap(Preheader); 01425 FirstInLoop = false; 01426 } 01427 01428 // Look for opportunity to CSE the hoisted instruction. 01429 unsigned Opcode = MI->getOpcode(); 01430 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 01431 CI = CSEMap.find(Opcode); 01432 if (!EliminateCSE(MI, CI)) { 01433 // Otherwise, splice the instruction to the preheader. 01434 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 01435 01436 // Update register pressure for BBs from header to this block. 01437 UpdateBackTraceRegPressure(MI); 01438 01439 // Clear the kill flags of any register this instruction defines, 01440 // since they may need to be live throughout the entire loop 01441 // rather than just live for part of it. 01442 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 01443 MachineOperand &MO = MI->getOperand(i); 01444 if (MO.isReg() && MO.isDef() && !MO.isDead()) 01445 MRI->clearKillFlags(MO.getReg()); 01446 } 01447 01448 // Add to the CSE map. 01449 if (CI != CSEMap.end()) 01450 CI->second.push_back(MI); 01451 else { 01452 std::vector<const MachineInstr*> CSEMIs; 01453 CSEMIs.push_back(MI); 01454 CSEMap.insert(std::make_pair(Opcode, CSEMIs)); 01455 } 01456 } 01457 01458 ++NumHoisted; 01459 Changed = true; 01460 01461 return true; 01462 } 01463 01464 MachineBasicBlock *MachineLICM::getCurPreheader() { 01465 // Determine the block to which to hoist instructions. If we can't find a 01466 // suitable loop predecessor, we can't do any hoisting. 01467 01468 // If we've tried to get a preheader and failed, don't try again. 01469 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 01470 return 0; 01471 01472 if (!CurPreheader) { 01473 CurPreheader = CurLoop->getLoopPreheader(); 01474 if (!CurPreheader) { 01475 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 01476 if (!Pred) { 01477 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 01478 return 0; 01479 } 01480 01481 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 01482 if (!CurPreheader) { 01483 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 01484 return 0; 01485 } 01486 } 01487 } 01488 return CurPreheader; 01489 }