LLVM API Documentation
00001 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This implements bottom-up and top-down register pressure reduction list 00011 // schedulers, using standard algorithms. The basic approach uses a priority 00012 // queue of available nodes to schedule. One at a time, nodes are taken from 00013 // the priority queue (thus in priority order), checked for legality to 00014 // schedule, and emitted if legal. 00015 // 00016 //===----------------------------------------------------------------------===// 00017 00018 #define DEBUG_TYPE "pre-RA-sched" 00019 #include "llvm/CodeGen/SchedulerRegistry.h" 00020 #include "ScheduleDAGSDNodes.h" 00021 #include "llvm/ADT/STLExtras.h" 00022 #include "llvm/ADT/SmallSet.h" 00023 #include "llvm/ADT/Statistic.h" 00024 #include "llvm/CodeGen/MachineRegisterInfo.h" 00025 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 00026 #include "llvm/CodeGen/SelectionDAGISel.h" 00027 #include "llvm/IR/DataLayout.h" 00028 #include "llvm/IR/InlineAsm.h" 00029 #include "llvm/Support/Debug.h" 00030 #include "llvm/Support/ErrorHandling.h" 00031 #include "llvm/Support/raw_ostream.h" 00032 #include "llvm/Target/TargetInstrInfo.h" 00033 #include "llvm/Target/TargetLowering.h" 00034 #include "llvm/Target/TargetMachine.h" 00035 #include "llvm/Target/TargetRegisterInfo.h" 00036 #include <climits> 00037 using namespace llvm; 00038 00039 STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 00040 STATISTIC(NumUnfolds, "Number of nodes unfolded"); 00041 STATISTIC(NumDups, "Number of duplicated nodes"); 00042 STATISTIC(NumPRCopies, "Number of physical register copies"); 00043 00044 static RegisterScheduler 00045 burrListDAGScheduler("list-burr", 00046 "Bottom-up register reduction list scheduling", 00047 createBURRListDAGScheduler); 00048 static RegisterScheduler 00049 sourceListDAGScheduler("source", 00050 "Similar to list-burr but schedules in source " 00051 "order when possible", 00052 createSourceListDAGScheduler); 00053 00054 static RegisterScheduler 00055 hybridListDAGScheduler("list-hybrid", 00056 "Bottom-up register pressure aware list scheduling " 00057 "which tries to balance latency and register pressure", 00058 createHybridListDAGScheduler); 00059 00060 static RegisterScheduler 00061 ILPListDAGScheduler("list-ilp", 00062 "Bottom-up register pressure aware list scheduling " 00063 "which tries to balance ILP and register pressure", 00064 createILPListDAGScheduler); 00065 00066 static cl::opt<bool> DisableSchedCycles( 00067 "disable-sched-cycles", cl::Hidden, cl::init(false), 00068 cl::desc("Disable cycle-level precision during preRA scheduling")); 00069 00070 // Temporary sched=list-ilp flags until the heuristics are robust. 00071 // Some options are also available under sched=list-hybrid. 00072 static cl::opt<bool> DisableSchedRegPressure( 00073 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 00074 cl::desc("Disable regpressure priority in sched=list-ilp")); 00075 static cl::opt<bool> DisableSchedLiveUses( 00076 "disable-sched-live-uses", cl::Hidden, cl::init(true), 00077 cl::desc("Disable live use priority in sched=list-ilp")); 00078 static cl::opt<bool> DisableSchedVRegCycle( 00079 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 00080 cl::desc("Disable virtual register cycle interference checks")); 00081 static cl::opt<bool> DisableSchedPhysRegJoin( 00082 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 00083 cl::desc("Disable physreg def-use affinity")); 00084 static cl::opt<bool> DisableSchedStalls( 00085 "disable-sched-stalls", cl::Hidden, cl::init(true), 00086 cl::desc("Disable no-stall priority in sched=list-ilp")); 00087 static cl::opt<bool> DisableSchedCriticalPath( 00088 "disable-sched-critical-path", cl::Hidden, cl::init(false), 00089 cl::desc("Disable critical path priority in sched=list-ilp")); 00090 static cl::opt<bool> DisableSchedHeight( 00091 "disable-sched-height", cl::Hidden, cl::init(false), 00092 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 00093 static cl::opt<bool> Disable2AddrHack( 00094 "disable-2addr-hack", cl::Hidden, cl::init(true), 00095 cl::desc("Disable scheduler's two-address hack")); 00096 00097 static cl::opt<int> MaxReorderWindow( 00098 "max-sched-reorder", cl::Hidden, cl::init(6), 00099 cl::desc("Number of instructions to allow ahead of the critical path " 00100 "in sched=list-ilp")); 00101 00102 static cl::opt<unsigned> AvgIPC( 00103 "sched-avg-ipc", cl::Hidden, cl::init(1), 00104 cl::desc("Average inst/cycle whan no target itinerary exists.")); 00105 00106 namespace { 00107 //===----------------------------------------------------------------------===// 00108 /// ScheduleDAGRRList - The actual register reduction list scheduler 00109 /// implementation. This supports both top-down and bottom-up scheduling. 00110 /// 00111 class ScheduleDAGRRList : public ScheduleDAGSDNodes { 00112 private: 00113 /// NeedLatency - True if the scheduler will make use of latency information. 00114 /// 00115 bool NeedLatency; 00116 00117 /// AvailableQueue - The priority queue to use for the available SUnits. 00118 SchedulingPriorityQueue *AvailableQueue; 00119 00120 /// PendingQueue - This contains all of the instructions whose operands have 00121 /// been issued, but their results are not ready yet (due to the latency of 00122 /// the operation). Once the operands becomes available, the instruction is 00123 /// added to the AvailableQueue. 00124 std::vector<SUnit*> PendingQueue; 00125 00126 /// HazardRec - The hazard recognizer to use. 00127 ScheduleHazardRecognizer *HazardRec; 00128 00129 /// CurCycle - The current scheduler state corresponds to this cycle. 00130 unsigned CurCycle; 00131 00132 /// MinAvailableCycle - Cycle of the soonest available instruction. 00133 unsigned MinAvailableCycle; 00134 00135 /// IssueCount - Count instructions issued in this cycle 00136 /// Currently valid only for bottom-up scheduling. 00137 unsigned IssueCount; 00138 00139 /// LiveRegDefs - A set of physical registers and their definition 00140 /// that are "live". These nodes must be scheduled before any other nodes that 00141 /// modifies the registers can be scheduled. 00142 unsigned NumLiveRegs; 00143 std::vector<SUnit*> LiveRegDefs; 00144 std::vector<SUnit*> LiveRegGens; 00145 00146 // Collect interferences between physical register use/defs. 00147 // Each interference is an SUnit and set of physical registers. 00148 SmallVector<SUnit*, 4> Interferences; 00149 typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT; 00150 LRegsMapT LRegsMap; 00151 00152 /// Topo - A topological ordering for SUnits which permits fast IsReachable 00153 /// and similar queries. 00154 ScheduleDAGTopologicalSort Topo; 00155 00156 // Hack to keep track of the inverse of FindCallSeqStart without more crazy 00157 // DAG crawling. 00158 DenseMap<SUnit*, SUnit*> CallSeqEndForStart; 00159 00160 public: 00161 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 00162 SchedulingPriorityQueue *availqueue, 00163 CodeGenOpt::Level OptLevel) 00164 : ScheduleDAGSDNodes(mf), 00165 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 00166 Topo(SUnits, NULL) { 00167 00168 const TargetMachine &tm = mf.getTarget(); 00169 if (DisableSchedCycles || !NeedLatency) 00170 HazardRec = new ScheduleHazardRecognizer(); 00171 else 00172 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 00173 } 00174 00175 ~ScheduleDAGRRList() { 00176 delete HazardRec; 00177 delete AvailableQueue; 00178 } 00179 00180 void Schedule(); 00181 00182 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 00183 00184 /// IsReachable - Checks if SU is reachable from TargetSU. 00185 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 00186 return Topo.IsReachable(SU, TargetSU); 00187 } 00188 00189 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 00190 /// create a cycle. 00191 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 00192 return Topo.WillCreateCycle(SU, TargetSU); 00193 } 00194 00195 /// AddPred - adds a predecessor edge to SUnit SU. 00196 /// This returns true if this is a new predecessor. 00197 /// Updates the topological ordering if required. 00198 void AddPred(SUnit *SU, const SDep &D) { 00199 Topo.AddPred(SU, D.getSUnit()); 00200 SU->addPred(D); 00201 } 00202 00203 /// RemovePred - removes a predecessor edge from SUnit SU. 00204 /// This returns true if an edge was removed. 00205 /// Updates the topological ordering if required. 00206 void RemovePred(SUnit *SU, const SDep &D) { 00207 Topo.RemovePred(SU, D.getSUnit()); 00208 SU->removePred(D); 00209 } 00210 00211 private: 00212 bool isReady(SUnit *SU) { 00213 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 00214 AvailableQueue->isReady(SU); 00215 } 00216 00217 void ReleasePred(SUnit *SU, const SDep *PredEdge); 00218 void ReleasePredecessors(SUnit *SU); 00219 void ReleasePending(); 00220 void AdvanceToCycle(unsigned NextCycle); 00221 void AdvancePastStalls(SUnit *SU); 00222 void EmitNode(SUnit *SU); 00223 void ScheduleNodeBottomUp(SUnit*); 00224 void CapturePred(SDep *PredEdge); 00225 void UnscheduleNodeBottomUp(SUnit*); 00226 void RestoreHazardCheckerBottomUp(); 00227 void BacktrackBottomUp(SUnit*, SUnit*); 00228 SUnit *CopyAndMoveSuccessors(SUnit*); 00229 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 00230 const TargetRegisterClass*, 00231 const TargetRegisterClass*, 00232 SmallVector<SUnit*, 2>&); 00233 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 00234 00235 void releaseInterferences(unsigned Reg = 0); 00236 00237 SUnit *PickNodeToScheduleBottomUp(); 00238 void ListScheduleBottomUp(); 00239 00240 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 00241 /// Updates the topological ordering if required. 00242 SUnit *CreateNewSUnit(SDNode *N) { 00243 unsigned NumSUnits = SUnits.size(); 00244 SUnit *NewNode = newSUnit(N); 00245 // Update the topological ordering. 00246 if (NewNode->NodeNum >= NumSUnits) 00247 Topo.InitDAGTopologicalSorting(); 00248 return NewNode; 00249 } 00250 00251 /// CreateClone - Creates a new SUnit from an existing one. 00252 /// Updates the topological ordering if required. 00253 SUnit *CreateClone(SUnit *N) { 00254 unsigned NumSUnits = SUnits.size(); 00255 SUnit *NewNode = Clone(N); 00256 // Update the topological ordering. 00257 if (NewNode->NodeNum >= NumSUnits) 00258 Topo.InitDAGTopologicalSorting(); 00259 return NewNode; 00260 } 00261 00262 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't 00263 /// need actual latency information but the hybrid scheduler does. 00264 bool forceUnitLatencies() const { 00265 return !NeedLatency; 00266 } 00267 }; 00268 } // end anonymous namespace 00269 00270 /// GetCostForDef - Looks up the register class and cost for a given definition. 00271 /// Typically this just means looking up the representative register class, 00272 /// but for untyped values (MVT::Untyped) it means inspecting the node's 00273 /// opcode to determine what register class is being generated. 00274 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 00275 const TargetLowering *TLI, 00276 const TargetInstrInfo *TII, 00277 const TargetRegisterInfo *TRI, 00278 unsigned &RegClass, unsigned &Cost, 00279 const MachineFunction &MF) { 00280 MVT VT = RegDefPos.GetValue(); 00281 00282 // Special handling for untyped values. These values can only come from 00283 // the expansion of custom DAG-to-DAG patterns. 00284 if (VT == MVT::Untyped) { 00285 const SDNode *Node = RegDefPos.GetNode(); 00286 00287 // Special handling for CopyFromReg of untyped values. 00288 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { 00289 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); 00290 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); 00291 RegClass = RC->getID(); 00292 Cost = 1; 00293 return; 00294 } 00295 00296 unsigned Opcode = Node->getMachineOpcode(); 00297 if (Opcode == TargetOpcode::REG_SEQUENCE) { 00298 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 00299 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 00300 RegClass = RC->getID(); 00301 Cost = 1; 00302 return; 00303 } 00304 00305 unsigned Idx = RegDefPos.GetIdx(); 00306 const MCInstrDesc Desc = TII->get(Opcode); 00307 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); 00308 RegClass = RC->getID(); 00309 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 00310 // better way to determine it. 00311 Cost = 1; 00312 } else { 00313 RegClass = TLI->getRepRegClassFor(VT)->getID(); 00314 Cost = TLI->getRepRegClassCostFor(VT); 00315 } 00316 } 00317 00318 /// Schedule - Schedule the DAG using list scheduling. 00319 void ScheduleDAGRRList::Schedule() { 00320 DEBUG(dbgs() 00321 << "********** List Scheduling BB#" << BB->getNumber() 00322 << " '" << BB->getName() << "' **********\n"); 00323 00324 CurCycle = 0; 00325 IssueCount = 0; 00326 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 00327 NumLiveRegs = 0; 00328 // Allocate slots for each physical register, plus one for a special register 00329 // to track the virtual resource of a calling sequence. 00330 LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); 00331 LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); 00332 CallSeqEndForStart.clear(); 00333 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); 00334 00335 // Build the scheduling graph. 00336 BuildSchedGraph(NULL); 00337 00338 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 00339 SUnits[su].dumpAll(this)); 00340 Topo.InitDAGTopologicalSorting(); 00341 00342 AvailableQueue->initNodes(SUnits); 00343 00344 HazardRec->Reset(); 00345 00346 // Execute the actual scheduling loop. 00347 ListScheduleBottomUp(); 00348 00349 AvailableQueue->releaseState(); 00350 00351 DEBUG({ 00352 dbgs() << "*** Final schedule ***\n"; 00353 dumpSchedule(); 00354 dbgs() << '\n'; 00355 }); 00356 } 00357 00358 //===----------------------------------------------------------------------===// 00359 // Bottom-Up Scheduling 00360 //===----------------------------------------------------------------------===// 00361 00362 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 00363 /// the AvailableQueue if the count reaches zero. Also update its cycle bound. 00364 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 00365 SUnit *PredSU = PredEdge->getSUnit(); 00366 00367 #ifndef NDEBUG 00368 if (PredSU->NumSuccsLeft == 0) { 00369 dbgs() << "*** Scheduling failed! ***\n"; 00370 PredSU->dump(this); 00371 dbgs() << " has been released too many times!\n"; 00372 llvm_unreachable(0); 00373 } 00374 #endif 00375 --PredSU->NumSuccsLeft; 00376 00377 if (!forceUnitLatencies()) { 00378 // Updating predecessor's height. This is now the cycle when the 00379 // predecessor can be scheduled without causing a pipeline stall. 00380 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 00381 } 00382 00383 // If all the node's successors are scheduled, this node is ready 00384 // to be scheduled. Ignore the special EntrySU node. 00385 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 00386 PredSU->isAvailable = true; 00387 00388 unsigned Height = PredSU->getHeight(); 00389 if (Height < MinAvailableCycle) 00390 MinAvailableCycle = Height; 00391 00392 if (isReady(PredSU)) { 00393 AvailableQueue->push(PredSU); 00394 } 00395 // CapturePred and others may have left the node in the pending queue, avoid 00396 // adding it twice. 00397 else if (!PredSU->isPending) { 00398 PredSU->isPending = true; 00399 PendingQueue.push_back(PredSU); 00400 } 00401 } 00402 } 00403 00404 /// IsChainDependent - Test if Outer is reachable from Inner through 00405 /// chain dependencies. 00406 static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 00407 unsigned NestLevel, 00408 const TargetInstrInfo *TII) { 00409 SDNode *N = Outer; 00410 for (;;) { 00411 if (N == Inner) 00412 return true; 00413 // For a TokenFactor, examine each operand. There may be multiple ways 00414 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 00415 // most nesting in order to ensure that we find the corresponding match. 00416 if (N->getOpcode() == ISD::TokenFactor) { 00417 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 00418 if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) 00419 return true; 00420 return false; 00421 } 00422 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 00423 if (N->isMachineOpcode()) { 00424 if (N->getMachineOpcode() == 00425 (unsigned)TII->getCallFrameDestroyOpcode()) { 00426 ++NestLevel; 00427 } else if (N->getMachineOpcode() == 00428 (unsigned)TII->getCallFrameSetupOpcode()) { 00429 if (NestLevel == 0) 00430 return false; 00431 --NestLevel; 00432 } 00433 } 00434 // Otherwise, find the chain and continue climbing. 00435 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 00436 if (N->getOperand(i).getValueType() == MVT::Other) { 00437 N = N->getOperand(i).getNode(); 00438 goto found_chain_operand; 00439 } 00440 return false; 00441 found_chain_operand:; 00442 if (N->getOpcode() == ISD::EntryToken) 00443 return false; 00444 } 00445 } 00446 00447 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 00448 /// the corresponding (lowered) CALLSEQ_BEGIN node. 00449 /// 00450 /// NestLevel and MaxNested are used in recursion to indcate the current level 00451 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 00452 /// level seen so far. 00453 /// 00454 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point 00455 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 00456 static SDNode * 00457 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 00458 const TargetInstrInfo *TII) { 00459 for (;;) { 00460 // For a TokenFactor, examine each operand. There may be multiple ways 00461 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 00462 // most nesting in order to ensure that we find the corresponding match. 00463 if (N->getOpcode() == ISD::TokenFactor) { 00464 SDNode *Best = 0; 00465 unsigned BestMaxNest = MaxNest; 00466 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 00467 unsigned MyNestLevel = NestLevel; 00468 unsigned MyMaxNest = MaxNest; 00469 if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), 00470 MyNestLevel, MyMaxNest, TII)) 00471 if (!Best || (MyMaxNest > BestMaxNest)) { 00472 Best = New; 00473 BestMaxNest = MyMaxNest; 00474 } 00475 } 00476 assert(Best); 00477 MaxNest = BestMaxNest; 00478 return Best; 00479 } 00480 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 00481 if (N->isMachineOpcode()) { 00482 if (N->getMachineOpcode() == 00483 (unsigned)TII->getCallFrameDestroyOpcode()) { 00484 ++NestLevel; 00485 MaxNest = std::max(MaxNest, NestLevel); 00486 } else if (N->getMachineOpcode() == 00487 (unsigned)TII->getCallFrameSetupOpcode()) { 00488 assert(NestLevel != 0); 00489 --NestLevel; 00490 if (NestLevel == 0) 00491 return N; 00492 } 00493 } 00494 // Otherwise, find the chain and continue climbing. 00495 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 00496 if (N->getOperand(i).getValueType() == MVT::Other) { 00497 N = N->getOperand(i).getNode(); 00498 goto found_chain_operand; 00499 } 00500 return 0; 00501 found_chain_operand:; 00502 if (N->getOpcode() == ISD::EntryToken) 00503 return 0; 00504 } 00505 } 00506 00507 /// Call ReleasePred for each predecessor, then update register live def/gen. 00508 /// Always update LiveRegDefs for a register dependence even if the current SU 00509 /// also defines the register. This effectively create one large live range 00510 /// across a sequence of two-address node. This is important because the 00511 /// entire chain must be scheduled together. Example: 00512 /// 00513 /// flags = (3) add 00514 /// flags = (2) addc flags 00515 /// flags = (1) addc flags 00516 /// 00517 /// results in 00518 /// 00519 /// LiveRegDefs[flags] = 3 00520 /// LiveRegGens[flags] = 1 00521 /// 00522 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 00523 /// interference on flags. 00524 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 00525 // Bottom up: release predecessors 00526 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 00527 I != E; ++I) { 00528 ReleasePred(SU, &*I); 00529 if (I->isAssignedRegDep()) { 00530 // This is a physical register dependency and it's impossible or 00531 // expensive to copy the register. Make sure nothing that can 00532 // clobber the register is scheduled between the predecessor and 00533 // this node. 00534 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 00535 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 00536 "interference on register dependence"); 00537 LiveRegDefs[I->getReg()] = I->getSUnit(); 00538 if (!LiveRegGens[I->getReg()]) { 00539 ++NumLiveRegs; 00540 LiveRegGens[I->getReg()] = SU; 00541 } 00542 } 00543 } 00544 00545 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 00546 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 00547 // these nodes, to prevent other calls from being interscheduled with them. 00548 unsigned CallResource = TRI->getNumRegs(); 00549 if (!LiveRegDefs[CallResource]) 00550 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 00551 if (Node->isMachineOpcode() && 00552 Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 00553 unsigned NestLevel = 0; 00554 unsigned MaxNest = 0; 00555 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 00556 00557 SUnit *Def = &SUnits[N->getNodeId()]; 00558 CallSeqEndForStart[Def] = SU; 00559 00560 ++NumLiveRegs; 00561 LiveRegDefs[CallResource] = Def; 00562 LiveRegGens[CallResource] = SU; 00563 break; 00564 } 00565 } 00566 00567 /// Check to see if any of the pending instructions are ready to issue. If 00568 /// so, add them to the available queue. 00569 void ScheduleDAGRRList::ReleasePending() { 00570 if (DisableSchedCycles) { 00571 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 00572 return; 00573 } 00574 00575 // If the available queue is empty, it is safe to reset MinAvailableCycle. 00576 if (AvailableQueue->empty()) 00577 MinAvailableCycle = UINT_MAX; 00578 00579 // Check to see if any of the pending instructions are ready to issue. If 00580 // so, add them to the available queue. 00581 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 00582 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 00583 if (ReadyCycle < MinAvailableCycle) 00584 MinAvailableCycle = ReadyCycle; 00585 00586 if (PendingQueue[i]->isAvailable) { 00587 if (!isReady(PendingQueue[i])) 00588 continue; 00589 AvailableQueue->push(PendingQueue[i]); 00590 } 00591 PendingQueue[i]->isPending = false; 00592 PendingQueue[i] = PendingQueue.back(); 00593 PendingQueue.pop_back(); 00594 --i; --e; 00595 } 00596 } 00597 00598 /// Move the scheduler state forward by the specified number of Cycles. 00599 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 00600 if (NextCycle <= CurCycle) 00601 return; 00602 00603 IssueCount = 0; 00604 AvailableQueue->setCurCycle(NextCycle); 00605 if (!HazardRec->isEnabled()) { 00606 // Bypass lots of virtual calls in case of long latency. 00607 CurCycle = NextCycle; 00608 } 00609 else { 00610 for (; CurCycle != NextCycle; ++CurCycle) { 00611 HazardRec->RecedeCycle(); 00612 } 00613 } 00614 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 00615 // available Q to release pending nodes at least once before popping. 00616 ReleasePending(); 00617 } 00618 00619 /// Move the scheduler state forward until the specified node's dependents are 00620 /// ready and can be scheduled with no resource conflicts. 00621 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 00622 if (DisableSchedCycles) 00623 return; 00624 00625 // FIXME: Nodes such as CopyFromReg probably should not advance the current 00626 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 00627 // has predecessors the cycle will be advanced when they are scheduled. 00628 // But given the crude nature of modeling latency though such nodes, we 00629 // currently need to treat these nodes like real instructions. 00630 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 00631 00632 unsigned ReadyCycle = SU->getHeight(); 00633 00634 // Bump CurCycle to account for latency. We assume the latency of other 00635 // available instructions may be hidden by the stall (not a full pipe stall). 00636 // This updates the hazard recognizer's cycle before reserving resources for 00637 // this instruction. 00638 AdvanceToCycle(ReadyCycle); 00639 00640 // Calls are scheduled in their preceding cycle, so don't conflict with 00641 // hazards from instructions after the call. EmitNode will reset the 00642 // scoreboard state before emitting the call. 00643 if (SU->isCall) 00644 return; 00645 00646 // FIXME: For resource conflicts in very long non-pipelined stages, we 00647 // should probably skip ahead here to avoid useless scoreboard checks. 00648 int Stalls = 0; 00649 while (true) { 00650 ScheduleHazardRecognizer::HazardType HT = 00651 HazardRec->getHazardType(SU, -Stalls); 00652 00653 if (HT == ScheduleHazardRecognizer::NoHazard) 00654 break; 00655 00656 ++Stalls; 00657 } 00658 AdvanceToCycle(CurCycle + Stalls); 00659 } 00660 00661 /// Record this SUnit in the HazardRecognizer. 00662 /// Does not update CurCycle. 00663 void ScheduleDAGRRList::EmitNode(SUnit *SU) { 00664 if (!HazardRec->isEnabled()) 00665 return; 00666 00667 // Check for phys reg copy. 00668 if (!SU->getNode()) 00669 return; 00670 00671 switch (SU->getNode()->getOpcode()) { 00672 default: 00673 assert(SU->getNode()->isMachineOpcode() && 00674 "This target-independent node should not be scheduled."); 00675 break; 00676 case ISD::MERGE_VALUES: 00677 case ISD::TokenFactor: 00678 case ISD::LIFETIME_START: 00679 case ISD::LIFETIME_END: 00680 case ISD::CopyToReg: 00681 case ISD::CopyFromReg: 00682 case ISD::EH_LABEL: 00683 // Noops don't affect the scoreboard state. Copies are likely to be 00684 // removed. 00685 return; 00686 case ISD::INLINEASM: 00687 // For inline asm, clear the pipeline state. 00688 HazardRec->Reset(); 00689 return; 00690 } 00691 if (SU->isCall) { 00692 // Calls are scheduled with their preceding instructions. For bottom-up 00693 // scheduling, clear the pipeline state before emitting. 00694 HazardRec->Reset(); 00695 } 00696 00697 HazardRec->EmitInstruction(SU); 00698 } 00699 00700 static void resetVRegCycle(SUnit *SU); 00701 00702 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 00703 /// count of its predecessors. If a predecessor pending count is zero, add it to 00704 /// the Available queue. 00705 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 00706 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 00707 DEBUG(SU->dump(this)); 00708 00709 #ifndef NDEBUG 00710 if (CurCycle < SU->getHeight()) 00711 DEBUG(dbgs() << " Height [" << SU->getHeight() 00712 << "] pipeline stall!\n"); 00713 #endif 00714 00715 // FIXME: Do not modify node height. It may interfere with 00716 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 00717 // node its ready cycle can aid heuristics, and after scheduling it can 00718 // indicate the scheduled cycle. 00719 SU->setHeightToAtLeast(CurCycle); 00720 00721 // Reserve resources for the scheduled intruction. 00722 EmitNode(SU); 00723 00724 Sequence.push_back(SU); 00725 00726 AvailableQueue->scheduledNode(SU); 00727 00728 // If HazardRec is disabled, and each inst counts as one cycle, then 00729 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 00730 // PendingQueue for schedulers that implement HasReadyFilter. 00731 if (!HazardRec->isEnabled() && AvgIPC < 2) 00732 AdvanceToCycle(CurCycle + 1); 00733 00734 // Update liveness of predecessors before successors to avoid treating a 00735 // two-address node as a live range def. 00736 ReleasePredecessors(SU); 00737 00738 // Release all the implicit physical register defs that are live. 00739 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 00740 I != E; ++I) { 00741 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 00742 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 00743 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 00744 --NumLiveRegs; 00745 LiveRegDefs[I->getReg()] = NULL; 00746 LiveRegGens[I->getReg()] = NULL; 00747 releaseInterferences(I->getReg()); 00748 } 00749 } 00750 // Release the special call resource dependence, if this is the beginning 00751 // of a call. 00752 unsigned CallResource = TRI->getNumRegs(); 00753 if (LiveRegDefs[CallResource] == SU) 00754 for (const SDNode *SUNode = SU->getNode(); SUNode; 00755 SUNode = SUNode->getGluedNode()) { 00756 if (SUNode->isMachineOpcode() && 00757 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 00758 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 00759 --NumLiveRegs; 00760 LiveRegDefs[CallResource] = NULL; 00761 LiveRegGens[CallResource] = NULL; 00762 releaseInterferences(CallResource); 00763 } 00764 } 00765 00766 resetVRegCycle(SU); 00767 00768 SU->isScheduled = true; 00769 00770 // Conditions under which the scheduler should eagerly advance the cycle: 00771 // (1) No available instructions 00772 // (2) All pipelines full, so available instructions must have hazards. 00773 // 00774 // If HazardRec is disabled, the cycle was pre-advanced before calling 00775 // ReleasePredecessors. In that case, IssueCount should remain 0. 00776 // 00777 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 00778 if (HazardRec->isEnabled() || AvgIPC > 1) { 00779 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 00780 ++IssueCount; 00781 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 00782 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 00783 AdvanceToCycle(CurCycle + 1); 00784 } 00785 } 00786 00787 /// CapturePred - This does the opposite of ReleasePred. Since SU is being 00788 /// unscheduled, incrcease the succ left count of its predecessors. Remove 00789 /// them from AvailableQueue if necessary. 00790 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 00791 SUnit *PredSU = PredEdge->getSUnit(); 00792 if (PredSU->isAvailable) { 00793 PredSU->isAvailable = false; 00794 if (!PredSU->isPending) 00795 AvailableQueue->remove(PredSU); 00796 } 00797 00798 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 00799 ++PredSU->NumSuccsLeft; 00800 } 00801 00802 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 00803 /// its predecessor states to reflect the change. 00804 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 00805 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 00806 DEBUG(SU->dump(this)); 00807 00808 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 00809 I != E; ++I) { 00810 CapturePred(&*I); 00811 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 00812 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 00813 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 00814 "Physical register dependency violated?"); 00815 --NumLiveRegs; 00816 LiveRegDefs[I->getReg()] = NULL; 00817 LiveRegGens[I->getReg()] = NULL; 00818 releaseInterferences(I->getReg()); 00819 } 00820 } 00821 00822 // Reclaim the special call resource dependence, if this is the beginning 00823 // of a call. 00824 unsigned CallResource = TRI->getNumRegs(); 00825 for (const SDNode *SUNode = SU->getNode(); SUNode; 00826 SUNode = SUNode->getGluedNode()) { 00827 if (SUNode->isMachineOpcode() && 00828 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 00829 ++NumLiveRegs; 00830 LiveRegDefs[CallResource] = SU; 00831 LiveRegGens[CallResource] = CallSeqEndForStart[SU]; 00832 } 00833 } 00834 00835 // Release the special call resource dependence, if this is the end 00836 // of a call. 00837 if (LiveRegGens[CallResource] == SU) 00838 for (const SDNode *SUNode = SU->getNode(); SUNode; 00839 SUNode = SUNode->getGluedNode()) { 00840 if (SUNode->isMachineOpcode() && 00841 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 00842 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 00843 --NumLiveRegs; 00844 LiveRegDefs[CallResource] = NULL; 00845 LiveRegGens[CallResource] = NULL; 00846 releaseInterferences(CallResource); 00847 } 00848 } 00849 00850 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 00851 I != E; ++I) { 00852 if (I->isAssignedRegDep()) { 00853 if (!LiveRegDefs[I->getReg()]) 00854 ++NumLiveRegs; 00855 // This becomes the nearest def. Note that an earlier def may still be 00856 // pending if this is a two-address node. 00857 LiveRegDefs[I->getReg()] = SU; 00858 if (LiveRegGens[I->getReg()] == NULL || 00859 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 00860 LiveRegGens[I->getReg()] = I->getSUnit(); 00861 } 00862 } 00863 if (SU->getHeight() < MinAvailableCycle) 00864 MinAvailableCycle = SU->getHeight(); 00865 00866 SU->setHeightDirty(); 00867 SU->isScheduled = false; 00868 SU->isAvailable = true; 00869 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 00870 // Don't make available until backtracking is complete. 00871 SU->isPending = true; 00872 PendingQueue.push_back(SU); 00873 } 00874 else { 00875 AvailableQueue->push(SU); 00876 } 00877 AvailableQueue->unscheduledNode(SU); 00878 } 00879 00880 /// After backtracking, the hazard checker needs to be restored to a state 00881 /// corresponding the current cycle. 00882 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 00883 HazardRec->Reset(); 00884 00885 unsigned LookAhead = std::min((unsigned)Sequence.size(), 00886 HazardRec->getMaxLookAhead()); 00887 if (LookAhead == 0) 00888 return; 00889 00890 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 00891 unsigned HazardCycle = (*I)->getHeight(); 00892 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 00893 SUnit *SU = *I; 00894 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 00895 HazardRec->RecedeCycle(); 00896 } 00897 EmitNode(SU); 00898 } 00899 } 00900 00901 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 00902 /// BTCycle in order to schedule a specific node. 00903 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 00904 SUnit *OldSU = Sequence.back(); 00905 while (true) { 00906 Sequence.pop_back(); 00907 // FIXME: use ready cycle instead of height 00908 CurCycle = OldSU->getHeight(); 00909 UnscheduleNodeBottomUp(OldSU); 00910 AvailableQueue->setCurCycle(CurCycle); 00911 if (OldSU == BtSU) 00912 break; 00913 OldSU = Sequence.back(); 00914 } 00915 00916 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 00917 00918 RestoreHazardCheckerBottomUp(); 00919 00920 ReleasePending(); 00921 00922 ++NumBacktracks; 00923 } 00924 00925 static bool isOperandOf(const SUnit *SU, SDNode *N) { 00926 for (const SDNode *SUNode = SU->getNode(); SUNode; 00927 SUNode = SUNode->getGluedNode()) { 00928 if (SUNode->isOperandOf(N)) 00929 return true; 00930 } 00931 return false; 00932 } 00933 00934 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 00935 /// successors to the newly created node. 00936 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 00937 SDNode *N = SU->getNode(); 00938 if (!N) 00939 return NULL; 00940 00941 if (SU->getNode()->getGluedNode()) 00942 return NULL; 00943 00944 SUnit *NewSU; 00945 bool TryUnfold = false; 00946 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 00947 EVT VT = N->getValueType(i); 00948 if (VT == MVT::Glue) 00949 return NULL; 00950 else if (VT == MVT::Other) 00951 TryUnfold = true; 00952 } 00953 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 00954 const SDValue &Op = N->getOperand(i); 00955 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 00956 if (VT == MVT::Glue) 00957 return NULL; 00958 } 00959 00960 if (TryUnfold) { 00961 SmallVector<SDNode*, 2> NewNodes; 00962 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 00963 return NULL; 00964 00965 // unfolding an x86 DEC64m operation results in store, dec, load which 00966 // can't be handled here so quit 00967 if (NewNodes.size() == 3) 00968 return NULL; 00969 00970 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 00971 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 00972 00973 N = NewNodes[1]; 00974 SDNode *LoadNode = NewNodes[0]; 00975 unsigned NumVals = N->getNumValues(); 00976 unsigned OldNumVals = SU->getNode()->getNumValues(); 00977 for (unsigned i = 0; i != NumVals; ++i) 00978 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 00979 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 00980 SDValue(LoadNode, 1)); 00981 00982 // LoadNode may already exist. This can happen when there is another 00983 // load from the same location and producing the same type of value 00984 // but it has different alignment or volatileness. 00985 bool isNewLoad = true; 00986 SUnit *LoadSU; 00987 if (LoadNode->getNodeId() != -1) { 00988 LoadSU = &SUnits[LoadNode->getNodeId()]; 00989 isNewLoad = false; 00990 } else { 00991 LoadSU = CreateNewSUnit(LoadNode); 00992 LoadNode->setNodeId(LoadSU->NodeNum); 00993 00994 InitNumRegDefsLeft(LoadSU); 00995 computeLatency(LoadSU); 00996 } 00997 00998 SUnit *NewSU = CreateNewSUnit(N); 00999 assert(N->getNodeId() == -1 && "Node already inserted!"); 01000 N->setNodeId(NewSU->NodeNum); 01001 01002 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 01003 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 01004 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 01005 NewSU->isTwoAddress = true; 01006 break; 01007 } 01008 } 01009 if (MCID.isCommutable()) 01010 NewSU->isCommutable = true; 01011 01012 InitNumRegDefsLeft(NewSU); 01013 computeLatency(NewSU); 01014 01015 // Record all the edges to and from the old SU, by category. 01016 SmallVector<SDep, 4> ChainPreds; 01017 SmallVector<SDep, 4> ChainSuccs; 01018 SmallVector<SDep, 4> LoadPreds; 01019 SmallVector<SDep, 4> NodePreds; 01020 SmallVector<SDep, 4> NodeSuccs; 01021 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 01022 I != E; ++I) { 01023 if (I->isCtrl()) 01024 ChainPreds.push_back(*I); 01025 else if (isOperandOf(I->getSUnit(), LoadNode)) 01026 LoadPreds.push_back(*I); 01027 else 01028 NodePreds.push_back(*I); 01029 } 01030 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 01031 I != E; ++I) { 01032 if (I->isCtrl()) 01033 ChainSuccs.push_back(*I); 01034 else 01035 NodeSuccs.push_back(*I); 01036 } 01037 01038 // Now assign edges to the newly-created nodes. 01039 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 01040 const SDep &Pred = ChainPreds[i]; 01041 RemovePred(SU, Pred); 01042 if (isNewLoad) 01043 AddPred(LoadSU, Pred); 01044 } 01045 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 01046 const SDep &Pred = LoadPreds[i]; 01047 RemovePred(SU, Pred); 01048 if (isNewLoad) 01049 AddPred(LoadSU, Pred); 01050 } 01051 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 01052 const SDep &Pred = NodePreds[i]; 01053 RemovePred(SU, Pred); 01054 AddPred(NewSU, Pred); 01055 } 01056 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 01057 SDep D = NodeSuccs[i]; 01058 SUnit *SuccDep = D.getSUnit(); 01059 D.setSUnit(SU); 01060 RemovePred(SuccDep, D); 01061 D.setSUnit(NewSU); 01062 AddPred(SuccDep, D); 01063 // Balance register pressure. 01064 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 01065 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 01066 --NewSU->NumRegDefsLeft; 01067 } 01068 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 01069 SDep D = ChainSuccs[i]; 01070 SUnit *SuccDep = D.getSUnit(); 01071 D.setSUnit(SU); 01072 RemovePred(SuccDep, D); 01073 if (isNewLoad) { 01074 D.setSUnit(LoadSU); 01075 AddPred(SuccDep, D); 01076 } 01077 } 01078 01079 // Add a data dependency to reflect that NewSU reads the value defined 01080 // by LoadSU. 01081 SDep D(LoadSU, SDep::Data, 0); 01082 D.setLatency(LoadSU->Latency); 01083 AddPred(NewSU, D); 01084 01085 if (isNewLoad) 01086 AvailableQueue->addNode(LoadSU); 01087 AvailableQueue->addNode(NewSU); 01088 01089 ++NumUnfolds; 01090 01091 if (NewSU->NumSuccsLeft == 0) { 01092 NewSU->isAvailable = true; 01093 return NewSU; 01094 } 01095 SU = NewSU; 01096 } 01097 01098 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 01099 NewSU = CreateClone(SU); 01100 01101 // New SUnit has the exact same predecessors. 01102 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 01103 I != E; ++I) 01104 if (!I->isArtificial()) 01105 AddPred(NewSU, *I); 01106 01107 // Only copy scheduled successors. Cut them from old node's successor 01108 // list and move them over. 01109 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 01110 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 01111 I != E; ++I) { 01112 if (I->isArtificial()) 01113 continue; 01114 SUnit *SuccSU = I->getSUnit(); 01115 if (SuccSU->isScheduled) { 01116 SDep D = *I; 01117 D.setSUnit(NewSU); 01118 AddPred(SuccSU, D); 01119 D.setSUnit(SU); 01120 DelDeps.push_back(std::make_pair(SuccSU, D)); 01121 } 01122 } 01123 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 01124 RemovePred(DelDeps[i].first, DelDeps[i].second); 01125 01126 AvailableQueue->updateNode(SU); 01127 AvailableQueue->addNode(NewSU); 01128 01129 ++NumDups; 01130 return NewSU; 01131 } 01132 01133 /// InsertCopiesAndMoveSuccs - Insert register copies and move all 01134 /// scheduled successors of the given SUnit to the last copy. 01135 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 01136 const TargetRegisterClass *DestRC, 01137 const TargetRegisterClass *SrcRC, 01138 SmallVector<SUnit*, 2> &Copies) { 01139 SUnit *CopyFromSU = CreateNewSUnit(NULL); 01140 CopyFromSU->CopySrcRC = SrcRC; 01141 CopyFromSU->CopyDstRC = DestRC; 01142 01143 SUnit *CopyToSU = CreateNewSUnit(NULL); 01144 CopyToSU->CopySrcRC = DestRC; 01145 CopyToSU->CopyDstRC = SrcRC; 01146 01147 // Only copy scheduled successors. Cut them from old node's successor 01148 // list and move them over. 01149 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 01150 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 01151 I != E; ++I) { 01152 if (I->isArtificial()) 01153 continue; 01154 SUnit *SuccSU = I->getSUnit(); 01155 if (SuccSU->isScheduled) { 01156 SDep D = *I; 01157 D.setSUnit(CopyToSU); 01158 AddPred(SuccSU, D); 01159 DelDeps.push_back(std::make_pair(SuccSU, *I)); 01160 } 01161 else { 01162 // Avoid scheduling the def-side copy before other successors. Otherwise 01163 // we could introduce another physreg interference on the copy and 01164 // continue inserting copies indefinitely. 01165 AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); 01166 } 01167 } 01168 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 01169 RemovePred(DelDeps[i].first, DelDeps[i].second); 01170 01171 SDep FromDep(SU, SDep::Data, Reg); 01172 FromDep.setLatency(SU->Latency); 01173 AddPred(CopyFromSU, FromDep); 01174 SDep ToDep(CopyFromSU, SDep::Data, 0); 01175 ToDep.setLatency(CopyFromSU->Latency); 01176 AddPred(CopyToSU, ToDep); 01177 01178 AvailableQueue->updateNode(SU); 01179 AvailableQueue->addNode(CopyFromSU); 01180 AvailableQueue->addNode(CopyToSU); 01181 Copies.push_back(CopyFromSU); 01182 Copies.push_back(CopyToSU); 01183 01184 ++NumPRCopies; 01185 } 01186 01187 /// getPhysicalRegisterVT - Returns the ValueType of the physical register 01188 /// definition of the specified node. 01189 /// FIXME: Move to SelectionDAG? 01190 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 01191 const TargetInstrInfo *TII) { 01192 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 01193 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 01194 unsigned NumRes = MCID.getNumDefs(); 01195 for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 01196 if (Reg == *ImpDef) 01197 break; 01198 ++NumRes; 01199 } 01200 return N->getValueType(NumRes); 01201 } 01202 01203 /// CheckForLiveRegDef - Return true and update live register vector if the 01204 /// specified register def of the specified SUnit clobbers any "live" registers. 01205 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 01206 std::vector<SUnit*> &LiveRegDefs, 01207 SmallSet<unsigned, 4> &RegAdded, 01208 SmallVector<unsigned, 4> &LRegs, 01209 const TargetRegisterInfo *TRI) { 01210 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { 01211 01212 // Check if Ref is live. 01213 if (!LiveRegDefs[*AliasI]) continue; 01214 01215 // Allow multiple uses of the same def. 01216 if (LiveRegDefs[*AliasI] == SU) continue; 01217 01218 // Add Reg to the set of interfering live regs. 01219 if (RegAdded.insert(*AliasI)) { 01220 LRegs.push_back(*AliasI); 01221 } 01222 } 01223 } 01224 01225 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered 01226 /// by RegMask, and add them to LRegs. 01227 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, 01228 std::vector<SUnit*> &LiveRegDefs, 01229 SmallSet<unsigned, 4> &RegAdded, 01230 SmallVector<unsigned, 4> &LRegs) { 01231 // Look at all live registers. Skip Reg0 and the special CallResource. 01232 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { 01233 if (!LiveRegDefs[i]) continue; 01234 if (LiveRegDefs[i] == SU) continue; 01235 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; 01236 if (RegAdded.insert(i)) 01237 LRegs.push_back(i); 01238 } 01239 } 01240 01241 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any. 01242 static const uint32_t *getNodeRegMask(const SDNode *N) { 01243 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 01244 if (const RegisterMaskSDNode *Op = 01245 dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) 01246 return Op->getRegMask(); 01247 return NULL; 01248 } 01249 01250 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 01251 /// scheduling of the given node to satisfy live physical register dependencies. 01252 /// If the specific node is the last one that's available to schedule, do 01253 /// whatever is necessary (i.e. backtracking or cloning) to make it possible. 01254 bool ScheduleDAGRRList:: 01255 DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 01256 if (NumLiveRegs == 0) 01257 return false; 01258 01259 SmallSet<unsigned, 4> RegAdded; 01260 // If this node would clobber any "live" register, then it's not ready. 01261 // 01262 // If SU is the currently live definition of the same register that it uses, 01263 // then we are free to schedule it. 01264 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 01265 I != E; ++I) { 01266 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 01267 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 01268 RegAdded, LRegs, TRI); 01269 } 01270 01271 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 01272 if (Node->getOpcode() == ISD::INLINEASM) { 01273 // Inline asm can clobber physical defs. 01274 unsigned NumOps = Node->getNumOperands(); 01275 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 01276 --NumOps; // Ignore the glue operand. 01277 01278 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 01279 unsigned Flags = 01280 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 01281 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 01282 01283 ++i; // Skip the ID value. 01284 if (InlineAsm::isRegDefKind(Flags) || 01285 InlineAsm::isRegDefEarlyClobberKind(Flags) || 01286 InlineAsm::isClobberKind(Flags)) { 01287 // Check for def of register or earlyclobber register. 01288 for (; NumVals; --NumVals, ++i) { 01289 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 01290 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 01291 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 01292 } 01293 } else 01294 i += NumVals; 01295 } 01296 continue; 01297 } 01298 01299 if (!Node->isMachineOpcode()) 01300 continue; 01301 // If we're in the middle of scheduling a call, don't begin scheduling 01302 // another call. Also, don't allow any physical registers to be live across 01303 // the call. 01304 if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 01305 // Check the special calling-sequence resource. 01306 unsigned CallResource = TRI->getNumRegs(); 01307 if (LiveRegDefs[CallResource]) { 01308 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 01309 while (SDNode *Glued = Gen->getGluedNode()) 01310 Gen = Glued; 01311 if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) 01312 LRegs.push_back(CallResource); 01313 } 01314 } 01315 if (const uint32_t *RegMask = getNodeRegMask(Node)) 01316 CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); 01317 01318 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 01319 if (!MCID.ImplicitDefs) 01320 continue; 01321 for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) 01322 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 01323 } 01324 01325 return !LRegs.empty(); 01326 } 01327 01328 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { 01329 // Add the nodes that aren't ready back onto the available list. 01330 for (unsigned i = Interferences.size(); i > 0; --i) { 01331 SUnit *SU = Interferences[i-1]; 01332 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); 01333 if (Reg) { 01334 SmallVector<unsigned, 4> &LRegs = LRegsPos->second; 01335 if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) 01336 continue; 01337 } 01338 SU->isPending = false; 01339 // The interfering node may no longer be available due to backtracking. 01340 // Furthermore, it may have been made available again, in which case it is 01341 // now already in the AvailableQueue. 01342 if (SU->isAvailable && !SU->NodeQueueId) { 01343 DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); 01344 AvailableQueue->push(SU); 01345 } 01346 if (i < Interferences.size()) 01347 Interferences[i-1] = Interferences.back(); 01348 Interferences.pop_back(); 01349 LRegsMap.erase(LRegsPos); 01350 } 01351 } 01352 01353 /// Return a node that can be scheduled in this cycle. Requirements: 01354 /// (1) Ready: latency has been satisfied 01355 /// (2) No Hazards: resources are available 01356 /// (3) No Interferences: may unschedule to break register interferences. 01357 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 01358 SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); 01359 while (CurSU) { 01360 SmallVector<unsigned, 4> LRegs; 01361 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 01362 break; 01363 DEBUG(dbgs() << " Interfering reg " << 01364 (LRegs[0] == TRI->getNumRegs() ? "CallResource" 01365 : TRI->getName(LRegs[0])) 01366 << " SU #" << CurSU->NodeNum << '\n'); 01367 std::pair<LRegsMapT::iterator, bool> LRegsPair = 01368 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 01369 if (LRegsPair.second) { 01370 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 01371 Interferences.push_back(CurSU); 01372 } 01373 else { 01374 assert(CurSU->isPending && "Intereferences are pending"); 01375 // Update the interference with current live regs. 01376 LRegsPair.first->second = LRegs; 01377 } 01378 CurSU = AvailableQueue->pop(); 01379 } 01380 if (CurSU) 01381 return CurSU; 01382 01383 // All candidates are delayed due to live physical reg dependencies. 01384 // Try backtracking, code duplication, or inserting cross class copies 01385 // to resolve it. 01386 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 01387 SUnit *TrySU = Interferences[i]; 01388 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 01389 01390 // Try unscheduling up to the point where it's safe to schedule 01391 // this node. 01392 SUnit *BtSU = NULL; 01393 unsigned LiveCycle = UINT_MAX; 01394 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 01395 unsigned Reg = LRegs[j]; 01396 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 01397 BtSU = LiveRegGens[Reg]; 01398 LiveCycle = BtSU->getHeight(); 01399 } 01400 } 01401 if (!WillCreateCycle(TrySU, BtSU)) { 01402 // BacktrackBottomUp mutates Interferences! 01403 BacktrackBottomUp(TrySU, BtSU); 01404 01405 // Force the current node to be scheduled before the node that 01406 // requires the physical reg dep. 01407 if (BtSU->isAvailable) { 01408 BtSU->isAvailable = false; 01409 if (!BtSU->isPending) 01410 AvailableQueue->remove(BtSU); 01411 } 01412 DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" 01413 << TrySU->NodeNum << ")\n"); 01414 AddPred(TrySU, SDep(BtSU, SDep::Artificial)); 01415 01416 // If one or more successors has been unscheduled, then the current 01417 // node is no longer available. 01418 if (!TrySU->isAvailable) 01419 CurSU = AvailableQueue->pop(); 01420 else { 01421 AvailableQueue->remove(TrySU); 01422 CurSU = TrySU; 01423 } 01424 // Interferences has been mutated. We must break. 01425 break; 01426 } 01427 } 01428 01429 if (!CurSU) { 01430 // Can't backtrack. If it's too expensive to copy the value, then try 01431 // duplicate the nodes that produces these "too expensive to copy" 01432 // values to break the dependency. In case even that doesn't work, 01433 // insert cross class copies. 01434 // If it's not too expensive, i.e. cost != -1, issue copies. 01435 SUnit *TrySU = Interferences[0]; 01436 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 01437 assert(LRegs.size() == 1 && "Can't handle this yet!"); 01438 unsigned Reg = LRegs[0]; 01439 SUnit *LRDef = LiveRegDefs[Reg]; 01440 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 01441 const TargetRegisterClass *RC = 01442 TRI->getMinimalPhysRegClass(Reg, VT); 01443 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 01444 01445 // If cross copy register class is the same as RC, then it must be possible 01446 // copy the value directly. Do not try duplicate the def. 01447 // If cross copy register class is not the same as RC, then it's possible to 01448 // copy the value but it require cross register class copies and it is 01449 // expensive. 01450 // If cross copy register class is null, then it's not possible to copy 01451 // the value at all. 01452 SUnit *NewDef = 0; 01453 if (DestRC != RC) { 01454 NewDef = CopyAndMoveSuccessors(LRDef); 01455 if (!DestRC && !NewDef) 01456 report_fatal_error("Can't handle live physical register dependency!"); 01457 } 01458 if (!NewDef) { 01459 // Issue copies, these can be expensive cross register class copies. 01460 SmallVector<SUnit*, 2> Copies; 01461 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 01462 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 01463 << " to SU #" << Copies.front()->NodeNum << "\n"); 01464 AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); 01465 NewDef = Copies.back(); 01466 } 01467 01468 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 01469 << " to SU #" << TrySU->NodeNum << "\n"); 01470 LiveRegDefs[Reg] = NewDef; 01471 AddPred(NewDef, SDep(TrySU, SDep::Artificial)); 01472 TrySU->isAvailable = false; 01473 CurSU = NewDef; 01474 } 01475 assert(CurSU && "Unable to resolve live physical register dependencies!"); 01476 return CurSU; 01477 } 01478 01479 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 01480 /// schedulers. 01481 void ScheduleDAGRRList::ListScheduleBottomUp() { 01482 // Release any predecessors of the special Exit node. 01483 ReleasePredecessors(&ExitSU); 01484 01485 // Add root to Available queue. 01486 if (!SUnits.empty()) { 01487 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 01488 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 01489 RootSU->isAvailable = true; 01490 AvailableQueue->push(RootSU); 01491 } 01492 01493 // While Available queue is not empty, grab the node with the highest 01494 // priority. If it is not ready put it back. Schedule the node. 01495 Sequence.reserve(SUnits.size()); 01496 while (!AvailableQueue->empty() || !Interferences.empty()) { 01497 DEBUG(dbgs() << "\nExamining Available:\n"; 01498 AvailableQueue->dump(this)); 01499 01500 // Pick the best node to schedule taking all constraints into 01501 // consideration. 01502 SUnit *SU = PickNodeToScheduleBottomUp(); 01503 01504 AdvancePastStalls(SU); 01505 01506 ScheduleNodeBottomUp(SU); 01507 01508 while (AvailableQueue->empty() && !PendingQueue.empty()) { 01509 // Advance the cycle to free resources. Skip ahead to the next ready SU. 01510 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 01511 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 01512 } 01513 } 01514 01515 // Reverse the order if it is bottom up. 01516 std::reverse(Sequence.begin(), Sequence.end()); 01517 01518 #ifndef NDEBUG 01519 VerifyScheduledSequence(/*isBottomUp=*/true); 01520 #endif 01521 } 01522 01523 //===----------------------------------------------------------------------===// 01524 // RegReductionPriorityQueue Definition 01525 //===----------------------------------------------------------------------===// 01526 // 01527 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 01528 // to reduce register pressure. 01529 // 01530 namespace { 01531 class RegReductionPQBase; 01532 01533 struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 01534 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 01535 }; 01536 01537 #ifndef NDEBUG 01538 template<class SF> 01539 struct reverse_sort : public queue_sort { 01540 SF &SortFunc; 01541 reverse_sort(SF &sf) : SortFunc(sf) {} 01542 reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} 01543 01544 bool operator()(SUnit* left, SUnit* right) const { 01545 // reverse left/right rather than simply !SortFunc(left, right) 01546 // to expose different paths in the comparison logic. 01547 return SortFunc(right, left); 01548 } 01549 }; 01550 #endif // NDEBUG 01551 01552 /// bu_ls_rr_sort - Priority function for bottom up register pressure 01553 // reduction scheduler. 01554 struct bu_ls_rr_sort : public queue_sort { 01555 enum { 01556 IsBottomUp = true, 01557 HasReadyFilter = false 01558 }; 01559 01560 RegReductionPQBase *SPQ; 01561 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 01562 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 01563 01564 bool operator()(SUnit* left, SUnit* right) const; 01565 }; 01566 01567 // src_ls_rr_sort - Priority function for source order scheduler. 01568 struct src_ls_rr_sort : public queue_sort { 01569 enum { 01570 IsBottomUp = true, 01571 HasReadyFilter = false 01572 }; 01573 01574 RegReductionPQBase *SPQ; 01575 src_ls_rr_sort(RegReductionPQBase *spq) 01576 : SPQ(spq) {} 01577 src_ls_rr_sort(const src_ls_rr_sort &RHS) 01578 : SPQ(RHS.SPQ) {} 01579 01580 bool operator()(SUnit* left, SUnit* right) const; 01581 }; 01582 01583 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 01584 struct hybrid_ls_rr_sort : public queue_sort { 01585 enum { 01586 IsBottomUp = true, 01587 HasReadyFilter = false 01588 }; 01589 01590 RegReductionPQBase *SPQ; 01591 hybrid_ls_rr_sort(RegReductionPQBase *spq) 01592 : SPQ(spq) {} 01593 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 01594 : SPQ(RHS.SPQ) {} 01595 01596 bool isReady(SUnit *SU, unsigned CurCycle) const; 01597 01598 bool operator()(SUnit* left, SUnit* right) const; 01599 }; 01600 01601 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 01602 // scheduler. 01603 struct ilp_ls_rr_sort : public queue_sort { 01604 enum { 01605 IsBottomUp = true, 01606 HasReadyFilter = false 01607 }; 01608 01609 RegReductionPQBase *SPQ; 01610 ilp_ls_rr_sort(RegReductionPQBase *spq) 01611 : SPQ(spq) {} 01612 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 01613 : SPQ(RHS.SPQ) {} 01614 01615 bool isReady(SUnit *SU, unsigned CurCycle) const; 01616 01617 bool operator()(SUnit* left, SUnit* right) const; 01618 }; 01619 01620 class RegReductionPQBase : public SchedulingPriorityQueue { 01621 protected: 01622 std::vector<SUnit*> Queue; 01623 unsigned CurQueueId; 01624 bool TracksRegPressure; 01625 bool SrcOrder; 01626 01627 // SUnits - The SUnits for the current graph. 01628 std::vector<SUnit> *SUnits; 01629 01630 MachineFunction &MF; 01631 const TargetInstrInfo *TII; 01632 const TargetRegisterInfo *TRI; 01633 const TargetLowering *TLI; 01634 ScheduleDAGRRList *scheduleDAG; 01635 01636 // SethiUllmanNumbers - The SethiUllman number for each node. 01637 std::vector<unsigned> SethiUllmanNumbers; 01638 01639 /// RegPressure - Tracking current reg pressure per register class. 01640 /// 01641 std::vector<unsigned> RegPressure; 01642 01643 /// RegLimit - Tracking the number of allocatable registers per register 01644 /// class. 01645 std::vector<unsigned> RegLimit; 01646 01647 public: 01648 RegReductionPQBase(MachineFunction &mf, 01649 bool hasReadyFilter, 01650 bool tracksrp, 01651 bool srcorder, 01652 const TargetInstrInfo *tii, 01653 const TargetRegisterInfo *tri, 01654 const TargetLowering *tli) 01655 : SchedulingPriorityQueue(hasReadyFilter), 01656 CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), 01657 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 01658 if (TracksRegPressure) { 01659 unsigned NumRC = TRI->getNumRegClasses(); 01660 RegLimit.resize(NumRC); 01661 RegPressure.resize(NumRC); 01662 std::fill(RegLimit.begin(), RegLimit.end(), 0); 01663 std::fill(RegPressure.begin(), RegPressure.end(), 0); 01664 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 01665 E = TRI->regclass_end(); I != E; ++I) 01666 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 01667 } 01668 } 01669 01670 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 01671 scheduleDAG = scheduleDag; 01672 } 01673 01674 ScheduleHazardRecognizer* getHazardRec() { 01675 return scheduleDAG->getHazardRec(); 01676 } 01677 01678 void initNodes(std::vector<SUnit> &sunits); 01679 01680 void addNode(const SUnit *SU); 01681 01682 void updateNode(const SUnit *SU); 01683 01684 void releaseState() { 01685 SUnits = 0; 01686 SethiUllmanNumbers.clear(); 01687 std::fill(RegPressure.begin(), RegPressure.end(), 0); 01688 } 01689 01690 unsigned getNodePriority(const SUnit *SU) const; 01691 01692 unsigned getNodeOrdering(const SUnit *SU) const { 01693 if (!SU->getNode()) return 0; 01694 01695 return SU->getNode()->getIROrder(); 01696 } 01697 01698 bool empty() const { return Queue.empty(); } 01699 01700 void push(SUnit *U) { 01701 assert(!U->NodeQueueId && "Node in the queue already"); 01702 U->NodeQueueId = ++CurQueueId; 01703 Queue.push_back(U); 01704 } 01705 01706 void remove(SUnit *SU) { 01707 assert(!Queue.empty() && "Queue is empty!"); 01708 assert(SU->NodeQueueId != 0 && "Not in queue!"); 01709 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 01710 SU); 01711 if (I != prior(Queue.end())) 01712 std::swap(*I, Queue.back()); 01713 Queue.pop_back(); 01714 SU->NodeQueueId = 0; 01715 } 01716 01717 bool tracksRegPressure() const { return TracksRegPressure; } 01718 01719 void dumpRegPressure() const; 01720 01721 bool HighRegPressure(const SUnit *SU) const; 01722 01723 bool MayReduceRegPressure(SUnit *SU) const; 01724 01725 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 01726 01727 void scheduledNode(SUnit *SU); 01728 01729 void unscheduledNode(SUnit *SU); 01730 01731 protected: 01732 bool canClobber(const SUnit *SU, const SUnit *Op); 01733 void AddPseudoTwoAddrDeps(); 01734 void PrescheduleNodesWithMultipleUses(); 01735 void CalculateSethiUllmanNumbers(); 01736 }; 01737 01738 template<class SF> 01739 static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { 01740 std::vector<SUnit *>::iterator Best = Q.begin(); 01741 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 01742 E = Q.end(); I != E; ++I) 01743 if (Picker(*Best, *I)) 01744 Best = I; 01745 SUnit *V = *Best; 01746 if (Best != prior(Q.end())) 01747 std::swap(*Best, Q.back()); 01748 Q.pop_back(); 01749 return V; 01750 } 01751 01752 template<class SF> 01753 SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { 01754 #ifndef NDEBUG 01755 if (DAG->StressSched) { 01756 reverse_sort<SF> RPicker(Picker); 01757 return popFromQueueImpl(Q, RPicker); 01758 } 01759 #endif 01760 (void)DAG; 01761 return popFromQueueImpl(Q, Picker); 01762 } 01763 01764 template<class SF> 01765 class RegReductionPriorityQueue : public RegReductionPQBase { 01766 SF Picker; 01767 01768 public: 01769 RegReductionPriorityQueue(MachineFunction &mf, 01770 bool tracksrp, 01771 bool srcorder, 01772 const TargetInstrInfo *tii, 01773 const TargetRegisterInfo *tri, 01774 const TargetLowering *tli) 01775 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, 01776 tii, tri, tli), 01777 Picker(this) {} 01778 01779 bool isBottomUp() const { return SF::IsBottomUp; } 01780 01781 bool isReady(SUnit *U) const { 01782 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 01783 } 01784 01785 SUnit *pop() { 01786 if (Queue.empty()) return NULL; 01787 01788 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 01789 V->NodeQueueId = 0; 01790 return V; 01791 } 01792 01793 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 01794 void dump(ScheduleDAG *DAG) const { 01795 // Emulate pop() without clobbering NodeQueueIds. 01796 std::vector<SUnit*> DumpQueue = Queue; 01797 SF DumpPicker = Picker; 01798 while (!DumpQueue.empty()) { 01799 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 01800 dbgs() << "Height " << SU->getHeight() << ": "; 01801 SU->dump(DAG); 01802 } 01803 } 01804 #endif 01805 }; 01806 01807 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 01808 BURegReductionPriorityQueue; 01809 01810 typedef RegReductionPriorityQueue<src_ls_rr_sort> 01811 SrcRegReductionPriorityQueue; 01812 01813 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 01814 HybridBURRPriorityQueue; 01815 01816 typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 01817 ILPBURRPriorityQueue; 01818 } // end anonymous namespace 01819 01820 //===----------------------------------------------------------------------===// 01821 // Static Node Priority for Register Pressure Reduction 01822 //===----------------------------------------------------------------------===// 01823 01824 // Check for special nodes that bypass scheduling heuristics. 01825 // Currently this pushes TokenFactor nodes down, but may be used for other 01826 // pseudo-ops as well. 01827 // 01828 // Return -1 to schedule right above left, 1 for left above right. 01829 // Return 0 if no bias exists. 01830 static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 01831 bool LSchedLow = left->isScheduleLow; 01832 bool RSchedLow = right->isScheduleLow; 01833 if (LSchedLow != RSchedLow) 01834 return LSchedLow < RSchedLow ? 1 : -1; 01835 return 0; 01836 } 01837 01838 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 01839 /// Smaller number is the higher priority. 01840 static unsigned 01841 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 01842 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 01843 if (SethiUllmanNumber != 0) 01844 return SethiUllmanNumber; 01845 01846 unsigned Extra = 0; 01847 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 01848 I != E; ++I) { 01849 if (I->isCtrl()) continue; // ignore chain preds 01850 SUnit *PredSU = I->getSUnit(); 01851 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 01852 if (PredSethiUllman > SethiUllmanNumber) { 01853 SethiUllmanNumber = PredSethiUllman; 01854 Extra = 0; 01855 } else if (PredSethiUllman == SethiUllmanNumber) 01856 ++Extra; 01857 } 01858 01859 SethiUllmanNumber += Extra; 01860 01861 if (SethiUllmanNumber == 0) 01862 SethiUllmanNumber = 1; 01863 01864 return SethiUllmanNumber; 01865 } 01866 01867 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 01868 /// scheduling units. 01869 void RegReductionPQBase::CalculateSethiUllmanNumbers() { 01870 SethiUllmanNumbers.assign(SUnits->size(), 0); 01871 01872 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 01873 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 01874 } 01875 01876 void RegReductionPQBase::addNode(const SUnit *SU) { 01877 unsigned SUSize = SethiUllmanNumbers.size(); 01878 if (SUnits->size() > SUSize) 01879 SethiUllmanNumbers.resize(SUSize*2, 0); 01880 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 01881 } 01882 01883 void RegReductionPQBase::updateNode(const SUnit *SU) { 01884 SethiUllmanNumbers[SU->NodeNum] = 0; 01885 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 01886 } 01887 01888 // Lower priority means schedule further down. For bottom-up scheduling, lower 01889 // priority SUs are scheduled before higher priority SUs. 01890 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 01891 assert(SU->NodeNum < SethiUllmanNumbers.size()); 01892 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 01893 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 01894 // CopyToReg should be close to its uses to facilitate coalescing and 01895 // avoid spilling. 01896 return 0; 01897 if (Opc == TargetOpcode::EXTRACT_SUBREG || 01898 Opc == TargetOpcode::SUBREG_TO_REG || 01899 Opc == TargetOpcode::INSERT_SUBREG) 01900 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 01901 // close to their uses to facilitate coalescing. 01902 return 0; 01903 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 01904 // If SU does not have a register use, i.e. it doesn't produce a value 01905 // that would be consumed (e.g. store), then it terminates a chain of 01906 // computation. Give it a large SethiUllman number so it will be 01907 // scheduled right before its predecessors that it doesn't lengthen 01908 // their live ranges. 01909 return 0xffff; 01910 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 01911 // If SU does not have a register def, schedule it close to its uses 01912 // because it does not lengthen any live ranges. 01913 return 0; 01914 #if 1 01915 return SethiUllmanNumbers[SU->NodeNum]; 01916 #else 01917 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 01918 if (SU->isCallOp) { 01919 // FIXME: This assumes all of the defs are used as call operands. 01920 int NP = (int)Priority - SU->getNode()->getNumValues(); 01921 return (NP > 0) ? NP : 0; 01922 } 01923 return Priority; 01924 #endif 01925 } 01926 01927 //===----------------------------------------------------------------------===// 01928 // Register Pressure Tracking 01929 //===----------------------------------------------------------------------===// 01930 01931 void RegReductionPQBase::dumpRegPressure() const { 01932 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 01933 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 01934 E = TRI->regclass_end(); I != E; ++I) { 01935 const TargetRegisterClass *RC = *I; 01936 unsigned Id = RC->getID(); 01937 unsigned RP = RegPressure[Id]; 01938 if (!RP) continue; 01939 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 01940 << '\n'); 01941 } 01942 #endif 01943 } 01944 01945 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 01946 if (!TLI) 01947 return false; 01948 01949 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 01950 I != E; ++I) { 01951 if (I->isCtrl()) 01952 continue; 01953 SUnit *PredSU = I->getSUnit(); 01954 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 01955 // to cover the number of registers defined (they are all live). 01956 if (PredSU->NumRegDefsLeft == 0) { 01957 continue; 01958 } 01959 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 01960 RegDefPos.IsValid(); RegDefPos.Advance()) { 01961 unsigned RCId, Cost; 01962 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 01963 01964 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 01965 return true; 01966 } 01967 } 01968 return false; 01969 } 01970 01971 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 01972 const SDNode *N = SU->getNode(); 01973 01974 if (!N->isMachineOpcode() || !SU->NumSuccs) 01975 return false; 01976 01977 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 01978 for (unsigned i = 0; i != NumDefs; ++i) { 01979 MVT VT = N->getSimpleValueType(i); 01980 if (!N->hasAnyUseOfValue(i)) 01981 continue; 01982 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 01983 if (RegPressure[RCId] >= RegLimit[RCId]) 01984 return true; 01985 } 01986 return false; 01987 } 01988 01989 // Compute the register pressure contribution by this instruction by count up 01990 // for uses that are not live and down for defs. Only count register classes 01991 // that are already under high pressure. As a side effect, compute the number of 01992 // uses of registers that are already live. 01993 // 01994 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 01995 // so could probably be factored. 01996 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 01997 LiveUses = 0; 01998 int PDiff = 0; 01999 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 02000 I != E; ++I) { 02001 if (I->isCtrl()) 02002 continue; 02003 SUnit *PredSU = I->getSUnit(); 02004 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 02005 // to cover the number of registers defined (they are all live). 02006 if (PredSU->NumRegDefsLeft == 0) { 02007 if (PredSU->getNode()->isMachineOpcode()) 02008 ++LiveUses; 02009 continue; 02010 } 02011 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 02012 RegDefPos.IsValid(); RegDefPos.Advance()) { 02013 MVT VT = RegDefPos.GetValue(); 02014 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02015 if (RegPressure[RCId] >= RegLimit[RCId]) 02016 ++PDiff; 02017 } 02018 } 02019 const SDNode *N = SU->getNode(); 02020 02021 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 02022 return PDiff; 02023 02024 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 02025 for (unsigned i = 0; i != NumDefs; ++i) { 02026 MVT VT = N->getSimpleValueType(i); 02027 if (!N->hasAnyUseOfValue(i)) 02028 continue; 02029 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02030 if (RegPressure[RCId] >= RegLimit[RCId]) 02031 --PDiff; 02032 } 02033 return PDiff; 02034 } 02035 02036 void RegReductionPQBase::scheduledNode(SUnit *SU) { 02037 if (!TracksRegPressure) 02038 return; 02039 02040 if (!SU->getNode()) 02041 return; 02042 02043 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02044 I != E; ++I) { 02045 if (I->isCtrl()) 02046 continue; 02047 SUnit *PredSU = I->getSUnit(); 02048 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 02049 // to cover the number of registers defined (they are all live). 02050 if (PredSU->NumRegDefsLeft == 0) { 02051 continue; 02052 } 02053 // FIXME: The ScheduleDAG currently loses information about which of a 02054 // node's values is consumed by each dependence. Consequently, if the node 02055 // defines multiple register classes, we don't know which to pressurize 02056 // here. Instead the following loop consumes the register defs in an 02057 // arbitrary order. At least it handles the common case of clustered loads 02058 // to the same class. For precise liveness, each SDep needs to indicate the 02059 // result number. But that tightly couples the ScheduleDAG with the 02060 // SelectionDAG making updates tricky. A simpler hack would be to attach a 02061 // value type or register class to SDep. 02062 // 02063 // The most important aspect of register tracking is balancing the increase 02064 // here with the reduction further below. Note that this SU may use multiple 02065 // defs in PredSU. The can't be determined here, but we've already 02066 // compensated by reducing NumRegDefsLeft in PredSU during 02067 // ScheduleDAGSDNodes::AddSchedEdges. 02068 --PredSU->NumRegDefsLeft; 02069 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 02070 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 02071 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 02072 if (SkipRegDefs) 02073 continue; 02074 02075 unsigned RCId, Cost; 02076 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 02077 RegPressure[RCId] += Cost; 02078 break; 02079 } 02080 } 02081 02082 // We should have this assert, but there may be dead SDNodes that never 02083 // materialize as SUnits, so they don't appear to generate liveness. 02084 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 02085 int SkipRegDefs = (int)SU->NumRegDefsLeft; 02086 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 02087 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 02088 if (SkipRegDefs > 0) 02089 continue; 02090 unsigned RCId, Cost; 02091 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 02092 if (RegPressure[RCId] < Cost) { 02093 // Register pressure tracking is imprecise. This can happen. But we try 02094 // hard not to let it happen because it likely results in poor scheduling. 02095 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 02096 RegPressure[RCId] = 0; 02097 } 02098 else { 02099 RegPressure[RCId] -= Cost; 02100 } 02101 } 02102 dumpRegPressure(); 02103 } 02104 02105 void RegReductionPQBase::unscheduledNode(SUnit *SU) { 02106 if (!TracksRegPressure) 02107 return; 02108 02109 const SDNode *N = SU->getNode(); 02110 if (!N) return; 02111 02112 if (!N->isMachineOpcode()) { 02113 if (N->getOpcode() != ISD::CopyToReg) 02114 return; 02115 } else { 02116 unsigned Opc = N->getMachineOpcode(); 02117 if (Opc == TargetOpcode::EXTRACT_SUBREG || 02118 Opc == TargetOpcode::INSERT_SUBREG || 02119 Opc == TargetOpcode::SUBREG_TO_REG || 02120 Opc == TargetOpcode::REG_SEQUENCE || 02121 Opc == TargetOpcode::IMPLICIT_DEF) 02122 return; 02123 } 02124 02125 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02126 I != E; ++I) { 02127 if (I->isCtrl()) 02128 continue; 02129 SUnit *PredSU = I->getSUnit(); 02130 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 02131 // counts data deps. 02132 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 02133 continue; 02134 const SDNode *PN = PredSU->getNode(); 02135 if (!PN->isMachineOpcode()) { 02136 if (PN->getOpcode() == ISD::CopyFromReg) { 02137 MVT VT = PN->getSimpleValueType(0); 02138 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02139 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 02140 } 02141 continue; 02142 } 02143 unsigned POpc = PN->getMachineOpcode(); 02144 if (POpc == TargetOpcode::IMPLICIT_DEF) 02145 continue; 02146 if (POpc == TargetOpcode::EXTRACT_SUBREG || 02147 POpc == TargetOpcode::INSERT_SUBREG || 02148 POpc == TargetOpcode::SUBREG_TO_REG) { 02149 MVT VT = PN->getSimpleValueType(0); 02150 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02151 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 02152 continue; 02153 } 02154 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 02155 for (unsigned i = 0; i != NumDefs; ++i) { 02156 MVT VT = PN->getSimpleValueType(i); 02157 if (!PN->hasAnyUseOfValue(i)) 02158 continue; 02159 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02160 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 02161 // Register pressure tracking is imprecise. This can happen. 02162 RegPressure[RCId] = 0; 02163 else 02164 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 02165 } 02166 } 02167 02168 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 02169 // may transfer data dependencies to CopyToReg. 02170 if (SU->NumSuccs && N->isMachineOpcode()) { 02171 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 02172 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 02173 MVT VT = N->getSimpleValueType(i); 02174 if (VT == MVT::Glue || VT == MVT::Other) 02175 continue; 02176 if (!N->hasAnyUseOfValue(i)) 02177 continue; 02178 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 02179 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 02180 } 02181 } 02182 02183 dumpRegPressure(); 02184 } 02185 02186 //===----------------------------------------------------------------------===// 02187 // Dynamic Node Priority for Register Pressure Reduction 02188 //===----------------------------------------------------------------------===// 02189 02190 /// closestSucc - Returns the scheduled cycle of the successor which is 02191 /// closest to the current cycle. 02192 static unsigned closestSucc(const SUnit *SU) { 02193 unsigned MaxHeight = 0; 02194 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 02195 I != E; ++I) { 02196 if (I->isCtrl()) continue; // ignore chain succs 02197 unsigned Height = I->getSUnit()->getHeight(); 02198 // If there are bunch of CopyToRegs stacked up, they should be considered 02199 // to be at the same position. 02200 if (I->getSUnit()->getNode() && 02201 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 02202 Height = closestSucc(I->getSUnit())+1; 02203 if (Height > MaxHeight) 02204 MaxHeight = Height; 02205 } 02206 return MaxHeight; 02207 } 02208 02209 /// calcMaxScratches - Returns an cost estimate of the worse case requirement 02210 /// for scratch registers, i.e. number of data dependencies. 02211 static unsigned calcMaxScratches(const SUnit *SU) { 02212 unsigned Scratches = 0; 02213 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02214 I != E; ++I) { 02215 if (I->isCtrl()) continue; // ignore chain preds 02216 Scratches++; 02217 } 02218 return Scratches; 02219 } 02220 02221 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 02222 /// CopyFromReg from a virtual register. 02223 static bool hasOnlyLiveInOpers(const SUnit *SU) { 02224 bool RetVal = false; 02225 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02226 I != E; ++I) { 02227 if (I->isCtrl()) continue; 02228 const SUnit *PredSU = I->getSUnit(); 02229 if (PredSU->getNode() && 02230 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 02231 unsigned Reg = 02232 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 02233 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 02234 RetVal = true; 02235 continue; 02236 } 02237 } 02238 return false; 02239 } 02240 return RetVal; 02241 } 02242 02243 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are 02244 /// CopyToReg to a virtual register. This SU def is probably a liveout and 02245 /// it has no other use. It should be scheduled closer to the terminator. 02246 static bool hasOnlyLiveOutUses(const SUnit *SU) { 02247 bool RetVal = false; 02248 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 02249 I != E; ++I) { 02250 if (I->isCtrl()) continue; 02251 const SUnit *SuccSU = I->getSUnit(); 02252 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 02253 unsigned Reg = 02254 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 02255 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 02256 RetVal = true; 02257 continue; 02258 } 02259 } 02260 return false; 02261 } 02262 return RetVal; 02263 } 02264 02265 // Set isVRegCycle for a node with only live in opers and live out uses. Also 02266 // set isVRegCycle for its CopyFromReg operands. 02267 // 02268 // This is only relevant for single-block loops, in which case the VRegCycle 02269 // node is likely an induction variable in which the operand and target virtual 02270 // registers should be coalesced (e.g. pre/post increment values). Setting the 02271 // isVRegCycle flag helps the scheduler prioritize other uses of the same 02272 // CopyFromReg so that this node becomes the virtual register "kill". This 02273 // avoids interference between the values live in and out of the block and 02274 // eliminates a copy inside the loop. 02275 static void initVRegCycle(SUnit *SU) { 02276 if (DisableSchedVRegCycle) 02277 return; 02278 02279 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 02280 return; 02281 02282 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 02283 02284 SU->isVRegCycle = true; 02285 02286 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 02287 I != E; ++I) { 02288 if (I->isCtrl()) continue; 02289 I->getSUnit()->isVRegCycle = true; 02290 } 02291 } 02292 02293 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 02294 // CopyFromReg operands. We should no longer penalize other uses of this VReg. 02295 static void resetVRegCycle(SUnit *SU) { 02296 if (!SU->isVRegCycle) 02297 return; 02298 02299 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 02300 I != E; ++I) { 02301 if (I->isCtrl()) continue; // ignore chain preds 02302 SUnit *PredSU = I->getSUnit(); 02303 if (PredSU->isVRegCycle) { 02304 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 02305 "VRegCycle def must be CopyFromReg"); 02306 I->getSUnit()->isVRegCycle = 0; 02307 } 02308 } 02309 } 02310 02311 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 02312 // means a node that defines the VRegCycle has not been scheduled yet. 02313 static bool hasVRegCycleUse(const SUnit *SU) { 02314 // If this SU also defines the VReg, don't hoist it as a "use". 02315 if (SU->isVRegCycle) 02316 return false; 02317 02318 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 02319 I != E; ++I) { 02320 if (I->isCtrl()) continue; // ignore chain preds 02321 if (I->getSUnit()->isVRegCycle && 02322 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 02323 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 02324 return true; 02325 } 02326 } 02327 return false; 02328 } 02329 02330 // Check for either a dependence (latency) or resource (hazard) stall. 02331 // 02332 // Note: The ScheduleHazardRecognizer interface requires a non-const SU. 02333 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 02334 if ((int)SPQ->getCurCycle() < Height) return true; 02335 if (SPQ->getHazardRec()->getHazardType(SU, 0) 02336 != ScheduleHazardRecognizer::NoHazard) 02337 return true; 02338 return false; 02339 } 02340 02341 // Return -1 if left has higher priority, 1 if right has higher priority. 02342 // Return 0 if latency-based priority is equivalent. 02343 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 02344 RegReductionPQBase *SPQ) { 02345 // Scheduling an instruction that uses a VReg whose postincrement has not yet 02346 // been scheduled will induce a copy. Model this as an extra cycle of latency. 02347 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 02348 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 02349 int LHeight = (int)left->getHeight() + LPenalty; 02350 int RHeight = (int)right->getHeight() + RPenalty; 02351 02352 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 02353 BUHasStall(left, LHeight, SPQ); 02354 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 02355 BUHasStall(right, RHeight, SPQ); 02356 02357 // If scheduling one of the node will cause a pipeline stall, delay it. 02358 // If scheduling either one of the node will cause a pipeline stall, sort 02359 // them according to their height. 02360 if (LStall) { 02361 if (!RStall) 02362 return 1; 02363 if (LHeight != RHeight) 02364 return LHeight > RHeight ? 1 : -1; 02365 } else if (RStall) 02366 return -1; 02367 02368 // If either node is scheduling for latency, sort them by height/depth 02369 // and latency. 02370 if (!checkPref || (left->SchedulingPref == Sched::ILP || 02371 right->SchedulingPref == Sched::ILP)) { 02372 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer 02373 // is enabled, grouping instructions by cycle, then its height is already 02374 // covered so only its depth matters. We also reach this point if both stall 02375 // but have the same height. 02376 if (!SPQ->getHazardRec()->isEnabled()) { 02377 if (LHeight != RHeight) 02378 return LHeight > RHeight ? 1 : -1; 02379 } 02380 int LDepth = left->getDepth() - LPenalty; 02381 int RDepth = right->getDepth() - RPenalty; 02382 if (LDepth != RDepth) { 02383 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 02384 << ") depth " << LDepth << " vs SU (" << right->NodeNum 02385 << ") depth " << RDepth << "\n"); 02386 return LDepth < RDepth ? 1 : -1; 02387 } 02388 if (left->Latency != right->Latency) 02389 return left->Latency > right->Latency ? 1 : -1; 02390 } 02391 return 0; 02392 } 02393 02394 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 02395 // Schedule physical register definitions close to their use. This is 02396 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 02397 // long as shortening physreg live ranges is generally good, we can defer 02398 // creating a subtarget hook. 02399 if (!DisableSchedPhysRegJoin) { 02400 bool LHasPhysReg = left->hasPhysRegDefs; 02401 bool RHasPhysReg = right->hasPhysRegDefs; 02402 if (LHasPhysReg != RHasPhysReg) { 02403 #ifndef NDEBUG 02404 const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"}; 02405 #endif 02406 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 02407 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 02408 << PhysRegMsg[RHasPhysReg] << "\n"); 02409 return LHasPhysReg < RHasPhysReg; 02410 } 02411 } 02412 02413 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 02414 unsigned LPriority = SPQ->getNodePriority(left); 02415 unsigned RPriority = SPQ->getNodePriority(right); 02416 02417 // Be really careful about hoisting call operands above previous calls. 02418 // Only allows it if it would reduce register pressure. 02419 if (left->isCall && right->isCallOp) { 02420 unsigned RNumVals = right->getNode()->getNumValues(); 02421 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 02422 } 02423 if (right->isCall && left->isCallOp) { 02424 unsigned LNumVals = left->getNode()->getNumValues(); 02425 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 02426 } 02427 02428 if (LPriority != RPriority) 02429 return LPriority > RPriority; 02430 02431 // One or both of the nodes are calls and their sethi-ullman numbers are the 02432 // same, then keep source order. 02433 if (left->isCall || right->isCall) { 02434 unsigned LOrder = SPQ->getNodeOrdering(left); 02435 unsigned ROrder = SPQ->getNodeOrdering(right); 02436 02437 // Prefer an ordering where the lower the non-zero order number, the higher 02438 // the preference. 02439 if ((LOrder || ROrder) && LOrder != ROrder) 02440 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 02441 } 02442 02443 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 02444 // e.g. 02445 // t1 = op t2, c1 02446 // t3 = op t4, c2 02447 // 02448 // and the following instructions are both ready. 02449 // t2 = op c3 02450 // t4 = op c4 02451 // 02452 // Then schedule t2 = op first. 02453 // i.e. 02454 // t4 = op c4 02455 // t2 = op c3 02456 // t1 = op t2, c1 02457 // t3 = op t4, c2 02458 // 02459 // This creates more short live intervals. 02460 unsigned LDist = closestSucc(left); 02461 unsigned RDist = closestSucc(right); 02462 if (LDist != RDist) 02463 return LDist < RDist; 02464 02465 // How many registers becomes live when the node is scheduled. 02466 unsigned LScratch = calcMaxScratches(left); 02467 unsigned RScratch = calcMaxScratches(right); 02468 if (LScratch != RScratch) 02469 return LScratch > RScratch; 02470 02471 // Comparing latency against a call makes little sense unless the node 02472 // is register pressure-neutral. 02473 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 02474 return (left->NodeQueueId > right->NodeQueueId); 02475 02476 // Do not compare latencies when one or both of the nodes are calls. 02477 if (!DisableSchedCycles && 02478 !(left->isCall || right->isCall)) { 02479 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 02480 if (result != 0) 02481 return result > 0; 02482 } 02483 else { 02484 if (left->getHeight() != right->getHeight()) 02485 return left->getHeight() > right->getHeight(); 02486 02487 if (left->getDepth() != right->getDepth()) 02488 return left->getDepth() < right->getDepth(); 02489 } 02490 02491 assert(left->NodeQueueId && right->NodeQueueId && 02492 "NodeQueueId cannot be zero"); 02493 return (left->NodeQueueId > right->NodeQueueId); 02494 } 02495 02496 // Bottom up 02497 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 02498 if (int res = checkSpecialNodes(left, right)) 02499 return res > 0; 02500 02501 return BURRSort(left, right, SPQ); 02502 } 02503 02504 // Source order, otherwise bottom up. 02505 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 02506 if (int res = checkSpecialNodes(left, right)) 02507 return res > 0; 02508 02509 unsigned LOrder = SPQ->getNodeOrdering(left); 02510 unsigned ROrder = SPQ->getNodeOrdering(right); 02511 02512 // Prefer an ordering where the lower the non-zero order number, the higher 02513 // the preference. 02514 if ((LOrder || ROrder) && LOrder != ROrder) 02515 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 02516 02517 return BURRSort(left, right, SPQ); 02518 } 02519 02520 // If the time between now and when the instruction will be ready can cover 02521 // the spill code, then avoid adding it to the ready queue. This gives long 02522 // stalls highest priority and allows hoisting across calls. It should also 02523 // speed up processing the available queue. 02524 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 02525 static const unsigned ReadyDelay = 3; 02526 02527 if (SPQ->MayReduceRegPressure(SU)) return true; 02528 02529 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 02530 02531 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 02532 != ScheduleHazardRecognizer::NoHazard) 02533 return false; 02534 02535 return true; 02536 } 02537 02538 // Return true if right should be scheduled with higher priority than left. 02539 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 02540 if (int res = checkSpecialNodes(left, right)) 02541 return res > 0; 02542 02543 if (left->isCall || right->isCall) 02544 // No way to compute latency of calls. 02545 return BURRSort(left, right, SPQ); 02546 02547 bool LHigh = SPQ->HighRegPressure(left); 02548 bool RHigh = SPQ->HighRegPressure(right); 02549 // Avoid causing spills. If register pressure is high, schedule for 02550 // register pressure reduction. 02551 if (LHigh && !RHigh) { 02552 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 02553 << right->NodeNum << ")\n"); 02554 return true; 02555 } 02556 else if (!LHigh && RHigh) { 02557 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 02558 << left->NodeNum << ")\n"); 02559 return false; 02560 } 02561 if (!LHigh && !RHigh) { 02562 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 02563 if (result != 0) 02564 return result > 0; 02565 } 02566 return BURRSort(left, right, SPQ); 02567 } 02568 02569 // Schedule as many instructions in each cycle as possible. So don't make an 02570 // instruction available unless it is ready in the current cycle. 02571 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 02572 if (SU->getHeight() > CurCycle) return false; 02573 02574 if (SPQ->getHazardRec()->getHazardType(SU, 0) 02575 != ScheduleHazardRecognizer::NoHazard) 02576 return false; 02577 02578 return true; 02579 } 02580 02581 static bool canEnableCoalescing(SUnit *SU) { 02582 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 02583 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 02584 // CopyToReg should be close to its uses to facilitate coalescing and 02585 // avoid spilling. 02586 return true; 02587 02588 if (Opc == TargetOpcode::EXTRACT_SUBREG || 02589 Opc == TargetOpcode::SUBREG_TO_REG || 02590 Opc == TargetOpcode::INSERT_SUBREG) 02591 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 02592 // close to their uses to facilitate coalescing. 02593 return true; 02594 02595 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 02596 // If SU does not have a register def, schedule it close to its uses 02597 // because it does not lengthen any live ranges. 02598 return true; 02599 02600 return false; 02601 } 02602 02603 // list-ilp is currently an experimental scheduler that allows various 02604 // heuristics to be enabled prior to the normal register reduction logic. 02605 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 02606 if (int res = checkSpecialNodes(left, right)) 02607 return res > 0; 02608 02609 if (left->isCall || right->isCall) 02610 // No way to compute latency of calls. 02611 return BURRSort(left, right, SPQ); 02612 02613 unsigned LLiveUses = 0, RLiveUses = 0; 02614 int LPDiff = 0, RPDiff = 0; 02615 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 02616 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 02617 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 02618 } 02619 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 02620 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 02621 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 02622 return LPDiff > RPDiff; 02623 } 02624 02625 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 02626 bool LReduce = canEnableCoalescing(left); 02627 bool RReduce = canEnableCoalescing(right); 02628 if (LReduce && !RReduce) return false; 02629 if (RReduce && !LReduce) return true; 02630 } 02631 02632 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 02633 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 02634 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 02635 return LLiveUses < RLiveUses; 02636 } 02637 02638 if (!DisableSchedStalls) { 02639 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 02640 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 02641 if (LStall != RStall) 02642 return left->getHeight() > right->getHeight(); 02643 } 02644 02645 if (!DisableSchedCriticalPath) { 02646 int spread = (int)left->getDepth() - (int)right->getDepth(); 02647 if (std::abs(spread) > MaxReorderWindow) { 02648 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 02649 << left->getDepth() << " != SU(" << right->NodeNum << "): " 02650 << right->getDepth() << "\n"); 02651 return left->getDepth() < right->getDepth(); 02652 } 02653 } 02654 02655 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 02656 int spread = (int)left->getHeight() - (int)right->getHeight(); 02657 if (std::abs(spread) > MaxReorderWindow) 02658 return left->getHeight() > right->getHeight(); 02659 } 02660 02661 return BURRSort(left, right, SPQ); 02662 } 02663 02664 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 02665 SUnits = &sunits; 02666 // Add pseudo dependency edges for two-address nodes. 02667 if (!Disable2AddrHack) 02668 AddPseudoTwoAddrDeps(); 02669 // Reroute edges to nodes with multiple uses. 02670 if (!TracksRegPressure && !SrcOrder) 02671 PrescheduleNodesWithMultipleUses(); 02672 // Calculate node priorities. 02673 CalculateSethiUllmanNumbers(); 02674 02675 // For single block loops, mark nodes that look like canonical IV increments. 02676 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 02677 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 02678 initVRegCycle(&sunits[i]); 02679 } 02680 } 02681 } 02682 02683 //===----------------------------------------------------------------------===// 02684 // Preschedule for Register Pressure 02685 //===----------------------------------------------------------------------===// 02686 02687 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 02688 if (SU->isTwoAddress) { 02689 unsigned Opc = SU->getNode()->getMachineOpcode(); 02690 const MCInstrDesc &MCID = TII->get(Opc); 02691 unsigned NumRes = MCID.getNumDefs(); 02692 unsigned NumOps = MCID.getNumOperands() - NumRes; 02693 for (unsigned i = 0; i != NumOps; ++i) { 02694 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 02695 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 02696 if (DU->getNodeId() != -1 && 02697 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 02698 return true; 02699 } 02700 } 02701 } 02702 return false; 02703 } 02704 02705 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's 02706 /// successor's explicit physregs whose definition can reach DepSU. 02707 /// i.e. DepSU should not be scheduled above SU. 02708 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 02709 ScheduleDAGRRList *scheduleDAG, 02710 const TargetInstrInfo *TII, 02711 const TargetRegisterInfo *TRI) { 02712 const uint16_t *ImpDefs 02713 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 02714 const uint32_t *RegMask = getNodeRegMask(SU->getNode()); 02715 if(!ImpDefs && !RegMask) 02716 return false; 02717 02718 for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); 02719 SI != SE; ++SI) { 02720 SUnit *SuccSU = SI->getSUnit(); 02721 for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), 02722 PE = SuccSU->Preds.end(); PI != PE; ++PI) { 02723 if (!PI->isAssignedRegDep()) 02724 continue; 02725 02726 if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && 02727 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 02728 return true; 02729 02730 if (ImpDefs) 02731 for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) 02732 // Return true if SU clobbers this physical register use and the 02733 // definition of the register reaches from DepSU. IsReachable queries 02734 // a topological forward sort of the DAG (following the successors). 02735 if (TRI->regsOverlap(*ImpDef, PI->getReg()) && 02736 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 02737 return true; 02738 } 02739 } 02740 return false; 02741 } 02742 02743 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 02744 /// physical register defs. 02745 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 02746 const TargetInstrInfo *TII, 02747 const TargetRegisterInfo *TRI) { 02748 SDNode *N = SuccSU->getNode(); 02749 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 02750 const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 02751 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 02752 for (const SDNode *SUNode = SU->getNode(); SUNode; 02753 SUNode = SUNode->getGluedNode()) { 02754 if (!SUNode->isMachineOpcode()) 02755 continue; 02756 const uint16_t *SUImpDefs = 02757 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 02758 const uint32_t *SURegMask = getNodeRegMask(SUNode); 02759 if (!SUImpDefs && !SURegMask) 02760 continue; 02761 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 02762 EVT VT = N->getValueType(i); 02763 if (VT == MVT::Glue || VT == MVT::Other) 02764 continue; 02765 if (!N->hasAnyUseOfValue(i)) 02766 continue; 02767 unsigned Reg = ImpDefs[i - NumDefs]; 02768 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) 02769 return true; 02770 if (!SUImpDefs) 02771 continue; 02772 for (;*SUImpDefs; ++SUImpDefs) { 02773 unsigned SUReg = *SUImpDefs; 02774 if (TRI->regsOverlap(Reg, SUReg)) 02775 return true; 02776 } 02777 } 02778 } 02779 return false; 02780 } 02781 02782 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 02783 /// are not handled well by the general register pressure reduction 02784 /// heuristics. When presented with code like this: 02785 /// 02786 /// N 02787 /// / | 02788 /// / | 02789 /// U store 02790 /// | 02791 /// ... 02792 /// 02793 /// the heuristics tend to push the store up, but since the 02794 /// operand of the store has another use (U), this would increase 02795 /// the length of that other use (the U->N edge). 02796 /// 02797 /// This function transforms code like the above to route U's 02798 /// dependence through the store when possible, like this: 02799 /// 02800 /// N 02801 /// || 02802 /// || 02803 /// store 02804 /// | 02805 /// U 02806 /// | 02807 /// ... 02808 /// 02809 /// This results in the store being scheduled immediately 02810 /// after N, which shortens the U->N live range, reducing 02811 /// register pressure. 02812 /// 02813 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 02814 // Visit all the nodes in topological order, working top-down. 02815 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 02816 SUnit *SU = &(*SUnits)[i]; 02817 // For now, only look at nodes with no data successors, such as stores. 02818 // These are especially important, due to the heuristics in 02819 // getNodePriority for nodes with no data successors. 02820 if (SU->NumSuccs != 0) 02821 continue; 02822 // For now, only look at nodes with exactly one data predecessor. 02823 if (SU->NumPreds != 1) 02824 continue; 02825 // Avoid prescheduling copies to virtual registers, which don't behave 02826 // like other nodes from the perspective of scheduling heuristics. 02827 if (SDNode *N = SU->getNode()) 02828 if (N->getOpcode() == ISD::CopyToReg && 02829 TargetRegisterInfo::isVirtualRegister 02830 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 02831 continue; 02832 02833 // Locate the single data predecessor. 02834 SUnit *PredSU = 0; 02835 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 02836 EE = SU->Preds.end(); II != EE; ++II) 02837 if (!II->isCtrl()) { 02838 PredSU = II->getSUnit(); 02839 break; 02840 } 02841 assert(PredSU); 02842 02843 // Don't rewrite edges that carry physregs, because that requires additional 02844 // support infrastructure. 02845 if (PredSU->hasPhysRegDefs) 02846 continue; 02847 // Short-circuit the case where SU is PredSU's only data successor. 02848 if (PredSU->NumSuccs == 1) 02849 continue; 02850 // Avoid prescheduling to copies from virtual registers, which don't behave 02851 // like other nodes from the perspective of scheduling heuristics. 02852 if (SDNode *N = SU->getNode()) 02853 if (N->getOpcode() == ISD::CopyFromReg && 02854 TargetRegisterInfo::isVirtualRegister 02855 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 02856 continue; 02857 02858 // Perform checks on the successors of PredSU. 02859 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 02860 EE = PredSU->Succs.end(); II != EE; ++II) { 02861 SUnit *PredSuccSU = II->getSUnit(); 02862 if (PredSuccSU == SU) continue; 02863 // If PredSU has another successor with no data successors, for 02864 // now don't attempt to choose either over the other. 02865 if (PredSuccSU->NumSuccs == 0) 02866 goto outer_loop_continue; 02867 // Don't break physical register dependencies. 02868 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 02869 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 02870 goto outer_loop_continue; 02871 // Don't introduce graph cycles. 02872 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 02873 goto outer_loop_continue; 02874 } 02875 02876 // Ok, the transformation is safe and the heuristics suggest it is 02877 // profitable. Update the graph. 02878 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 02879 << " next to PredSU #" << PredSU->NodeNum 02880 << " to guide scheduling in the presence of multiple uses\n"); 02881 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 02882 SDep Edge = PredSU->Succs[i]; 02883 assert(!Edge.isAssignedRegDep()); 02884 SUnit *SuccSU = Edge.getSUnit(); 02885 if (SuccSU != SU) { 02886 Edge.setSUnit(PredSU); 02887 scheduleDAG->RemovePred(SuccSU, Edge); 02888 scheduleDAG->AddPred(SU, Edge); 02889 Edge.setSUnit(SU); 02890 scheduleDAG->AddPred(SuccSU, Edge); 02891 --i; 02892 } 02893 } 02894 outer_loop_continue:; 02895 } 02896 } 02897 02898 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 02899 /// it as a def&use operand. Add a pseudo control edge from it to the other 02900 /// node (if it won't create a cycle) so the two-address one will be scheduled 02901 /// first (lower in the schedule). If both nodes are two-address, favor the 02902 /// one that has a CopyToReg use (more likely to be a loop induction update). 02903 /// If both are two-address, but one is commutable while the other is not 02904 /// commutable, favor the one that's not commutable. 02905 void RegReductionPQBase::AddPseudoTwoAddrDeps() { 02906 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 02907 SUnit *SU = &(*SUnits)[i]; 02908 if (!SU->isTwoAddress) 02909 continue; 02910 02911 SDNode *Node = SU->getNode(); 02912 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 02913 continue; 02914 02915 bool isLiveOut = hasOnlyLiveOutUses(SU); 02916 unsigned Opc = Node->getMachineOpcode(); 02917 const MCInstrDesc &MCID = TII->get(Opc); 02918 unsigned NumRes = MCID.getNumDefs(); 02919 unsigned NumOps = MCID.getNumOperands() - NumRes; 02920 for (unsigned j = 0; j != NumOps; ++j) { 02921 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 02922 continue; 02923 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 02924 if (DU->getNodeId() == -1) 02925 continue; 02926 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 02927 if (!DUSU) continue; 02928 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 02929 E = DUSU->Succs.end(); I != E; ++I) { 02930 if (I->isCtrl()) continue; 02931 SUnit *SuccSU = I->getSUnit(); 02932 if (SuccSU == SU) 02933 continue; 02934 // Be conservative. Ignore if nodes aren't at roughly the same 02935 // depth and height. 02936 if (SuccSU->getHeight() < SU->getHeight() && 02937 (SU->getHeight() - SuccSU->getHeight()) > 1) 02938 continue; 02939 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 02940 // constrains whatever is using the copy, instead of the copy 02941 // itself. In the case that the copy is coalesced, this 02942 // preserves the intent of the pseudo two-address heurietics. 02943 while (SuccSU->Succs.size() == 1 && 02944 SuccSU->getNode()->isMachineOpcode() && 02945 SuccSU->getNode()->getMachineOpcode() == 02946 TargetOpcode::COPY_TO_REGCLASS) 02947 SuccSU = SuccSU->Succs.front().getSUnit(); 02948 // Don't constrain non-instruction nodes. 02949 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 02950 continue; 02951 // Don't constrain nodes with physical register defs if the 02952 // predecessor can clobber them. 02953 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 02954 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 02955 continue; 02956 } 02957 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 02958 // these may be coalesced away. We want them close to their uses. 02959 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 02960 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 02961 SuccOpc == TargetOpcode::INSERT_SUBREG || 02962 SuccOpc == TargetOpcode::SUBREG_TO_REG) 02963 continue; 02964 if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && 02965 (!canClobber(SuccSU, DUSU) || 02966 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 02967 (!SU->isCommutable && SuccSU->isCommutable)) && 02968 !scheduleDAG->IsReachable(SuccSU, SU)) { 02969 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 02970 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 02971 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); 02972 } 02973 } 02974 } 02975 } 02976 } 02977 02978 //===----------------------------------------------------------------------===// 02979 // Public Constructor Functions 02980 //===----------------------------------------------------------------------===// 02981 02982 llvm::ScheduleDAGSDNodes * 02983 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 02984 CodeGenOpt::Level OptLevel) { 02985 const TargetMachine &TM = IS->TM; 02986 const TargetInstrInfo *TII = TM.getInstrInfo(); 02987 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 02988 02989 BURegReductionPriorityQueue *PQ = 02990 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); 02991 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 02992 PQ->setScheduleDAG(SD); 02993 return SD; 02994 } 02995 02996 llvm::ScheduleDAGSDNodes * 02997 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 02998 CodeGenOpt::Level OptLevel) { 02999 const TargetMachine &TM = IS->TM; 03000 const TargetInstrInfo *TII = TM.getInstrInfo(); 03001 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 03002 03003 SrcRegReductionPriorityQueue *PQ = 03004 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); 03005 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 03006 PQ->setScheduleDAG(SD); 03007 return SD; 03008 } 03009 03010 llvm::ScheduleDAGSDNodes * 03011 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 03012 CodeGenOpt::Level OptLevel) { 03013 const TargetMachine &TM = IS->TM; 03014 const TargetInstrInfo *TII = TM.getInstrInfo(); 03015 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 03016 const TargetLowering *TLI = &IS->getTargetLowering(); 03017 03018 HybridBURRPriorityQueue *PQ = 03019 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 03020 03021 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 03022 PQ->setScheduleDAG(SD); 03023 return SD; 03024 } 03025 03026 llvm::ScheduleDAGSDNodes * 03027 llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 03028 CodeGenOpt::Level OptLevel) { 03029 const TargetMachine &TM = IS->TM; 03030 const TargetInstrInfo *TII = TM.getInstrInfo(); 03031 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 03032 const TargetLowering *TLI = &IS->getTargetLowering(); 03033 03034 ILPBURRPriorityQueue *PQ = 03035 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 03036 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 03037 PQ->setScheduleDAG(SD); 03038 return SD; 03039 }