File: | lib/Target/AMDGPU/GCNSchedStrategy.cpp |
Warning: | line 551, column 5 Called C++ object pointer is null |
1 | //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// | |||||||||
2 | // | |||||||||
3 | // The LLVM Compiler Infrastructure | |||||||||
4 | // | |||||||||
5 | // This file is distributed under the University of Illinois Open Source | |||||||||
6 | // License. See LICENSE.TXT for details. | |||||||||
7 | // | |||||||||
8 | //===----------------------------------------------------------------------===// | |||||||||
9 | // | |||||||||
10 | /// \file | |||||||||
11 | /// This contains a MachineSchedStrategy implementation for maximizing wave | |||||||||
12 | /// occupancy on GCN hardware. | |||||||||
13 | //===----------------------------------------------------------------------===// | |||||||||
14 | ||||||||||
15 | #include "GCNSchedStrategy.h" | |||||||||
16 | #include "AMDGPUSubtarget.h" | |||||||||
17 | #include "SIInstrInfo.h" | |||||||||
18 | #include "SIMachineFunctionInfo.h" | |||||||||
19 | #include "SIRegisterInfo.h" | |||||||||
20 | #include "llvm/CodeGen/RegisterClassInfo.h" | |||||||||
21 | #include "llvm/Support/MathExtras.h" | |||||||||
22 | ||||||||||
23 | #define DEBUG_TYPE"misched" "misched" | |||||||||
24 | ||||||||||
25 | using namespace llvm; | |||||||||
26 | ||||||||||
27 | GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( | |||||||||
28 | const MachineSchedContext *C) : | |||||||||
29 | GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { } | |||||||||
30 | ||||||||||
31 | static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs, | |||||||||
32 | const MachineFunction &MF) { | |||||||||
33 | ||||||||||
34 | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); | |||||||||
35 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | |||||||||
36 | unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs), | |||||||||
37 | ST.getOccupancyWithNumVGPRs(VGPRs)); | |||||||||
38 | return std::min(MinRegOccupancy, | |||||||||
39 | ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), | |||||||||
40 | *MF.getFunction())); | |||||||||
41 | } | |||||||||
42 | ||||||||||
43 | void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { | |||||||||
44 | GenericScheduler::initialize(DAG); | |||||||||
45 | ||||||||||
46 | const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); | |||||||||
47 | ||||||||||
48 | MF = &DAG->MF; | |||||||||
49 | ||||||||||
50 | const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); | |||||||||
51 | ||||||||||
52 | // FIXME: This is also necessary, because some passes that run after | |||||||||
53 | // scheduling and before regalloc increase register pressure. | |||||||||
54 | const int ErrorMargin = 3; | |||||||||
55 | ||||||||||
56 | SGPRExcessLimit = Context->RegClassInfo | |||||||||
57 | ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin; | |||||||||
58 | VGPRExcessLimit = Context->RegClassInfo | |||||||||
59 | ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin; | |||||||||
60 | if (TargetOccupancy) { | |||||||||
61 | SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true); | |||||||||
62 | VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy); | |||||||||
63 | } else { | |||||||||
64 | SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, | |||||||||
65 | SRI->getSGPRPressureSet()); | |||||||||
66 | VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF, | |||||||||
67 | SRI->getVGPRPressureSet()); | |||||||||
68 | } | |||||||||
69 | ||||||||||
70 | SGPRCriticalLimit -= ErrorMargin; | |||||||||
71 | VGPRCriticalLimit -= ErrorMargin; | |||||||||
72 | } | |||||||||
73 | ||||||||||
74 | void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, | |||||||||
75 | bool AtTop, const RegPressureTracker &RPTracker, | |||||||||
76 | const SIRegisterInfo *SRI, | |||||||||
77 | unsigned SGPRPressure, | |||||||||
78 | unsigned VGPRPressure) { | |||||||||
79 | ||||||||||
80 | Cand.SU = SU; | |||||||||
81 | Cand.AtTop = AtTop; | |||||||||
82 | ||||||||||
83 | // getDownwardPressure() and getUpwardPressure() make temporary changes to | |||||||||
84 | // the the tracker, so we need to pass those function a non-const copy. | |||||||||
85 | RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); | |||||||||
86 | ||||||||||
87 | std::vector<unsigned> Pressure; | |||||||||
88 | std::vector<unsigned> MaxPressure; | |||||||||
89 | ||||||||||
90 | if (AtTop) | |||||||||
91 | TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); | |||||||||
92 | else { | |||||||||
93 | // FIXME: I think for bottom up scheduling, the register pressure is cached | |||||||||
94 | // and can be retrieved by DAG->getPressureDif(SU). | |||||||||
95 | TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); | |||||||||
96 | } | |||||||||
97 | ||||||||||
98 | unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()]; | |||||||||
99 | unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()]; | |||||||||
100 | ||||||||||
101 | // If two instructions increase the pressure of different register sets | |||||||||
102 | // by the same amount, the generic scheduler will prefer to schedule the | |||||||||
103 | // instruction that increases the set with the least amount of registers, | |||||||||
104 | // which in our case would be SGPRs. This is rarely what we want, so | |||||||||
105 | // when we report excess/critical register pressure, we do it either | |||||||||
106 | // only for VGPRs or only for SGPRs. | |||||||||
107 | ||||||||||
108 | // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. | |||||||||
109 | const unsigned MaxVGPRPressureInc = 16; | |||||||||
110 | bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; | |||||||||
111 | bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; | |||||||||
112 | ||||||||||
113 | ||||||||||
114 | // FIXME: We have to enter REG-EXCESS before we reach the actual threshold | |||||||||
115 | // to increase the likelihood we don't go over the limits. We should improve | |||||||||
116 | // the analysis to look through dependencies to find the path with the least | |||||||||
117 | // register pressure. | |||||||||
118 | ||||||||||
119 | // We only need to update the RPDelata for instructions that increase | |||||||||
120 | // register pressure. Instructions that decrease or keep reg pressure | |||||||||
121 | // the same will be marked as RegExcess in tryCandidate() when they | |||||||||
122 | // are compared with instructions that increase the register pressure. | |||||||||
123 | if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { | |||||||||
124 | Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet()); | |||||||||
125 | Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); | |||||||||
126 | } | |||||||||
127 | ||||||||||
128 | if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { | |||||||||
129 | Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet()); | |||||||||
130 | Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); | |||||||||
131 | } | |||||||||
132 | ||||||||||
133 | // Register pressure is considered 'CRITICAL' if it is approaching a value | |||||||||
134 | // that would reduce the wave occupancy for the execution unit. When | |||||||||
135 | // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both | |||||||||
136 | // has the same cost, so we don't need to prefer one over the other. | |||||||||
137 | ||||||||||
138 | int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; | |||||||||
139 | int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; | |||||||||
140 | ||||||||||
141 | if (SGPRDelta >= 0 || VGPRDelta >= 0) { | |||||||||
142 | if (SGPRDelta > VGPRDelta) { | |||||||||
143 | Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet()); | |||||||||
144 | Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); | |||||||||
145 | } else { | |||||||||
146 | Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet()); | |||||||||
147 | Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); | |||||||||
148 | } | |||||||||
149 | } | |||||||||
150 | } | |||||||||
151 | ||||||||||
152 | // This function is mostly cut and pasted from | |||||||||
153 | // GenericScheduler::pickNodeFromQueue() | |||||||||
154 | void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, | |||||||||
155 | const CandPolicy &ZonePolicy, | |||||||||
156 | const RegPressureTracker &RPTracker, | |||||||||
157 | SchedCandidate &Cand) { | |||||||||
158 | const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); | |||||||||
159 | ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); | |||||||||
160 | unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()]; | |||||||||
161 | unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()]; | |||||||||
162 | ReadyQueue &Q = Zone.Available; | |||||||||
163 | for (SUnit *SU : Q) { | |||||||||
164 | ||||||||||
165 | SchedCandidate TryCand(ZonePolicy); | |||||||||
166 | initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, | |||||||||
167 | SGPRPressure, VGPRPressure); | |||||||||
168 | // Pass SchedBoundary only when comparing nodes from the same boundary. | |||||||||
169 | SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; | |||||||||
170 | GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg); | |||||||||
171 | if (TryCand.Reason != NoCand) { | |||||||||
172 | // Initialize resource delta if needed in case future heuristics query it. | |||||||||
173 | if (TryCand.ResDelta == SchedResourceDelta()) | |||||||||
174 | TryCand.initResourceDelta(Zone.DAG, SchedModel); | |||||||||
175 | Cand.setBest(TryCand); | |||||||||
176 | } | |||||||||
177 | } | |||||||||
178 | } | |||||||||
179 | ||||||||||
180 | // This function is mostly cut and pasted from | |||||||||
181 | // GenericScheduler::pickNodeBidirectional() | |||||||||
182 | SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) { | |||||||||
183 | // Schedule as far as possible in the direction of no choice. This is most | |||||||||
184 | // efficient, but also provides the best heuristics for CriticalPSets. | |||||||||
185 | if (SUnit *SU = Bot.pickOnlyChoice()) { | |||||||||
186 | IsTopNode = false; | |||||||||
187 | return SU; | |||||||||
188 | } | |||||||||
189 | if (SUnit *SU = Top.pickOnlyChoice()) { | |||||||||
190 | IsTopNode = true; | |||||||||
191 | return SU; | |||||||||
192 | } | |||||||||
193 | // Set the bottom-up policy based on the state of the current bottom zone and | |||||||||
194 | // the instructions outside the zone, including the top zone. | |||||||||
195 | CandPolicy BotPolicy; | |||||||||
196 | setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); | |||||||||
197 | // Set the top-down policy based on the state of the current top zone and | |||||||||
198 | // the instructions outside the zone, including the bottom zone. | |||||||||
199 | CandPolicy TopPolicy; | |||||||||
200 | setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); | |||||||||
201 | ||||||||||
202 | // See if BotCand is still valid (because we previously scheduled from Top). | |||||||||
203 | DEBUG(dbgs() << "Picking from Bot:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Picking from Bot:\n"; } } while (false); | |||||||||
204 | if (!BotCand.isValid() || BotCand.SU->isScheduled || | |||||||||
205 | BotCand.Policy != BotPolicy) { | |||||||||
206 | BotCand.reset(CandPolicy()); | |||||||||
207 | pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); | |||||||||
208 | assert(BotCand.Reason != NoCand && "failed to find the first candidate")((BotCand.Reason != NoCand && "failed to find the first candidate" ) ? static_cast<void> (0) : __assert_fail ("BotCand.Reason != NoCand && \"failed to find the first candidate\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303041/lib/Target/AMDGPU/GCNSchedStrategy.cpp" , 208, __PRETTY_FUNCTION__)); | |||||||||
209 | } else { | |||||||||
210 | DEBUG(traceCandidate(BotCand))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { traceCandidate(BotCand); } } while (false); | |||||||||
211 | } | |||||||||
212 | ||||||||||
213 | // Check if the top Q has a better candidate. | |||||||||
214 | DEBUG(dbgs() << "Picking from Top:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Picking from Top:\n"; } } while (false); | |||||||||
215 | if (!TopCand.isValid() || TopCand.SU->isScheduled || | |||||||||
216 | TopCand.Policy != TopPolicy) { | |||||||||
217 | TopCand.reset(CandPolicy()); | |||||||||
218 | pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); | |||||||||
219 | assert(TopCand.Reason != NoCand && "failed to find the first candidate")((TopCand.Reason != NoCand && "failed to find the first candidate" ) ? static_cast<void> (0) : __assert_fail ("TopCand.Reason != NoCand && \"failed to find the first candidate\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303041/lib/Target/AMDGPU/GCNSchedStrategy.cpp" , 219, __PRETTY_FUNCTION__)); | |||||||||
220 | } else { | |||||||||
221 | DEBUG(traceCandidate(TopCand))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { traceCandidate(TopCand); } } while (false); | |||||||||
222 | } | |||||||||
223 | ||||||||||
224 | // Pick best from BotCand and TopCand. | |||||||||
225 | DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Top Cand: "; traceCandidate(TopCand ); dbgs() << "Bot Cand: "; traceCandidate(BotCand);; } } while (false) | |||||||||
226 | dbgs() << "Top Cand: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Top Cand: "; traceCandidate(TopCand ); dbgs() << "Bot Cand: "; traceCandidate(BotCand);; } } while (false) | |||||||||
227 | traceCandidate(TopCand);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Top Cand: "; traceCandidate(TopCand ); dbgs() << "Bot Cand: "; traceCandidate(BotCand);; } } while (false) | |||||||||
228 | dbgs() << "Bot Cand: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Top Cand: "; traceCandidate(TopCand ); dbgs() << "Bot Cand: "; traceCandidate(BotCand);; } } while (false) | |||||||||
229 | traceCandidate(BotCand);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Top Cand: "; traceCandidate(TopCand ); dbgs() << "Bot Cand: "; traceCandidate(BotCand);; } } while (false) | |||||||||
230 | )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Top Cand: "; traceCandidate(TopCand ); dbgs() << "Bot Cand: "; traceCandidate(BotCand);; } } while (false); | |||||||||
231 | SchedCandidate Cand; | |||||||||
232 | if (TopCand.Reason == BotCand.Reason) { | |||||||||
233 | Cand = BotCand; | |||||||||
234 | GenericSchedulerBase::CandReason TopReason = TopCand.Reason; | |||||||||
235 | TopCand.Reason = NoCand; | |||||||||
236 | GenericScheduler::tryCandidate(Cand, TopCand, nullptr); | |||||||||
237 | if (TopCand.Reason != NoCand) { | |||||||||
238 | Cand.setBest(TopCand); | |||||||||
239 | } else { | |||||||||
240 | TopCand.Reason = TopReason; | |||||||||
241 | } | |||||||||
242 | } else { | |||||||||
243 | if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) { | |||||||||
244 | Cand = TopCand; | |||||||||
245 | } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) { | |||||||||
246 | Cand = BotCand; | |||||||||
247 | } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) { | |||||||||
248 | Cand = TopCand; | |||||||||
249 | } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) { | |||||||||
250 | Cand = BotCand; | |||||||||
251 | } else { | |||||||||
252 | if (BotCand.Reason > TopCand.Reason) { | |||||||||
253 | Cand = TopCand; | |||||||||
254 | } else { | |||||||||
255 | Cand = BotCand; | |||||||||
256 | } | |||||||||
257 | } | |||||||||
258 | } | |||||||||
259 | DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Picking: "; traceCandidate(Cand );; } } while (false) | |||||||||
260 | dbgs() << "Picking: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Picking: "; traceCandidate(Cand );; } } while (false) | |||||||||
261 | traceCandidate(Cand);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Picking: "; traceCandidate(Cand );; } } while (false) | |||||||||
262 | )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Picking: "; traceCandidate(Cand );; } } while (false); | |||||||||
263 | ||||||||||
264 | IsTopNode = Cand.AtTop; | |||||||||
265 | return Cand.SU; | |||||||||
266 | } | |||||||||
267 | ||||||||||
268 | // This function is mostly cut and pasted from | |||||||||
269 | // GenericScheduler::pickNode() | |||||||||
270 | SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { | |||||||||
271 | if (DAG->top() == DAG->bottom()) { | |||||||||
272 | assert(Top.Available.empty() && Top.Pending.empty() &&((Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage") ? static_cast<void> (0) : __assert_fail ("Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && \"ReadyQ garbage\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303041/lib/Target/AMDGPU/GCNSchedStrategy.cpp" , 273, __PRETTY_FUNCTION__)) | |||||||||
273 | Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage")((Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage") ? static_cast<void> (0) : __assert_fail ("Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && \"ReadyQ garbage\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303041/lib/Target/AMDGPU/GCNSchedStrategy.cpp" , 273, __PRETTY_FUNCTION__)); | |||||||||
274 | return nullptr; | |||||||||
275 | } | |||||||||
276 | SUnit *SU; | |||||||||
277 | do { | |||||||||
278 | if (RegionPolicy.OnlyTopDown) { | |||||||||
279 | SU = Top.pickOnlyChoice(); | |||||||||
280 | if (!SU) { | |||||||||
281 | CandPolicy NoPolicy; | |||||||||
282 | TopCand.reset(NoPolicy); | |||||||||
283 | pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); | |||||||||
284 | assert(TopCand.Reason != NoCand && "failed to find a candidate")((TopCand.Reason != NoCand && "failed to find a candidate" ) ? static_cast<void> (0) : __assert_fail ("TopCand.Reason != NoCand && \"failed to find a candidate\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303041/lib/Target/AMDGPU/GCNSchedStrategy.cpp" , 284, __PRETTY_FUNCTION__)); | |||||||||
285 | SU = TopCand.SU; | |||||||||
286 | } | |||||||||
287 | IsTopNode = true; | |||||||||
288 | } else if (RegionPolicy.OnlyBottomUp) { | |||||||||
289 | SU = Bot.pickOnlyChoice(); | |||||||||
290 | if (!SU) { | |||||||||
291 | CandPolicy NoPolicy; | |||||||||
292 | BotCand.reset(NoPolicy); | |||||||||
293 | pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); | |||||||||
294 | assert(BotCand.Reason != NoCand && "failed to find a candidate")((BotCand.Reason != NoCand && "failed to find a candidate" ) ? static_cast<void> (0) : __assert_fail ("BotCand.Reason != NoCand && \"failed to find a candidate\"" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303041/lib/Target/AMDGPU/GCNSchedStrategy.cpp" , 294, __PRETTY_FUNCTION__)); | |||||||||
295 | SU = BotCand.SU; | |||||||||
296 | } | |||||||||
297 | IsTopNode = false; | |||||||||
298 | } else { | |||||||||
299 | SU = pickNodeBidirectional(IsTopNode); | |||||||||
300 | } | |||||||||
301 | } while (SU->isScheduled); | |||||||||
302 | ||||||||||
303 | if (SU->isTopReady()) | |||||||||
304 | Top.removeReady(SU); | |||||||||
305 | if (SU->isBottomReady()) | |||||||||
306 | Bot.removeReady(SU); | |||||||||
307 | ||||||||||
308 | DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Scheduling SU(" << SU-> NodeNum << ") " << *SU->getInstr(); } } while ( false); | |||||||||
309 | return SU; | |||||||||
310 | } | |||||||||
311 | ||||||||||
312 | GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, | |||||||||
313 | std::unique_ptr<MachineSchedStrategy> S) : | |||||||||
314 | ScheduleDAGMILive(C, std::move(S)), | |||||||||
315 | ST(MF.getSubtarget<SISubtarget>()), | |||||||||
316 | MFI(*MF.getInfo<SIMachineFunctionInfo>()), | |||||||||
317 | StartingOccupancy(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(), | |||||||||
318 | *MF.getFunction())), | |||||||||
319 | MinOccupancy(StartingOccupancy), Stage(0) { | |||||||||
320 | ||||||||||
321 | DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"; } } while (false); | |||||||||
322 | } | |||||||||
323 | ||||||||||
324 | void GCNScheduleDAGMILive::schedule() { | |||||||||
325 | std::vector<MachineInstr*> Unsched; | |||||||||
326 | Unsched.reserve(NumRegionInstrs); | |||||||||
327 | for (auto &I : *this) | |||||||||
328 | Unsched.push_back(&I); | |||||||||
329 | ||||||||||
330 | std::pair<unsigned, unsigned> PressureBefore; | |||||||||
331 | if (LIS) { | |||||||||
332 | DEBUG(dbgs() << "Pressure before scheduling:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Pressure before scheduling:\n" ; } } while (false); | |||||||||
333 | discoverLiveIns(); | |||||||||
334 | PressureBefore = getRealRegPressure(); | |||||||||
335 | } | |||||||||
336 | ||||||||||
337 | ScheduleDAGMILive::schedule(); | |||||||||
338 | if (Stage == 0) | |||||||||
339 | Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); | |||||||||
340 | ||||||||||
341 | if (!LIS) | |||||||||
342 | return; | |||||||||
343 | ||||||||||
344 | // Check the results of scheduling. | |||||||||
345 | GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; | |||||||||
346 | DEBUG(dbgs() << "Pressure after scheduling:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Pressure after scheduling:\n" ; } } while (false); | |||||||||
347 | auto PressureAfter = getRealRegPressure(); | |||||||||
348 | LiveIns.clear(); | |||||||||
349 | ||||||||||
350 | if (PressureAfter.first <= S.SGPRCriticalLimit && | |||||||||
351 | PressureAfter.second <= S.VGPRCriticalLimit) { | |||||||||
352 | DEBUG(dbgs() << "Pressure in desired limits, done.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Pressure in desired limits, done.\n" ; } } while (false); | |||||||||
353 | return; | |||||||||
354 | } | |||||||||
355 | unsigned WavesAfter = getMaxWaves(PressureAfter.first, | |||||||||
356 | PressureAfter.second, MF); | |||||||||
357 | unsigned WavesBefore = getMaxWaves(PressureBefore.first, | |||||||||
358 | PressureBefore.second, MF); | |||||||||
359 | DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore <<do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Occupancy before scheduling: " << WavesBefore << ", after " << WavesAfter << ".\n"; } } while (false) | |||||||||
360 | ", after " << WavesAfter << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Occupancy before scheduling: " << WavesBefore << ", after " << WavesAfter << ".\n"; } } while (false); | |||||||||
361 | ||||||||||
362 | // We could not keep current target occupancy because of the just scheduled | |||||||||
363 | // region. Record new occupancy for next scheduling cycle. | |||||||||
364 | unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); | |||||||||
365 | if (NewOccupancy < MinOccupancy) { | |||||||||
366 | MinOccupancy = NewOccupancy; | |||||||||
367 | DEBUG(dbgs() << "Occupancy lowered for the function to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Occupancy lowered for the function to " << MinOccupancy << ".\n"; } } while (false) | |||||||||
368 | << MinOccupancy << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Occupancy lowered for the function to " << MinOccupancy << ".\n"; } } while (false); | |||||||||
369 | } | |||||||||
370 | ||||||||||
371 | if (WavesAfter >= WavesBefore) | |||||||||
372 | return; | |||||||||
373 | ||||||||||
374 | DEBUG(dbgs() << "Attempting to revert scheduling.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Attempting to revert scheduling.\n" ; } } while (false); | |||||||||
375 | RegionEnd = RegionBegin; | |||||||||
376 | for (MachineInstr *MI : Unsched) { | |||||||||
377 | if (MI->getIterator() != RegionEnd) { | |||||||||
378 | BB->remove(MI); | |||||||||
379 | BB->insert(RegionEnd, MI); | |||||||||
380 | LIS->handleMove(*MI, true); | |||||||||
381 | } | |||||||||
382 | // Reset read-undef flags and update them later. | |||||||||
383 | for (auto &Op : MI->operands()) | |||||||||
384 | if (Op.isReg() && Op.isDef()) | |||||||||
385 | Op.setIsUndef(false); | |||||||||
386 | RegisterOperands RegOpers; | |||||||||
387 | RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); | |||||||||
388 | if (ShouldTrackLaneMasks) { | |||||||||
389 | // Adjust liveness and add missing dead+read-undef flags. | |||||||||
390 | SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); | |||||||||
391 | RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); | |||||||||
392 | } else { | |||||||||
393 | // Adjust for missing dead-def flags. | |||||||||
394 | RegOpers.detectDeadDefs(*MI, *LIS); | |||||||||
395 | } | |||||||||
396 | RegionEnd = MI->getIterator(); | |||||||||
397 | ++RegionEnd; | |||||||||
398 | DEBUG(dbgs() << "Scheduling " << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Scheduling " << *MI; } } while (false); | |||||||||
399 | } | |||||||||
400 | RegionBegin = Unsched.front()->getIterator(); | |||||||||
401 | if (Stage == 0) | |||||||||
402 | Regions.back() = std::make_pair(RegionBegin, RegionEnd); | |||||||||
403 | ||||||||||
404 | placeDebugValues(); | |||||||||
405 | } | |||||||||
406 | ||||||||||
407 | static inline void setMask(const MachineRegisterInfo &MRI, | |||||||||
408 | const SIRegisterInfo *SRI, unsigned Reg, | |||||||||
409 | LaneBitmask &PrevMask, LaneBitmask NewMask, | |||||||||
410 | unsigned &SGPRs, unsigned &VGPRs) { | |||||||||
411 | int NewRegs = countPopulation(NewMask.getAsInteger()) - | |||||||||
412 | countPopulation(PrevMask.getAsInteger()); | |||||||||
413 | if (SRI->isSGPRReg(MRI, Reg)) | |||||||||
414 | SGPRs += NewRegs; | |||||||||
415 | if (SRI->isVGPR(MRI, Reg)) | |||||||||
416 | VGPRs += NewRegs; | |||||||||
417 | assert ((int)SGPRs >= 0 && (int)VGPRs >= 0)(((int)SGPRs >= 0 && (int)VGPRs >= 0) ? static_cast <void> (0) : __assert_fail ("(int)SGPRs >= 0 && (int)VGPRs >= 0" , "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303041/lib/Target/AMDGPU/GCNSchedStrategy.cpp" , 417, __PRETTY_FUNCTION__)); | |||||||||
418 | PrevMask = NewMask; | |||||||||
419 | } | |||||||||
420 | ||||||||||
421 | void GCNScheduleDAGMILive::discoverLiveIns() { | |||||||||
422 | unsigned SGPRs = 0; | |||||||||
423 | unsigned VGPRs = 0; | |||||||||
424 | ||||||||||
425 | auto I = begin(); | |||||||||
426 | I = skipDebugInstructionsForward(I, I->getParent()->end()); | |||||||||
427 | const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); | |||||||||
428 | SlotIndex SI = LIS->getInstructionIndex(*I).getBaseIndex(); | |||||||||
429 | assert (SI.isValid())((SI.isValid()) ? static_cast<void> (0) : __assert_fail ("SI.isValid()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303041/lib/Target/AMDGPU/GCNSchedStrategy.cpp" , 429, __PRETTY_FUNCTION__)); | |||||||||
430 | ||||||||||
431 | DEBUG(dbgs() << "Region live-ins:")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Region live-ins:"; } } while ( false); | |||||||||
432 | for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { | |||||||||
433 | unsigned Reg = TargetRegisterInfo::index2VirtReg(I); | |||||||||
434 | if (MRI.reg_nodbg_empty(Reg)) | |||||||||
435 | continue; | |||||||||
436 | const LiveInterval &LI = LIS->getInterval(Reg); | |||||||||
437 | LaneBitmask LaneMask = LaneBitmask::getNone(); | |||||||||
438 | if (LI.hasSubRanges()) { | |||||||||
439 | for (const auto &S : LI.subranges()) | |||||||||
440 | if (S.liveAt(SI)) | |||||||||
441 | LaneMask |= S.LaneMask; | |||||||||
442 | } else if (LI.liveAt(SI)) { | |||||||||
443 | LaneMask = MRI.getMaxLaneMaskForVReg(Reg); | |||||||||
444 | } | |||||||||
445 | ||||||||||
446 | if (LaneMask.any()) { | |||||||||
447 | setMask(MRI, SRI, Reg, LiveIns[Reg], LaneMask, SGPRs, VGPRs); | |||||||||
448 | ||||||||||
449 | DEBUG(dbgs() << ' ' << PrintVRegOrUnit(Reg, SRI) << ':'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << ' ' << PrintVRegOrUnit(Reg , SRI) << ':' << PrintLaneMask(LiveIns[Reg]); } } while (false) | |||||||||
450 | << PrintLaneMask(LiveIns[Reg]))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << ' ' << PrintVRegOrUnit(Reg , SRI) << ':' << PrintLaneMask(LiveIns[Reg]); } } while (false); | |||||||||
451 | } | |||||||||
452 | } | |||||||||
453 | ||||||||||
454 | LiveInPressure = std::make_pair(SGPRs, VGPRs); | |||||||||
455 | ||||||||||
456 | DEBUG(dbgs() << "\nLive-in pressure:\nSGPR = " << SGPRsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "\nLive-in pressure:\nSGPR = " << SGPRs << "\nVGPR = " << VGPRs << '\n' ; } } while (false) | |||||||||
457 | << "\nVGPR = " << VGPRs << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "\nLive-in pressure:\nSGPR = " << SGPRs << "\nVGPR = " << VGPRs << '\n' ; } } while (false); | |||||||||
458 | } | |||||||||
459 | ||||||||||
460 | std::pair<unsigned, unsigned> | |||||||||
461 | GCNScheduleDAGMILive::getRealRegPressure() const { | |||||||||
462 | unsigned SGPRs, MaxSGPRs, VGPRs, MaxVGPRs; | |||||||||
463 | SGPRs = MaxSGPRs = LiveInPressure.first; | |||||||||
464 | VGPRs = MaxVGPRs = LiveInPressure.second; | |||||||||
465 | ||||||||||
466 | const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); | |||||||||
467 | DenseMap<unsigned, LaneBitmask> LiveRegs(LiveIns); | |||||||||
468 | ||||||||||
469 | for (const MachineInstr &MI : *this) { | |||||||||
470 | if (MI.isDebugValue()) | |||||||||
471 | continue; | |||||||||
472 | SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex(); | |||||||||
473 | assert (SI.isValid())((SI.isValid()) ? static_cast<void> (0) : __assert_fail ("SI.isValid()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303041/lib/Target/AMDGPU/GCNSchedStrategy.cpp" , 473, __PRETTY_FUNCTION__)); | |||||||||
474 | ||||||||||
475 | // Remove dead registers or mask bits. | |||||||||
476 | for (auto &It : LiveRegs) { | |||||||||
477 | if (It.second.none()) | |||||||||
478 | continue; | |||||||||
479 | const LiveInterval &LI = LIS->getInterval(It.first); | |||||||||
480 | if (LI.hasSubRanges()) { | |||||||||
481 | for (const auto &S : LI.subranges()) | |||||||||
482 | if (!S.liveAt(SI)) | |||||||||
483 | setMask(MRI, SRI, It.first, It.second, It.second & ~S.LaneMask, | |||||||||
484 | SGPRs, VGPRs); | |||||||||
485 | } else if (!LI.liveAt(SI)) { | |||||||||
486 | setMask(MRI, SRI, It.first, It.second, LaneBitmask::getNone(), | |||||||||
487 | SGPRs, VGPRs); | |||||||||
488 | } | |||||||||
489 | } | |||||||||
490 | ||||||||||
491 | // Add new registers or mask bits. | |||||||||
492 | for (const auto &MO : MI.defs()) { | |||||||||
493 | if (!MO.isReg()) | |||||||||
494 | continue; | |||||||||
495 | unsigned Reg = MO.getReg(); | |||||||||
496 | if (!TargetRegisterInfo::isVirtualRegister(Reg)) | |||||||||
497 | continue; | |||||||||
498 | unsigned SubRegIdx = MO.getSubReg(); | |||||||||
499 | LaneBitmask LaneMask = SubRegIdx != 0 | |||||||||
500 | ? TRI->getSubRegIndexLaneMask(SubRegIdx) | |||||||||
501 | : MRI.getMaxLaneMaskForVReg(Reg); | |||||||||
502 | LaneBitmask &LM = LiveRegs[Reg]; | |||||||||
503 | setMask(MRI, SRI, Reg, LM, LM | LaneMask, SGPRs, VGPRs); | |||||||||
504 | } | |||||||||
505 | MaxSGPRs = std::max(MaxSGPRs, SGPRs); | |||||||||
506 | MaxVGPRs = std::max(MaxVGPRs, VGPRs); | |||||||||
507 | } | |||||||||
508 | ||||||||||
509 | DEBUG(dbgs() << "Real region's register pressure:\nSGPR = " << MaxSGPRsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Real region's register pressure:\nSGPR = " << MaxSGPRs << "\nVGPR = " << MaxVGPRs << '\n'; } } while (false) | |||||||||
510 | << "\nVGPR = " << MaxVGPRs << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Real region's register pressure:\nSGPR = " << MaxSGPRs << "\nVGPR = " << MaxVGPRs << '\n'; } } while (false); | |||||||||
511 | ||||||||||
512 | return std::make_pair(MaxSGPRs, MaxVGPRs); | |||||||||
513 | } | |||||||||
514 | ||||||||||
515 | void GCNScheduleDAGMILive::finalizeSchedule() { | |||||||||
516 | // Retry function scheduling if we found resulting occupancy and it is | |||||||||
517 | // lower than used for first pass scheduling. This will give more freedom | |||||||||
518 | // to schedule low register pressure blocks. | |||||||||
519 | // Code is partially copied from MachineSchedulerBase::scheduleRegions(). | |||||||||
520 | ||||||||||
521 | if (!LIS || StartingOccupancy <= MinOccupancy) | |||||||||
| ||||||||||
522 | return; | |||||||||
523 | ||||||||||
524 | DEBUG(dbgs() << "Retrying function scheduling with lowest recorded occupancy "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Retrying function scheduling with lowest recorded occupancy " << MinOccupancy << ".\n"; } } while (false) | |||||||||
525 | << MinOccupancy << ".\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Retrying function scheduling with lowest recorded occupancy " << MinOccupancy << ".\n"; } } while (false); | |||||||||
526 | ||||||||||
527 | Stage++; | |||||||||
528 | GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; | |||||||||
529 | S.setTargetOccupancy(MinOccupancy); | |||||||||
530 | ||||||||||
531 | MachineBasicBlock *MBB = nullptr; | |||||||||
532 | for (auto Region : Regions) { | |||||||||
533 | RegionBegin = Region.first; | |||||||||
534 | RegionEnd = Region.second; | |||||||||
535 | ||||||||||
536 | if (RegionBegin->getParent() != MBB) { | |||||||||
537 | if (MBB) finishBlock(); | |||||||||
538 | MBB = RegionBegin->getParent(); | |||||||||
539 | startBlock(MBB); | |||||||||
540 | } | |||||||||
541 | ||||||||||
542 | unsigned NumRegionInstrs = std::distance(begin(), end()); | |||||||||
543 | enterRegion(MBB, begin(), end(), NumRegionInstrs); | |||||||||
544 | ||||||||||
545 | // Skip empty scheduling regions (0 or 1 schedulable instructions). | |||||||||
546 | if (begin() == end() || begin() == std::prev(end())) { | |||||||||
547 | exitRegion(); | |||||||||
548 | continue; | |||||||||
549 | } | |||||||||
550 | DEBUG(dbgs() << "********** MI Scheduling **********\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "********** MI Scheduling **********\n" ; } } while (false); | |||||||||
551 | DEBUG(dbgs() << MF.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << MF.getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *begin() << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'; } } while (false) | |||||||||
| ||||||||||
552 | << ":BB#" << MBB->getNumber() << " " << MBB->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << MF.getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *begin() << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'; } } while (false) | |||||||||
553 | << "\n From: " << *begin() << " To: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << MF.getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *begin() << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'; } } while (false) | |||||||||
554 | if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << MF.getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *begin() << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'; } } while (false) | |||||||||
555 | else dbgs() << "End";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << MF.getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *begin() << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'; } } while (false) | |||||||||
556 | dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << MF.getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *begin() << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'; } } while (false); | |||||||||
557 | ||||||||||
558 | schedule(); | |||||||||
559 | ||||||||||
560 | exitRegion(); | |||||||||
561 | } | |||||||||
562 | finishBlock(); | |||||||||
563 | LiveIns.shrink_and_clear(); | |||||||||
564 | } |