File: | lib/CodeGen/ScheduleDAGInstrs.cpp |
Location: | line 891, column 9 |
Description: | Called C++ object pointer is null |
1 | //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This implements the ScheduleDAGInstrs class, which implements re-scheduling | |||
11 | // of MachineInstrs. | |||
12 | // | |||
13 | //===----------------------------------------------------------------------===// | |||
14 | ||||
15 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" | |||
16 | #include "llvm/ADT/IntEqClasses.h" | |||
17 | #include "llvm/ADT/MapVector.h" | |||
18 | #include "llvm/ADT/SmallPtrSet.h" | |||
19 | #include "llvm/ADT/SmallSet.h" | |||
20 | #include "llvm/Analysis/AliasAnalysis.h" | |||
21 | #include "llvm/Analysis/ValueTracking.h" | |||
22 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
23 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
24 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
25 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
26 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
27 | #include "llvm/CodeGen/PseudoSourceValue.h" | |||
28 | #include "llvm/CodeGen/RegisterPressure.h" | |||
29 | #include "llvm/CodeGen/ScheduleDFS.h" | |||
30 | #include "llvm/IR/Operator.h" | |||
31 | #include "llvm/Support/CommandLine.h" | |||
32 | #include "llvm/Support/Debug.h" | |||
33 | #include "llvm/Support/Format.h" | |||
34 | #include "llvm/Support/raw_ostream.h" | |||
35 | #include "llvm/Target/TargetInstrInfo.h" | |||
36 | #include "llvm/Target/TargetMachine.h" | |||
37 | #include "llvm/Target/TargetRegisterInfo.h" | |||
38 | #include "llvm/Target/TargetSubtargetInfo.h" | |||
39 | #include <queue> | |||
40 | ||||
41 | using namespace llvm; | |||
42 | ||||
43 | #define DEBUG_TYPE"misched" "misched" | |||
44 | ||||
45 | static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, | |||
46 | cl::ZeroOrMore, cl::init(false), | |||
47 | cl::desc("Enable use of AA during MI DAG construction")); | |||
48 | ||||
49 | static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, | |||
50 | cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); | |||
51 | ||||
52 | ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, | |||
53 | const MachineLoopInfo *mli, | |||
54 | bool RemoveKillFlags) | |||
55 | : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), | |||
56 | RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), | |||
57 | TrackLaneMasks(false), FirstDbgValue(nullptr) { | |||
58 | DbgValues.clear(); | |||
59 | ||||
60 | const TargetSubtargetInfo &ST = mf.getSubtarget(); | |||
61 | SchedModel.init(ST.getSchedModel(), &ST, TII); | |||
62 | } | |||
63 | ||||
64 | /// getUnderlyingObjectFromInt - This is the function that does the work of | |||
65 | /// looking through basic ptrtoint+arithmetic+inttoptr sequences. | |||
66 | static const Value *getUnderlyingObjectFromInt(const Value *V) { | |||
67 | do { | |||
68 | if (const Operator *U = dyn_cast<Operator>(V)) { | |||
69 | // If we find a ptrtoint, we can transfer control back to the | |||
70 | // regular getUnderlyingObjectFromInt. | |||
71 | if (U->getOpcode() == Instruction::PtrToInt) | |||
72 | return U->getOperand(0); | |||
73 | // If we find an add of a constant, a multiplied value, or a phi, it's | |||
74 | // likely that the other operand will lead us to the base | |||
75 | // object. We don't have to worry about the case where the | |||
76 | // object address is somehow being computed by the multiply, | |||
77 | // because our callers only care when the result is an | |||
78 | // identifiable object. | |||
79 | if (U->getOpcode() != Instruction::Add || | |||
80 | (!isa<ConstantInt>(U->getOperand(1)) && | |||
81 | Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && | |||
82 | !isa<PHINode>(U->getOperand(1)))) | |||
83 | return V; | |||
84 | V = U->getOperand(0); | |||
85 | } else { | |||
86 | return V; | |||
87 | } | |||
88 | assert(V->getType()->isIntegerTy() && "Unexpected operand type!")((V->getType()->isIntegerTy() && "Unexpected operand type!" ) ? static_cast<void> (0) : __assert_fail ("V->getType()->isIntegerTy() && \"Unexpected operand type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 88, __PRETTY_FUNCTION__)); | |||
89 | } while (1); | |||
90 | } | |||
91 | ||||
92 | /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects | |||
93 | /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. | |||
94 | static void getUnderlyingObjects(const Value *V, | |||
95 | SmallVectorImpl<Value *> &Objects, | |||
96 | const DataLayout &DL) { | |||
97 | SmallPtrSet<const Value *, 16> Visited; | |||
98 | SmallVector<const Value *, 4> Working(1, V); | |||
99 | do { | |||
100 | V = Working.pop_back_val(); | |||
101 | ||||
102 | SmallVector<Value *, 4> Objs; | |||
103 | GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); | |||
104 | ||||
105 | for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); | |||
106 | I != IE; ++I) { | |||
107 | V = *I; | |||
108 | if (!Visited.insert(V).second) | |||
109 | continue; | |||
110 | if (Operator::getOpcode(V) == Instruction::IntToPtr) { | |||
111 | const Value *O = | |||
112 | getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); | |||
113 | if (O->getType()->isPointerTy()) { | |||
114 | Working.push_back(O); | |||
115 | continue; | |||
116 | } | |||
117 | } | |||
118 | Objects.push_back(const_cast<Value *>(V)); | |||
119 | } | |||
120 | } while (!Working.empty()); | |||
121 | } | |||
122 | ||||
123 | typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType; | |||
124 | typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4> | |||
125 | UnderlyingObjectsVector; | |||
126 | ||||
127 | /// getUnderlyingObjectsForInstr - If this machine instr has memory reference | |||
128 | /// information and it can be tracked to a normal reference to a known | |||
129 | /// object, return the Value for that object. | |||
130 | static void getUnderlyingObjectsForInstr(const MachineInstr *MI, | |||
131 | const MachineFrameInfo *MFI, | |||
132 | UnderlyingObjectsVector &Objects, | |||
133 | const DataLayout &DL) { | |||
134 | if (!MI->hasOneMemOperand() || | |||
135 | (!(*MI->memoperands_begin())->getValue() && | |||
136 | !(*MI->memoperands_begin())->getPseudoValue()) || | |||
137 | (*MI->memoperands_begin())->isVolatile()) | |||
138 | return; | |||
139 | ||||
140 | if (const PseudoSourceValue *PSV = | |||
141 | (*MI->memoperands_begin())->getPseudoValue()) { | |||
142 | // Function that contain tail calls don't have unique PseudoSourceValue | |||
143 | // objects. Two PseudoSourceValues might refer to the same or overlapping | |||
144 | // locations. The client code calling this function assumes this is not the | |||
145 | // case. So return a conservative answer of no known object. | |||
146 | if (MFI->hasTailCall()) | |||
147 | return; | |||
148 | ||||
149 | // For now, ignore PseudoSourceValues which may alias LLVM IR values | |||
150 | // because the code that uses this function has no way to cope with | |||
151 | // such aliases. | |||
152 | if (!PSV->isAliased(MFI)) { | |||
153 | bool MayAlias = PSV->mayAlias(MFI); | |||
154 | Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); | |||
155 | } | |||
156 | return; | |||
157 | } | |||
158 | ||||
159 | const Value *V = (*MI->memoperands_begin())->getValue(); | |||
160 | if (!V) | |||
161 | return; | |||
162 | ||||
163 | SmallVector<Value *, 4> Objs; | |||
164 | getUnderlyingObjects(V, Objs, DL); | |||
165 | ||||
166 | for (Value *V : Objs) { | |||
167 | if (!isIdentifiedObject(V)) { | |||
168 | Objects.clear(); | |||
169 | return; | |||
170 | } | |||
171 | ||||
172 | Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); | |||
173 | } | |||
174 | } | |||
175 | ||||
176 | void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { | |||
177 | BB = bb; | |||
178 | } | |||
179 | ||||
180 | void ScheduleDAGInstrs::finishBlock() { | |||
181 | // Subclasses should no longer refer to the old block. | |||
182 | BB = nullptr; | |||
183 | } | |||
184 | ||||
185 | /// Initialize the DAG and common scheduler state for the current scheduling | |||
186 | /// region. This does not actually create the DAG, only clears it. The | |||
187 | /// scheduling driver may call BuildSchedGraph multiple times per scheduling | |||
188 | /// region. | |||
189 | void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, | |||
190 | MachineBasicBlock::iterator begin, | |||
191 | MachineBasicBlock::iterator end, | |||
192 | unsigned regioninstrs) { | |||
193 | assert(bb == BB && "startBlock should set BB")((bb == BB && "startBlock should set BB") ? static_cast <void> (0) : __assert_fail ("bb == BB && \"startBlock should set BB\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 193, __PRETTY_FUNCTION__)); | |||
194 | RegionBegin = begin; | |||
195 | RegionEnd = end; | |||
196 | NumRegionInstrs = regioninstrs; | |||
197 | } | |||
198 | ||||
199 | /// Close the current scheduling region. Don't clear any state in case the | |||
200 | /// driver wants to refer to the previous scheduling region. | |||
201 | void ScheduleDAGInstrs::exitRegion() { | |||
202 | // Nothing to do. | |||
203 | } | |||
204 | ||||
205 | /// addSchedBarrierDeps - Add dependencies from instructions in the current | |||
206 | /// list of instructions being scheduled to scheduling barrier by adding | |||
207 | /// the exit SU to the register defs and use list. This is because we want to | |||
208 | /// make sure instructions which define registers that are either used by | |||
209 | /// the terminator or are live-out are properly scheduled. This is | |||
210 | /// especially important when the definition latency of the return value(s) | |||
211 | /// are too high to be hidden by the branch or when the liveout registers | |||
212 | /// used by instructions in the fallthrough block. | |||
213 | void ScheduleDAGInstrs::addSchedBarrierDeps() { | |||
214 | MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr; | |||
215 | ExitSU.setInstr(ExitMI); | |||
216 | bool AllDepKnown = ExitMI && | |||
217 | (ExitMI->isCall() || ExitMI->isBarrier()); | |||
218 | if (ExitMI && AllDepKnown) { | |||
219 | // If it's a call or a barrier, add dependencies on the defs and uses of | |||
220 | // instruction. | |||
221 | for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { | |||
222 | const MachineOperand &MO = ExitMI->getOperand(i); | |||
223 | if (!MO.isReg() || MO.isDef()) continue; | |||
224 | unsigned Reg = MO.getReg(); | |||
225 | if (Reg == 0) continue; | |||
226 | ||||
227 | if (TRI->isPhysicalRegister(Reg)) | |||
228 | Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); | |||
229 | else if (MO.readsReg()) // ignore undef operands | |||
230 | addVRegUseDeps(&ExitSU, i); | |||
231 | } | |||
232 | } else { | |||
233 | // For others, e.g. fallthrough, conditional branch, assume the exit | |||
234 | // uses all the registers that are livein to the successor blocks. | |||
235 | assert(Uses.empty() && "Uses in set before adding deps?")((Uses.empty() && "Uses in set before adding deps?") ? static_cast<void> (0) : __assert_fail ("Uses.empty() && \"Uses in set before adding deps?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 235, __PRETTY_FUNCTION__)); | |||
236 | for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), | |||
237 | SE = BB->succ_end(); SI != SE; ++SI) | |||
238 | for (const auto &LI : (*SI)->liveins()) { | |||
239 | if (!Uses.contains(LI.PhysReg)) | |||
240 | Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg)); | |||
241 | } | |||
242 | } | |||
243 | } | |||
244 | ||||
245 | /// MO is an operand of SU's instruction that defines a physical register. Add | |||
246 | /// data dependencies from SU to any uses of the physical register. | |||
247 | void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { | |||
248 | const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); | |||
249 | assert(MO.isDef() && "expect physreg def")((MO.isDef() && "expect physreg def") ? static_cast< void> (0) : __assert_fail ("MO.isDef() && \"expect physreg def\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 249, __PRETTY_FUNCTION__)); | |||
250 | ||||
251 | // Ask the target if address-backscheduling is desirable, and if so how much. | |||
252 | const TargetSubtargetInfo &ST = MF.getSubtarget(); | |||
253 | ||||
254 | for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); | |||
255 | Alias.isValid(); ++Alias) { | |||
256 | if (!Uses.contains(*Alias)) | |||
257 | continue; | |||
258 | for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { | |||
259 | SUnit *UseSU = I->SU; | |||
260 | if (UseSU == SU) | |||
261 | continue; | |||
262 | ||||
263 | // Adjust the dependence latency using operand def/use information, | |||
264 | // then allow the target to perform its own adjustments. | |||
265 | int UseOp = I->OpIdx; | |||
266 | MachineInstr *RegUse = nullptr; | |||
267 | SDep Dep; | |||
268 | if (UseOp < 0) | |||
269 | Dep = SDep(SU, SDep::Artificial); | |||
270 | else { | |||
271 | // Set the hasPhysRegDefs only for physreg defs that have a use within | |||
272 | // the scheduling region. | |||
273 | SU->hasPhysRegDefs = true; | |||
274 | Dep = SDep(SU, SDep::Data, *Alias); | |||
275 | RegUse = UseSU->getInstr(); | |||
276 | } | |||
277 | Dep.setLatency( | |||
278 | SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, | |||
279 | UseOp)); | |||
280 | ||||
281 | ST.adjustSchedDependency(SU, UseSU, Dep); | |||
282 | UseSU->addPred(Dep); | |||
283 | } | |||
284 | } | |||
285 | } | |||
286 | ||||
287 | /// addPhysRegDeps - Add register dependencies (data, anti, and output) from | |||
288 | /// this SUnit to following instructions in the same scheduling region that | |||
289 | /// depend the physical register referenced at OperIdx. | |||
290 | void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { | |||
291 | MachineInstr *MI = SU->getInstr(); | |||
292 | MachineOperand &MO = MI->getOperand(OperIdx); | |||
293 | ||||
294 | // Optionally add output and anti dependencies. For anti | |||
295 | // dependencies we use a latency of 0 because for a multi-issue | |||
296 | // target we want to allow the defining instruction to issue | |||
297 | // in the same cycle as the using instruction. | |||
298 | // TODO: Using a latency of 1 here for output dependencies assumes | |||
299 | // there's no cost for reusing registers. | |||
300 | SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; | |||
301 | for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); | |||
302 | Alias.isValid(); ++Alias) { | |||
303 | if (!Defs.contains(*Alias)) | |||
304 | continue; | |||
305 | for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { | |||
306 | SUnit *DefSU = I->SU; | |||
307 | if (DefSU == &ExitSU) | |||
308 | continue; | |||
309 | if (DefSU != SU && | |||
310 | (Kind != SDep::Output || !MO.isDead() || | |||
311 | !DefSU->getInstr()->registerDefIsDead(*Alias))) { | |||
312 | if (Kind == SDep::Anti) | |||
313 | DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); | |||
314 | else { | |||
315 | SDep Dep(SU, Kind, /*Reg=*/*Alias); | |||
316 | Dep.setLatency( | |||
317 | SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); | |||
318 | DefSU->addPred(Dep); | |||
319 | } | |||
320 | } | |||
321 | } | |||
322 | } | |||
323 | ||||
324 | if (!MO.isDef()) { | |||
325 | SU->hasPhysRegUses = true; | |||
326 | // Either insert a new Reg2SUnits entry with an empty SUnits list, or | |||
327 | // retrieve the existing SUnits list for this register's uses. | |||
328 | // Push this SUnit on the use list. | |||
329 | Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); | |||
330 | if (RemoveKillFlags) | |||
331 | MO.setIsKill(false); | |||
332 | } | |||
333 | else { | |||
334 | addPhysRegDataDeps(SU, OperIdx); | |||
335 | unsigned Reg = MO.getReg(); | |||
336 | ||||
337 | // clear this register's use list | |||
338 | if (Uses.contains(Reg)) | |||
339 | Uses.eraseAll(Reg); | |||
340 | ||||
341 | if (!MO.isDead()) { | |||
342 | Defs.eraseAll(Reg); | |||
343 | } else if (SU->isCall) { | |||
344 | // Calls will not be reordered because of chain dependencies (see | |||
345 | // below). Since call operands are dead, calls may continue to be added | |||
346 | // to the DefList making dependence checking quadratic in the size of | |||
347 | // the block. Instead, we leave only one call at the back of the | |||
348 | // DefList. | |||
349 | Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); | |||
350 | Reg2SUnitsMap::iterator B = P.first; | |||
351 | Reg2SUnitsMap::iterator I = P.second; | |||
352 | for (bool isBegin = I == B; !isBegin; /* empty */) { | |||
353 | isBegin = (--I) == B; | |||
354 | if (!I->SU->isCall) | |||
355 | break; | |||
356 | I = Defs.erase(I); | |||
357 | } | |||
358 | } | |||
359 | ||||
360 | // Defs are pushed in the order they are visited and never reordered. | |||
361 | Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); | |||
362 | } | |||
363 | } | |||
364 | ||||
365 | LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const | |||
366 | { | |||
367 | unsigned Reg = MO.getReg(); | |||
368 | // No point in tracking lanemasks if we don't have interesting subregisters. | |||
369 | const TargetRegisterClass &RC = *MRI.getRegClass(Reg); | |||
370 | if (!RC.HasDisjunctSubRegs) | |||
371 | return ~0u; | |||
372 | ||||
373 | unsigned SubReg = MO.getSubReg(); | |||
374 | if (SubReg == 0) | |||
375 | return RC.getLaneMask(); | |||
376 | return TRI->getSubRegIndexLaneMask(SubReg); | |||
377 | } | |||
378 | ||||
379 | /// addVRegDefDeps - Add register output and data dependencies from this SUnit | |||
380 | /// to instructions that occur later in the same scheduling region if they read | |||
381 | /// from or write to the virtual register defined at OperIdx. | |||
382 | /// | |||
383 | /// TODO: Hoist loop induction variable increments. This has to be | |||
384 | /// reevaluated. Generally, IV scheduling should be done before coalescing. | |||
385 | void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { | |||
386 | MachineInstr *MI = SU->getInstr(); | |||
387 | MachineOperand &MO = MI->getOperand(OperIdx); | |||
388 | unsigned Reg = MO.getReg(); | |||
389 | ||||
390 | LaneBitmask DefLaneMask; | |||
391 | LaneBitmask KillLaneMask; | |||
392 | if (TrackLaneMasks) { | |||
393 | bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); | |||
394 | DefLaneMask = getLaneMaskForMO(MO); | |||
395 | // If we have a <read-undef> flag, none of the lane values comes from an | |||
396 | // earlier instruction. | |||
397 | KillLaneMask = IsKill ? ~0u : DefLaneMask; | |||
398 | ||||
399 | // Clear undef flag, we'll re-add it later once we know which subregister | |||
400 | // Def is first. | |||
401 | MO.setIsUndef(false); | |||
402 | } else { | |||
403 | DefLaneMask = ~0u; | |||
404 | KillLaneMask = ~0u; | |||
405 | } | |||
406 | ||||
407 | if (MO.isDead()) { | |||
408 | assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&((CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && "Dead defs should have no uses") ? static_cast<void> ( 0) : __assert_fail ("CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && \"Dead defs should have no uses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 409, __PRETTY_FUNCTION__)) | |||
409 | "Dead defs should have no uses")((CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && "Dead defs should have no uses") ? static_cast<void> ( 0) : __assert_fail ("CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && \"Dead defs should have no uses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 409, __PRETTY_FUNCTION__)); | |||
410 | } else { | |||
411 | // Add data dependence to all uses we found so far. | |||
412 | const TargetSubtargetInfo &ST = MF.getSubtarget(); | |||
413 | for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg), | |||
414 | E = CurrentVRegUses.end(); I != E; /*empty*/) { | |||
415 | LaneBitmask LaneMask = I->LaneMask; | |||
416 | // Ignore uses of other lanes. | |||
417 | if ((LaneMask & KillLaneMask) == 0) { | |||
418 | ++I; | |||
419 | continue; | |||
420 | } | |||
421 | ||||
422 | if ((LaneMask & DefLaneMask) != 0) { | |||
423 | SUnit *UseSU = I->SU; | |||
424 | MachineInstr *Use = UseSU->getInstr(); | |||
425 | SDep Dep(SU, SDep::Data, Reg); | |||
426 | Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, | |||
427 | I->OperandIndex)); | |||
428 | ST.adjustSchedDependency(SU, UseSU, Dep); | |||
429 | UseSU->addPred(Dep); | |||
430 | } | |||
431 | ||||
432 | LaneMask &= ~KillLaneMask; | |||
433 | // If we found a Def for all lanes of this use, remove it from the list. | |||
434 | if (LaneMask != 0) { | |||
435 | I->LaneMask = LaneMask; | |||
436 | ++I; | |||
437 | } else | |||
438 | I = CurrentVRegUses.erase(I); | |||
439 | } | |||
440 | } | |||
441 | ||||
442 | // Shortcut: Singly defined vregs do not have output/anti dependencies. | |||
443 | if (MRI.hasOneDef(Reg)) | |||
444 | return; | |||
445 | ||||
446 | // Add output dependence to the next nearest defs of this vreg. | |||
447 | // | |||
448 | // Unless this definition is dead, the output dependence should be | |||
449 | // transitively redundant with antidependencies from this definition's | |||
450 | // uses. We're conservative for now until we have a way to guarantee the uses | |||
451 | // are not eliminated sometime during scheduling. The output dependence edge | |||
452 | // is also useful if output latency exceeds def-use latency. | |||
453 | LaneBitmask LaneMask = DefLaneMask; | |||
454 | for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), | |||
455 | CurrentVRegDefs.end())) { | |||
456 | // Ignore defs for other lanes. | |||
457 | if ((V2SU.LaneMask & LaneMask) == 0) | |||
458 | continue; | |||
459 | // Add an output dependence. | |||
460 | SUnit *DefSU = V2SU.SU; | |||
461 | // Ignore additional defs of the same lanes in one instruction. This can | |||
462 | // happen because lanemasks are shared for targets with too many | |||
463 | // subregisters. We also use some representration tricks/hacks where we | |||
464 | // add super-register defs/uses, to imply that although we only access parts | |||
465 | // of the reg we care about the full one. | |||
466 | if (DefSU == SU) | |||
467 | continue; | |||
468 | SDep Dep(SU, SDep::Output, Reg); | |||
469 | Dep.setLatency( | |||
470 | SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); | |||
471 | DefSU->addPred(Dep); | |||
472 | ||||
473 | // Update current definition. This can get tricky if the def was about a | |||
474 | // bigger lanemask before. We then have to shrink it and create a new | |||
475 | // VReg2SUnit for the non-overlapping part. | |||
476 | LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; | |||
477 | LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; | |||
478 | if (NonOverlapMask != 0) | |||
479 | CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU)); | |||
480 | V2SU.SU = SU; | |||
481 | V2SU.LaneMask = OverlapMask; | |||
482 | } | |||
483 | // If there was no CurrentVRegDefs entry for some lanes yet, create one. | |||
484 | if (LaneMask != 0) | |||
485 | CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); | |||
486 | } | |||
487 | ||||
488 | /// addVRegUseDeps - Add a register data dependency if the instruction that | |||
489 | /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a | |||
490 | /// register antidependency from this SUnit to instructions that occur later in | |||
491 | /// the same scheduling region if they write the virtual register. | |||
492 | /// | |||
493 | /// TODO: Handle ExitSU "uses" properly. | |||
494 | void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { | |||
495 | const MachineInstr *MI = SU->getInstr(); | |||
496 | const MachineOperand &MO = MI->getOperand(OperIdx); | |||
497 | unsigned Reg = MO.getReg(); | |||
498 | ||||
499 | // Remember the use. Data dependencies will be added when we find the def. | |||
500 | LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u; | |||
501 | CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); | |||
502 | ||||
503 | // Add antidependences to the following defs of the vreg. | |||
504 | for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), | |||
505 | CurrentVRegDefs.end())) { | |||
506 | // Ignore defs for unrelated lanes. | |||
507 | LaneBitmask PrevDefLaneMask = V2SU.LaneMask; | |||
508 | if ((PrevDefLaneMask & LaneMask) == 0) | |||
509 | continue; | |||
510 | if (V2SU.SU == SU) | |||
511 | continue; | |||
512 | ||||
513 | V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); | |||
514 | } | |||
515 | } | |||
516 | ||||
517 | /// Return true if MI is an instruction we are unable to reason about | |||
518 | /// (like a call or something with unmodeled side effects). | |||
519 | static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { | |||
520 | return MI->isCall() || MI->hasUnmodeledSideEffects() || | |||
521 | (MI->hasOrderedMemoryRef() && | |||
522 | (!MI->mayLoad() || !MI->isInvariantLoad(AA))); | |||
523 | } | |||
524 | ||||
525 | // This MI might have either incomplete info, or known to be unsafe | |||
526 | // to deal with (i.e. volatile object). | |||
527 | static inline bool isUnsafeMemoryObject(MachineInstr *MI, | |||
528 | const MachineFrameInfo *MFI, | |||
529 | const DataLayout &DL) { | |||
530 | if (!MI || MI->memoperands_empty()) | |||
531 | return true; | |||
532 | // We purposefully do no check for hasOneMemOperand() here | |||
533 | // in hope to trigger an assert downstream in order to | |||
534 | // finish implementation. | |||
535 | if ((*MI->memoperands_begin())->isVolatile() || | |||
536 | MI->hasUnmodeledSideEffects()) | |||
537 | return true; | |||
538 | ||||
539 | if ((*MI->memoperands_begin())->getPseudoValue()) { | |||
540 | // Similarly to getUnderlyingObjectForInstr: | |||
541 | // For now, ignore PseudoSourceValues which may alias LLVM IR values | |||
542 | // because the code that uses this function has no way to cope with | |||
543 | // such aliases. | |||
544 | return true; | |||
545 | } | |||
546 | ||||
547 | const Value *V = (*MI->memoperands_begin())->getValue(); | |||
548 | if (!V) | |||
549 | return true; | |||
550 | ||||
551 | SmallVector<Value *, 4> Objs; | |||
552 | getUnderlyingObjects(V, Objs, DL); | |||
553 | for (Value *V : Objs) { | |||
554 | // Does this pointer refer to a distinct and identifiable object? | |||
555 | if (!isIdentifiedObject(V)) | |||
556 | return true; | |||
557 | } | |||
558 | ||||
559 | return false; | |||
560 | } | |||
561 | ||||
562 | /// This returns true if the two MIs need a chain edge between them. | |||
563 | /// If these are not even memory operations, we still may need | |||
564 | /// chain deps between them. The question really is - could | |||
565 | /// these two MIs be reordered during scheduling from memory dependency | |||
566 | /// point of view. | |||
567 | static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, | |||
568 | const DataLayout &DL, MachineInstr *MIa, | |||
569 | MachineInstr *MIb) { | |||
570 | const MachineFunction *MF = MIa->getParent()->getParent(); | |||
571 | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); | |||
572 | ||||
573 | // Cover a trivial case - no edge is need to itself. | |||
574 | if (MIa == MIb) | |||
575 | return false; | |||
576 | ||||
577 | // Let the target decide if memory accesses cannot possibly overlap. | |||
578 | if ((MIa->mayLoad() || MIa->mayStore()) && | |||
579 | (MIb->mayLoad() || MIb->mayStore())) | |||
580 | if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA)) | |||
581 | return false; | |||
582 | ||||
583 | // FIXME: Need to handle multiple memory operands to support all targets. | |||
584 | if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) | |||
585 | return true; | |||
586 | ||||
587 | if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL)) | |||
588 | return true; | |||
589 | ||||
590 | // If we are dealing with two "normal" loads, we do not need an edge | |||
591 | // between them - they could be reordered. | |||
592 | if (!MIa->mayStore() && !MIb->mayStore()) | |||
593 | return false; | |||
594 | ||||
595 | // To this point analysis is generic. From here on we do need AA. | |||
596 | if (!AA) | |||
597 | return true; | |||
598 | ||||
599 | MachineMemOperand *MMOa = *MIa->memoperands_begin(); | |||
600 | MachineMemOperand *MMOb = *MIb->memoperands_begin(); | |||
601 | ||||
602 | if (!MMOa->getValue() || !MMOb->getValue()) | |||
603 | return true; | |||
604 | ||||
605 | // The following interface to AA is fashioned after DAGCombiner::isAlias | |||
606 | // and operates with MachineMemOperand offset with some important | |||
607 | // assumptions: | |||
608 | // - LLVM fundamentally assumes flat address spaces. | |||
609 | // - MachineOperand offset can *only* result from legalization and | |||
610 | // cannot affect queries other than the trivial case of overlap | |||
611 | // checking. | |||
612 | // - These offsets never wrap and never step outside | |||
613 | // of allocated objects. | |||
614 | // - There should never be any negative offsets here. | |||
615 | // | |||
616 | // FIXME: Modify API to hide this math from "user" | |||
617 | // FIXME: Even before we go to AA we can reason locally about some | |||
618 | // memory objects. It can save compile time, and possibly catch some | |||
619 | // corner cases not currently covered. | |||
620 | ||||
621 | assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset")(((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset" ) ? static_cast<void> (0) : __assert_fail ("(MMOa->getOffset() >= 0) && \"Negative MachineMemOperand offset\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 621, __PRETTY_FUNCTION__)); | |||
622 | assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset")(((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset" ) ? static_cast<void> (0) : __assert_fail ("(MMOb->getOffset() >= 0) && \"Negative MachineMemOperand offset\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 622, __PRETTY_FUNCTION__)); | |||
623 | ||||
624 | int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); | |||
625 | int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; | |||
626 | int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; | |||
627 | ||||
628 | AliasResult AAResult = | |||
629 | AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, | |||
630 | UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), | |||
631 | MemoryLocation(MMOb->getValue(), Overlapb, | |||
632 | UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); | |||
633 | ||||
634 | return (AAResult != NoAlias); | |||
635 | } | |||
636 | ||||
637 | /// This recursive function iterates over chain deps of SUb looking for | |||
638 | /// "latest" node that needs a chain edge to SUa. | |||
639 | static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, | |||
640 | const DataLayout &DL, SUnit *SUa, SUnit *SUb, | |||
641 | SUnit *ExitSU, unsigned *Depth, | |||
642 | SmallPtrSetImpl<const SUnit *> &Visited) { | |||
643 | if (!SUa || !SUb || SUb == ExitSU) | |||
644 | return *Depth; | |||
645 | ||||
646 | // Remember visited nodes. | |||
647 | if (!Visited.insert(SUb).second) | |||
648 | return *Depth; | |||
649 | // If there is _some_ dependency already in place, do not | |||
650 | // descend any further. | |||
651 | // TODO: Need to make sure that if that dependency got eliminated or ignored | |||
652 | // for any reason in the future, we would not violate DAG topology. | |||
653 | // Currently it does not happen, but makes an implicit assumption about | |||
654 | // future implementation. | |||
655 | // | |||
656 | // Independently, if we encounter node that is some sort of global | |||
657 | // object (like a call) we already have full set of dependencies to it | |||
658 | // and we can stop descending. | |||
659 | if (SUa->isSucc(SUb) || | |||
660 | isGlobalMemoryObject(AA, SUb->getInstr())) | |||
661 | return *Depth; | |||
662 | ||||
663 | // If we do need an edge, or we have exceeded depth budget, | |||
664 | // add that edge to the predecessors chain of SUb, | |||
665 | // and stop descending. | |||
666 | if (*Depth > 200 || | |||
667 | MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { | |||
668 | SUb->addPred(SDep(SUa, SDep::MayAliasMem)); | |||
669 | return *Depth; | |||
670 | } | |||
671 | // Track current depth. | |||
672 | (*Depth)++; | |||
673 | // Iterate over memory dependencies only. | |||
674 | for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); | |||
675 | I != E; ++I) | |||
676 | if (I->isNormalMemoryOrBarrier()) | |||
677 | iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited); | |||
678 | return *Depth; | |||
679 | } | |||
680 | ||||
681 | /// This function assumes that "downward" from SU there exist | |||
682 | /// tail/leaf of already constructed DAG. It iterates downward and | |||
683 | /// checks whether SU can be aliasing any node dominated | |||
684 | /// by it. | |||
685 | static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, | |||
686 | const DataLayout &DL, SUnit *SU, SUnit *ExitSU, | |||
687 | std::set<SUnit *> &CheckList, | |||
688 | unsigned LatencyToLoad) { | |||
689 | if (!SU) | |||
690 | return; | |||
691 | ||||
692 | SmallPtrSet<const SUnit*, 16> Visited; | |||
693 | unsigned Depth = 0; | |||
694 | ||||
695 | for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end(); | |||
696 | I != IE; ++I) { | |||
697 | if (SU == *I) | |||
698 | continue; | |||
699 | if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) { | |||
700 | SDep Dep(SU, SDep::MayAliasMem); | |||
701 | Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); | |||
702 | (*I)->addPred(Dep); | |||
703 | } | |||
704 | ||||
705 | // Iterate recursively over all previously added memory chain | |||
706 | // successors. Keep track of visited nodes. | |||
707 | for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), | |||
708 | JE = (*I)->Succs.end(); J != JE; ++J) | |||
709 | if (J->isNormalMemoryOrBarrier()) | |||
710 | iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth, | |||
711 | Visited); | |||
712 | } | |||
713 | } | |||
714 | ||||
715 | /// Check whether two objects need a chain edge, if so, add it | |||
716 | /// otherwise remember the rejected SU. | |||
717 | static inline void addChainDependency(AliasAnalysis *AA, | |||
718 | const MachineFrameInfo *MFI, | |||
719 | const DataLayout &DL, SUnit *SUa, | |||
720 | SUnit *SUb, std::set<SUnit *> &RejectList, | |||
721 | unsigned TrueMemOrderLatency = 0, | |||
722 | bool isNormalMemory = false) { | |||
723 | // If this is a false dependency, | |||
724 | // do not add the edge, but remember the rejected node. | |||
725 | if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { | |||
726 | SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); | |||
727 | Dep.setLatency(TrueMemOrderLatency); | |||
728 | SUb->addPred(Dep); | |||
729 | } | |||
730 | else { | |||
731 | // Duplicate entries should be ignored. | |||
732 | RejectList.insert(SUb); | |||
733 | DEBUG(dbgs() << "\tReject chain dep between SU("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "\tReject chain dep between SU(" << SUa->NodeNum << ") and SU(" << SUb-> NodeNum << ")\n"; } } while (0) | |||
734 | << SUa->NodeNum << ") and SU("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "\tReject chain dep between SU(" << SUa->NodeNum << ") and SU(" << SUb-> NodeNum << ")\n"; } } while (0) | |||
735 | << SUb->NodeNum << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "\tReject chain dep between SU(" << SUa->NodeNum << ") and SU(" << SUb-> NodeNum << ")\n"; } } while (0); | |||
736 | } | |||
737 | } | |||
738 | ||||
739 | /// Create an SUnit for each real instruction, numbered in top-down topological | |||
740 | /// order. The instruction order A < B, implies that no edge exists from B to A. | |||
741 | /// | |||
742 | /// Map each real instruction to its SUnit. | |||
743 | /// | |||
744 | /// After initSUnits, the SUnits vector cannot be resized and the scheduler may | |||
745 | /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs | |||
746 | /// instead of pointers. | |||
747 | /// | |||
748 | /// MachineScheduler relies on initSUnits numbering the nodes by their order in | |||
749 | /// the original instruction list. | |||
750 | void ScheduleDAGInstrs::initSUnits() { | |||
751 | // We'll be allocating one SUnit for each real instruction in the region, | |||
752 | // which is contained within a basic block. | |||
753 | SUnits.reserve(NumRegionInstrs); | |||
754 | ||||
755 | for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { | |||
756 | MachineInstr *MI = I; | |||
757 | if (MI->isDebugValue()) | |||
758 | continue; | |||
759 | ||||
760 | SUnit *SU = newSUnit(MI); | |||
761 | MISUnitMap[MI] = SU; | |||
762 | ||||
763 | SU->isCall = MI->isCall(); | |||
764 | SU->isCommutable = MI->isCommutable(); | |||
765 | ||||
766 | // Assign the Latency field of SU using target-provided information. | |||
767 | SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); | |||
768 | ||||
769 | // If this SUnit uses a reserved or unbuffered resource, mark it as such. | |||
770 | // | |||
771 | // Reserved resources block an instruction from issuing and stall the | |||
772 | // entire pipeline. These are identified by BufferSize=0. | |||
773 | // | |||
774 | // Unbuffered resources prevent execution of subsequent instructions that | |||
775 | // require the same resources. This is used for in-order execution pipelines | |||
776 | // within an out-of-order core. These are identified by BufferSize=1. | |||
777 | if (SchedModel.hasInstrSchedModel()) { | |||
778 | const MCSchedClassDesc *SC = getSchedClass(SU); | |||
779 | for (TargetSchedModel::ProcResIter | |||
780 | PI = SchedModel.getWriteProcResBegin(SC), | |||
781 | PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { | |||
782 | switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { | |||
783 | case 0: | |||
784 | SU->hasReservedResource = true; | |||
785 | break; | |||
786 | case 1: | |||
787 | SU->isUnbuffered = true; | |||
788 | break; | |||
789 | default: | |||
790 | break; | |||
791 | } | |||
792 | } | |||
793 | } | |||
794 | } | |||
795 | } | |||
796 | ||||
797 | void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { | |||
798 | const MachineInstr *MI = SU->getInstr(); | |||
799 | for (const MachineOperand &MO : MI->operands()) { | |||
800 | if (!MO.isReg()) | |||
801 | continue; | |||
802 | if (!MO.readsReg()) | |||
803 | continue; | |||
804 | if (TrackLaneMasks && !MO.isUse()) | |||
805 | continue; | |||
806 | ||||
807 | unsigned Reg = MO.getReg(); | |||
808 | if (!TargetRegisterInfo::isVirtualRegister(Reg)) | |||
809 | continue; | |||
810 | ||||
811 | // Record this local VReg use. | |||
812 | VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); | |||
813 | for (; UI != VRegUses.end(); ++UI) { | |||
814 | if (UI->SU == SU) | |||
815 | break; | |||
816 | } | |||
817 | if (UI == VRegUses.end()) | |||
818 | VRegUses.insert(VReg2SUnit(Reg, 0, SU)); | |||
819 | } | |||
820 | } | |||
821 | ||||
822 | /// If RegPressure is non-null, compute register pressure as a side effect. The | |||
823 | /// DAG builder is an efficient place to do it because it already visits | |||
824 | /// operands. | |||
825 | void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, | |||
826 | RegPressureTracker *RPTracker, | |||
827 | PressureDiffs *PDiffs, | |||
828 | bool TrackLaneMasks) { | |||
829 | const TargetSubtargetInfo &ST = MF.getSubtarget(); | |||
830 | bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI | |||
| ||||
831 | : ST.useAA(); | |||
832 | AliasAnalysis *AAForDep = UseAA ? AA : nullptr; | |||
833 | ||||
834 | this->TrackLaneMasks = TrackLaneMasks; | |||
835 | MISUnitMap.clear(); | |||
836 | ScheduleDAG::clearDAG(); | |||
837 | ||||
838 | // Create an SUnit for each real instruction. | |||
839 | initSUnits(); | |||
840 | ||||
841 | if (PDiffs) | |||
842 | PDiffs->init(SUnits.size()); | |||
843 | ||||
844 | // We build scheduling units by walking a block's instruction list from bottom | |||
845 | // to top. | |||
846 | ||||
847 | // Remember where a generic side-effecting instruction is as we proceed. | |||
848 | SUnit *BarrierChain = nullptr, *AliasChain = nullptr; | |||
849 | ||||
850 | // Memory references to specific known memory locations are tracked | |||
851 | // so that they can be given more precise dependencies. We track | |||
852 | // separately the known memory locations that may alias and those | |||
853 | // that are known not to alias | |||
854 | MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs; | |||
855 | MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; | |||
856 | std::set<SUnit*> RejectMemNodes; | |||
857 | ||||
858 | // Remove any stale debug info; sometimes BuildSchedGraph is called again | |||
859 | // without emitting the info from the previous call. | |||
860 | DbgValues.clear(); | |||
861 | FirstDbgValue = nullptr; | |||
862 | ||||
863 | assert(Defs.empty() && Uses.empty() &&((Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses" ) ? static_cast<void> (0) : __assert_fail ("Defs.empty() && Uses.empty() && \"Only BuildGraph should update Defs/Uses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 864, __PRETTY_FUNCTION__)) | |||
864 | "Only BuildGraph should update Defs/Uses")((Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses" ) ? static_cast<void> (0) : __assert_fail ("Defs.empty() && Uses.empty() && \"Only BuildGraph should update Defs/Uses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 864, __PRETTY_FUNCTION__)); | |||
865 | Defs.setUniverse(TRI->getNumRegs()); | |||
866 | Uses.setUniverse(TRI->getNumRegs()); | |||
867 | ||||
868 | assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs")((CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs" ) ? static_cast<void> (0) : __assert_fail ("CurrentVRegDefs.empty() && \"nobody else should use CurrentVRegDefs\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 868, __PRETTY_FUNCTION__)); | |||
869 | assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses")((CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses" ) ? static_cast<void> (0) : __assert_fail ("CurrentVRegUses.empty() && \"nobody else should use CurrentVRegUses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 869, __PRETTY_FUNCTION__)); | |||
870 | unsigned NumVirtRegs = MRI.getNumVirtRegs(); | |||
871 | CurrentVRegDefs.setUniverse(NumVirtRegs); | |||
872 | CurrentVRegUses.setUniverse(NumVirtRegs); | |||
873 | ||||
874 | VRegUses.clear(); | |||
875 | VRegUses.setUniverse(NumVirtRegs); | |||
876 | ||||
877 | // Model data dependencies between instructions being scheduled and the | |||
878 | // ExitSU. | |||
879 | addSchedBarrierDeps(); | |||
880 | ||||
881 | // Walk the list of instructions, from bottom moving up. | |||
882 | MachineInstr *DbgMI = nullptr; | |||
883 | for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; | |||
884 | MII != MIE; --MII) { | |||
885 | MachineInstr *MI = std::prev(MII); | |||
886 | if (MI && DbgMI) { | |||
887 | DbgValues.push_back(std::make_pair(DbgMI, MI)); | |||
888 | DbgMI = nullptr; | |||
889 | } | |||
890 | ||||
891 | if (MI->isDebugValue()) { | |||
| ||||
892 | DbgMI = MI; | |||
893 | continue; | |||
894 | } | |||
895 | SUnit *SU = MISUnitMap[MI]; | |||
896 | assert(SU && "No SUnit mapped to this MI")((SU && "No SUnit mapped to this MI") ? static_cast< void> (0) : __assert_fail ("SU && \"No SUnit mapped to this MI\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 896, __PRETTY_FUNCTION__)); | |||
897 | ||||
898 | if (RPTracker) { | |||
899 | PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr; | |||
900 | RPTracker->recede(/*LiveUses=*/nullptr, PDiff); | |||
901 | assert(RPTracker->getPos() == std::prev(MII) &&((RPTracker->getPos() == std::prev(MII) && "RPTracker can't find MI" ) ? static_cast<void> (0) : __assert_fail ("RPTracker->getPos() == std::prev(MII) && \"RPTracker can't find MI\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 902, __PRETTY_FUNCTION__)) | |||
902 | "RPTracker can't find MI")((RPTracker->getPos() == std::prev(MII) && "RPTracker can't find MI" ) ? static_cast<void> (0) : __assert_fail ("RPTracker->getPos() == std::prev(MII) && \"RPTracker can't find MI\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 902, __PRETTY_FUNCTION__)); | |||
903 | collectVRegUses(SU); | |||
904 | } | |||
905 | ||||
906 | assert((((CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && "Cannot schedule terminators or labels!" ) ? static_cast<void> (0) : __assert_fail ("(CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && \"Cannot schedule terminators or labels!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 908, __PRETTY_FUNCTION__)) | |||
907 | (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) &&(((CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && "Cannot schedule terminators or labels!" ) ? static_cast<void> (0) : __assert_fail ("(CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && \"Cannot schedule terminators or labels!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 908, __PRETTY_FUNCTION__)) | |||
908 | "Cannot schedule terminators or labels!")(((CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && "Cannot schedule terminators or labels!" ) ? static_cast<void> (0) : __assert_fail ("(CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && \"Cannot schedule terminators or labels!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 908, __PRETTY_FUNCTION__)); | |||
909 | ||||
910 | // Add register-based dependencies (data, anti, and output). | |||
911 | bool HasVRegDef = false; | |||
912 | for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { | |||
913 | const MachineOperand &MO = MI->getOperand(j); | |||
914 | if (!MO.isReg()) continue; | |||
915 | unsigned Reg = MO.getReg(); | |||
916 | if (Reg == 0) continue; | |||
917 | ||||
918 | if (TRI->isPhysicalRegister(Reg)) | |||
919 | addPhysRegDeps(SU, j); | |||
920 | else { | |||
921 | if (MO.isDef()) { | |||
922 | HasVRegDef = true; | |||
923 | addVRegDefDeps(SU, j); | |||
924 | } | |||
925 | else if (MO.readsReg()) // ignore undef operands | |||
926 | addVRegUseDeps(SU, j); | |||
927 | } | |||
928 | } | |||
929 | // If we haven't seen any uses in this scheduling region, create a | |||
930 | // dependence edge to ExitSU to model the live-out latency. This is required | |||
931 | // for vreg defs with no in-region use, and prefetches with no vreg def. | |||
932 | // | |||
933 | // FIXME: NumDataSuccs would be more precise than NumSuccs here. This | |||
934 | // check currently relies on being called before adding chain deps. | |||
935 | if (SU->NumSuccs == 0 && SU->Latency > 1 | |||
936 | && (HasVRegDef || MI->mayLoad())) { | |||
937 | SDep Dep(SU, SDep::Artificial); | |||
938 | Dep.setLatency(SU->Latency - 1); | |||
939 | ExitSU.addPred(Dep); | |||
940 | } | |||
941 | ||||
942 | // Add chain dependencies. | |||
943 | // Chain dependencies used to enforce memory order should have | |||
944 | // latency of 0 (except for true dependency of Store followed by | |||
945 | // aliased Load... we estimate that with a single cycle of latency | |||
946 | // assuming the hardware will bypass) | |||
947 | // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable | |||
948 | // after stack slots are lowered to actual addresses. | |||
949 | // TODO: Use an AliasAnalysis and do real alias-analysis queries, and | |||
950 | // produce more precise dependence information. | |||
951 | unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; | |||
952 | if (isGlobalMemoryObject(AA, MI)) { | |||
953 | // Be conservative with these and add dependencies on all memory | |||
954 | // references, even those that are known to not alias. | |||
955 | for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = | |||
956 | NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { | |||
957 | for (unsigned i = 0, e = I->second.size(); i != e; ++i) { | |||
958 | I->second[i]->addPred(SDep(SU, SDep::Barrier)); | |||
959 | } | |||
960 | } | |||
961 | for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = | |||
962 | NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { | |||
963 | for (unsigned i = 0, e = I->second.size(); i != e; ++i) { | |||
964 | SDep Dep(SU, SDep::Barrier); | |||
965 | Dep.setLatency(TrueMemOrderLatency); | |||
966 | I->second[i]->addPred(Dep); | |||
967 | } | |||
968 | } | |||
969 | // Add SU to the barrier chain. | |||
970 | if (BarrierChain) | |||
971 | BarrierChain->addPred(SDep(SU, SDep::Barrier)); | |||
972 | BarrierChain = SU; | |||
973 | // This is a barrier event that acts as a pivotal node in the DAG, | |||
974 | // so it is safe to clear list of exposed nodes. | |||
975 | adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, | |||
976 | TrueMemOrderLatency); | |||
977 | RejectMemNodes.clear(); | |||
978 | NonAliasMemDefs.clear(); | |||
979 | NonAliasMemUses.clear(); | |||
980 | ||||
981 | // fall-through | |||
982 | new_alias_chain: | |||
983 | // Chain all possibly aliasing memory references through SU. | |||
984 | if (AliasChain) { | |||
985 | unsigned ChainLatency = 0; | |||
986 | if (AliasChain->getInstr()->mayLoad()) | |||
987 | ChainLatency = TrueMemOrderLatency; | |||
988 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, | |||
989 | RejectMemNodes, ChainLatency); | |||
990 | } | |||
991 | AliasChain = SU; | |||
992 | for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) | |||
993 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, | |||
994 | PendingLoads[k], RejectMemNodes, | |||
995 | TrueMemOrderLatency); | |||
996 | for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = | |||
997 | AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { | |||
998 | for (unsigned i = 0, e = I->second.size(); i != e; ++i) | |||
999 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, | |||
1000 | I->second[i], RejectMemNodes); | |||
1001 | } | |||
1002 | for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = | |||
1003 | AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { | |||
1004 | for (unsigned i = 0, e = I->second.size(); i != e; ++i) | |||
1005 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, | |||
1006 | I->second[i], RejectMemNodes, TrueMemOrderLatency); | |||
1007 | } | |||
1008 | // This call must come after calls to addChainDependency() since it | |||
1009 | // consumes the 'RejectMemNodes' list that addChainDependency() possibly | |||
1010 | // adds to. | |||
1011 | adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, | |||
1012 | TrueMemOrderLatency); | |||
1013 | PendingLoads.clear(); | |||
1014 | AliasMemDefs.clear(); | |||
1015 | AliasMemUses.clear(); | |||
1016 | } else if (MI->mayStore()) { | |||
1017 | // Add dependence on barrier chain, if needed. | |||
1018 | // There is no point to check aliasing on barrier event. Even if | |||
1019 | // SU and barrier _could_ be reordered, they should not. In addition, | |||
1020 | // we have lost all RejectMemNodes below barrier. | |||
1021 | if (BarrierChain) | |||
1022 | BarrierChain->addPred(SDep(SU, SDep::Barrier)); | |||
1023 | ||||
1024 | UnderlyingObjectsVector Objs; | |||
1025 | getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); | |||
1026 | ||||
1027 | if (Objs.empty()) { | |||
1028 | // Treat all other stores conservatively. | |||
1029 | goto new_alias_chain; | |||
1030 | } | |||
1031 | ||||
1032 | bool MayAlias = false; | |||
1033 | for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); | |||
1034 | K != KE; ++K) { | |||
1035 | ValueType V = K->getPointer(); | |||
1036 | bool ThisMayAlias = K->getInt(); | |||
1037 | if (ThisMayAlias) | |||
1038 | MayAlias = true; | |||
1039 | ||||
1040 | // A store to a specific PseudoSourceValue. Add precise dependencies. | |||
1041 | // Record the def in MemDefs, first adding a dep if there is | |||
1042 | // an existing def. | |||
1043 | MapVector<ValueType, std::vector<SUnit *> >::iterator I = | |||
1044 | ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); | |||
1045 | MapVector<ValueType, std::vector<SUnit *> >::iterator IE = | |||
1046 | ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); | |||
1047 | if (I != IE) { | |||
1048 | for (unsigned i = 0, e = I->second.size(); i != e; ++i) | |||
1049 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, | |||
1050 | I->second[i], RejectMemNodes, 0, true); | |||
1051 | ||||
1052 | // If we're not using AA, then we only need one store per object. | |||
1053 | if (!AAForDep) | |||
1054 | I->second.clear(); | |||
1055 | I->second.push_back(SU); | |||
1056 | } else { | |||
1057 | if (ThisMayAlias) { | |||
1058 | if (!AAForDep) | |||
1059 | AliasMemDefs[V].clear(); | |||
1060 | AliasMemDefs[V].push_back(SU); | |||
1061 | } else { | |||
1062 | if (!AAForDep) | |||
1063 | NonAliasMemDefs[V].clear(); | |||
1064 | NonAliasMemDefs[V].push_back(SU); | |||
1065 | } | |||
1066 | } | |||
1067 | // Handle the uses in MemUses, if there are any. | |||
1068 | MapVector<ValueType, std::vector<SUnit *> >::iterator J = | |||
1069 | ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); | |||
1070 | MapVector<ValueType, std::vector<SUnit *> >::iterator JE = | |||
1071 | ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); | |||
1072 | if (J != JE) { | |||
1073 | for (unsigned i = 0, e = J->second.size(); i != e; ++i) | |||
1074 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, | |||
1075 | J->second[i], RejectMemNodes, | |||
1076 | TrueMemOrderLatency, true); | |||
1077 | J->second.clear(); | |||
1078 | } | |||
1079 | } | |||
1080 | if (MayAlias) { | |||
1081 | // Add dependencies from all the PendingLoads, i.e. loads | |||
1082 | // with no underlying object. | |||
1083 | for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) | |||
1084 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, | |||
1085 | PendingLoads[k], RejectMemNodes, | |||
1086 | TrueMemOrderLatency); | |||
1087 | // Add dependence on alias chain, if needed. | |||
1088 | if (AliasChain) | |||
1089 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, | |||
1090 | RejectMemNodes); | |||
1091 | } | |||
1092 | // This call must come after calls to addChainDependency() since it | |||
1093 | // consumes the 'RejectMemNodes' list that addChainDependency() possibly | |||
1094 | // adds to. | |||
1095 | adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, | |||
1096 | TrueMemOrderLatency); | |||
1097 | } else if (MI->mayLoad()) { | |||
1098 | bool MayAlias = true; | |||
1099 | if (MI->isInvariantLoad(AA)) { | |||
1100 | // Invariant load, no chain dependencies needed! | |||
1101 | } else { | |||
1102 | UnderlyingObjectsVector Objs; | |||
1103 | getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); | |||
1104 | ||||
1105 | if (Objs.empty()) { | |||
1106 | // A load with no underlying object. Depend on all | |||
1107 | // potentially aliasing stores. | |||
1108 | for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = | |||
1109 | AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) | |||
1110 | for (unsigned i = 0, e = I->second.size(); i != e; ++i) | |||
1111 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, | |||
1112 | I->second[i], RejectMemNodes); | |||
1113 | ||||
1114 | PendingLoads.push_back(SU); | |||
1115 | MayAlias = true; | |||
1116 | } else { | |||
1117 | MayAlias = false; | |||
1118 | } | |||
1119 | ||||
1120 | for (UnderlyingObjectsVector::iterator | |||
1121 | J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { | |||
1122 | ValueType V = J->getPointer(); | |||
1123 | bool ThisMayAlias = J->getInt(); | |||
1124 | ||||
1125 | if (ThisMayAlias) | |||
1126 | MayAlias = true; | |||
1127 | ||||
1128 | // A load from a specific PseudoSourceValue. Add precise dependencies. | |||
1129 | MapVector<ValueType, std::vector<SUnit *> >::iterator I = | |||
1130 | ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); | |||
1131 | MapVector<ValueType, std::vector<SUnit *> >::iterator IE = | |||
1132 | ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); | |||
1133 | if (I != IE) | |||
1134 | for (unsigned i = 0, e = I->second.size(); i != e; ++i) | |||
1135 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, | |||
1136 | I->second[i], RejectMemNodes, 0, true); | |||
1137 | if (ThisMayAlias) | |||
1138 | AliasMemUses[V].push_back(SU); | |||
1139 | else | |||
1140 | NonAliasMemUses[V].push_back(SU); | |||
1141 | } | |||
1142 | // Add dependencies on alias and barrier chains, if needed. | |||
1143 | if (MayAlias && AliasChain) | |||
1144 | addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, | |||
1145 | RejectMemNodes); | |||
1146 | if (MayAlias) | |||
1147 | // This call must come after calls to addChainDependency() since it | |||
1148 | // consumes the 'RejectMemNodes' list that addChainDependency() | |||
1149 | // possibly adds to. | |||
1150 | adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, | |||
1151 | RejectMemNodes, /*Latency=*/0); | |||
1152 | if (BarrierChain) | |||
1153 | BarrierChain->addPred(SDep(SU, SDep::Barrier)); | |||
1154 | } | |||
1155 | } | |||
1156 | } | |||
1157 | if (DbgMI) | |||
1158 | FirstDbgValue = DbgMI; | |||
1159 | ||||
1160 | Defs.clear(); | |||
1161 | Uses.clear(); | |||
1162 | CurrentVRegDefs.clear(); | |||
1163 | CurrentVRegUses.clear(); | |||
1164 | PendingLoads.clear(); | |||
1165 | } | |||
1166 | ||||
1167 | /// \brief Initialize register live-range state for updating kills. | |||
1168 | void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { | |||
1169 | // Start with no live registers. | |||
1170 | LiveRegs.reset(); | |||
1171 | ||||
1172 | // Examine the live-in regs of all successors. | |||
1173 | for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), | |||
1174 | SE = BB->succ_end(); SI != SE; ++SI) { | |||
1175 | for (const auto &LI : (*SI)->liveins()) { | |||
1176 | // Repeat, for reg and all subregs. | |||
1177 | for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); | |||
1178 | SubRegs.isValid(); ++SubRegs) | |||
1179 | LiveRegs.set(*SubRegs); | |||
1180 | } | |||
1181 | } | |||
1182 | } | |||
1183 | ||||
1184 | /// \brief If we change a kill flag on the bundle instruction implicit register | |||
1185 | /// operands, then we also need to propagate that to any instructions inside | |||
1186 | /// the bundle which had the same kill state. | |||
1187 | static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, | |||
1188 | bool NewKillState) { | |||
1189 | if (MI->getOpcode() != TargetOpcode::BUNDLE) | |||
1190 | return; | |||
1191 | ||||
1192 | // Walk backwards from the last instruction in the bundle to the first. | |||
1193 | // Once we set a kill flag on an instruction, we bail out, as otherwise we | |||
1194 | // might set it on too many operands. We will clear as many flags as we | |||
1195 | // can though. | |||
1196 | MachineBasicBlock::instr_iterator Begin = MI->getIterator(); | |||
1197 | MachineBasicBlock::instr_iterator End = getBundleEnd(MI); | |||
1198 | while (Begin != End) { | |||
1199 | for (MachineOperand &MO : (--End)->operands()) { | |||
1200 | if (!MO.isReg() || MO.isDef() || Reg != MO.getReg()) | |||
1201 | continue; | |||
1202 | ||||
1203 | // DEBUG_VALUE nodes do not contribute to code generation and should | |||
1204 | // always be ignored. Failure to do so may result in trying to modify | |||
1205 | // KILL flags on DEBUG_VALUE nodes, which is distressing. | |||
1206 | if (MO.isDebug()) | |||
1207 | continue; | |||
1208 | ||||
1209 | // If the register has the internal flag then it could be killing an | |||
1210 | // internal def of the register. In this case, just skip. We only want | |||
1211 | // to toggle the flag on operands visible outside the bundle. | |||
1212 | if (MO.isInternalRead()) | |||
1213 | continue; | |||
1214 | ||||
1215 | if (MO.isKill() == NewKillState) | |||
1216 | continue; | |||
1217 | MO.setIsKill(NewKillState); | |||
1218 | if (NewKillState) | |||
1219 | return; | |||
1220 | } | |||
1221 | } | |||
1222 | } | |||
1223 | ||||
1224 | bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { | |||
1225 | // Setting kill flag... | |||
1226 | if (!MO.isKill()) { | |||
1227 | MO.setIsKill(true); | |||
1228 | toggleBundleKillFlag(MI, MO.getReg(), true); | |||
1229 | return false; | |||
1230 | } | |||
1231 | ||||
1232 | // If MO itself is live, clear the kill flag... | |||
1233 | if (LiveRegs.test(MO.getReg())) { | |||
1234 | MO.setIsKill(false); | |||
1235 | toggleBundleKillFlag(MI, MO.getReg(), false); | |||
1236 | return false; | |||
1237 | } | |||
1238 | ||||
1239 | // If any subreg of MO is live, then create an imp-def for that | |||
1240 | // subreg and keep MO marked as killed. | |||
1241 | MO.setIsKill(false); | |||
1242 | toggleBundleKillFlag(MI, MO.getReg(), false); | |||
1243 | bool AllDead = true; | |||
1244 | const unsigned SuperReg = MO.getReg(); | |||
1245 | MachineInstrBuilder MIB(MF, MI); | |||
1246 | for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { | |||
1247 | if (LiveRegs.test(*SubRegs)) { | |||
1248 | MIB.addReg(*SubRegs, RegState::ImplicitDefine); | |||
1249 | AllDead = false; | |||
1250 | } | |||
1251 | } | |||
1252 | ||||
1253 | if(AllDead) { | |||
1254 | MO.setIsKill(true); | |||
1255 | toggleBundleKillFlag(MI, MO.getReg(), true); | |||
1256 | } | |||
1257 | return false; | |||
1258 | } | |||
1259 | ||||
1260 | // FIXME: Reuse the LivePhysRegs utility for this. | |||
1261 | void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { | |||
1262 | DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'; } } while (0); | |||
1263 | ||||
1264 | LiveRegs.resize(TRI->getNumRegs()); | |||
1265 | BitVector killedRegs(TRI->getNumRegs()); | |||
1266 | ||||
1267 | startBlockForKills(MBB); | |||
1268 | ||||
1269 | // Examine block from end to start... | |||
1270 | unsigned Count = MBB->size(); | |||
1271 | for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); | |||
1272 | I != E; --Count) { | |||
1273 | MachineInstr *MI = --I; | |||
1274 | if (MI->isDebugValue()) | |||
1275 | continue; | |||
1276 | ||||
1277 | // Update liveness. Registers that are defed but not used in this | |||
1278 | // instruction are now dead. Mark register and all subregs as they | |||
1279 | // are completely defined. | |||
1280 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |||
1281 | MachineOperand &MO = MI->getOperand(i); | |||
1282 | if (MO.isRegMask()) | |||
1283 | LiveRegs.clearBitsNotInMask(MO.getRegMask()); | |||
1284 | if (!MO.isReg()) continue; | |||
1285 | unsigned Reg = MO.getReg(); | |||
1286 | if (Reg == 0) continue; | |||
1287 | if (!MO.isDef()) continue; | |||
1288 | // Ignore two-addr defs. | |||
1289 | if (MI->isRegTiedToUseOperand(i)) continue; | |||
1290 | ||||
1291 | // Repeat for reg and all subregs. | |||
1292 | for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); | |||
1293 | SubRegs.isValid(); ++SubRegs) | |||
1294 | LiveRegs.reset(*SubRegs); | |||
1295 | } | |||
1296 | ||||
1297 | // Examine all used registers and set/clear kill flag. When a | |||
1298 | // register is used multiple times we only set the kill flag on | |||
1299 | // the first use. Don't set kill flags on undef operands. | |||
1300 | killedRegs.reset(); | |||
1301 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |||
1302 | MachineOperand &MO = MI->getOperand(i); | |||
1303 | if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; | |||
1304 | unsigned Reg = MO.getReg(); | |||
1305 | if ((Reg == 0) || MRI.isReserved(Reg)) continue; | |||
1306 | ||||
1307 | bool kill = false; | |||
1308 | if (!killedRegs.test(Reg)) { | |||
1309 | kill = true; | |||
1310 | // A register is not killed if any subregs are live... | |||
1311 | for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { | |||
1312 | if (LiveRegs.test(*SubRegs)) { | |||
1313 | kill = false; | |||
1314 | break; | |||
1315 | } | |||
1316 | } | |||
1317 | ||||
1318 | // If subreg is not live, then register is killed if it became | |||
1319 | // live in this instruction | |||
1320 | if (kill) | |||
1321 | kill = !LiveRegs.test(Reg); | |||
1322 | } | |||
1323 | ||||
1324 | if (MO.isKill() != kill) { | |||
1325 | DEBUG(dbgs() << "Fixing " << MO << " in ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Fixing " << MO << " in "; } } while (0); | |||
1326 | // Warning: toggleKillFlag may invalidate MO. | |||
1327 | toggleKillFlag(MI, MO); | |||
1328 | DEBUG(MI->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { MI->dump(); } } while (0); | |||
1329 | DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1330 | MachineBasicBlock::instr_iterator Begin = MI->getIterator();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1331 | MachineBasicBlock::instr_iterator End = getBundleEnd(MI);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1332 | while (++Begin != End)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1333 | DEBUG(Begin->dump());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1334 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(MI); while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0); | |||
1335 | } | |||
1336 | ||||
1337 | killedRegs.set(Reg); | |||
1338 | } | |||
1339 | ||||
1340 | // Mark any used register (that is not using undef) and subregs as | |||
1341 | // now live... | |||
1342 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |||
1343 | MachineOperand &MO = MI->getOperand(i); | |||
1344 | if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; | |||
1345 | unsigned Reg = MO.getReg(); | |||
1346 | if ((Reg == 0) || MRI.isReserved(Reg)) continue; | |||
1347 | ||||
1348 | for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); | |||
1349 | SubRegs.isValid(); ++SubRegs) | |||
1350 | LiveRegs.set(*SubRegs); | |||
1351 | } | |||
1352 | } | |||
1353 | } | |||
1354 | ||||
1355 | void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { | |||
1356 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
1357 | SU->getInstr()->dump(); | |||
1358 | #endif | |||
1359 | } | |||
1360 | ||||
1361 | std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { | |||
1362 | std::string s; | |||
1363 | raw_string_ostream oss(s); | |||
1364 | if (SU == &EntrySU) | |||
1365 | oss << "<entry>"; | |||
1366 | else if (SU == &ExitSU) | |||
1367 | oss << "<exit>"; | |||
1368 | else | |||
1369 | SU->getInstr()->print(oss, /*SkipOpers=*/true); | |||
1370 | return oss.str(); | |||
1371 | } | |||
1372 | ||||
1373 | /// Return the basic block label. It is not necessarilly unique because a block | |||
1374 | /// contains multiple scheduling regions. But it is fine for visualization. | |||
1375 | std::string ScheduleDAGInstrs::getDAGName() const { | |||
1376 | return "dag." + BB->getFullName(); | |||
1377 | } | |||
1378 | ||||
1379 | //===----------------------------------------------------------------------===// | |||
1380 | // SchedDFSResult Implementation | |||
1381 | //===----------------------------------------------------------------------===// | |||
1382 | ||||
1383 | namespace llvm { | |||
1384 | /// \brief Internal state used to compute SchedDFSResult. | |||
1385 | class SchedDFSImpl { | |||
1386 | SchedDFSResult &R; | |||
1387 | ||||
1388 | /// Join DAG nodes into equivalence classes by their subtree. | |||
1389 | IntEqClasses SubtreeClasses; | |||
1390 | /// List PredSU, SuccSU pairs that represent data edges between subtrees. | |||
1391 | std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs; | |||
1392 | ||||
1393 | struct RootData { | |||
1394 | unsigned NodeID; | |||
1395 | unsigned ParentNodeID; // Parent node (member of the parent subtree). | |||
1396 | unsigned SubInstrCount; // Instr count in this tree only, not children. | |||
1397 | ||||
1398 | RootData(unsigned id): NodeID(id), | |||
1399 | ParentNodeID(SchedDFSResult::InvalidSubtreeID), | |||
1400 | SubInstrCount(0) {} | |||
1401 | ||||
1402 | unsigned getSparseSetIndex() const { return NodeID; } | |||
1403 | }; | |||
1404 | ||||
1405 | SparseSet<RootData> RootSet; | |||
1406 | ||||
1407 | public: | |||
1408 | SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { | |||
1409 | RootSet.setUniverse(R.DFSNodeData.size()); | |||
1410 | } | |||
1411 | ||||
1412 | /// Return true if this node been visited by the DFS traversal. | |||
1413 | /// | |||
1414 | /// During visitPostorderNode the Node's SubtreeID is assigned to the Node | |||
1415 | /// ID. Later, SubtreeID is updated but remains valid. | |||
1416 | bool isVisited(const SUnit *SU) const { | |||
1417 | return R.DFSNodeData[SU->NodeNum].SubtreeID | |||
1418 | != SchedDFSResult::InvalidSubtreeID; | |||
1419 | } | |||
1420 | ||||
1421 | /// Initialize this node's instruction count. We don't need to flag the node | |||
1422 | /// visited until visitPostorder because the DAG cannot have cycles. | |||
1423 | void visitPreorder(const SUnit *SU) { | |||
1424 | R.DFSNodeData[SU->NodeNum].InstrCount = | |||
1425 | SU->getInstr()->isTransient() ? 0 : 1; | |||
1426 | } | |||
1427 | ||||
1428 | /// Called once for each node after all predecessors are visited. Revisit this | |||
1429 | /// node's predecessors and potentially join them now that we know the ILP of | |||
1430 | /// the other predecessors. | |||
1431 | void visitPostorderNode(const SUnit *SU) { | |||
1432 | // Mark this node as the root of a subtree. It may be joined with its | |||
1433 | // successors later. | |||
1434 | R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; | |||
1435 | RootData RData(SU->NodeNum); | |||
1436 | RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; | |||
1437 | ||||
1438 | // If any predecessors are still in their own subtree, they either cannot be | |||
1439 | // joined or are large enough to remain separate. If this parent node's | |||
1440 | // total instruction count is not greater than a child subtree by at least | |||
1441 | // the subtree limit, then try to join it now since splitting subtrees is | |||
1442 | // only useful if multiple high-pressure paths are possible. | |||
1443 | unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; | |||
1444 | for (SUnit::const_pred_iterator | |||
1445 | PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { | |||
1446 | if (PI->getKind() != SDep::Data) | |||
1447 | continue; | |||
1448 | unsigned PredNum = PI->getSUnit()->NodeNum; | |||
1449 | if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) | |||
1450 | joinPredSubtree(*PI, SU, /*CheckLimit=*/false); | |||
1451 | ||||
1452 | // Either link or merge the TreeData entry from the child to the parent. | |||
1453 | if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { | |||
1454 | // If the predecessor's parent is invalid, this is a tree edge and the | |||
1455 | // current node is the parent. | |||
1456 | if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) | |||
1457 | RootSet[PredNum].ParentNodeID = SU->NodeNum; | |||
1458 | } | |||
1459 | else if (RootSet.count(PredNum)) { | |||
1460 | // The predecessor is not a root, but is still in the root set. This | |||
1461 | // must be the new parent that it was just joined to. Note that | |||
1462 | // RootSet[PredNum].ParentNodeID may either be invalid or may still be | |||
1463 | // set to the original parent. | |||
1464 | RData.SubInstrCount += RootSet[PredNum].SubInstrCount; | |||
1465 | RootSet.erase(PredNum); | |||
1466 | } | |||
1467 | } | |||
1468 | RootSet[SU->NodeNum] = RData; | |||
1469 | } | |||
1470 | ||||
1471 | /// Called once for each tree edge after calling visitPostOrderNode on the | |||
1472 | /// predecessor. Increment the parent node's instruction count and | |||
1473 | /// preemptively join this subtree to its parent's if it is small enough. | |||
1474 | void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { | |||
1475 | R.DFSNodeData[Succ->NodeNum].InstrCount | |||
1476 | += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; | |||
1477 | joinPredSubtree(PredDep, Succ); | |||
1478 | } | |||
1479 | ||||
1480 | /// Add a connection for cross edges. | |||
1481 | void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { | |||
1482 | ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); | |||
1483 | } | |||
1484 | ||||
1485 | /// Set each node's subtree ID to the representative ID and record connections | |||
1486 | /// between trees. | |||
1487 | void finalize() { | |||
1488 | SubtreeClasses.compress(); | |||
1489 | R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); | |||
1490 | assert(SubtreeClasses.getNumClasses() == RootSet.size()((SubtreeClasses.getNumClasses() == RootSet.size() && "number of roots should match trees") ? static_cast<void> (0) : __assert_fail ("SubtreeClasses.getNumClasses() == RootSet.size() && \"number of roots should match trees\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1491, __PRETTY_FUNCTION__)) | |||
1491 | && "number of roots should match trees")((SubtreeClasses.getNumClasses() == RootSet.size() && "number of roots should match trees") ? static_cast<void> (0) : __assert_fail ("SubtreeClasses.getNumClasses() == RootSet.size() && \"number of roots should match trees\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1491, __PRETTY_FUNCTION__)); | |||
1492 | for (SparseSet<RootData>::const_iterator | |||
1493 | RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { | |||
1494 | unsigned TreeID = SubtreeClasses[RI->NodeID]; | |||
1495 | if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) | |||
1496 | R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; | |||
1497 | R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; | |||
1498 | // Note that SubInstrCount may be greater than InstrCount if we joined | |||
1499 | // subtrees across a cross edge. InstrCount will be attributed to the | |||
1500 | // original parent, while SubInstrCount will be attributed to the joined | |||
1501 | // parent. | |||
1502 | } | |||
1503 | R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); | |||
1504 | R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); | |||
1505 | DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << R.getNumSubtrees() << " subtrees:\n" ; } } while (0); | |||
1506 | for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { | |||
1507 | R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; | |||
1508 | DEBUG(dbgs() << " SU(" << Idx << ") in tree "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << " SU(" << Idx << ") in tree " << R.DFSNodeData[Idx].SubtreeID << '\n'; } } while (0) | |||
1509 | << R.DFSNodeData[Idx].SubtreeID << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << " SU(" << Idx << ") in tree " << R.DFSNodeData[Idx].SubtreeID << '\n'; } } while (0); | |||
1510 | } | |||
1511 | for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator | |||
1512 | I = ConnectionPairs.begin(), E = ConnectionPairs.end(); | |||
1513 | I != E; ++I) { | |||
1514 | unsigned PredTree = SubtreeClasses[I->first->NodeNum]; | |||
1515 | unsigned SuccTree = SubtreeClasses[I->second->NodeNum]; | |||
1516 | if (PredTree == SuccTree) | |||
1517 | continue; | |||
1518 | unsigned Depth = I->first->getDepth(); | |||
1519 | addConnection(PredTree, SuccTree, Depth); | |||
1520 | addConnection(SuccTree, PredTree, Depth); | |||
1521 | } | |||
1522 | } | |||
1523 | ||||
1524 | protected: | |||
1525 | /// Join the predecessor subtree with the successor that is its DFS | |||
1526 | /// parent. Apply some heuristics before joining. | |||
1527 | bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, | |||
1528 | bool CheckLimit = true) { | |||
1529 | assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges")((PredDep.getKind() == SDep::Data && "Subtrees are for data edges" ) ? static_cast<void> (0) : __assert_fail ("PredDep.getKind() == SDep::Data && \"Subtrees are for data edges\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1529, __PRETTY_FUNCTION__)); | |||
1530 | ||||
1531 | // Check if the predecessor is already joined. | |||
1532 | const SUnit *PredSU = PredDep.getSUnit(); | |||
1533 | unsigned PredNum = PredSU->NodeNum; | |||
1534 | if (R.DFSNodeData[PredNum].SubtreeID != PredNum) | |||
1535 | return false; | |||
1536 | ||||
1537 | // Four is the magic number of successors before a node is considered a | |||
1538 | // pinch point. | |||
1539 | unsigned NumDataSucs = 0; | |||
1540 | for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), | |||
1541 | SE = PredSU->Succs.end(); SI != SE; ++SI) { | |||
1542 | if (SI->getKind() == SDep::Data) { | |||
1543 | if (++NumDataSucs >= 4) | |||
1544 | return false; | |||
1545 | } | |||
1546 | } | |||
1547 | if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) | |||
1548 | return false; | |||
1549 | R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; | |||
1550 | SubtreeClasses.join(Succ->NodeNum, PredNum); | |||
1551 | return true; | |||
1552 | } | |||
1553 | ||||
1554 | /// Called by finalize() to record a connection between trees. | |||
1555 | void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { | |||
1556 | if (!Depth) | |||
1557 | return; | |||
1558 | ||||
1559 | do { | |||
1560 | SmallVectorImpl<SchedDFSResult::Connection> &Connections = | |||
1561 | R.SubtreeConnections[FromTree]; | |||
1562 | for (SmallVectorImpl<SchedDFSResult::Connection>::iterator | |||
1563 | I = Connections.begin(), E = Connections.end(); I != E; ++I) { | |||
1564 | if (I->TreeID == ToTree) { | |||
1565 | I->Level = std::max(I->Level, Depth); | |||
1566 | return; | |||
1567 | } | |||
1568 | } | |||
1569 | Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); | |||
1570 | FromTree = R.DFSTreeData[FromTree].ParentTreeID; | |||
1571 | } while (FromTree != SchedDFSResult::InvalidSubtreeID); | |||
1572 | } | |||
1573 | }; | |||
1574 | } // namespace llvm | |||
1575 | ||||
1576 | namespace { | |||
1577 | /// \brief Manage the stack used by a reverse depth-first search over the DAG. | |||
1578 | class SchedDAGReverseDFS { | |||
1579 | std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack; | |||
1580 | public: | |||
1581 | bool isComplete() const { return DFSStack.empty(); } | |||
1582 | ||||
1583 | void follow(const SUnit *SU) { | |||
1584 | DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); | |||
1585 | } | |||
1586 | void advance() { ++DFSStack.back().second; } | |||
1587 | ||||
1588 | const SDep *backtrack() { | |||
1589 | DFSStack.pop_back(); | |||
1590 | return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second); | |||
1591 | } | |||
1592 | ||||
1593 | const SUnit *getCurr() const { return DFSStack.back().first; } | |||
1594 | ||||
1595 | SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } | |||
1596 | ||||
1597 | SUnit::const_pred_iterator getPredEnd() const { | |||
1598 | return getCurr()->Preds.end(); | |||
1599 | } | |||
1600 | }; | |||
1601 | } // anonymous | |||
1602 | ||||
1603 | static bool hasDataSucc(const SUnit *SU) { | |||
1604 | for (SUnit::const_succ_iterator | |||
1605 | SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { | |||
1606 | if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) | |||
1607 | return true; | |||
1608 | } | |||
1609 | return false; | |||
1610 | } | |||
1611 | ||||
1612 | /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first | |||
1613 | /// search from this root. | |||
1614 | void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { | |||
1615 | if (!IsBottomUp) | |||
1616 | llvm_unreachable("Top-down ILP metric is unimplemnted")::llvm::llvm_unreachable_internal("Top-down ILP metric is unimplemnted" , "/tmp/buildd/llvm-toolchain-snapshot-3.8~svn257205/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1616); | |||
1617 | ||||
1618 | SchedDFSImpl Impl(*this); | |||
1619 | for (ArrayRef<SUnit>::const_iterator | |||
1620 | SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { | |||
1621 | const SUnit *SU = &*SI; | |||
1622 | if (Impl.isVisited(SU) || hasDataSucc(SU)) | |||
1623 | continue; | |||
1624 | ||||
1625 | SchedDAGReverseDFS DFS; | |||
1626 | Impl.visitPreorder(SU); | |||
1627 | DFS.follow(SU); | |||
1628 | for (;;) { | |||
1629 | // Traverse the leftmost path as far as possible. | |||
1630 | while (DFS.getPred() != DFS.getPredEnd()) { | |||
1631 | const SDep &PredDep = *DFS.getPred(); | |||
1632 | DFS.advance(); | |||
1633 | // Ignore non-data edges. | |||
1634 | if (PredDep.getKind() != SDep::Data | |||
1635 | || PredDep.getSUnit()->isBoundaryNode()) { | |||
1636 | continue; | |||
1637 | } | |||
1638 | // An already visited edge is a cross edge, assuming an acyclic DAG. | |||
1639 | if (Impl.isVisited(PredDep.getSUnit())) { | |||
1640 | Impl.visitCrossEdge(PredDep, DFS.getCurr()); | |||
1641 | continue; | |||
1642 | } | |||
1643 | Impl.visitPreorder(PredDep.getSUnit()); | |||
1644 | DFS.follow(PredDep.getSUnit()); | |||
1645 | } | |||
1646 | // Visit the top of the stack in postorder and backtrack. | |||
1647 | const SUnit *Child = DFS.getCurr(); | |||
1648 | const SDep *PredDep = DFS.backtrack(); | |||
1649 | Impl.visitPostorderNode(Child); | |||
1650 | if (PredDep) | |||
1651 | Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); | |||
1652 | if (DFS.isComplete()) | |||
1653 | break; | |||
1654 | } | |||
1655 | } | |||
1656 | Impl.finalize(); | |||
1657 | } | |||
1658 | ||||
1659 | /// The root of the given SubtreeID was just scheduled. For all subtrees | |||
1660 | /// connected to this tree, record the depth of the connection so that the | |||
1661 | /// nearest connected subtrees can be prioritized. | |||
1662 | void SchedDFSResult::scheduleTree(unsigned SubtreeID) { | |||
1663 | for (SmallVectorImpl<Connection>::const_iterator | |||
1664 | I = SubtreeConnections[SubtreeID].begin(), | |||
1665 | E = SubtreeConnections[SubtreeID].end(); I != E; ++I) { | |||
1666 | SubtreeConnectLevels[I->TreeID] = | |||
1667 | std::max(SubtreeConnectLevels[I->TreeID], I->Level); | |||
1668 | DEBUG(dbgs() << " Tree: " << I->TreeIDdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << " Tree: " << I->TreeID << " @" << SubtreeConnectLevels[I->TreeID] << '\n'; } } while (0) | |||
1669 | << " @" << SubtreeConnectLevels[I->TreeID] << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << " Tree: " << I->TreeID << " @" << SubtreeConnectLevels[I->TreeID] << '\n'; } } while (0); | |||
1670 | } | |||
1671 | } | |||
1672 | ||||
1673 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) | |||
1674 | void ILPValue::print(raw_ostream &OS) const { | |||
1675 | OS << InstrCount << " / " << Length << " = "; | |||
1676 | if (!Length) | |||
1677 | OS << "BADILP"; | |||
1678 | else | |||
1679 | OS << format("%g", ((double)InstrCount / Length)); | |||
1680 | } | |||
1681 | ||||
1682 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) | |||
1683 | void ILPValue::dump() const { | |||
1684 | dbgs() << *this << '\n'; | |||
1685 | } | |||
1686 | ||||
1687 | namespace llvm { | |||
1688 | ||||
1689 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) | |||
1690 | raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { | |||
1691 | Val.print(OS); | |||
1692 | return OS; | |||
1693 | } | |||
1694 | ||||
1695 | } // namespace llvm |