File: | lib/CodeGen/ScheduleDAGInstrs.cpp |
Location: | line 922, column 9 |
Description: | Called C++ object pointer is null |
1 | //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// | |||
2 | // | |||
3 | // The LLVM Compiler Infrastructure | |||
4 | // | |||
5 | // This file is distributed under the University of Illinois Open Source | |||
6 | // License. See LICENSE.TXT for details. | |||
7 | // | |||
8 | //===----------------------------------------------------------------------===// | |||
9 | // | |||
10 | // This implements the ScheduleDAGInstrs class, which implements re-scheduling | |||
11 | // of MachineInstrs. | |||
12 | // | |||
13 | //===----------------------------------------------------------------------===// | |||
14 | ||||
15 | #include "llvm/CodeGen/ScheduleDAGInstrs.h" | |||
16 | #include "llvm/ADT/IntEqClasses.h" | |||
17 | #include "llvm/ADT/SmallPtrSet.h" | |||
18 | #include "llvm/ADT/SmallSet.h" | |||
19 | #include "llvm/Analysis/AliasAnalysis.h" | |||
20 | #include "llvm/Analysis/ValueTracking.h" | |||
21 | #include "llvm/CodeGen/LiveIntervalAnalysis.h" | |||
22 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
23 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
24 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
25 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
26 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
27 | #include "llvm/CodeGen/PseudoSourceValue.h" | |||
28 | #include "llvm/CodeGen/RegisterPressure.h" | |||
29 | #include "llvm/CodeGen/ScheduleDFS.h" | |||
30 | #include "llvm/IR/Function.h" | |||
31 | #include "llvm/IR/Type.h" | |||
32 | #include "llvm/IR/Operator.h" | |||
33 | #include "llvm/Support/CommandLine.h" | |||
34 | #include "llvm/Support/Debug.h" | |||
35 | #include "llvm/Support/Format.h" | |||
36 | #include "llvm/Support/raw_ostream.h" | |||
37 | #include "llvm/Target/TargetInstrInfo.h" | |||
38 | #include "llvm/Target/TargetMachine.h" | |||
39 | #include "llvm/Target/TargetRegisterInfo.h" | |||
40 | #include "llvm/Target/TargetSubtargetInfo.h" | |||
41 | ||||
42 | using namespace llvm; | |||
43 | ||||
44 | #define DEBUG_TYPE"misched" "misched" | |||
45 | ||||
46 | static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, | |||
47 | cl::ZeroOrMore, cl::init(false), | |||
48 | cl::desc("Enable use of AA during MI DAG construction")); | |||
49 | ||||
50 | static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, | |||
51 | cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); | |||
52 | ||||
53 | // Note: the two options below might be used in tuning compile time vs | |||
54 | // output quality. Setting HugeRegion so large that it will never be | |||
55 | // reached means best-effort, but may be slow. | |||
56 | ||||
57 | // When Stores and Loads maps (or NonAliasStores and NonAliasLoads) | |||
58 | // together hold this many SUs, a reduction of maps will be done. | |||
59 | static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden, | |||
60 | cl::init(1000), cl::desc("The limit to use while constructing the DAG " | |||
61 | "prior to scheduling, at which point a trade-off " | |||
62 | "is made to avoid excessive compile time.")); | |||
63 | ||||
64 | static cl::opt<unsigned> ReductionSize( | |||
65 | "dag-maps-reduction-size", cl::Hidden, | |||
66 | cl::desc("A huge scheduling region will have maps reduced by this many " | |||
67 | "nodes at a time. Defaults to HugeRegion / 2.")); | |||
68 | ||||
69 | static unsigned getReductionSize() { | |||
70 | // Always reduce a huge region with half of the elements, except | |||
71 | // when user sets this number explicitly. | |||
72 | if (ReductionSize.getNumOccurrences() == 0) | |||
73 | return HugeRegion / 2; | |||
74 | return ReductionSize; | |||
75 | } | |||
76 | ||||
77 | static void dumpSUList(ScheduleDAGInstrs::SUList &L) { | |||
78 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
79 | dbgs() << "{ "; | |||
80 | for (auto *su : L) { | |||
81 | dbgs() << "SU(" << su->NodeNum << ")"; | |||
82 | if (su != L.back()) | |||
83 | dbgs() << ", "; | |||
84 | } | |||
85 | dbgs() << "}\n"; | |||
86 | #endif | |||
87 | } | |||
88 | ||||
89 | ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, | |||
90 | const MachineLoopInfo *mli, | |||
91 | bool RemoveKillFlags) | |||
92 | : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), | |||
93 | RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), | |||
94 | TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr), | |||
95 | UnknownValue(UndefValue::get( | |||
96 | Type::getVoidTy(mf.getFunction()->getContext()))), | |||
97 | FirstDbgValue(nullptr) { | |||
98 | DbgValues.clear(); | |||
99 | ||||
100 | const TargetSubtargetInfo &ST = mf.getSubtarget(); | |||
101 | SchedModel.init(ST.getSchedModel(), &ST, TII); | |||
102 | } | |||
103 | ||||
104 | /// getUnderlyingObjectFromInt - This is the function that does the work of | |||
105 | /// looking through basic ptrtoint+arithmetic+inttoptr sequences. | |||
106 | static const Value *getUnderlyingObjectFromInt(const Value *V) { | |||
107 | do { | |||
108 | if (const Operator *U = dyn_cast<Operator>(V)) { | |||
109 | // If we find a ptrtoint, we can transfer control back to the | |||
110 | // regular getUnderlyingObjectFromInt. | |||
111 | if (U->getOpcode() == Instruction::PtrToInt) | |||
112 | return U->getOperand(0); | |||
113 | // If we find an add of a constant, a multiplied value, or a phi, it's | |||
114 | // likely that the other operand will lead us to the base | |||
115 | // object. We don't have to worry about the case where the | |||
116 | // object address is somehow being computed by the multiply, | |||
117 | // because our callers only care when the result is an | |||
118 | // identifiable object. | |||
119 | if (U->getOpcode() != Instruction::Add || | |||
120 | (!isa<ConstantInt>(U->getOperand(1)) && | |||
121 | Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && | |||
122 | !isa<PHINode>(U->getOperand(1)))) | |||
123 | return V; | |||
124 | V = U->getOperand(0); | |||
125 | } else { | |||
126 | return V; | |||
127 | } | |||
128 | assert(V->getType()->isIntegerTy() && "Unexpected operand type!")((V->getType()->isIntegerTy() && "Unexpected operand type!" ) ? static_cast<void> (0) : __assert_fail ("V->getType()->isIntegerTy() && \"Unexpected operand type!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 128, __PRETTY_FUNCTION__)); | |||
129 | } while (1); | |||
130 | } | |||
131 | ||||
132 | /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects | |||
133 | /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. | |||
134 | static void getUnderlyingObjects(const Value *V, | |||
135 | SmallVectorImpl<Value *> &Objects, | |||
136 | const DataLayout &DL) { | |||
137 | SmallPtrSet<const Value *, 16> Visited; | |||
138 | SmallVector<const Value *, 4> Working(1, V); | |||
139 | do { | |||
140 | V = Working.pop_back_val(); | |||
141 | ||||
142 | SmallVector<Value *, 4> Objs; | |||
143 | GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); | |||
144 | ||||
145 | for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); | |||
146 | I != IE; ++I) { | |||
147 | V = *I; | |||
148 | if (!Visited.insert(V).second) | |||
149 | continue; | |||
150 | if (Operator::getOpcode(V) == Instruction::IntToPtr) { | |||
151 | const Value *O = | |||
152 | getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); | |||
153 | if (O->getType()->isPointerTy()) { | |||
154 | Working.push_back(O); | |||
155 | continue; | |||
156 | } | |||
157 | } | |||
158 | Objects.push_back(const_cast<Value *>(V)); | |||
159 | } | |||
160 | } while (!Working.empty()); | |||
161 | } | |||
162 | ||||
163 | /// getUnderlyingObjectsForInstr - If this machine instr has memory reference | |||
164 | /// information and it can be tracked to a normal reference to a known | |||
165 | /// object, return the Value for that object. | |||
166 | static void getUnderlyingObjectsForInstr(const MachineInstr *MI, | |||
167 | const MachineFrameInfo *MFI, | |||
168 | UnderlyingObjectsVector &Objects, | |||
169 | const DataLayout &DL) { | |||
170 | auto allMMOsOkay = [&]() { | |||
171 | for (const MachineMemOperand *MMO : MI->memoperands()) { | |||
172 | if (MMO->isVolatile()) | |||
173 | return false; | |||
174 | ||||
175 | if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { | |||
176 | // Function that contain tail calls don't have unique PseudoSourceValue | |||
177 | // objects. Two PseudoSourceValues might refer to the same or | |||
178 | // overlapping locations. The client code calling this function assumes | |||
179 | // this is not the case. So return a conservative answer of no known | |||
180 | // object. | |||
181 | if (MFI->hasTailCall()) | |||
182 | return false; | |||
183 | ||||
184 | // For now, ignore PseudoSourceValues which may alias LLVM IR values | |||
185 | // because the code that uses this function has no way to cope with | |||
186 | // such aliases. | |||
187 | if (PSV->isAliased(MFI)) | |||
188 | return false; | |||
189 | ||||
190 | bool MayAlias = PSV->mayAlias(MFI); | |||
191 | Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); | |||
192 | } else if (const Value *V = MMO->getValue()) { | |||
193 | SmallVector<Value *, 4> Objs; | |||
194 | getUnderlyingObjects(V, Objs, DL); | |||
195 | ||||
196 | for (Value *V : Objs) { | |||
197 | if (!isIdentifiedObject(V)) | |||
198 | return false; | |||
199 | ||||
200 | Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); | |||
201 | } | |||
202 | } else | |||
203 | return false; | |||
204 | } | |||
205 | return true; | |||
206 | }; | |||
207 | ||||
208 | if (!allMMOsOkay()) | |||
209 | Objects.clear(); | |||
210 | } | |||
211 | ||||
212 | void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { | |||
213 | BB = bb; | |||
214 | } | |||
215 | ||||
216 | void ScheduleDAGInstrs::finishBlock() { | |||
217 | // Subclasses should no longer refer to the old block. | |||
218 | BB = nullptr; | |||
219 | } | |||
220 | ||||
221 | /// Initialize the DAG and common scheduler state for the current scheduling | |||
222 | /// region. This does not actually create the DAG, only clears it. The | |||
223 | /// scheduling driver may call BuildSchedGraph multiple times per scheduling | |||
224 | /// region. | |||
225 | void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, | |||
226 | MachineBasicBlock::iterator begin, | |||
227 | MachineBasicBlock::iterator end, | |||
228 | unsigned regioninstrs) { | |||
229 | assert(bb == BB && "startBlock should set BB")((bb == BB && "startBlock should set BB") ? static_cast <void> (0) : __assert_fail ("bb == BB && \"startBlock should set BB\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 229, __PRETTY_FUNCTION__)); | |||
230 | RegionBegin = begin; | |||
231 | RegionEnd = end; | |||
232 | NumRegionInstrs = regioninstrs; | |||
233 | } | |||
234 | ||||
235 | /// Close the current scheduling region. Don't clear any state in case the | |||
236 | /// driver wants to refer to the previous scheduling region. | |||
237 | void ScheduleDAGInstrs::exitRegion() { | |||
238 | // Nothing to do. | |||
239 | } | |||
240 | ||||
241 | /// addSchedBarrierDeps - Add dependencies from instructions in the current | |||
242 | /// list of instructions being scheduled to scheduling barrier by adding | |||
243 | /// the exit SU to the register defs and use list. This is because we want to | |||
244 | /// make sure instructions which define registers that are either used by | |||
245 | /// the terminator or are live-out are properly scheduled. This is | |||
246 | /// especially important when the definition latency of the return value(s) | |||
247 | /// are too high to be hidden by the branch or when the liveout registers | |||
248 | /// used by instructions in the fallthrough block. | |||
249 | void ScheduleDAGInstrs::addSchedBarrierDeps() { | |||
250 | MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr; | |||
251 | ExitSU.setInstr(ExitMI); | |||
252 | bool AllDepKnown = ExitMI && | |||
253 | (ExitMI->isCall() || ExitMI->isBarrier()); | |||
254 | if (ExitMI && AllDepKnown) { | |||
255 | // If it's a call or a barrier, add dependencies on the defs and uses of | |||
256 | // instruction. | |||
257 | for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { | |||
258 | const MachineOperand &MO = ExitMI->getOperand(i); | |||
259 | if (!MO.isReg() || MO.isDef()) continue; | |||
260 | unsigned Reg = MO.getReg(); | |||
261 | if (Reg == 0) continue; | |||
262 | ||||
263 | if (TRI->isPhysicalRegister(Reg)) | |||
264 | Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); | |||
265 | else if (MO.readsReg()) // ignore undef operands | |||
266 | addVRegUseDeps(&ExitSU, i); | |||
267 | } | |||
268 | } else { | |||
269 | // For others, e.g. fallthrough, conditional branch, assume the exit | |||
270 | // uses all the registers that are livein to the successor blocks. | |||
271 | assert(Uses.empty() && "Uses in set before adding deps?")((Uses.empty() && "Uses in set before adding deps?") ? static_cast<void> (0) : __assert_fail ("Uses.empty() && \"Uses in set before adding deps?\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 271, __PRETTY_FUNCTION__)); | |||
272 | for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), | |||
273 | SE = BB->succ_end(); SI != SE; ++SI) | |||
274 | for (const auto &LI : (*SI)->liveins()) { | |||
275 | if (!Uses.contains(LI.PhysReg)) | |||
276 | Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg)); | |||
277 | } | |||
278 | } | |||
279 | } | |||
280 | ||||
281 | /// MO is an operand of SU's instruction that defines a physical register. Add | |||
282 | /// data dependencies from SU to any uses of the physical register. | |||
283 | void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { | |||
284 | const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); | |||
285 | assert(MO.isDef() && "expect physreg def")((MO.isDef() && "expect physreg def") ? static_cast< void> (0) : __assert_fail ("MO.isDef() && \"expect physreg def\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 285, __PRETTY_FUNCTION__)); | |||
286 | ||||
287 | // Ask the target if address-backscheduling is desirable, and if so how much. | |||
288 | const TargetSubtargetInfo &ST = MF.getSubtarget(); | |||
289 | ||||
290 | for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); | |||
291 | Alias.isValid(); ++Alias) { | |||
292 | if (!Uses.contains(*Alias)) | |||
293 | continue; | |||
294 | for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { | |||
295 | SUnit *UseSU = I->SU; | |||
296 | if (UseSU == SU) | |||
297 | continue; | |||
298 | ||||
299 | // Adjust the dependence latency using operand def/use information, | |||
300 | // then allow the target to perform its own adjustments. | |||
301 | int UseOp = I->OpIdx; | |||
302 | MachineInstr *RegUse = nullptr; | |||
303 | SDep Dep; | |||
304 | if (UseOp < 0) | |||
305 | Dep = SDep(SU, SDep::Artificial); | |||
306 | else { | |||
307 | // Set the hasPhysRegDefs only for physreg defs that have a use within | |||
308 | // the scheduling region. | |||
309 | SU->hasPhysRegDefs = true; | |||
310 | Dep = SDep(SU, SDep::Data, *Alias); | |||
311 | RegUse = UseSU->getInstr(); | |||
312 | } | |||
313 | Dep.setLatency( | |||
314 | SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, | |||
315 | UseOp)); | |||
316 | ||||
317 | ST.adjustSchedDependency(SU, UseSU, Dep); | |||
318 | UseSU->addPred(Dep); | |||
319 | } | |||
320 | } | |||
321 | } | |||
322 | ||||
323 | /// addPhysRegDeps - Add register dependencies (data, anti, and output) from | |||
324 | /// this SUnit to following instructions in the same scheduling region that | |||
325 | /// depend the physical register referenced at OperIdx. | |||
326 | void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { | |||
327 | MachineInstr *MI = SU->getInstr(); | |||
328 | MachineOperand &MO = MI->getOperand(OperIdx); | |||
329 | ||||
330 | // Optionally add output and anti dependencies. For anti | |||
331 | // dependencies we use a latency of 0 because for a multi-issue | |||
332 | // target we want to allow the defining instruction to issue | |||
333 | // in the same cycle as the using instruction. | |||
334 | // TODO: Using a latency of 1 here for output dependencies assumes | |||
335 | // there's no cost for reusing registers. | |||
336 | SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; | |||
337 | for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); | |||
338 | Alias.isValid(); ++Alias) { | |||
339 | if (!Defs.contains(*Alias)) | |||
340 | continue; | |||
341 | for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { | |||
342 | SUnit *DefSU = I->SU; | |||
343 | if (DefSU == &ExitSU) | |||
344 | continue; | |||
345 | if (DefSU != SU && | |||
346 | (Kind != SDep::Output || !MO.isDead() || | |||
347 | !DefSU->getInstr()->registerDefIsDead(*Alias))) { | |||
348 | if (Kind == SDep::Anti) | |||
349 | DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); | |||
350 | else { | |||
351 | SDep Dep(SU, Kind, /*Reg=*/*Alias); | |||
352 | Dep.setLatency( | |||
353 | SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); | |||
354 | DefSU->addPred(Dep); | |||
355 | } | |||
356 | } | |||
357 | } | |||
358 | } | |||
359 | ||||
360 | if (!MO.isDef()) { | |||
361 | SU->hasPhysRegUses = true; | |||
362 | // Either insert a new Reg2SUnits entry with an empty SUnits list, or | |||
363 | // retrieve the existing SUnits list for this register's uses. | |||
364 | // Push this SUnit on the use list. | |||
365 | Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); | |||
366 | if (RemoveKillFlags) | |||
367 | MO.setIsKill(false); | |||
368 | } | |||
369 | else { | |||
370 | addPhysRegDataDeps(SU, OperIdx); | |||
371 | unsigned Reg = MO.getReg(); | |||
372 | ||||
373 | // clear this register's use list | |||
374 | if (Uses.contains(Reg)) | |||
375 | Uses.eraseAll(Reg); | |||
376 | ||||
377 | if (!MO.isDead()) { | |||
378 | Defs.eraseAll(Reg); | |||
379 | } else if (SU->isCall) { | |||
380 | // Calls will not be reordered because of chain dependencies (see | |||
381 | // below). Since call operands are dead, calls may continue to be added | |||
382 | // to the DefList making dependence checking quadratic in the size of | |||
383 | // the block. Instead, we leave only one call at the back of the | |||
384 | // DefList. | |||
385 | Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); | |||
386 | Reg2SUnitsMap::iterator B = P.first; | |||
387 | Reg2SUnitsMap::iterator I = P.second; | |||
388 | for (bool isBegin = I == B; !isBegin; /* empty */) { | |||
389 | isBegin = (--I) == B; | |||
390 | if (!I->SU->isCall) | |||
391 | break; | |||
392 | I = Defs.erase(I); | |||
393 | } | |||
394 | } | |||
395 | ||||
396 | // Defs are pushed in the order they are visited and never reordered. | |||
397 | Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); | |||
398 | } | |||
399 | } | |||
400 | ||||
401 | LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const | |||
402 | { | |||
403 | unsigned Reg = MO.getReg(); | |||
404 | // No point in tracking lanemasks if we don't have interesting subregisters. | |||
405 | const TargetRegisterClass &RC = *MRI.getRegClass(Reg); | |||
406 | if (!RC.HasDisjunctSubRegs) | |||
407 | return ~0u; | |||
408 | ||||
409 | unsigned SubReg = MO.getSubReg(); | |||
410 | if (SubReg == 0) | |||
411 | return RC.getLaneMask(); | |||
412 | return TRI->getSubRegIndexLaneMask(SubReg); | |||
413 | } | |||
414 | ||||
415 | /// addVRegDefDeps - Add register output and data dependencies from this SUnit | |||
416 | /// to instructions that occur later in the same scheduling region if they read | |||
417 | /// from or write to the virtual register defined at OperIdx. | |||
418 | /// | |||
419 | /// TODO: Hoist loop induction variable increments. This has to be | |||
420 | /// reevaluated. Generally, IV scheduling should be done before coalescing. | |||
421 | void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { | |||
422 | MachineInstr *MI = SU->getInstr(); | |||
423 | MachineOperand &MO = MI->getOperand(OperIdx); | |||
424 | unsigned Reg = MO.getReg(); | |||
425 | ||||
426 | LaneBitmask DefLaneMask; | |||
427 | LaneBitmask KillLaneMask; | |||
428 | if (TrackLaneMasks) { | |||
429 | bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); | |||
430 | DefLaneMask = getLaneMaskForMO(MO); | |||
431 | // If we have a <read-undef> flag, none of the lane values comes from an | |||
432 | // earlier instruction. | |||
433 | KillLaneMask = IsKill ? ~0u : DefLaneMask; | |||
434 | ||||
435 | // Clear undef flag, we'll re-add it later once we know which subregister | |||
436 | // Def is first. | |||
437 | MO.setIsUndef(false); | |||
438 | } else { | |||
439 | DefLaneMask = ~0u; | |||
440 | KillLaneMask = ~0u; | |||
441 | } | |||
442 | ||||
443 | if (MO.isDead()) { | |||
444 | assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&((CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && "Dead defs should have no uses") ? static_cast<void> ( 0) : __assert_fail ("CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && \"Dead defs should have no uses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 445, __PRETTY_FUNCTION__)) | |||
445 | "Dead defs should have no uses")((CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && "Dead defs should have no uses") ? static_cast<void> ( 0) : __assert_fail ("CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && \"Dead defs should have no uses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 445, __PRETTY_FUNCTION__)); | |||
446 | } else { | |||
447 | // Add data dependence to all uses we found so far. | |||
448 | const TargetSubtargetInfo &ST = MF.getSubtarget(); | |||
449 | for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg), | |||
450 | E = CurrentVRegUses.end(); I != E; /*empty*/) { | |||
451 | LaneBitmask LaneMask = I->LaneMask; | |||
452 | // Ignore uses of other lanes. | |||
453 | if ((LaneMask & KillLaneMask) == 0) { | |||
454 | ++I; | |||
455 | continue; | |||
456 | } | |||
457 | ||||
458 | if ((LaneMask & DefLaneMask) != 0) { | |||
459 | SUnit *UseSU = I->SU; | |||
460 | MachineInstr *Use = UseSU->getInstr(); | |||
461 | SDep Dep(SU, SDep::Data, Reg); | |||
462 | Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, | |||
463 | I->OperandIndex)); | |||
464 | ST.adjustSchedDependency(SU, UseSU, Dep); | |||
465 | UseSU->addPred(Dep); | |||
466 | } | |||
467 | ||||
468 | LaneMask &= ~KillLaneMask; | |||
469 | // If we found a Def for all lanes of this use, remove it from the list. | |||
470 | if (LaneMask != 0) { | |||
471 | I->LaneMask = LaneMask; | |||
472 | ++I; | |||
473 | } else | |||
474 | I = CurrentVRegUses.erase(I); | |||
475 | } | |||
476 | } | |||
477 | ||||
478 | // Shortcut: Singly defined vregs do not have output/anti dependencies. | |||
479 | if (MRI.hasOneDef(Reg)) | |||
480 | return; | |||
481 | ||||
482 | // Add output dependence to the next nearest defs of this vreg. | |||
483 | // | |||
484 | // Unless this definition is dead, the output dependence should be | |||
485 | // transitively redundant with antidependencies from this definition's | |||
486 | // uses. We're conservative for now until we have a way to guarantee the uses | |||
487 | // are not eliminated sometime during scheduling. The output dependence edge | |||
488 | // is also useful if output latency exceeds def-use latency. | |||
489 | LaneBitmask LaneMask = DefLaneMask; | |||
490 | for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), | |||
491 | CurrentVRegDefs.end())) { | |||
492 | // Ignore defs for other lanes. | |||
493 | if ((V2SU.LaneMask & LaneMask) == 0) | |||
494 | continue; | |||
495 | // Add an output dependence. | |||
496 | SUnit *DefSU = V2SU.SU; | |||
497 | // Ignore additional defs of the same lanes in one instruction. This can | |||
498 | // happen because lanemasks are shared for targets with too many | |||
499 | // subregisters. We also use some representration tricks/hacks where we | |||
500 | // add super-register defs/uses, to imply that although we only access parts | |||
501 | // of the reg we care about the full one. | |||
502 | if (DefSU == SU) | |||
503 | continue; | |||
504 | SDep Dep(SU, SDep::Output, Reg); | |||
505 | Dep.setLatency( | |||
506 | SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); | |||
507 | DefSU->addPred(Dep); | |||
508 | ||||
509 | // Update current definition. This can get tricky if the def was about a | |||
510 | // bigger lanemask before. We then have to shrink it and create a new | |||
511 | // VReg2SUnit for the non-overlapping part. | |||
512 | LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; | |||
513 | LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; | |||
514 | V2SU.SU = SU; | |||
515 | V2SU.LaneMask = OverlapMask; | |||
516 | if (NonOverlapMask != 0) | |||
517 | CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU)); | |||
518 | } | |||
519 | // If there was no CurrentVRegDefs entry for some lanes yet, create one. | |||
520 | if (LaneMask != 0) | |||
521 | CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); | |||
522 | } | |||
523 | ||||
524 | /// addVRegUseDeps - Add a register data dependency if the instruction that | |||
525 | /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a | |||
526 | /// register antidependency from this SUnit to instructions that occur later in | |||
527 | /// the same scheduling region if they write the virtual register. | |||
528 | /// | |||
529 | /// TODO: Handle ExitSU "uses" properly. | |||
530 | void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { | |||
531 | const MachineInstr *MI = SU->getInstr(); | |||
532 | const MachineOperand &MO = MI->getOperand(OperIdx); | |||
533 | unsigned Reg = MO.getReg(); | |||
534 | ||||
535 | // Remember the use. Data dependencies will be added when we find the def. | |||
536 | LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u; | |||
537 | CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); | |||
538 | ||||
539 | // Add antidependences to the following defs of the vreg. | |||
540 | for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), | |||
541 | CurrentVRegDefs.end())) { | |||
542 | // Ignore defs for unrelated lanes. | |||
543 | LaneBitmask PrevDefLaneMask = V2SU.LaneMask; | |||
544 | if ((PrevDefLaneMask & LaneMask) == 0) | |||
545 | continue; | |||
546 | if (V2SU.SU == SU) | |||
547 | continue; | |||
548 | ||||
549 | V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); | |||
550 | } | |||
551 | } | |||
552 | ||||
553 | /// Return true if MI is an instruction we are unable to reason about | |||
554 | /// (like a call or something with unmodeled side effects). | |||
555 | static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { | |||
556 | return MI->isCall() || MI->hasUnmodeledSideEffects() || | |||
557 | (MI->hasOrderedMemoryRef() && !MI->isInvariantLoad(AA)); | |||
558 | } | |||
559 | ||||
560 | /// This returns true if the two MIs need a chain edge between them. | |||
561 | /// This is called on normal stores and loads. | |||
562 | static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, | |||
563 | const DataLayout &DL, MachineInstr *MIa, | |||
564 | MachineInstr *MIb) { | |||
565 | const MachineFunction *MF = MIa->getParent()->getParent(); | |||
566 | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); | |||
567 | ||||
568 | assert ((MIa->mayStore() || MIb->mayStore()) &&(((MIa->mayStore() || MIb->mayStore()) && "Dependency checked between two loads" ) ? static_cast<void> (0) : __assert_fail ("(MIa->mayStore() || MIb->mayStore()) && \"Dependency checked between two loads\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 569, __PRETTY_FUNCTION__)) | |||
569 | "Dependency checked between two loads")(((MIa->mayStore() || MIb->mayStore()) && "Dependency checked between two loads" ) ? static_cast<void> (0) : __assert_fail ("(MIa->mayStore() || MIb->mayStore()) && \"Dependency checked between two loads\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 569, __PRETTY_FUNCTION__)); | |||
570 | ||||
571 | // Let the target decide if memory accesses cannot possibly overlap. | |||
572 | if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA)) | |||
573 | return false; | |||
574 | ||||
575 | // To this point analysis is generic. From here on we do need AA. | |||
576 | if (!AA) | |||
577 | return true; | |||
578 | ||||
579 | // FIXME: Need to handle multiple memory operands to support all targets. | |||
580 | if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) | |||
581 | return true; | |||
582 | ||||
583 | MachineMemOperand *MMOa = *MIa->memoperands_begin(); | |||
584 | MachineMemOperand *MMOb = *MIb->memoperands_begin(); | |||
585 | ||||
586 | if (!MMOa->getValue() || !MMOb->getValue()) | |||
587 | return true; | |||
588 | ||||
589 | // The following interface to AA is fashioned after DAGCombiner::isAlias | |||
590 | // and operates with MachineMemOperand offset with some important | |||
591 | // assumptions: | |||
592 | // - LLVM fundamentally assumes flat address spaces. | |||
593 | // - MachineOperand offset can *only* result from legalization and | |||
594 | // cannot affect queries other than the trivial case of overlap | |||
595 | // checking. | |||
596 | // - These offsets never wrap and never step outside | |||
597 | // of allocated objects. | |||
598 | // - There should never be any negative offsets here. | |||
599 | // | |||
600 | // FIXME: Modify API to hide this math from "user" | |||
601 | // FIXME: Even before we go to AA we can reason locally about some | |||
602 | // memory objects. It can save compile time, and possibly catch some | |||
603 | // corner cases not currently covered. | |||
604 | ||||
605 | assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset")(((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset" ) ? static_cast<void> (0) : __assert_fail ("(MMOa->getOffset() >= 0) && \"Negative MachineMemOperand offset\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 605, __PRETTY_FUNCTION__)); | |||
606 | assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset")(((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset" ) ? static_cast<void> (0) : __assert_fail ("(MMOb->getOffset() >= 0) && \"Negative MachineMemOperand offset\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 606, __PRETTY_FUNCTION__)); | |||
607 | ||||
608 | int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); | |||
609 | int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; | |||
610 | int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; | |||
611 | ||||
612 | AliasResult AAResult = | |||
613 | AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, | |||
614 | UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), | |||
615 | MemoryLocation(MMOb->getValue(), Overlapb, | |||
616 | UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); | |||
617 | ||||
618 | return (AAResult != NoAlias); | |||
619 | } | |||
620 | ||||
621 | /// Check whether two objects need a chain edge and add it if needed. | |||
622 | void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, | |||
623 | unsigned Latency) { | |||
624 | if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(), | |||
625 | SUb->getInstr())) { | |||
626 | SDep Dep(SUa, SDep::MayAliasMem); | |||
627 | Dep.setLatency(Latency); | |||
628 | SUb->addPred(Dep); | |||
629 | } | |||
630 | } | |||
631 | ||||
632 | /// Create an SUnit for each real instruction, numbered in top-down topological | |||
633 | /// order. The instruction order A < B, implies that no edge exists from B to A. | |||
634 | /// | |||
635 | /// Map each real instruction to its SUnit. | |||
636 | /// | |||
637 | /// After initSUnits, the SUnits vector cannot be resized and the scheduler may | |||
638 | /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs | |||
639 | /// instead of pointers. | |||
640 | /// | |||
641 | /// MachineScheduler relies on initSUnits numbering the nodes by their order in | |||
642 | /// the original instruction list. | |||
643 | void ScheduleDAGInstrs::initSUnits() { | |||
644 | // We'll be allocating one SUnit for each real instruction in the region, | |||
645 | // which is contained within a basic block. | |||
646 | SUnits.reserve(NumRegionInstrs); | |||
647 | ||||
648 | for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { | |||
649 | MachineInstr *MI = I; | |||
650 | if (MI->isDebugValue()) | |||
651 | continue; | |||
652 | ||||
653 | SUnit *SU = newSUnit(MI); | |||
654 | MISUnitMap[MI] = SU; | |||
655 | ||||
656 | SU->isCall = MI->isCall(); | |||
657 | SU->isCommutable = MI->isCommutable(); | |||
658 | ||||
659 | // Assign the Latency field of SU using target-provided information. | |||
660 | SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); | |||
661 | ||||
662 | // If this SUnit uses a reserved or unbuffered resource, mark it as such. | |||
663 | // | |||
664 | // Reserved resources block an instruction from issuing and stall the | |||
665 | // entire pipeline. These are identified by BufferSize=0. | |||
666 | // | |||
667 | // Unbuffered resources prevent execution of subsequent instructions that | |||
668 | // require the same resources. This is used for in-order execution pipelines | |||
669 | // within an out-of-order core. These are identified by BufferSize=1. | |||
670 | if (SchedModel.hasInstrSchedModel()) { | |||
671 | const MCSchedClassDesc *SC = getSchedClass(SU); | |||
672 | for (TargetSchedModel::ProcResIter | |||
673 | PI = SchedModel.getWriteProcResBegin(SC), | |||
674 | PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { | |||
675 | switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { | |||
676 | case 0: | |||
677 | SU->hasReservedResource = true; | |||
678 | break; | |||
679 | case 1: | |||
680 | SU->isUnbuffered = true; | |||
681 | break; | |||
682 | default: | |||
683 | break; | |||
684 | } | |||
685 | } | |||
686 | } | |||
687 | } | |||
688 | } | |||
689 | ||||
690 | void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { | |||
691 | const MachineInstr *MI = SU->getInstr(); | |||
692 | for (const MachineOperand &MO : MI->operands()) { | |||
693 | if (!MO.isReg()) | |||
694 | continue; | |||
695 | if (!MO.readsReg()) | |||
696 | continue; | |||
697 | if (TrackLaneMasks && !MO.isUse()) | |||
698 | continue; | |||
699 | ||||
700 | unsigned Reg = MO.getReg(); | |||
701 | if (!TargetRegisterInfo::isVirtualRegister(Reg)) | |||
702 | continue; | |||
703 | ||||
704 | // Ignore re-defs. | |||
705 | if (TrackLaneMasks) { | |||
706 | bool FoundDef = false; | |||
707 | for (const MachineOperand &MO2 : MI->operands()) { | |||
708 | if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { | |||
709 | FoundDef = true; | |||
710 | break; | |||
711 | } | |||
712 | } | |||
713 | if (FoundDef) | |||
714 | continue; | |||
715 | } | |||
716 | ||||
717 | // Record this local VReg use. | |||
718 | VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); | |||
719 | for (; UI != VRegUses.end(); ++UI) { | |||
720 | if (UI->SU == SU) | |||
721 | break; | |||
722 | } | |||
723 | if (UI == VRegUses.end()) | |||
724 | VRegUses.insert(VReg2SUnit(Reg, 0, SU)); | |||
725 | } | |||
726 | } | |||
727 | ||||
728 | class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> { | |||
729 | ||||
730 | /// Current total number of SUs in map. | |||
731 | unsigned NumNodes; | |||
732 | ||||
733 | /// 1 for loads, 0 for stores. (see comment in SUList) | |||
734 | unsigned TrueMemOrderLatency; | |||
735 | public: | |||
736 | ||||
737 | Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {} | |||
738 | ||||
739 | /// To keep NumNodes up to date, insert() is used instead of | |||
740 | /// this operator w/ push_back(). | |||
741 | ValueType &operator[](const SUList &Key) { | |||
742 | llvm_unreachable("Don't use. Use insert() instead.")::llvm::llvm_unreachable_internal("Don't use. Use insert() instead." , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 742); }; | |||
743 | ||||
744 | /// Add SU to the SUList of V. If Map grows huge, reduce its size | |||
745 | /// by calling reduce(). | |||
746 | void inline insert(SUnit *SU, ValueType V) { | |||
747 | MapVector::operator[](V).push_back(SU); | |||
748 | NumNodes++; | |||
749 | } | |||
750 | ||||
751 | /// Clears the list of SUs mapped to V. | |||
752 | void inline clearList(ValueType V) { | |||
753 | iterator Itr = find(V); | |||
754 | if (Itr != end()) { | |||
755 | assert (NumNodes >= Itr->second.size())((NumNodes >= Itr->second.size()) ? static_cast<void > (0) : __assert_fail ("NumNodes >= Itr->second.size()" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 755, __PRETTY_FUNCTION__)); | |||
756 | NumNodes -= Itr->second.size(); | |||
757 | ||||
758 | Itr->second.clear(); | |||
759 | } | |||
760 | } | |||
761 | ||||
762 | /// Clears map from all contents. | |||
763 | void clear() { | |||
764 | MapVector<ValueType, SUList>::clear(); | |||
765 | NumNodes = 0; | |||
766 | } | |||
767 | ||||
768 | unsigned inline size() const { return NumNodes; } | |||
769 | ||||
770 | /// Count the number of SUs in this map after a reduction. | |||
771 | void reComputeSize(void) { | |||
772 | NumNodes = 0; | |||
773 | for (auto &I : *this) | |||
774 | NumNodes += I.second.size(); | |||
775 | } | |||
776 | ||||
777 | unsigned inline getTrueMemOrderLatency() const { | |||
778 | return TrueMemOrderLatency; | |||
779 | } | |||
780 | ||||
781 | void dump(); | |||
782 | }; | |||
783 | ||||
784 | void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, | |||
785 | Value2SUsMap &Val2SUsMap) { | |||
786 | for (auto &I : Val2SUsMap) | |||
787 | addChainDependencies(SU, I.second, | |||
788 | Val2SUsMap.getTrueMemOrderLatency()); | |||
789 | } | |||
790 | ||||
791 | void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, | |||
792 | Value2SUsMap &Val2SUsMap, | |||
793 | ValueType V) { | |||
794 | Value2SUsMap::iterator Itr = Val2SUsMap.find(V); | |||
795 | if (Itr != Val2SUsMap.end()) | |||
796 | addChainDependencies(SU, Itr->second, | |||
797 | Val2SUsMap.getTrueMemOrderLatency()); | |||
798 | } | |||
799 | ||||
800 | void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { | |||
801 | assert (BarrierChain != nullptr)((BarrierChain != nullptr) ? static_cast<void> (0) : __assert_fail ("BarrierChain != nullptr", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 801, __PRETTY_FUNCTION__)); | |||
802 | ||||
803 | for (auto &I : map) { | |||
804 | SUList &sus = I.second; | |||
805 | for (auto *SU : sus) | |||
806 | SU->addPredBarrier(BarrierChain); | |||
807 | } | |||
808 | map.clear(); | |||
809 | } | |||
810 | ||||
811 | void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { | |||
812 | assert (BarrierChain != nullptr)((BarrierChain != nullptr) ? static_cast<void> (0) : __assert_fail ("BarrierChain != nullptr", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 812, __PRETTY_FUNCTION__)); | |||
813 | ||||
814 | // Go through all lists of SUs. | |||
815 | for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) { | |||
816 | Value2SUsMap::iterator CurrItr = I++; | |||
817 | SUList &sus = CurrItr->second; | |||
818 | SUList::iterator SUItr = sus.begin(), SUEE = sus.end(); | |||
819 | for (; SUItr != SUEE; ++SUItr) { | |||
820 | // Stop on BarrierChain or any instruction above it. | |||
821 | if ((*SUItr)->NodeNum <= BarrierChain->NodeNum) | |||
822 | break; | |||
823 | ||||
824 | (*SUItr)->addPredBarrier(BarrierChain); | |||
825 | } | |||
826 | ||||
827 | // Remove also the BarrierChain from list if present. | |||
828 | if (SUItr != SUEE && *SUItr == BarrierChain) | |||
829 | SUItr++; | |||
830 | ||||
831 | // Remove all SUs that are now successors of BarrierChain. | |||
832 | if (SUItr != sus.begin()) | |||
833 | sus.erase(sus.begin(), SUItr); | |||
834 | } | |||
835 | ||||
836 | // Remove all entries with empty su lists. | |||
837 | map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) { | |||
838 | return (mapEntry.second.empty()); }); | |||
839 | ||||
840 | // Recompute the size of the map (NumNodes). | |||
841 | map.reComputeSize(); | |||
842 | } | |||
843 | ||||
844 | /// If RegPressure is non-null, compute register pressure as a side effect. The | |||
845 | /// DAG builder is an efficient place to do it because it already visits | |||
846 | /// operands. | |||
847 | void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, | |||
848 | RegPressureTracker *RPTracker, | |||
849 | PressureDiffs *PDiffs, | |||
850 | LiveIntervals *LIS, | |||
851 | bool TrackLaneMasks) { | |||
852 | const TargetSubtargetInfo &ST = MF.getSubtarget(); | |||
853 | bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI | |||
| ||||
854 | : ST.useAA(); | |||
855 | AAForDep = UseAA ? AA : nullptr; | |||
856 | ||||
857 | BarrierChain = nullptr; | |||
858 | ||||
859 | this->TrackLaneMasks = TrackLaneMasks; | |||
860 | MISUnitMap.clear(); | |||
861 | ScheduleDAG::clearDAG(); | |||
862 | ||||
863 | // Create an SUnit for each real instruction. | |||
864 | initSUnits(); | |||
865 | ||||
866 | if (PDiffs) | |||
867 | PDiffs->init(SUnits.size()); | |||
868 | ||||
869 | // We build scheduling units by walking a block's instruction list | |||
870 | // from bottom to top. | |||
871 | ||||
872 | // Each MIs' memory operand(s) is analyzed to a list of underlying | |||
873 | // objects. The SU is then inserted in the SUList(s) mapped from the | |||
874 | // Value(s). Each Value thus gets mapped to lists of SUs depending | |||
875 | // on it, stores and loads kept separately. Two SUs are trivially | |||
876 | // non-aliasing if they both depend on only identified Values and do | |||
877 | // not share any common Value. | |||
878 | Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/); | |||
879 | ||||
880 | // Certain memory accesses are known to not alias any SU in Stores | |||
881 | // or Loads, and have therefore their own 'NonAlias' | |||
882 | // domain. E.g. spill / reload instructions never alias LLVM I/R | |||
883 | // Values. It would be nice to assume that this type of memory | |||
884 | // accesses always have a proper memory operand modelling, and are | |||
885 | // therefore never unanalyzable, but this is conservatively not | |||
886 | // done. | |||
887 | Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/); | |||
888 | ||||
889 | // Remove any stale debug info; sometimes BuildSchedGraph is called again | |||
890 | // without emitting the info from the previous call. | |||
891 | DbgValues.clear(); | |||
892 | FirstDbgValue = nullptr; | |||
893 | ||||
894 | assert(Defs.empty() && Uses.empty() &&((Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses" ) ? static_cast<void> (0) : __assert_fail ("Defs.empty() && Uses.empty() && \"Only BuildGraph should update Defs/Uses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 895, __PRETTY_FUNCTION__)) | |||
895 | "Only BuildGraph should update Defs/Uses")((Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses" ) ? static_cast<void> (0) : __assert_fail ("Defs.empty() && Uses.empty() && \"Only BuildGraph should update Defs/Uses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 895, __PRETTY_FUNCTION__)); | |||
896 | Defs.setUniverse(TRI->getNumRegs()); | |||
897 | Uses.setUniverse(TRI->getNumRegs()); | |||
898 | ||||
899 | assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs")((CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs" ) ? static_cast<void> (0) : __assert_fail ("CurrentVRegDefs.empty() && \"nobody else should use CurrentVRegDefs\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 899, __PRETTY_FUNCTION__)); | |||
900 | assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses")((CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses" ) ? static_cast<void> (0) : __assert_fail ("CurrentVRegUses.empty() && \"nobody else should use CurrentVRegUses\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 900, __PRETTY_FUNCTION__)); | |||
901 | unsigned NumVirtRegs = MRI.getNumVirtRegs(); | |||
902 | CurrentVRegDefs.setUniverse(NumVirtRegs); | |||
903 | CurrentVRegUses.setUniverse(NumVirtRegs); | |||
904 | ||||
905 | VRegUses.clear(); | |||
906 | VRegUses.setUniverse(NumVirtRegs); | |||
907 | ||||
908 | // Model data dependencies between instructions being scheduled and the | |||
909 | // ExitSU. | |||
910 | addSchedBarrierDeps(); | |||
911 | ||||
912 | // Walk the list of instructions, from bottom moving up. | |||
913 | MachineInstr *DbgMI = nullptr; | |||
914 | for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; | |||
915 | MII != MIE; --MII) { | |||
916 | MachineInstr *MI = std::prev(MII); | |||
917 | if (MI && DbgMI) { | |||
918 | DbgValues.push_back(std::make_pair(DbgMI, MI)); | |||
919 | DbgMI = nullptr; | |||
920 | } | |||
921 | ||||
922 | if (MI->isDebugValue()) { | |||
| ||||
923 | DbgMI = MI; | |||
924 | continue; | |||
925 | } | |||
926 | SUnit *SU = MISUnitMap[MI]; | |||
927 | assert(SU && "No SUnit mapped to this MI")((SU && "No SUnit mapped to this MI") ? static_cast< void> (0) : __assert_fail ("SU && \"No SUnit mapped to this MI\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 927, __PRETTY_FUNCTION__)); | |||
928 | ||||
929 | if (RPTracker) { | |||
930 | collectVRegUses(SU); | |||
931 | ||||
932 | RegisterOperands RegOpers; | |||
933 | RegOpers.collect(*MI, *TRI, MRI, TrackLaneMasks, false); | |||
934 | if (TrackLaneMasks) { | |||
935 | SlotIndex SlotIdx = LIS->getInstructionIndex(*MI); | |||
936 | RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx); | |||
937 | } | |||
938 | if (PDiffs != nullptr) | |||
939 | PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); | |||
940 | ||||
941 | RPTracker->recedeSkipDebugValues(); | |||
942 | assert(&*RPTracker->getPos() == MI && "RPTracker in sync")((&*RPTracker->getPos() == MI && "RPTracker in sync" ) ? static_cast<void> (0) : __assert_fail ("&*RPTracker->getPos() == MI && \"RPTracker in sync\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 942, __PRETTY_FUNCTION__)); | |||
943 | RPTracker->recede(RegOpers); | |||
944 | } | |||
945 | ||||
946 | assert((((CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && "Cannot schedule terminators or labels!" ) ? static_cast<void> (0) : __assert_fail ("(CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && \"Cannot schedule terminators or labels!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 948, __PRETTY_FUNCTION__)) | |||
947 | (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) &&(((CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && "Cannot schedule terminators or labels!" ) ? static_cast<void> (0) : __assert_fail ("(CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && \"Cannot schedule terminators or labels!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 948, __PRETTY_FUNCTION__)) | |||
948 | "Cannot schedule terminators or labels!")(((CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && "Cannot schedule terminators or labels!" ) ? static_cast<void> (0) : __assert_fail ("(CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && \"Cannot schedule terminators or labels!\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 948, __PRETTY_FUNCTION__)); | |||
949 | ||||
950 | // Add register-based dependencies (data, anti, and output). | |||
951 | // For some instructions (calls, returns, inline-asm, etc.) there can | |||
952 | // be explicit uses and implicit defs, in which case the use will appear | |||
953 | // on the operand list before the def. Do two passes over the operand | |||
954 | // list to make sure that defs are processed before any uses. | |||
955 | bool HasVRegDef = false; | |||
956 | for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { | |||
957 | const MachineOperand &MO = MI->getOperand(j); | |||
958 | if (!MO.isReg() || !MO.isDef()) | |||
959 | continue; | |||
960 | unsigned Reg = MO.getReg(); | |||
961 | if (Reg == 0) | |||
962 | continue; | |||
963 | ||||
964 | if (TRI->isPhysicalRegister(Reg)) | |||
965 | addPhysRegDeps(SU, j); | |||
966 | else { | |||
967 | HasVRegDef = true; | |||
968 | addVRegDefDeps(SU, j); | |||
969 | } | |||
970 | } | |||
971 | // Now process all uses. | |||
972 | for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { | |||
973 | const MachineOperand &MO = MI->getOperand(j); | |||
974 | // Only look at use operands. | |||
975 | // We do not need to check for MO.readsReg() here because subsequent | |||
976 | // subregister defs will get output dependence edges and need no | |||
977 | // additional use dependencies. | |||
978 | if (!MO.isReg() || !MO.isUse()) | |||
979 | continue; | |||
980 | unsigned Reg = MO.getReg(); | |||
981 | if (Reg == 0) | |||
982 | continue; | |||
983 | ||||
984 | if (TRI->isPhysicalRegister(Reg)) | |||
985 | addPhysRegDeps(SU, j); | |||
986 | else if (MO.readsReg()) // ignore undef operands | |||
987 | addVRegUseDeps(SU, j); | |||
988 | } | |||
989 | ||||
990 | // If we haven't seen any uses in this scheduling region, create a | |||
991 | // dependence edge to ExitSU to model the live-out latency. This is required | |||
992 | // for vreg defs with no in-region use, and prefetches with no vreg def. | |||
993 | // | |||
994 | // FIXME: NumDataSuccs would be more precise than NumSuccs here. This | |||
995 | // check currently relies on being called before adding chain deps. | |||
996 | if (SU->NumSuccs == 0 && SU->Latency > 1 | |||
997 | && (HasVRegDef || MI->mayLoad())) { | |||
998 | SDep Dep(SU, SDep::Artificial); | |||
999 | Dep.setLatency(SU->Latency - 1); | |||
1000 | ExitSU.addPred(Dep); | |||
1001 | } | |||
1002 | ||||
1003 | // Add memory dependencies (Note: isStoreToStackSlot and | |||
1004 | // isLoadFromStackSLot are not usable after stack slots are lowered to | |||
1005 | // actual addresses). | |||
1006 | ||||
1007 | // This is a barrier event that acts as a pivotal node in the DAG. | |||
1008 | if (isGlobalMemoryObject(AA, MI)) { | |||
1009 | ||||
1010 | // Become the barrier chain. | |||
1011 | if (BarrierChain) | |||
1012 | BarrierChain->addPredBarrier(SU); | |||
1013 | BarrierChain = SU; | |||
1014 | ||||
1015 | DEBUG(dbgs() << "Global memory object and new barrier chain: SU("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Global memory object and new barrier chain: SU(" << BarrierChain->NodeNum << ").\n";; } } while (0) | |||
1016 | << BarrierChain->NodeNum << ").\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Global memory object and new barrier chain: SU(" << BarrierChain->NodeNum << ").\n";; } } while (0); | |||
1017 | ||||
1018 | // Add dependencies against everything below it and clear maps. | |||
1019 | addBarrierChain(Stores); | |||
1020 | addBarrierChain(Loads); | |||
1021 | addBarrierChain(NonAliasStores); | |||
1022 | addBarrierChain(NonAliasLoads); | |||
1023 | ||||
1024 | continue; | |||
1025 | } | |||
1026 | ||||
1027 | // If it's not a store or a variant load, we're done. | |||
1028 | if (!MI->mayStore() && !(MI->mayLoad() && !MI->isInvariantLoad(AA))) | |||
1029 | continue; | |||
1030 | ||||
1031 | // Always add dependecy edge to BarrierChain if present. | |||
1032 | if (BarrierChain) | |||
1033 | BarrierChain->addPredBarrier(SU); | |||
1034 | ||||
1035 | // Find the underlying objects for MI. The Objs vector is either | |||
1036 | // empty, or filled with the Values of memory locations which this | |||
1037 | // SU depends on. An empty vector means the memory location is | |||
1038 | // unknown, and may alias anything. | |||
1039 | UnderlyingObjectsVector Objs; | |||
1040 | getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); | |||
1041 | ||||
1042 | if (MI->mayStore()) { | |||
1043 | if (Objs.empty()) { | |||
1044 | // An unknown store depends on all stores and loads. | |||
1045 | addChainDependencies(SU, Stores); | |||
1046 | addChainDependencies(SU, NonAliasStores); | |||
1047 | addChainDependencies(SU, Loads); | |||
1048 | addChainDependencies(SU, NonAliasLoads); | |||
1049 | ||||
1050 | // Map this store to 'UnknownValue'. | |||
1051 | Stores.insert(SU, UnknownValue); | |||
1052 | } else { | |||
1053 | // Add precise dependencies against all previously seen memory | |||
1054 | // accesses mapped to the same Value(s). | |||
1055 | for (const UnderlyingObject &UnderlObj : Objs) { | |||
1056 | ValueType V = UnderlObj.getValue(); | |||
1057 | bool ThisMayAlias = UnderlObj.mayAlias(); | |||
1058 | ||||
1059 | // Add dependencies to previous stores and loads mapped to V. | |||
1060 | addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); | |||
1061 | addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V); | |||
1062 | } | |||
1063 | // Update the store map after all chains have been added to avoid adding | |||
1064 | // self-loop edge if multiple underlying objects are present. | |||
1065 | for (const UnderlyingObject &UnderlObj : Objs) { | |||
1066 | ValueType V = UnderlObj.getValue(); | |||
1067 | bool ThisMayAlias = UnderlObj.mayAlias(); | |||
1068 | ||||
1069 | // Map this store to V. | |||
1070 | (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V); | |||
1071 | } | |||
1072 | // The store may have dependencies to unanalyzable loads and | |||
1073 | // stores. | |||
1074 | addChainDependencies(SU, Loads, UnknownValue); | |||
1075 | addChainDependencies(SU, Stores, UnknownValue); | |||
1076 | } | |||
1077 | } else { // SU is a load. | |||
1078 | if (Objs.empty()) { | |||
1079 | // An unknown load depends on all stores. | |||
1080 | addChainDependencies(SU, Stores); | |||
1081 | addChainDependencies(SU, NonAliasStores); | |||
1082 | ||||
1083 | Loads.insert(SU, UnknownValue); | |||
1084 | } else { | |||
1085 | for (const UnderlyingObject &UnderlObj : Objs) { | |||
1086 | ValueType V = UnderlObj.getValue(); | |||
1087 | bool ThisMayAlias = UnderlObj.mayAlias(); | |||
1088 | ||||
1089 | // Add precise dependencies against all previously seen stores | |||
1090 | // mapping to the same Value(s). | |||
1091 | addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); | |||
1092 | ||||
1093 | // Map this load to V. | |||
1094 | (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V); | |||
1095 | } | |||
1096 | // The load may have dependencies to unanalyzable stores. | |||
1097 | addChainDependencies(SU, Stores, UnknownValue); | |||
1098 | } | |||
1099 | } | |||
1100 | ||||
1101 | // Reduce maps if they grow huge. | |||
1102 | if (Stores.size() + Loads.size() >= HugeRegion) { | |||
1103 | DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Reducing Stores and Loads maps.\n" ;; } } while (0); | |||
1104 | reduceHugeMemNodeMaps(Stores, Loads, getReductionSize()); | |||
1105 | } | |||
1106 | if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) { | |||
1107 | DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n" ;; } } while (0); | |||
1108 | reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize()); | |||
1109 | } | |||
1110 | } | |||
1111 | ||||
1112 | if (DbgMI) | |||
1113 | FirstDbgValue = DbgMI; | |||
1114 | ||||
1115 | Defs.clear(); | |||
1116 | Uses.clear(); | |||
1117 | CurrentVRegDefs.clear(); | |||
1118 | CurrentVRegUses.clear(); | |||
1119 | } | |||
1120 | ||||
1121 | raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) { | |||
1122 | PSV->printCustom(OS); | |||
1123 | return OS; | |||
1124 | } | |||
1125 | ||||
1126 | void ScheduleDAGInstrs::Value2SUsMap::dump() { | |||
1127 | for (auto &Itr : *this) { | |||
1128 | if (Itr.first.is<const Value*>()) { | |||
1129 | const Value *V = Itr.first.get<const Value*>(); | |||
1130 | if (isa<UndefValue>(V)) | |||
1131 | dbgs() << "Unknown"; | |||
1132 | else | |||
1133 | V->printAsOperand(dbgs()); | |||
1134 | } | |||
1135 | else if (Itr.first.is<const PseudoSourceValue*>()) | |||
1136 | dbgs() << Itr.first.get<const PseudoSourceValue*>(); | |||
1137 | else | |||
1138 | llvm_unreachable("Unknown Value type.")::llvm::llvm_unreachable_internal("Unknown Value type.", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1138); | |||
1139 | ||||
1140 | dbgs() << " : "; | |||
1141 | dumpSUList(Itr.second); | |||
1142 | } | |||
1143 | } | |||
1144 | ||||
1145 | /// Reduce maps in FIFO order, by N SUs. This is better than turning | |||
1146 | /// every Nth memory SU into BarrierChain in buildSchedGraph(), since | |||
1147 | /// it avoids unnecessary edges between seen SUs above the new | |||
1148 | /// BarrierChain, and those below it. | |||
1149 | void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, | |||
1150 | Value2SUsMap &loads, unsigned N) { | |||
1151 | DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Before reduction:\nStoring SUnits:\n" ; stores.dump(); dbgs() << "Loading SUnits:\n"; loads.dump (); } } while (0) | |||
1152 | stores.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Before reduction:\nStoring SUnits:\n" ; stores.dump(); dbgs() << "Loading SUnits:\n"; loads.dump (); } } while (0) | |||
1153 | dbgs() << "Loading SUnits:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Before reduction:\nStoring SUnits:\n" ; stores.dump(); dbgs() << "Loading SUnits:\n"; loads.dump (); } } while (0) | |||
1154 | loads.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Before reduction:\nStoring SUnits:\n" ; stores.dump(); dbgs() << "Loading SUnits:\n"; loads.dump (); } } while (0); | |||
1155 | ||||
1156 | // Insert all SU's NodeNums into a vector and sort it. | |||
1157 | std::vector<unsigned> NodeNums; | |||
1158 | NodeNums.reserve(stores.size() + loads.size()); | |||
1159 | for (auto &I : stores) | |||
1160 | for (auto *SU : I.second) | |||
1161 | NodeNums.push_back(SU->NodeNum); | |||
1162 | for (auto &I : loads) | |||
1163 | for (auto *SU : I.second) | |||
1164 | NodeNums.push_back(SU->NodeNum); | |||
1165 | std::sort(NodeNums.begin(), NodeNums.end()); | |||
1166 | ||||
1167 | // The N last elements in NodeNums will be removed, and the SU with | |||
1168 | // the lowest NodeNum of them will become the new BarrierChain to | |||
1169 | // let the not yet seen SUs have a dependency to the removed SUs. | |||
1170 | assert (N <= NodeNums.size())((N <= NodeNums.size()) ? static_cast<void> (0) : __assert_fail ("N <= NodeNums.size()", "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1170, __PRETTY_FUNCTION__)); | |||
1171 | SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)]; | |||
1172 | if (BarrierChain) { | |||
1173 | // The aliasing and non-aliasing maps reduce independently of each | |||
1174 | // other, but share a common BarrierChain. Check if the | |||
1175 | // newBarrierChain is above the former one. If it is not, it may | |||
1176 | // introduce a loop to use newBarrierChain, so keep the old one. | |||
1177 | if (newBarrierChain->NodeNum < BarrierChain->NodeNum) { | |||
1178 | BarrierChain->addPredBarrier(newBarrierChain); | |||
1179 | BarrierChain = newBarrierChain; | |||
1180 | DEBUG(dbgs() << "Inserting new barrier chain: SU("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Inserting new barrier chain: SU(" << BarrierChain->NodeNum << ").\n";; } } while (0) | |||
1181 | << BarrierChain->NodeNum << ").\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Inserting new barrier chain: SU(" << BarrierChain->NodeNum << ").\n";; } } while (0); | |||
1182 | } | |||
1183 | else | |||
1184 | DEBUG(dbgs() << "Keeping old barrier chain: SU("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Keeping old barrier chain: SU(" << BarrierChain->NodeNum << ").\n";; } } while (0) | |||
1185 | << BarrierChain->NodeNum << ").\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Keeping old barrier chain: SU(" << BarrierChain->NodeNum << ").\n";; } } while (0); | |||
1186 | } | |||
1187 | else | |||
1188 | BarrierChain = newBarrierChain; | |||
1189 | ||||
1190 | insertBarrierChain(stores); | |||
1191 | insertBarrierChain(loads); | |||
1192 | ||||
1193 | DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "After reduction:\nStoring SUnits:\n" ; stores.dump(); dbgs() << "Loading SUnits:\n"; loads.dump (); } } while (0) | |||
1194 | stores.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "After reduction:\nStoring SUnits:\n" ; stores.dump(); dbgs() << "Loading SUnits:\n"; loads.dump (); } } while (0) | |||
1195 | dbgs() << "Loading SUnits:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "After reduction:\nStoring SUnits:\n" ; stores.dump(); dbgs() << "Loading SUnits:\n"; loads.dump (); } } while (0) | |||
1196 | loads.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "After reduction:\nStoring SUnits:\n" ; stores.dump(); dbgs() << "Loading SUnits:\n"; loads.dump (); } } while (0); | |||
1197 | } | |||
1198 | ||||
1199 | /// \brief Initialize register live-range state for updating kills. | |||
1200 | void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { | |||
1201 | // Start with no live registers. | |||
1202 | LiveRegs.reset(); | |||
1203 | ||||
1204 | // Examine the live-in regs of all successors. | |||
1205 | for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), | |||
1206 | SE = BB->succ_end(); SI != SE; ++SI) { | |||
1207 | for (const auto &LI : (*SI)->liveins()) { | |||
1208 | // Repeat, for reg and all subregs. | |||
1209 | for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); | |||
1210 | SubRegs.isValid(); ++SubRegs) | |||
1211 | LiveRegs.set(*SubRegs); | |||
1212 | } | |||
1213 | } | |||
1214 | } | |||
1215 | ||||
1216 | /// \brief If we change a kill flag on the bundle instruction implicit register | |||
1217 | /// operands, then we also need to propagate that to any instructions inside | |||
1218 | /// the bundle which had the same kill state. | |||
1219 | static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, | |||
1220 | bool NewKillState, | |||
1221 | const TargetRegisterInfo *TRI) { | |||
1222 | if (MI->getOpcode() != TargetOpcode::BUNDLE) | |||
1223 | return; | |||
1224 | ||||
1225 | // Walk backwards from the last instruction in the bundle to the first. | |||
1226 | // Once we set a kill flag on an instruction, we bail out, as otherwise we | |||
1227 | // might set it on too many operands. We will clear as many flags as we | |||
1228 | // can though. | |||
1229 | MachineBasicBlock::instr_iterator Begin = MI->getIterator(); | |||
1230 | MachineBasicBlock::instr_iterator End = getBundleEnd(*MI); | |||
1231 | while (Begin != End) { | |||
1232 | if (NewKillState) { | |||
1233 | if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) | |||
1234 | return; | |||
1235 | } else | |||
1236 | (--End)->clearRegisterKills(Reg, TRI); | |||
1237 | } | |||
1238 | } | |||
1239 | ||||
1240 | bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { | |||
1241 | // Setting kill flag... | |||
1242 | if (!MO.isKill()) { | |||
1243 | MO.setIsKill(true); | |||
1244 | toggleBundleKillFlag(MI, MO.getReg(), true, TRI); | |||
1245 | return false; | |||
1246 | } | |||
1247 | ||||
1248 | // If MO itself is live, clear the kill flag... | |||
1249 | if (LiveRegs.test(MO.getReg())) { | |||
1250 | MO.setIsKill(false); | |||
1251 | toggleBundleKillFlag(MI, MO.getReg(), false, TRI); | |||
1252 | return false; | |||
1253 | } | |||
1254 | ||||
1255 | // If any subreg of MO is live, then create an imp-def for that | |||
1256 | // subreg and keep MO marked as killed. | |||
1257 | MO.setIsKill(false); | |||
1258 | toggleBundleKillFlag(MI, MO.getReg(), false, TRI); | |||
1259 | bool AllDead = true; | |||
1260 | const unsigned SuperReg = MO.getReg(); | |||
1261 | MachineInstrBuilder MIB(MF, MI); | |||
1262 | for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { | |||
1263 | if (LiveRegs.test(*SubRegs)) { | |||
1264 | MIB.addReg(*SubRegs, RegState::ImplicitDefine); | |||
1265 | AllDead = false; | |||
1266 | } | |||
1267 | } | |||
1268 | ||||
1269 | if(AllDead) { | |||
1270 | MO.setIsKill(true); | |||
1271 | toggleBundleKillFlag(MI, MO.getReg(), true, TRI); | |||
1272 | } | |||
1273 | return false; | |||
1274 | } | |||
1275 | ||||
1276 | // FIXME: Reuse the LivePhysRegs utility for this. | |||
1277 | void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { | |||
1278 | DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'; } } while (0); | |||
1279 | ||||
1280 | LiveRegs.resize(TRI->getNumRegs()); | |||
1281 | BitVector killedRegs(TRI->getNumRegs()); | |||
1282 | ||||
1283 | startBlockForKills(MBB); | |||
1284 | ||||
1285 | // Examine block from end to start... | |||
1286 | unsigned Count = MBB->size(); | |||
1287 | for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); | |||
1288 | I != E; --Count) { | |||
1289 | MachineInstr *MI = --I; | |||
1290 | if (MI->isDebugValue()) | |||
1291 | continue; | |||
1292 | ||||
1293 | // Update liveness. Registers that are defed but not used in this | |||
1294 | // instruction are now dead. Mark register and all subregs as they | |||
1295 | // are completely defined. | |||
1296 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |||
1297 | MachineOperand &MO = MI->getOperand(i); | |||
1298 | if (MO.isRegMask()) | |||
1299 | LiveRegs.clearBitsNotInMask(MO.getRegMask()); | |||
1300 | if (!MO.isReg()) continue; | |||
1301 | unsigned Reg = MO.getReg(); | |||
1302 | if (Reg == 0) continue; | |||
1303 | if (!MO.isDef()) continue; | |||
1304 | // Ignore two-addr defs. | |||
1305 | if (MI->isRegTiedToUseOperand(i)) continue; | |||
1306 | ||||
1307 | // Repeat for reg and all subregs. | |||
1308 | for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); | |||
1309 | SubRegs.isValid(); ++SubRegs) | |||
1310 | LiveRegs.reset(*SubRegs); | |||
1311 | } | |||
1312 | ||||
1313 | // Examine all used registers and set/clear kill flag. When a | |||
1314 | // register is used multiple times we only set the kill flag on | |||
1315 | // the first use. Don't set kill flags on undef operands. | |||
1316 | killedRegs.reset(); | |||
1317 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |||
1318 | MachineOperand &MO = MI->getOperand(i); | |||
1319 | if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; | |||
1320 | unsigned Reg = MO.getReg(); | |||
1321 | if ((Reg == 0) || MRI.isReserved(Reg)) continue; | |||
1322 | ||||
1323 | bool kill = false; | |||
1324 | if (!killedRegs.test(Reg)) { | |||
1325 | kill = true; | |||
1326 | // A register is not killed if any subregs are live... | |||
1327 | for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { | |||
1328 | if (LiveRegs.test(*SubRegs)) { | |||
1329 | kill = false; | |||
1330 | break; | |||
1331 | } | |||
1332 | } | |||
1333 | ||||
1334 | // If subreg is not live, then register is killed if it became | |||
1335 | // live in this instruction | |||
1336 | if (kill) | |||
1337 | kill = !LiveRegs.test(Reg); | |||
1338 | } | |||
1339 | ||||
1340 | if (MO.isKill() != kill) { | |||
1341 | DEBUG(dbgs() << "Fixing " << MO << " in ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << "Fixing " << MO << " in "; } } while (0); | |||
1342 | // Warning: toggleKillFlag may invalidate MO. | |||
1343 | toggleKillFlag(MI, MO); | |||
1344 | DEBUG(MI->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { MI->dump(); } } while (0); | |||
1345 | DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(*MI) ; while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1346 | MachineBasicBlock::instr_iterator Begin = MI->getIterator();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(*MI) ; while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1347 | MachineBasicBlock::instr_iterator End = getBundleEnd(*MI);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(*MI) ; while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1348 | while (++Begin != End)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(*MI) ; while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1349 | DEBUG(Begin->dump());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(*MI) ; while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0) | |||
1350 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { if (MI->getOpcode() == TargetOpcode::BUNDLE ) { MachineBasicBlock::instr_iterator Begin = MI->getIterator (); MachineBasicBlock::instr_iterator End = getBundleEnd(*MI) ; while (++Begin != End) do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType("misched")) { Begin->dump(); } } while (0); }; } } while (0); | |||
1351 | } | |||
1352 | ||||
1353 | killedRegs.set(Reg); | |||
1354 | } | |||
1355 | ||||
1356 | // Mark any used register (that is not using undef) and subregs as | |||
1357 | // now live... | |||
1358 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { | |||
1359 | MachineOperand &MO = MI->getOperand(i); | |||
1360 | if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; | |||
1361 | unsigned Reg = MO.getReg(); | |||
1362 | if ((Reg == 0) || MRI.isReserved(Reg)) continue; | |||
1363 | ||||
1364 | for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); | |||
1365 | SubRegs.isValid(); ++SubRegs) | |||
1366 | LiveRegs.set(*SubRegs); | |||
1367 | } | |||
1368 | } | |||
1369 | } | |||
1370 | ||||
1371 | void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { | |||
1372 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | |||
1373 | SU->getInstr()->dump(); | |||
1374 | #endif | |||
1375 | } | |||
1376 | ||||
1377 | std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { | |||
1378 | std::string s; | |||
1379 | raw_string_ostream oss(s); | |||
1380 | if (SU == &EntrySU) | |||
1381 | oss << "<entry>"; | |||
1382 | else if (SU == &ExitSU) | |||
1383 | oss << "<exit>"; | |||
1384 | else | |||
1385 | SU->getInstr()->print(oss, /*SkipOpers=*/true); | |||
1386 | return oss.str(); | |||
1387 | } | |||
1388 | ||||
1389 | /// Return the basic block label. It is not necessarilly unique because a block | |||
1390 | /// contains multiple scheduling regions. But it is fine for visualization. | |||
1391 | std::string ScheduleDAGInstrs::getDAGName() const { | |||
1392 | return "dag." + BB->getFullName(); | |||
1393 | } | |||
1394 | ||||
1395 | //===----------------------------------------------------------------------===// | |||
1396 | // SchedDFSResult Implementation | |||
1397 | //===----------------------------------------------------------------------===// | |||
1398 | ||||
1399 | namespace llvm { | |||
1400 | /// \brief Internal state used to compute SchedDFSResult. | |||
1401 | class SchedDFSImpl { | |||
1402 | SchedDFSResult &R; | |||
1403 | ||||
1404 | /// Join DAG nodes into equivalence classes by their subtree. | |||
1405 | IntEqClasses SubtreeClasses; | |||
1406 | /// List PredSU, SuccSU pairs that represent data edges between subtrees. | |||
1407 | std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs; | |||
1408 | ||||
1409 | struct RootData { | |||
1410 | unsigned NodeID; | |||
1411 | unsigned ParentNodeID; // Parent node (member of the parent subtree). | |||
1412 | unsigned SubInstrCount; // Instr count in this tree only, not children. | |||
1413 | ||||
1414 | RootData(unsigned id): NodeID(id), | |||
1415 | ParentNodeID(SchedDFSResult::InvalidSubtreeID), | |||
1416 | SubInstrCount(0) {} | |||
1417 | ||||
1418 | unsigned getSparseSetIndex() const { return NodeID; } | |||
1419 | }; | |||
1420 | ||||
1421 | SparseSet<RootData> RootSet; | |||
1422 | ||||
1423 | public: | |||
1424 | SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { | |||
1425 | RootSet.setUniverse(R.DFSNodeData.size()); | |||
1426 | } | |||
1427 | ||||
1428 | /// Return true if this node been visited by the DFS traversal. | |||
1429 | /// | |||
1430 | /// During visitPostorderNode the Node's SubtreeID is assigned to the Node | |||
1431 | /// ID. Later, SubtreeID is updated but remains valid. | |||
1432 | bool isVisited(const SUnit *SU) const { | |||
1433 | return R.DFSNodeData[SU->NodeNum].SubtreeID | |||
1434 | != SchedDFSResult::InvalidSubtreeID; | |||
1435 | } | |||
1436 | ||||
1437 | /// Initialize this node's instruction count. We don't need to flag the node | |||
1438 | /// visited until visitPostorder because the DAG cannot have cycles. | |||
1439 | void visitPreorder(const SUnit *SU) { | |||
1440 | R.DFSNodeData[SU->NodeNum].InstrCount = | |||
1441 | SU->getInstr()->isTransient() ? 0 : 1; | |||
1442 | } | |||
1443 | ||||
1444 | /// Called once for each node after all predecessors are visited. Revisit this | |||
1445 | /// node's predecessors and potentially join them now that we know the ILP of | |||
1446 | /// the other predecessors. | |||
1447 | void visitPostorderNode(const SUnit *SU) { | |||
1448 | // Mark this node as the root of a subtree. It may be joined with its | |||
1449 | // successors later. | |||
1450 | R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; | |||
1451 | RootData RData(SU->NodeNum); | |||
1452 | RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; | |||
1453 | ||||
1454 | // If any predecessors are still in their own subtree, they either cannot be | |||
1455 | // joined or are large enough to remain separate. If this parent node's | |||
1456 | // total instruction count is not greater than a child subtree by at least | |||
1457 | // the subtree limit, then try to join it now since splitting subtrees is | |||
1458 | // only useful if multiple high-pressure paths are possible. | |||
1459 | unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; | |||
1460 | for (SUnit::const_pred_iterator | |||
1461 | PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { | |||
1462 | if (PI->getKind() != SDep::Data) | |||
1463 | continue; | |||
1464 | unsigned PredNum = PI->getSUnit()->NodeNum; | |||
1465 | if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) | |||
1466 | joinPredSubtree(*PI, SU, /*CheckLimit=*/false); | |||
1467 | ||||
1468 | // Either link or merge the TreeData entry from the child to the parent. | |||
1469 | if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { | |||
1470 | // If the predecessor's parent is invalid, this is a tree edge and the | |||
1471 | // current node is the parent. | |||
1472 | if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) | |||
1473 | RootSet[PredNum].ParentNodeID = SU->NodeNum; | |||
1474 | } | |||
1475 | else if (RootSet.count(PredNum)) { | |||
1476 | // The predecessor is not a root, but is still in the root set. This | |||
1477 | // must be the new parent that it was just joined to. Note that | |||
1478 | // RootSet[PredNum].ParentNodeID may either be invalid or may still be | |||
1479 | // set to the original parent. | |||
1480 | RData.SubInstrCount += RootSet[PredNum].SubInstrCount; | |||
1481 | RootSet.erase(PredNum); | |||
1482 | } | |||
1483 | } | |||
1484 | RootSet[SU->NodeNum] = RData; | |||
1485 | } | |||
1486 | ||||
1487 | /// Called once for each tree edge after calling visitPostOrderNode on the | |||
1488 | /// predecessor. Increment the parent node's instruction count and | |||
1489 | /// preemptively join this subtree to its parent's if it is small enough. | |||
1490 | void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { | |||
1491 | R.DFSNodeData[Succ->NodeNum].InstrCount | |||
1492 | += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; | |||
1493 | joinPredSubtree(PredDep, Succ); | |||
1494 | } | |||
1495 | ||||
1496 | /// Add a connection for cross edges. | |||
1497 | void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { | |||
1498 | ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); | |||
1499 | } | |||
1500 | ||||
1501 | /// Set each node's subtree ID to the representative ID and record connections | |||
1502 | /// between trees. | |||
1503 | void finalize() { | |||
1504 | SubtreeClasses.compress(); | |||
1505 | R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); | |||
1506 | assert(SubtreeClasses.getNumClasses() == RootSet.size()((SubtreeClasses.getNumClasses() == RootSet.size() && "number of roots should match trees") ? static_cast<void> (0) : __assert_fail ("SubtreeClasses.getNumClasses() == RootSet.size() && \"number of roots should match trees\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1507, __PRETTY_FUNCTION__)) | |||
1507 | && "number of roots should match trees")((SubtreeClasses.getNumClasses() == RootSet.size() && "number of roots should match trees") ? static_cast<void> (0) : __assert_fail ("SubtreeClasses.getNumClasses() == RootSet.size() && \"number of roots should match trees\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1507, __PRETTY_FUNCTION__)); | |||
1508 | for (SparseSet<RootData>::const_iterator | |||
1509 | RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { | |||
1510 | unsigned TreeID = SubtreeClasses[RI->NodeID]; | |||
1511 | if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) | |||
1512 | R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; | |||
1513 | R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; | |||
1514 | // Note that SubInstrCount may be greater than InstrCount if we joined | |||
1515 | // subtrees across a cross edge. InstrCount will be attributed to the | |||
1516 | // original parent, while SubInstrCount will be attributed to the joined | |||
1517 | // parent. | |||
1518 | } | |||
1519 | R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); | |||
1520 | R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); | |||
1521 | DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << R.getNumSubtrees() << " subtrees:\n" ; } } while (0); | |||
1522 | for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { | |||
1523 | R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; | |||
1524 | DEBUG(dbgs() << " SU(" << Idx << ") in tree "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << " SU(" << Idx << ") in tree " << R.DFSNodeData[Idx].SubtreeID << '\n'; } } while (0) | |||
1525 | << R.DFSNodeData[Idx].SubtreeID << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << " SU(" << Idx << ") in tree " << R.DFSNodeData[Idx].SubtreeID << '\n'; } } while (0); | |||
1526 | } | |||
1527 | for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator | |||
1528 | I = ConnectionPairs.begin(), E = ConnectionPairs.end(); | |||
1529 | I != E; ++I) { | |||
1530 | unsigned PredTree = SubtreeClasses[I->first->NodeNum]; | |||
1531 | unsigned SuccTree = SubtreeClasses[I->second->NodeNum]; | |||
1532 | if (PredTree == SuccTree) | |||
1533 | continue; | |||
1534 | unsigned Depth = I->first->getDepth(); | |||
1535 | addConnection(PredTree, SuccTree, Depth); | |||
1536 | addConnection(SuccTree, PredTree, Depth); | |||
1537 | } | |||
1538 | } | |||
1539 | ||||
1540 | protected: | |||
1541 | /// Join the predecessor subtree with the successor that is its DFS | |||
1542 | /// parent. Apply some heuristics before joining. | |||
1543 | bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, | |||
1544 | bool CheckLimit = true) { | |||
1545 | assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges")((PredDep.getKind() == SDep::Data && "Subtrees are for data edges" ) ? static_cast<void> (0) : __assert_fail ("PredDep.getKind() == SDep::Data && \"Subtrees are for data edges\"" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1545, __PRETTY_FUNCTION__)); | |||
1546 | ||||
1547 | // Check if the predecessor is already joined. | |||
1548 | const SUnit *PredSU = PredDep.getSUnit(); | |||
1549 | unsigned PredNum = PredSU->NodeNum; | |||
1550 | if (R.DFSNodeData[PredNum].SubtreeID != PredNum) | |||
1551 | return false; | |||
1552 | ||||
1553 | // Four is the magic number of successors before a node is considered a | |||
1554 | // pinch point. | |||
1555 | unsigned NumDataSucs = 0; | |||
1556 | for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), | |||
1557 | SE = PredSU->Succs.end(); SI != SE; ++SI) { | |||
1558 | if (SI->getKind() == SDep::Data) { | |||
1559 | if (++NumDataSucs >= 4) | |||
1560 | return false; | |||
1561 | } | |||
1562 | } | |||
1563 | if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) | |||
1564 | return false; | |||
1565 | R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; | |||
1566 | SubtreeClasses.join(Succ->NodeNum, PredNum); | |||
1567 | return true; | |||
1568 | } | |||
1569 | ||||
1570 | /// Called by finalize() to record a connection between trees. | |||
1571 | void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { | |||
1572 | if (!Depth) | |||
1573 | return; | |||
1574 | ||||
1575 | do { | |||
1576 | SmallVectorImpl<SchedDFSResult::Connection> &Connections = | |||
1577 | R.SubtreeConnections[FromTree]; | |||
1578 | for (SmallVectorImpl<SchedDFSResult::Connection>::iterator | |||
1579 | I = Connections.begin(), E = Connections.end(); I != E; ++I) { | |||
1580 | if (I->TreeID == ToTree) { | |||
1581 | I->Level = std::max(I->Level, Depth); | |||
1582 | return; | |||
1583 | } | |||
1584 | } | |||
1585 | Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); | |||
1586 | FromTree = R.DFSTreeData[FromTree].ParentTreeID; | |||
1587 | } while (FromTree != SchedDFSResult::InvalidSubtreeID); | |||
1588 | } | |||
1589 | }; | |||
1590 | } // namespace llvm | |||
1591 | ||||
1592 | namespace { | |||
1593 | /// \brief Manage the stack used by a reverse depth-first search over the DAG. | |||
1594 | class SchedDAGReverseDFS { | |||
1595 | std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack; | |||
1596 | public: | |||
1597 | bool isComplete() const { return DFSStack.empty(); } | |||
1598 | ||||
1599 | void follow(const SUnit *SU) { | |||
1600 | DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); | |||
1601 | } | |||
1602 | void advance() { ++DFSStack.back().second; } | |||
1603 | ||||
1604 | const SDep *backtrack() { | |||
1605 | DFSStack.pop_back(); | |||
1606 | return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second); | |||
1607 | } | |||
1608 | ||||
1609 | const SUnit *getCurr() const { return DFSStack.back().first; } | |||
1610 | ||||
1611 | SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } | |||
1612 | ||||
1613 | SUnit::const_pred_iterator getPredEnd() const { | |||
1614 | return getCurr()->Preds.end(); | |||
1615 | } | |||
1616 | }; | |||
1617 | } // anonymous | |||
1618 | ||||
1619 | static bool hasDataSucc(const SUnit *SU) { | |||
1620 | for (SUnit::const_succ_iterator | |||
1621 | SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { | |||
1622 | if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) | |||
1623 | return true; | |||
1624 | } | |||
1625 | return false; | |||
1626 | } | |||
1627 | ||||
1628 | /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first | |||
1629 | /// search from this root. | |||
1630 | void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { | |||
1631 | if (!IsBottomUp) | |||
1632 | llvm_unreachable("Top-down ILP metric is unimplemnted")::llvm::llvm_unreachable_internal("Top-down ILP metric is unimplemnted" , "/tmp/buildd/llvm-toolchain-snapshot-3.9~svn271203/lib/CodeGen/ScheduleDAGInstrs.cpp" , 1632); | |||
1633 | ||||
1634 | SchedDFSImpl Impl(*this); | |||
1635 | for (ArrayRef<SUnit>::const_iterator | |||
1636 | SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { | |||
1637 | const SUnit *SU = &*SI; | |||
1638 | if (Impl.isVisited(SU) || hasDataSucc(SU)) | |||
1639 | continue; | |||
1640 | ||||
1641 | SchedDAGReverseDFS DFS; | |||
1642 | Impl.visitPreorder(SU); | |||
1643 | DFS.follow(SU); | |||
1644 | for (;;) { | |||
1645 | // Traverse the leftmost path as far as possible. | |||
1646 | while (DFS.getPred() != DFS.getPredEnd()) { | |||
1647 | const SDep &PredDep = *DFS.getPred(); | |||
1648 | DFS.advance(); | |||
1649 | // Ignore non-data edges. | |||
1650 | if (PredDep.getKind() != SDep::Data | |||
1651 | || PredDep.getSUnit()->isBoundaryNode()) { | |||
1652 | continue; | |||
1653 | } | |||
1654 | // An already visited edge is a cross edge, assuming an acyclic DAG. | |||
1655 | if (Impl.isVisited(PredDep.getSUnit())) { | |||
1656 | Impl.visitCrossEdge(PredDep, DFS.getCurr()); | |||
1657 | continue; | |||
1658 | } | |||
1659 | Impl.visitPreorder(PredDep.getSUnit()); | |||
1660 | DFS.follow(PredDep.getSUnit()); | |||
1661 | } | |||
1662 | // Visit the top of the stack in postorder and backtrack. | |||
1663 | const SUnit *Child = DFS.getCurr(); | |||
1664 | const SDep *PredDep = DFS.backtrack(); | |||
1665 | Impl.visitPostorderNode(Child); | |||
1666 | if (PredDep) | |||
1667 | Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); | |||
1668 | if (DFS.isComplete()) | |||
1669 | break; | |||
1670 | } | |||
1671 | } | |||
1672 | Impl.finalize(); | |||
1673 | } | |||
1674 | ||||
1675 | /// The root of the given SubtreeID was just scheduled. For all subtrees | |||
1676 | /// connected to this tree, record the depth of the connection so that the | |||
1677 | /// nearest connected subtrees can be prioritized. | |||
1678 | void SchedDFSResult::scheduleTree(unsigned SubtreeID) { | |||
1679 | for (SmallVectorImpl<Connection>::const_iterator | |||
1680 | I = SubtreeConnections[SubtreeID].begin(), | |||
1681 | E = SubtreeConnections[SubtreeID].end(); I != E; ++I) { | |||
1682 | SubtreeConnectLevels[I->TreeID] = | |||
1683 | std::max(SubtreeConnectLevels[I->TreeID], I->Level); | |||
1684 | DEBUG(dbgs() << " Tree: " << I->TreeIDdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << " Tree: " << I->TreeID << " @" << SubtreeConnectLevels[I->TreeID] << '\n'; } } while (0) | |||
1685 | << " @" << SubtreeConnectLevels[I->TreeID] << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("misched")) { dbgs() << " Tree: " << I->TreeID << " @" << SubtreeConnectLevels[I->TreeID] << '\n'; } } while (0); | |||
1686 | } | |||
1687 | } | |||
1688 | ||||
1689 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) | |||
1690 | void ILPValue::print(raw_ostream &OS) const { | |||
1691 | OS << InstrCount << " / " << Length << " = "; | |||
1692 | if (!Length) | |||
1693 | OS << "BADILP"; | |||
1694 | else | |||
1695 | OS << format("%g", ((double)InstrCount / Length)); | |||
1696 | } | |||
1697 | ||||
1698 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) | |||
1699 | void ILPValue::dump() const { | |||
1700 | dbgs() << *this << '\n'; | |||
1701 | } | |||
1702 | ||||
1703 | namespace llvm { | |||
1704 | ||||
1705 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) | |||
1706 | raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { | |||
1707 | Val.print(OS); | |||
1708 | return OS; | |||
1709 | } | |||
1710 | ||||
1711 | } // namespace llvm |