Line data Source code
1 : //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file This implements the ScheduleDAGInstrs class, which implements
11 : /// re-scheduling of MachineInstrs.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "llvm/CodeGen/ScheduleDAGInstrs.h"
16 : #include "llvm/ADT/IntEqClasses.h"
17 : #include "llvm/ADT/MapVector.h"
18 : #include "llvm/ADT/SmallPtrSet.h"
19 : #include "llvm/ADT/SmallVector.h"
20 : #include "llvm/ADT/SparseSet.h"
21 : #include "llvm/ADT/iterator_range.h"
22 : #include "llvm/Analysis/AliasAnalysis.h"
23 : #include "llvm/Analysis/ValueTracking.h"
24 : #include "llvm/CodeGen/LiveIntervals.h"
25 : #include "llvm/CodeGen/LivePhysRegs.h"
26 : #include "llvm/CodeGen/MachineBasicBlock.h"
27 : #include "llvm/CodeGen/MachineFrameInfo.h"
28 : #include "llvm/CodeGen/MachineFunction.h"
29 : #include "llvm/CodeGen/MachineInstr.h"
30 : #include "llvm/CodeGen/MachineInstrBundle.h"
31 : #include "llvm/CodeGen/MachineMemOperand.h"
32 : #include "llvm/CodeGen/MachineOperand.h"
33 : #include "llvm/CodeGen/MachineRegisterInfo.h"
34 : #include "llvm/CodeGen/PseudoSourceValue.h"
35 : #include "llvm/CodeGen/RegisterPressure.h"
36 : #include "llvm/CodeGen/ScheduleDAG.h"
37 : #include "llvm/CodeGen/ScheduleDFS.h"
38 : #include "llvm/CodeGen/SlotIndexes.h"
39 : #include "llvm/CodeGen/TargetRegisterInfo.h"
40 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
41 : #include "llvm/Config/llvm-config.h"
42 : #include "llvm/IR/Constants.h"
43 : #include "llvm/IR/Function.h"
44 : #include "llvm/IR/Instruction.h"
45 : #include "llvm/IR/Instructions.h"
46 : #include "llvm/IR/Operator.h"
47 : #include "llvm/IR/Type.h"
48 : #include "llvm/IR/Value.h"
49 : #include "llvm/MC/LaneBitmask.h"
50 : #include "llvm/MC/MCRegisterInfo.h"
51 : #include "llvm/Support/Casting.h"
52 : #include "llvm/Support/CommandLine.h"
53 : #include "llvm/Support/Compiler.h"
54 : #include "llvm/Support/Debug.h"
55 : #include "llvm/Support/ErrorHandling.h"
56 : #include "llvm/Support/Format.h"
57 : #include "llvm/Support/raw_ostream.h"
58 : #include <algorithm>
59 : #include <cassert>
60 : #include <iterator>
61 : #include <string>
62 : #include <utility>
63 : #include <vector>
64 :
65 : using namespace llvm;
66 :
67 : #define DEBUG_TYPE "machine-scheduler"
68 :
69 : static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
70 : cl::ZeroOrMore, cl::init(false),
71 : cl::desc("Enable use of AA during MI DAG construction"));
72 :
73 : static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
74 : cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
75 :
76 : // Note: the two options below might be used in tuning compile time vs
77 : // output quality. Setting HugeRegion so large that it will never be
78 : // reached means best-effort, but may be slow.
79 :
80 : // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
81 : // together hold this many SUs, a reduction of maps will be done.
82 : static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
83 : cl::init(1000), cl::desc("The limit to use while constructing the DAG "
84 : "prior to scheduling, at which point a trade-off "
85 : "is made to avoid excessive compile time."));
86 :
87 : static cl::opt<unsigned> ReductionSize(
88 : "dag-maps-reduction-size", cl::Hidden,
89 : cl::desc("A huge scheduling region will have maps reduced by this many "
90 : "nodes at a time. Defaults to HugeRegion / 2."));
91 :
92 : static unsigned getReductionSize() {
93 : // Always reduce a huge region with half of the elements, except
94 : // when user sets this number explicitly.
95 7 : if (ReductionSize.getNumOccurrences() == 0)
96 7 : return HugeRegion / 2;
97 : return ReductionSize;
98 : }
99 :
100 0 : static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
101 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
102 : dbgs() << "{ ";
103 : for (const SUnit *su : L) {
104 : dbgs() << "SU(" << su->NodeNum << ")";
105 : if (su != L.back())
106 : dbgs() << ", ";
107 : }
108 : dbgs() << "}\n";
109 : #endif
110 0 : }
111 :
112 234334 : ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
113 : const MachineLoopInfo *mli,
114 234334 : bool RemoveKillFlags)
115 234334 : : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
116 : RemoveKillFlags(RemoveKillFlags),
117 234334 : UnknownValue(UndefValue::get(
118 703002 : Type::getVoidTy(mf.getFunction().getContext()))) {
119 : DbgValues.clear();
120 :
121 234334 : const TargetSubtargetInfo &ST = mf.getSubtarget();
122 234334 : SchedModel.init(&ST);
123 234334 : }
124 :
125 : /// If this machine instr has memory reference information and it can be
126 : /// tracked to a normal reference to a known object, return the Value
127 : /// for that object. This function returns false the memory location is
128 : /// unknown or may alias anything.
129 : static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
130 : const MachineFrameInfo &MFI,
131 : UnderlyingObjectsVector &Objects,
132 : const DataLayout &DL) {
133 : auto allMMOsOkay = [&]() {
134 : for (const MachineMemOperand *MMO : MI->memoperands()) {
135 : if (MMO->isVolatile())
136 : return false;
137 :
138 : if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
139 : // Function that contain tail calls don't have unique PseudoSourceValue
140 : // objects. Two PseudoSourceValues might refer to the same or
141 : // overlapping locations. The client code calling this function assumes
142 : // this is not the case. So return a conservative answer of no known
143 : // object.
144 : if (MFI.hasTailCall())
145 : return false;
146 :
147 : // For now, ignore PseudoSourceValues which may alias LLVM IR values
148 : // because the code that uses this function has no way to cope with
149 : // such aliases.
150 : if (PSV->isAliased(&MFI))
151 : return false;
152 :
153 : bool MayAlias = PSV->mayAlias(&MFI);
154 : Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
155 : } else if (const Value *V = MMO->getValue()) {
156 : SmallVector<Value *, 4> Objs;
157 : if (!getUnderlyingObjectsForCodeGen(V, Objs, DL))
158 : return false;
159 :
160 : for (Value *V : Objs) {
161 : assert(isIdentifiedObject(V));
162 : Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
163 : }
164 : } else
165 : return false;
166 : }
167 : return true;
168 1457822 : };
169 :
170 1457822 : if (!allMMOsOkay()) {
171 : Objects.clear();
172 : return false;
173 : }
174 :
175 : return true;
176 : }
177 :
178 610079 : void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
179 610079 : BB = bb;
180 610079 : }
181 :
182 610771 : void ScheduleDAGInstrs::finishBlock() {
183 : // Subclasses should no longer refer to the old block.
184 610771 : BB = nullptr;
185 610771 : }
186 :
187 1455260 : void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
188 : MachineBasicBlock::iterator begin,
189 : MachineBasicBlock::iterator end,
190 : unsigned regioninstrs) {
191 : assert(bb == BB && "startBlock should set BB");
192 1455260 : RegionBegin = begin;
193 1455260 : RegionEnd = end;
194 1455260 : NumRegionInstrs = regioninstrs;
195 1455260 : }
196 :
197 1455260 : void ScheduleDAGInstrs::exitRegion() {
198 : // Nothing to do.
199 1455260 : }
200 :
201 895288 : void ScheduleDAGInstrs::addSchedBarrierDeps() {
202 1790576 : MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
203 : ExitSU.setInstr(ExitMI);
204 : // Add dependencies on the defs and uses of the instruction.
205 734444 : if (ExitMI) {
206 3116971 : for (const MachineOperand &MO : ExitMI->operands()) {
207 2382527 : if (!MO.isReg() || MO.isDef()) continue;
208 964481 : unsigned Reg = MO.getReg();
209 964481 : if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
210 1836940 : Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
211 46011 : } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
212 9454 : addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
213 : }
214 : }
215 : }
216 2233762 : if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
217 : // For others, e.g. fallthrough, conditional branch, assume the exit
218 : // uses all the registers that are livein to the successor blocks.
219 1111530 : for (const MachineBasicBlock *Succ : BB->successors()) {
220 1849107 : for (const auto &LI : Succ->liveins()) {
221 1240722 : if (!Uses.contains(LI.PhysReg))
222 1970572 : Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
223 : }
224 : }
225 : }
226 895288 : }
227 :
228 : /// MO is an operand of SU's instruction that defines a physical register. Adds
229 : /// data dependencies from SU to any uses of the physical register.
230 1907995 : void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
231 1907995 : const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
232 : assert(MO.isDef() && "expect physreg def");
233 :
234 : // Ask the target if address-backscheduling is desirable, and if so how much.
235 1907995 : const TargetSubtargetInfo &ST = MF.getSubtarget();
236 :
237 12935856 : for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
238 23963717 : Alias.isValid(); ++Alias) {
239 11027861 : if (!Uses.contains(*Alias))
240 : continue;
241 : for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
242 2420907 : SUnit *UseSU = I->SU;
243 2420907 : if (UseSU == SU)
244 0 : continue;
245 :
246 : // Adjust the dependence latency using operand def/use information,
247 : // then allow the target to perform its own adjustments.
248 2420907 : int UseOp = I->OpIdx;
249 : MachineInstr *RegUse = nullptr;
250 : SDep Dep;
251 2420907 : if (UseOp < 0)
252 916553 : Dep = SDep(SU, SDep::Artificial);
253 : else {
254 : // Set the hasPhysRegDefs only for physreg defs that have a use within
255 : // the scheduling region.
256 1504354 : SU->hasPhysRegDefs = true;
257 1504354 : Dep = SDep(SU, SDep::Data, *Alias);
258 1504354 : RegUse = UseSU->getInstr();
259 : }
260 4841814 : Dep.setLatency(
261 2420907 : SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
262 : UseOp));
263 :
264 2420907 : ST.adjustSchedDependency(SU, UseSU, Dep);
265 2420907 : UseSU->addPred(Dep);
266 : }
267 : }
268 1907995 : }
269 :
270 : /// Adds register dependencies (data, anti, and output) from this SUnit
271 : /// to following instructions in the same scheduling region that depend the
272 : /// physical register referenced at OperIdx.
273 5098892 : void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
274 5098892 : MachineInstr *MI = SU->getInstr();
275 5098892 : MachineOperand &MO = MI->getOperand(OperIdx);
276 5098892 : unsigned Reg = MO.getReg();
277 : // We do not need to track any dependencies for constant registers.
278 5098892 : if (MRI.isConstantPhysReg(Reg))
279 : return;
280 :
281 : // Optionally add output and anti dependencies. For anti
282 : // dependencies we use a latency of 0 because for a multi-issue
283 : // target we want to allow the defining instruction to issue
284 : // in the same cycle as the using instruction.
285 : // TODO: Using a latency of 1 here for output dependencies assumes
286 : // there's no cost for reusing registers.
287 4250799 : SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
288 34836514 : for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
289 30585715 : if (!Defs.contains(*Alias))
290 : continue;
291 : for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
292 3322028 : SUnit *DefSU = I->SU;
293 3322028 : if (DefSU == &ExitSU)
294 : continue;
295 3322028 : if (DefSU != SU &&
296 3413668 : (Kind != SDep::Output || !MO.isDead() ||
297 1317958 : !DefSU->getInstr()->registerDefIsDead(*Alias))) {
298 1548966 : if (Kind == SDep::Anti)
299 588437 : DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
300 : else {
301 : SDep Dep(SU, Kind, /*Reg=*/*Alias);
302 1921058 : Dep.setLatency(
303 960529 : SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
304 960529 : DefSU->addPred(Dep);
305 : }
306 : }
307 : }
308 : }
309 :
310 4250799 : if (!MO.isDef()) {
311 2342804 : SU->hasPhysRegUses = true;
312 : // Either insert a new Reg2SUnits entry with an empty SUnits list, or
313 : // retrieve the existing SUnits list for this register's uses.
314 : // Push this SUnit on the use list.
315 4685608 : Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
316 2342804 : if (RemoveKillFlags)
317 : MO.setIsKill(false);
318 : } else {
319 1907995 : addPhysRegDataDeps(SU, OperIdx);
320 :
321 : // Clear previous uses and defs of this register and its subergisters.
322 7554848 : for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) {
323 3738858 : if (Uses.contains(*SubReg))
324 1264457 : Uses.eraseAll(*SubReg);
325 3738858 : if (!MO.isDead())
326 3110132 : Defs.eraseAll(*SubReg);
327 : }
328 1907995 : if (MO.isDead() && SU->isCall) {
329 : // Calls will not be reordered because of chain dependencies (see
330 : // below). Since call operands are dead, calls may continue to be added
331 : // to the DefList making dependence checking quadratic in the size of
332 : // the block. Instead, we leave only one call at the back of the
333 : // DefList.
334 1439 : Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
335 : Reg2SUnitsMap::iterator B = P.first;
336 1439 : Reg2SUnitsMap::iterator I = P.second;
337 2103 : for (bool isBegin = I == B; !isBegin; /* empty */) {
338 1158 : isBegin = (--I) == B;
339 1158 : if (!I->SU->isCall)
340 : break;
341 664 : I = Defs.erase(I);
342 : }
343 : }
344 :
345 : // Defs are pushed in the order they are visited and never reordered.
346 3815990 : Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
347 : }
348 : }
349 :
350 662266 : LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
351 : {
352 662266 : unsigned Reg = MO.getReg();
353 : // No point in tracking lanemasks if we don't have interesting subregisters.
354 662266 : const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
355 662266 : if (!RC.HasDisjunctSubRegs)
356 262613 : return LaneBitmask::getAll();
357 :
358 : unsigned SubReg = MO.getSubReg();
359 399653 : if (SubReg == 0)
360 176137 : return RC.getLaneMask();
361 223516 : return TRI->getSubRegIndexLaneMask(SubReg);
362 : }
363 :
364 : /// Adds register output and data dependencies from this SUnit to instructions
365 : /// that occur later in the same scheduling region if they read from or write to
366 : /// the virtual register defined at OperIdx.
367 : ///
368 : /// TODO: Hoist loop induction variable increments. This has to be
369 : /// reevaluated. Generally, IV scheduling should be done before coalescing.
370 1587666 : void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
371 1587666 : MachineInstr *MI = SU->getInstr();
372 1587666 : MachineOperand &MO = MI->getOperand(OperIdx);
373 1587666 : unsigned Reg = MO.getReg();
374 :
375 : LaneBitmask DefLaneMask;
376 : LaneBitmask KillLaneMask;
377 1587666 : if (TrackLaneMasks) {
378 303116 : bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
379 303116 : DefLaneMask = getLaneMaskForMO(MO);
380 : // If we have a <read-undef> flag, none of the lane values comes from an
381 : // earlier instruction.
382 303116 : KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
383 :
384 : // Clear undef flag, we'll re-add it later once we know which subregister
385 : // Def is first.
386 : MO.setIsUndef(false);
387 : } else {
388 : DefLaneMask = LaneBitmask::getAll();
389 : KillLaneMask = LaneBitmask::getAll();
390 : }
391 :
392 1587666 : if (MO.isDead()) {
393 : assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
394 : "Dead defs should have no uses");
395 : } else {
396 : // Add data dependence to all uses we found so far.
397 1576933 : const TargetSubtargetInfo &ST = MF.getSubtarget();
398 1576933 : for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
399 : E = CurrentVRegUses.end(); I != E; /*empty*/) {
400 1786282 : LaneBitmask LaneMask = I->LaneMask;
401 : // Ignore uses of other lanes.
402 1786282 : if ((LaneMask & KillLaneMask).none()) {
403 : ++I;
404 : continue;
405 : }
406 :
407 1736002 : if ((LaneMask & DefLaneMask).any()) {
408 1736002 : SUnit *UseSU = I->SU;
409 1736002 : MachineInstr *Use = UseSU->getInstr();
410 : SDep Dep(SU, SDep::Data, Reg);
411 1736002 : Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
412 : I->OperandIndex));
413 1736002 : ST.adjustSchedDependency(SU, UseSU, Dep);
414 1736002 : UseSU->addPred(Dep);
415 : }
416 :
417 : LaneMask &= ~KillLaneMask;
418 : // If we found a Def for all lanes of this use, remove it from the list.
419 1736002 : if (LaneMask.any()) {
420 102685 : I->LaneMask = LaneMask;
421 : ++I;
422 : } else
423 1633317 : I = CurrentVRegUses.erase(I);
424 : }
425 : }
426 :
427 : // Shortcut: Singly defined vregs do not have output/anti dependencies.
428 1587666 : if (MRI.hasOneDef(Reg))
429 : return;
430 :
431 : // Add output dependence to the next nearest defs of this vreg.
432 : //
433 : // Unless this definition is dead, the output dependence should be
434 : // transitively redundant with antidependencies from this definition's
435 : // uses. We're conservative for now until we have a way to guarantee the uses
436 : // are not eliminated sometime during scheduling. The output dependence edge
437 : // is also useful if output latency exceeds def-use latency.
438 : LaneBitmask LaneMask = DefLaneMask;
439 : for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
440 685074 : CurrentVRegDefs.end())) {
441 : // Ignore defs for other lanes.
442 1034980 : if ((V2SU.LaneMask & LaneMask).none())
443 169341 : continue;
444 : // Add an output dependence.
445 348149 : SUnit *DefSU = V2SU.SU;
446 : // Ignore additional defs of the same lanes in one instruction. This can
447 : // happen because lanemasks are shared for targets with too many
448 : // subregisters. We also use some representration tricks/hacks where we
449 : // add super-register defs/uses, to imply that although we only access parts
450 : // of the reg we care about the full one.
451 348149 : if (DefSU == SU)
452 : continue;
453 : SDep Dep(SU, SDep::Output, Reg);
454 696298 : Dep.setLatency(
455 348149 : SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
456 348149 : DefSU->addPred(Dep);
457 :
458 : // Update current definition. This can get tricky if the def was about a
459 : // bigger lanemask before. We then have to shrink it and create a new
460 : // VReg2SUnit for the non-overlapping part.
461 348149 : LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
462 : LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
463 348149 : V2SU.SU = SU;
464 348149 : V2SU.LaneMask = OverlapMask;
465 348149 : if (NonOverlapMask.any())
466 202 : CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
467 : }
468 : // If there was no CurrentVRegDefs entry for some lanes yet, create one.
469 685074 : if (LaneMask.any())
470 685074 : CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
471 : }
472 :
473 : /// Adds a register data dependency if the instruction that defines the
474 : /// virtual register used at OperIdx is mapped to an SUnit. Add a register
475 : /// antidependency from this SUnit to instructions that occur later in the same
476 : /// scheduling region if they write the virtual register.
477 : ///
478 : /// TODO: Handle ExitSU "uses" properly.
479 2392078 : void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
480 2392078 : const MachineInstr *MI = SU->getInstr();
481 2392078 : const MachineOperand &MO = MI->getOperand(OperIdx);
482 2392078 : unsigned Reg = MO.getReg();
483 :
484 : // Remember the use. Data dependencies will be added when we find the def.
485 359150 : LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO)
486 2392078 : : LaneBitmask::getAll();
487 4784156 : CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
488 :
489 : // Add antidependences to the following defs of the vreg.
490 : for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
491 2392078 : CurrentVRegDefs.end())) {
492 : // Ignore defs for unrelated lanes.
493 447082 : LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
494 447082 : if ((PrevDefLaneMask & LaneMask).none())
495 : continue;
496 423970 : if (V2SU.SU == SU)
497 : continue;
498 :
499 76392 : V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
500 : }
501 2392078 : }
502 :
503 : /// Returns true if MI is an instruction we are unable to reason about
504 : /// (like a call or something with unmodeled side effects).
505 3817026 : static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
506 7601406 : return MI->isCall() || MI->hasUnmodeledSideEffects() ||
507 3852872 : (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
508 : }
509 :
510 4132405 : void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
511 : unsigned Latency) {
512 8264810 : if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
513 : SDep Dep(SUa, SDep::MayAliasMem);
514 : Dep.setLatency(Latency);
515 2658832 : SUb->addPred(Dep);
516 : }
517 4132405 : }
518 :
519 : /// Creates an SUnit for each real instruction, numbered in top-down
520 : /// topological order. The instruction order A < B, implies that no edge exists
521 : /// from B to A.
522 : ///
523 : /// Map each real instruction to its SUnit.
524 : ///
525 : /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
526 : /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
527 : /// instead of pointers.
528 : ///
529 : /// MachineScheduler relies on initSUnits numbering the nodes by their order in
530 : /// the original instruction list.
531 895288 : void ScheduleDAGInstrs::initSUnits() {
532 : // We'll be allocating one SUnit for each real instruction in the region,
533 : // which is contained within a basic block.
534 895288 : SUnits.reserve(NumRegionInstrs);
535 :
536 4834990 : for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) {
537 : if (MI.isDebugInstr())
538 : continue;
539 :
540 3817026 : SUnit *SU = newSUnit(&MI);
541 3817026 : MISUnitMap[&MI] = SU;
542 :
543 3817026 : SU->isCall = MI.isCall();
544 3817026 : SU->isCommutable = MI.isCommutable();
545 :
546 : // Assign the Latency field of SU using target-provided information.
547 3817026 : SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
548 :
549 : // If this SUnit uses a reserved or unbuffered resource, mark it as such.
550 : //
551 : // Reserved resources block an instruction from issuing and stall the
552 : // entire pipeline. These are identified by BufferSize=0.
553 : //
554 : // Unbuffered resources prevent execution of subsequent instructions that
555 : // require the same resources. This is used for in-order execution pipelines
556 : // within an out-of-order core. These are identified by BufferSize=1.
557 3817026 : if (SchedModel.hasInstrSchedModel()) {
558 1331801 : const MCSchedClassDesc *SC = getSchedClass(SU);
559 2389928 : for (const MCWriteProcResEntry &PRE :
560 : make_range(SchedModel.getWriteProcResBegin(SC),
561 3721729 : SchedModel.getWriteProcResEnd(SC))) {
562 4779856 : switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
563 1131 : case 0:
564 1131 : SU->hasReservedResource = true;
565 1131 : break;
566 382121 : case 1:
567 382121 : SU->isUnbuffered = true;
568 382121 : break;
569 : default:
570 : break;
571 : }
572 : }
573 : }
574 : }
575 895288 : }
576 :
577 2685864 : class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
578 : /// Current total number of SUs in map.
579 : unsigned NumNodes = 0;
580 :
581 : /// 1 for loads, 0 for stores. (see comment in SUList)
582 : unsigned TrueMemOrderLatency;
583 :
584 : public:
585 895288 : Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
586 :
587 : /// To keep NumNodes up to date, insert() is used instead of
588 : /// this operator w/ push_back().
589 : ValueType &operator[](const SUList &Key) {
590 : llvm_unreachable("Don't use. Use insert() instead."); };
591 :
592 : /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
593 : /// reduce().
594 1988811 : void inline insert(SUnit *SU, ValueType V) {
595 1988811 : MapVector::operator[](V).push_back(SU);
596 1988811 : NumNodes++;
597 1988811 : }
598 :
599 : /// Clears the list of SUs mapped to V.
600 : void inline clearList(ValueType V) {
601 : iterator Itr = find(V);
602 : if (Itr != end()) {
603 : assert(NumNodes >= Itr->second.size());
604 : NumNodes -= Itr->second.size();
605 :
606 : Itr->second.clear();
607 : }
608 : }
609 :
610 : /// Clears map from all contents.
611 : void clear() {
612 : MapVector<ValueType, SUList>::clear();
613 404552 : NumNodes = 0;
614 : }
615 :
616 0 : unsigned inline size() const { return NumNodes; }
617 :
618 : /// Counts the number of SUs in this map after a reduction.
619 : void reComputeSize() {
620 14 : NumNodes = 0;
621 1495 : for (auto &I : *this)
622 1481 : NumNodes += I.second.size();
623 : }
624 :
625 0 : unsigned inline getTrueMemOrderLatency() const {
626 0 : return TrueMemOrderLatency;
627 : }
628 :
629 : void dump();
630 : };
631 :
632 724720 : void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
633 : Value2SUsMap &Val2SUsMap) {
634 903506 : for (auto &I : Val2SUsMap)
635 178786 : addChainDependencies(SU, I.second,
636 : Val2SUsMap.getTrueMemOrderLatency());
637 724720 : }
638 :
639 5433073 : void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
640 : Value2SUsMap &Val2SUsMap,
641 : ValueType V) {
642 5433073 : Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
643 5433073 : if (Itr != Val2SUsMap.end())
644 938946 : addChainDependencies(SU, Itr->second,
645 : Val2SUsMap.getTrueMemOrderLatency());
646 5433073 : }
647 :
648 404552 : void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
649 : assert(BarrierChain != nullptr);
650 :
651 425369 : for (auto &I : map) {
652 : SUList &sus = I.second;
653 56388 : for (auto *SU : sus)
654 35571 : SU->addPredBarrier(BarrierChain);
655 : }
656 : map.clear();
657 404552 : }
658 :
659 14 : void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
660 : assert(BarrierChain != nullptr);
661 :
662 : // Go through all lists of SUs.
663 2983 : for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
664 : Value2SUsMap::iterator CurrItr = I++;
665 2969 : SUList &sus = CurrItr->second;
666 2969 : SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
667 6462 : for (; SUItr != SUEE; ++SUItr) {
668 : // Stop on BarrierChain or any instruction above it.
669 4976 : if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
670 : break;
671 :
672 3493 : (*SUItr)->addPredBarrier(BarrierChain);
673 : }
674 :
675 : // Remove also the BarrierChain from list if present.
676 2969 : if (SUItr != SUEE && *SUItr == BarrierChain)
677 : SUItr++;
678 :
679 : // Remove all SUs that are now successors of BarrierChain.
680 2969 : if (SUItr != sus.begin())
681 1497 : sus.erase(sus.begin(), SUItr);
682 : }
683 :
684 : // Remove all entries with empty su lists.
685 14 : map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
686 : return (mapEntry.second.empty()); });
687 :
688 : // Recompute the size of the map (NumNodes).
689 : map.reComputeSize();
690 14 : }
691 :
692 895288 : void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
693 : RegPressureTracker *RPTracker,
694 : PressureDiffs *PDiffs,
695 : LiveIntervals *LIS,
696 : bool TrackLaneMasks) {
697 895288 : const TargetSubtargetInfo &ST = MF.getSubtarget();
698 895288 : bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
699 895175 : : ST.useAA();
700 895288 : AAForDep = UseAA ? AA : nullptr;
701 :
702 895288 : BarrierChain = nullptr;
703 :
704 895288 : this->TrackLaneMasks = TrackLaneMasks;
705 895288 : MISUnitMap.clear();
706 895288 : ScheduleDAG::clearDAG();
707 :
708 : // Create an SUnit for each real instruction.
709 895288 : initSUnits();
710 :
711 895288 : if (PDiffs)
712 248848 : PDiffs->init(SUnits.size());
713 :
714 : // We build scheduling units by walking a block's instruction list
715 : // from bottom to top.
716 :
717 : // Each MIs' memory operand(s) is analyzed to a list of underlying
718 : // objects. The SU is then inserted in the SUList(s) mapped from the
719 : // Value(s). Each Value thus gets mapped to lists of SUs depending
720 : // on it, stores and loads kept separately. Two SUs are trivially
721 : // non-aliasing if they both depend on only identified Values and do
722 : // not share any common Value.
723 : Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
724 :
725 : // Certain memory accesses are known to not alias any SU in Stores
726 : // or Loads, and have therefore their own 'NonAlias'
727 : // domain. E.g. spill / reload instructions never alias LLVM I/R
728 : // Values. It would be nice to assume that this type of memory
729 : // accesses always have a proper memory operand modelling, and are
730 : // therefore never unanalyzable, but this is conservatively not
731 : // done.
732 : Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
733 :
734 : // Remove any stale debug info; sometimes BuildSchedGraph is called again
735 : // without emitting the info from the previous call.
736 895288 : DbgValues.clear();
737 895288 : FirstDbgValue = nullptr;
738 :
739 : assert(Defs.empty() && Uses.empty() &&
740 : "Only BuildGraph should update Defs/Uses");
741 895288 : Defs.setUniverse(TRI->getNumRegs());
742 895288 : Uses.setUniverse(TRI->getNumRegs());
743 :
744 : assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
745 : assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
746 895288 : unsigned NumVirtRegs = MRI.getNumVirtRegs();
747 895288 : CurrentVRegDefs.setUniverse(NumVirtRegs);
748 895288 : CurrentVRegUses.setUniverse(NumVirtRegs);
749 :
750 : // Model data dependencies between instructions being scheduled and the
751 : // ExitSU.
752 895288 : addSchedBarrierDeps();
753 :
754 : // Walk the list of instructions, from bottom moving up.
755 : MachineInstr *DbgMI = nullptr;
756 895288 : for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
757 4834990 : MII != MIE; --MII) {
758 3939702 : MachineInstr &MI = *std::prev(MII);
759 3939702 : if (DbgMI) {
760 111549 : DbgValues.push_back(std::make_pair(DbgMI, &MI));
761 : DbgMI = nullptr;
762 : }
763 :
764 3939702 : if (MI.isDebugValue()) {
765 : DbgMI = &MI;
766 2481880 : continue;
767 : }
768 3817027 : if (MI.isDebugLabel())
769 : continue;
770 :
771 3817026 : SUnit *SU = MISUnitMap[&MI];
772 : assert(SU && "No SUnit mapped to this MI");
773 :
774 3817026 : if (RPTracker) {
775 1536618 : RegisterOperands RegOpers;
776 1536618 : RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
777 1536618 : if (TrackLaneMasks) {
778 341436 : SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
779 341436 : RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
780 : }
781 1536618 : if (PDiffs != nullptr)
782 1536618 : PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
783 :
784 1536618 : if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
785 1536618 : RPTracker->recedeSkipDebugValues();
786 : assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
787 1536618 : RPTracker->recede(RegOpers);
788 : }
789 :
790 : assert(
791 : (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
792 : "Cannot schedule terminators or labels!");
793 :
794 : // Add register-based dependencies (data, anti, and output).
795 : // For some instructions (calls, returns, inline-asm, etc.) there can
796 : // be explicit uses and implicit defs, in which case the use will appear
797 : // on the operand list before the def. Do two passes over the operand
798 : // list to make sure that defs are processed before any uses.
799 : bool HasVRegDef = false;
800 22743080 : for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
801 18926054 : const MachineOperand &MO = MI.getOperand(j);
802 18926054 : if (!MO.isReg() || !MO.isDef())
803 : continue;
804 3497342 : unsigned Reg = MO.getReg();
805 3497342 : if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
806 1909676 : addPhysRegDeps(SU, j);
807 1587666 : } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
808 : HasVRegDef = true;
809 1587666 : addVRegDefDeps(SU, j);
810 : }
811 : }
812 : // Now process all uses.
813 22743080 : for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
814 18926054 : const MachineOperand &MO = MI.getOperand(j);
815 : // Only look at use operands.
816 : // We do not need to check for MO.readsReg() here because subsequent
817 : // subregister defs will get output dependence edges and need no
818 : // additional use dependencies.
819 18926054 : if (!MO.isReg() || !MO.isUse())
820 : continue;
821 8788418 : unsigned Reg = MO.getReg();
822 8788418 : if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
823 3189216 : addPhysRegDeps(SU, j);
824 5599202 : } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
825 2382624 : addVRegUseDeps(SU, j);
826 : }
827 : }
828 :
829 : // If we haven't seen any uses in this scheduling region, create a
830 : // dependence edge to ExitSU to model the live-out latency. This is required
831 : // for vreg defs with no in-region use, and prefetches with no vreg def.
832 : //
833 : // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
834 : // check currently relies on being called before adding chain deps.
835 3817026 : if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
836 : SDep Dep(SU, SDep::Artificial);
837 410789 : Dep.setLatency(SU->Latency - 1);
838 410789 : ExitSU.addPred(Dep);
839 : }
840 :
841 : // Add memory dependencies (Note: isStoreToStackSlot and
842 : // isLoadFromStackSLot are not usable after stack slots are lowered to
843 : // actual addresses).
844 :
845 : // This is a barrier event that acts as a pivotal node in the DAG.
846 3817026 : if (isGlobalMemoryObject(AA, &MI)) {
847 :
848 : // Become the barrier chain.
849 101138 : if (BarrierChain)
850 52392 : BarrierChain->addPredBarrier(SU);
851 101138 : BarrierChain = SU;
852 :
853 : LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
854 : << BarrierChain->NodeNum << ").\n";);
855 :
856 : // Add dependencies against everything below it and clear maps.
857 101138 : addBarrierChain(Stores);
858 101138 : addBarrierChain(Loads);
859 101138 : addBarrierChain(NonAliasStores);
860 101138 : addBarrierChain(NonAliasLoads);
861 :
862 101138 : continue;
863 : }
864 :
865 : // If it's not a store or a variant load, we're done.
866 6341169 : if (!MI.mayStore() &&
867 3116322 : !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
868 2258066 : continue;
869 :
870 : // Always add dependecy edge to BarrierChain if present.
871 1457822 : if (BarrierChain)
872 37596 : BarrierChain->addPredBarrier(SU);
873 :
874 : // Find the underlying objects for MI. The Objs vector is either
875 : // empty, or filled with the Values of memory locations which this
876 : // SU depends on.
877 : UnderlyingObjectsVector Objs;
878 1457822 : bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
879 1457822 : MF.getDataLayout());
880 :
881 1457822 : if (MI.mayStore()) {
882 1090607 : if (!ObjsFound) {
883 : // An unknown store depends on all stores and loads.
884 111255 : addChainDependencies(SU, Stores);
885 111255 : addChainDependencies(SU, NonAliasStores);
886 111255 : addChainDependencies(SU, Loads);
887 111255 : addChainDependencies(SU, NonAliasLoads);
888 :
889 : // Map this store to 'UnknownValue'.
890 222510 : Stores.insert(SU, UnknownValue);
891 : } else {
892 : // Add precise dependencies against all previously seen memory
893 : // accesses mapped to the same Value(s).
894 2488650 : for (const UnderlyingObject &UnderlObj : Objs) {
895 1509298 : ValueType V = UnderlObj.getValue();
896 : bool ThisMayAlias = UnderlObj.mayAlias();
897 :
898 : // Add dependencies to previous stores and loads mapped to V.
899 1524543 : addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
900 1524543 : addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
901 : }
902 : // Update the store map after all chains have been added to avoid adding
903 : // self-loop edge if multiple underlying objects are present.
904 2488650 : for (const UnderlyingObject &UnderlObj : Objs) {
905 1509298 : ValueType V = UnderlObj.getValue();
906 : bool ThisMayAlias = UnderlObj.mayAlias();
907 :
908 : // Map this store to V.
909 1524543 : (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
910 : }
911 : // The store may have dependencies to unanalyzable loads and
912 : // stores.
913 1958704 : addChainDependencies(SU, Loads, UnknownValue);
914 1958704 : addChainDependencies(SU, Stores, UnknownValue);
915 : }
916 : } else { // SU is a load.
917 367215 : if (!ObjsFound) {
918 : // An unknown load depends on all stores.
919 139850 : addChainDependencies(SU, Stores);
920 139850 : addChainDependencies(SU, NonAliasStores);
921 :
922 279700 : Loads.insert(SU, UnknownValue);
923 : } else {
924 455773 : for (const UnderlyingObject &UnderlObj : Objs) {
925 228408 : ValueType V = UnderlObj.getValue();
926 : bool ThisMayAlias = UnderlObj.mayAlias();
927 :
928 : // Add precise dependencies against all previously seen stores
929 : // mapping to the same Value(s).
930 241051 : addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
931 :
932 : // Map this load to V.
933 241051 : (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
934 : }
935 : // The load may have dependencies to unanalyzable stores.
936 454730 : addChainDependencies(SU, Stores, UnknownValue);
937 : }
938 : }
939 :
940 : // Reduce maps if they grow huge.
941 2915644 : if (Stores.size() + Loads.size() >= HugeRegion) {
942 : LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
943 7 : reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
944 : }
945 2915644 : if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
946 : LLVM_DEBUG(
947 : dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
948 0 : reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
949 : }
950 : }
951 :
952 895288 : if (DbgMI)
953 11126 : FirstDbgValue = DbgMI;
954 :
955 : Defs.clear();
956 : Uses.clear();
957 : CurrentVRegDefs.clear();
958 : CurrentVRegUses.clear();
959 895288 : }
960 :
961 0 : raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
962 0 : PSV->printCustom(OS);
963 0 : return OS;
964 : }
965 :
966 0 : void ScheduleDAGInstrs::Value2SUsMap::dump() {
967 0 : for (auto &Itr : *this) {
968 0 : if (Itr.first.is<const Value*>()) {
969 : const Value *V = Itr.first.get<const Value*>();
970 0 : if (isa<UndefValue>(V))
971 0 : dbgs() << "Unknown";
972 : else
973 0 : V->printAsOperand(dbgs());
974 : }
975 : else if (Itr.first.is<const PseudoSourceValue*>())
976 0 : dbgs() << Itr.first.get<const PseudoSourceValue*>();
977 : else
978 : llvm_unreachable("Unknown Value type.");
979 :
980 0 : dbgs() << " : ";
981 : dumpSUList(Itr.second);
982 : }
983 0 : }
984 :
985 7 : void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
986 : Value2SUsMap &loads, unsigned N) {
987 : LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
988 : dbgs() << "Loading SUnits:\n"; loads.dump());
989 :
990 : // Insert all SU's NodeNums into a vector and sort it.
991 : std::vector<unsigned> NodeNums;
992 7 : NodeNums.reserve(stores.size() + loads.size());
993 2976 : for (auto &I : stores)
994 9969 : for (auto *SU : I.second)
995 7000 : NodeNums.push_back(SU->NodeNum);
996 7 : for (auto &I : loads)
997 0 : for (auto *SU : I.second)
998 0 : NodeNums.push_back(SU->NodeNum);
999 : llvm::sort(NodeNums);
1000 :
1001 : // The N last elements in NodeNums will be removed, and the SU with
1002 : // the lowest NodeNum of them will become the new BarrierChain to
1003 : // let the not yet seen SUs have a dependency to the removed SUs.
1004 : assert(N <= NodeNums.size());
1005 7 : SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1006 7 : if (BarrierChain) {
1007 : // The aliasing and non-aliasing maps reduce independently of each
1008 : // other, but share a common BarrierChain. Check if the
1009 : // newBarrierChain is above the former one. If it is not, it may
1010 : // introduce a loop to use newBarrierChain, so keep the old one.
1011 2 : if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1012 2 : BarrierChain->addPredBarrier(newBarrierChain);
1013 2 : BarrierChain = newBarrierChain;
1014 : LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1015 : << BarrierChain->NodeNum << ").\n";);
1016 : }
1017 : else
1018 : LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1019 : << BarrierChain->NodeNum << ").\n";);
1020 : }
1021 : else
1022 5 : BarrierChain = newBarrierChain;
1023 :
1024 7 : insertBarrierChain(stores);
1025 7 : insertBarrierChain(loads);
1026 :
1027 : LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1028 : dbgs() << "Loading SUnits:\n"; loads.dump());
1029 7 : }
1030 :
1031 1345076 : static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
1032 : MachineInstr &MI, bool addToLiveRegs) {
1033 7559614 : for (MachineOperand &MO : MI.operands()) {
1034 6214538 : if (!MO.isReg() || !MO.readsReg())
1035 : continue;
1036 3006555 : unsigned Reg = MO.getReg();
1037 3006555 : if (!Reg)
1038 : continue;
1039 :
1040 : // Things that are available after the instruction are killed by it.
1041 1926108 : bool IsKill = LiveRegs.available(MRI, Reg);
1042 : MO.setIsKill(IsKill);
1043 1926108 : if (addToLiveRegs)
1044 1923976 : LiveRegs.addReg(Reg);
1045 : }
1046 1345076 : }
1047 :
1048 157649 : void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
1049 : LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1050 :
1051 157649 : LiveRegs.init(*TRI);
1052 157649 : LiveRegs.addLiveOuts(MBB);
1053 :
1054 : // Examine block from end to start...
1055 1500684 : for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1056 : if (MI.isDebugInstr())
1057 : continue;
1058 :
1059 : // Update liveness. Registers that are defed but not used in this
1060 : // instruction are now dead. Mark register and all subregs as they
1061 : // are completely defined.
1062 7560987 : for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1063 : const MachineOperand &MO = *O;
1064 6218206 : if (MO.isReg()) {
1065 4163410 : if (!MO.isDef())
1066 : continue;
1067 1094437 : unsigned Reg = MO.getReg();
1068 1094437 : if (!Reg)
1069 : continue;
1070 1094437 : LiveRegs.removeReg(Reg);
1071 2054796 : } else if (MO.isRegMask()) {
1072 43878 : LiveRegs.removeRegsInMask(MO);
1073 : }
1074 : }
1075 :
1076 : // If there is a bundle header fix it up first.
1077 : if (!MI.isBundled()) {
1078 1341213 : toggleKills(MRI, LiveRegs, MI, true);
1079 : } else {
1080 1568 : MachineBasicBlock::instr_iterator First = MI.getIterator();
1081 1568 : if (MI.isBundle()) {
1082 1568 : toggleKills(MRI, LiveRegs, MI, false);
1083 : ++First;
1084 : }
1085 : // Some targets make the (questionable) assumtion that the instructions
1086 : // inside the bundle are ordered and consequently only the last use of
1087 : // a register inside the bundle can kill it.
1088 : MachineBasicBlock::instr_iterator I = std::next(First);
1089 2296 : while (I->isBundledWithSucc())
1090 : ++I;
1091 : do {
1092 : if (!I->isDebugInstr())
1093 2295 : toggleKills(MRI, LiveRegs, *I, true);
1094 : --I;
1095 2296 : } while(I != First);
1096 : }
1097 : }
1098 157649 : }
1099 :
1100 0 : void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1101 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1102 : dumpNodeName(SU);
1103 : dbgs() << ": ";
1104 : SU.getInstr()->dump();
1105 : #endif
1106 0 : }
1107 :
1108 0 : void ScheduleDAGInstrs::dump() const {
1109 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1110 : if (EntrySU.getInstr() != nullptr)
1111 : dumpNodeAll(EntrySU);
1112 : for (const SUnit &SU : SUnits)
1113 : dumpNodeAll(SU);
1114 : if (ExitSU.getInstr() != nullptr)
1115 : dumpNodeAll(ExitSU);
1116 : #endif
1117 0 : }
1118 :
1119 0 : std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1120 : std::string s;
1121 0 : raw_string_ostream oss(s);
1122 0 : if (SU == &EntrySU)
1123 0 : oss << "<entry>";
1124 0 : else if (SU == &ExitSU)
1125 0 : oss << "<exit>";
1126 : else
1127 0 : SU->getInstr()->print(oss, /*SkipOpers=*/true);
1128 0 : return oss.str();
1129 : }
1130 :
1131 : /// Return the basic block label. It is not necessarilly unique because a block
1132 : /// contains multiple scheduling regions. But it is fine for visualization.
1133 0 : std::string ScheduleDAGInstrs::getDAGName() const {
1134 0 : return "dag." + BB->getFullName();
1135 : }
1136 :
1137 : //===----------------------------------------------------------------------===//
1138 : // SchedDFSResult Implementation
1139 : //===----------------------------------------------------------------------===//
1140 :
1141 : namespace llvm {
1142 :
1143 : /// Internal state used to compute SchedDFSResult.
1144 : class SchedDFSImpl {
1145 : SchedDFSResult &R;
1146 :
1147 : /// Join DAG nodes into equivalence classes by their subtree.
1148 : IntEqClasses SubtreeClasses;
1149 : /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1150 : std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1151 :
1152 : struct RootData {
1153 : unsigned NodeID;
1154 : unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1155 : unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1156 : /// children.
1157 :
1158 354 : RootData(unsigned id): NodeID(id),
1159 8 : ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1160 :
1161 0 : unsigned getSparseSetIndex() const { return NodeID; }
1162 : };
1163 :
1164 : SparseSet<RootData> RootSet;
1165 :
1166 : public:
1167 20 : SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1168 20 : RootSet.setUniverse(R.DFSNodeData.size());
1169 10 : }
1170 :
1171 : /// Returns true if this node been visited by the DFS traversal.
1172 : ///
1173 : /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1174 : /// ID. Later, SubtreeID is updated but remains valid.
1175 0 : bool isVisited(const SUnit *SU) const {
1176 708 : return R.DFSNodeData[SU->NodeNum].SubtreeID
1177 0 : != SchedDFSResult::InvalidSubtreeID;
1178 : }
1179 :
1180 : /// Initializes this node's instruction count. We don't need to flag the node
1181 : /// visited until visitPostorder because the DAG cannot have cycles.
1182 0 : void visitPreorder(const SUnit *SU) {
1183 0 : R.DFSNodeData[SU->NodeNum].InstrCount =
1184 0 : SU->getInstr()->isTransient() ? 0 : 1;
1185 0 : }
1186 :
1187 : /// Called once for each node after all predecessors are visited. Revisit this
1188 : /// node's predecessors and potentially join them now that we know the ILP of
1189 : /// the other predecessors.
1190 176 : void visitPostorderNode(const SUnit *SU) {
1191 : // Mark this node as the root of a subtree. It may be joined with its
1192 : // successors later.
1193 176 : R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1194 176 : RootData RData(SU->NodeNum);
1195 176 : RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1196 :
1197 : // If any predecessors are still in their own subtree, they either cannot be
1198 : // joined or are large enough to remain separate. If this parent node's
1199 : // total instruction count is not greater than a child subtree by at least
1200 : // the subtree limit, then try to join it now since splitting subtrees is
1201 : // only useful if multiple high-pressure paths are possible.
1202 352 : unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1203 548 : for (const SDep &PredDep : SU->Preds) {
1204 372 : if (PredDep.getKind() != SDep::Data)
1205 : continue;
1206 178 : unsigned PredNum = PredDep.getSUnit()->NodeNum;
1207 356 : if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1208 170 : joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1209 :
1210 : // Either link or merge the TreeData entry from the child to the parent.
1211 356 : if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1212 : // If the predecessor's parent is invalid, this is a tree edge and the
1213 : // current node is the parent.
1214 32 : if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1215 8 : RootSet[PredNum].ParentNodeID = SU->NodeNum;
1216 : }
1217 146 : else if (RootSet.count(PredNum)) {
1218 : // The predecessor is not a root, but is still in the root set. This
1219 : // must be the new parent that it was just joined to. Note that
1220 : // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1221 : // set to the original parent.
1222 138 : RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1223 138 : RootSet.erase(PredNum);
1224 : }
1225 : }
1226 176 : RootSet[SU->NodeNum] = RData;
1227 176 : }
1228 :
1229 : /// Called once for each tree edge after calling visitPostOrderNode on
1230 : /// the predecessor. Increment the parent node's instruction count and
1231 : /// preemptively join this subtree to its parent's if it is small enough.
1232 146 : void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1233 146 : R.DFSNodeData[Succ->NodeNum].InstrCount
1234 292 : += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1235 146 : joinPredSubtree(PredDep, Succ);
1236 146 : }
1237 :
1238 : /// Adds a connection for cross edges.
1239 : void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1240 32 : ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1241 : }
1242 :
1243 : /// Sets each node's subtree ID to the representative ID and record
1244 : /// connections between trees.
1245 10 : void finalize() {
1246 10 : SubtreeClasses.compress();
1247 10 : R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1248 : assert(SubtreeClasses.getNumClasses() == RootSet.size()
1249 : && "number of roots should match trees");
1250 48 : for (const RootData &Root : RootSet) {
1251 38 : unsigned TreeID = SubtreeClasses[Root.NodeID];
1252 38 : if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1253 16 : R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1254 76 : R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1255 : // Note that SubInstrCount may be greater than InstrCount if we joined
1256 : // subtrees across a cross edge. InstrCount will be attributed to the
1257 : // original parent, while SubInstrCount will be attributed to the joined
1258 : // parent.
1259 : }
1260 10 : R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1261 10 : R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1262 : LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1263 196 : for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1264 352 : R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1265 : LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1266 : << R.DFSNodeData[Idx].SubtreeID << '\n');
1267 : }
1268 42 : for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
1269 32 : unsigned PredTree = SubtreeClasses[P.first->NodeNum];
1270 32 : unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
1271 32 : if (PredTree == SuccTree)
1272 : continue;
1273 : unsigned Depth = P.first->getDepth();
1274 32 : addConnection(PredTree, SuccTree, Depth);
1275 32 : addConnection(SuccTree, PredTree, Depth);
1276 : }
1277 10 : }
1278 :
1279 : protected:
1280 : /// Joins the predecessor subtree with the successor that is its DFS parent.
1281 : /// Applies some heuristics before joining.
1282 316 : bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1283 : bool CheckLimit = true) {
1284 : assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1285 :
1286 : // Check if the predecessor is already joined.
1287 : const SUnit *PredSU = PredDep.getSUnit();
1288 316 : unsigned PredNum = PredSU->NodeNum;
1289 632 : if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1290 : return false;
1291 :
1292 : // Four is the magic number of successors before a node is considered a
1293 : // pinch point.
1294 : unsigned NumDataSucs = 0;
1295 618 : for (const SDep &SuccDep : PredSU->Succs) {
1296 472 : if (SuccDep.getKind() == SDep::Data) {
1297 314 : if (++NumDataSucs >= 4)
1298 : return false;
1299 : }
1300 : }
1301 146 : if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1302 : return false;
1303 138 : R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1304 138 : SubtreeClasses.join(Succ->NodeNum, PredNum);
1305 138 : return true;
1306 : }
1307 :
1308 : /// Called by finalize() to record a connection between trees.
1309 64 : void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1310 64 : if (!Depth)
1311 : return;
1312 :
1313 : do {
1314 : SmallVectorImpl<SchedDFSResult::Connection> &Connections =
1315 2 : R.SubtreeConnections[FromTree];
1316 2 : for (SchedDFSResult::Connection &C : Connections) {
1317 0 : if (C.TreeID == ToTree) {
1318 0 : C.Level = std::max(C.Level, Depth);
1319 0 : return;
1320 : }
1321 : }
1322 4 : Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1323 2 : FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1324 2 : } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1325 : }
1326 : };
1327 :
1328 : } // end namespace llvm
1329 :
1330 : namespace {
1331 :
1332 : /// Manage the stack used by a reverse depth-first search over the DAG.
1333 : class SchedDAGReverseDFS {
1334 : std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1335 :
1336 : public:
1337 : bool isComplete() const { return DFSStack.empty(); }
1338 :
1339 : void follow(const SUnit *SU) {
1340 176 : DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1341 : }
1342 372 : void advance() { ++DFSStack.back().second; }
1343 :
1344 : const SDep *backtrack() {
1345 : DFSStack.pop_back();
1346 176 : return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1347 : }
1348 :
1349 694 : const SUnit *getCurr() const { return DFSStack.back().first; }
1350 :
1351 548 : SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1352 :
1353 : SUnit::const_pred_iterator getPredEnd() const {
1354 : return getCurr()->Preds.end();
1355 : }
1356 : };
1357 :
1358 : } // end anonymous namespace
1359 :
1360 : static bool hasDataSucc(const SUnit *SU) {
1361 213 : for (const SDep &SuccDep : SU->Succs) {
1362 183 : if (SuccDep.getKind() == SDep::Data &&
1363 146 : !SuccDep.getSUnit()->isBoundaryNode())
1364 : return true;
1365 : }
1366 : return false;
1367 : }
1368 :
1369 : /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1370 : /// search from this root.
1371 10 : void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
1372 10 : if (!IsBottomUp)
1373 0 : llvm_unreachable("Top-down ILP metric is unimplemented");
1374 :
1375 20 : SchedDFSImpl Impl(*this);
1376 186 : for (const SUnit &SU : SUnits) {
1377 528 : if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1378 146 : continue;
1379 :
1380 : SchedDAGReverseDFS DFS;
1381 30 : Impl.visitPreorder(&SU);
1382 : DFS.follow(&SU);
1383 : while (true) {
1384 : // Traverse the leftmost path as far as possible.
1385 548 : while (DFS.getPred() != DFS.getPredEnd()) {
1386 : const SDep &PredDep = *DFS.getPred();
1387 : DFS.advance();
1388 : // Ignore non-data edges.
1389 : if (PredDep.getKind() != SDep::Data
1390 372 : || PredDep.getSUnit()->isBoundaryNode()) {
1391 : continue;
1392 : }
1393 : // An already visited edge is a cross edge, assuming an acyclic DAG.
1394 356 : if (Impl.isVisited(PredDep.getSUnit())) {
1395 : Impl.visitCrossEdge(PredDep, DFS.getCurr());
1396 32 : continue;
1397 : }
1398 146 : Impl.visitPreorder(PredDep.getSUnit());
1399 : DFS.follow(PredDep.getSUnit());
1400 : }
1401 : // Visit the top of the stack in postorder and backtrack.
1402 : const SUnit *Child = DFS.getCurr();
1403 : const SDep *PredDep = DFS.backtrack();
1404 176 : Impl.visitPostorderNode(Child);
1405 176 : if (PredDep)
1406 146 : Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1407 176 : if (DFS.isComplete())
1408 : break;
1409 : }
1410 : }
1411 10 : Impl.finalize();
1412 10 : }
1413 :
1414 : /// The root of the given SubtreeID was just scheduled. For all subtrees
1415 : /// connected to this tree, record the depth of the connection so that the
1416 : /// nearest connected subtrees can be prioritized.
1417 38 : void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1418 40 : for (const Connection &C : SubtreeConnections[SubtreeID]) {
1419 2 : SubtreeConnectLevels[C.TreeID] =
1420 4 : std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1421 : LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1422 : << SubtreeConnectLevels[C.TreeID] << '\n');
1423 : }
1424 38 : }
1425 :
1426 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1427 : LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const {
1428 : OS << InstrCount << " / " << Length << " = ";
1429 : if (!Length)
1430 : OS << "BADILP";
1431 : else
1432 : OS << format("%g", ((double)InstrCount / Length));
1433 : }
1434 :
1435 : LLVM_DUMP_METHOD void ILPValue::dump() const {
1436 : dbgs() << *this << '\n';
1437 : }
1438 :
1439 : namespace llvm {
1440 :
1441 : LLVM_DUMP_METHOD
1442 : raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
1443 : Val.print(OS);
1444 : return OS;
1445 : }
1446 :
1447 : } // end namespace llvm
1448 :
1449 : #endif
|