LLVM 19.0.0git
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This implements the ScheduleDAGInstrs class, which implements
10/// re-scheduling of MachineInstrs.
11//
12//===----------------------------------------------------------------------===//
13
15
17#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/SparseSet.h"
40#include "llvm/Config/llvm-config.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/Function.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/Value.h"
45#include "llvm/MC/LaneBitmask.h"
50#include "llvm/Support/Debug.h"
52#include "llvm/Support/Format.h"
54#include <algorithm>
55#include <cassert>
56#include <iterator>
57#include <utility>
58#include <vector>
59
60using namespace llvm;
61
62#define DEBUG_TYPE "machine-scheduler"
63
64static cl::opt<bool>
65 EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
66 cl::desc("Enable use of AA during MI DAG construction"));
67
68static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
69 cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
70
71// Note: the two options below might be used in tuning compile time vs
72// output quality. Setting HugeRegion so large that it will never be
73// reached means best-effort, but may be slow.
74
75// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
76// together hold this many SUs, a reduction of maps will be done.
77static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
78 cl::init(1000), cl::desc("The limit to use while constructing the DAG "
79 "prior to scheduling, at which point a trade-off "
80 "is made to avoid excessive compile time."));
81
83 "dag-maps-reduction-size", cl::Hidden,
84 cl::desc("A huge scheduling region will have maps reduced by this many "
85 "nodes at a time. Defaults to HugeRegion / 2."));
86
87#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
89 "sched-print-cycles", cl::Hidden, cl::init(false),
90 cl::desc("Report top/bottom cycles when dumping SUnit instances"));
91#endif
92
93static unsigned getReductionSize() {
94 // Always reduce a huge region with half of the elements, except
95 // when user sets this number explicitly.
96 if (ReductionSize.getNumOccurrences() == 0)
97 return HugeRegion / 2;
98 return ReductionSize;
99}
100
102#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
103 dbgs() << "{ ";
104 for (const SUnit *SU : L) {
105 dbgs() << "SU(" << SU->NodeNum << ")";
106 if (SU != L.back())
107 dbgs() << ", ";
108 }
109 dbgs() << "}\n";
110#endif
111}
112
114 const MachineLoopInfo *mli,
115 bool RemoveKillFlags)
116 : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
117 RemoveKillFlags(RemoveKillFlags),
118 UnknownValue(UndefValue::get(
119 Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
120 DbgValues.clear();
121
122 const TargetSubtargetInfo &ST = mf.getSubtarget();
123 SchedModel.init(&ST);
124}
125
126/// If this machine instr has memory reference information and it can be
127/// tracked to a normal reference to a known object, return the Value
128/// for that object. This function returns false the memory location is
129/// unknown or may alias anything.
131 const MachineFrameInfo &MFI,
133 const DataLayout &DL) {
134 auto AllMMOsOkay = [&]() {
135 for (const MachineMemOperand *MMO : MI->memoperands()) {
136 // TODO: Figure out whether isAtomic is really necessary (see D57601).
137 if (MMO->isVolatile() || MMO->isAtomic())
138 return false;
139
140 if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
141 // Function that contain tail calls don't have unique PseudoSourceValue
142 // objects. Two PseudoSourceValues might refer to the same or
143 // overlapping locations. The client code calling this function assumes
144 // this is not the case. So return a conservative answer of no known
145 // object.
146 if (MFI.hasTailCall())
147 return false;
148
149 // For now, ignore PseudoSourceValues which may alias LLVM IR values
150 // because the code that uses this function has no way to cope with
151 // such aliases.
152 if (PSV->isAliased(&MFI))
153 return false;
154
155 bool MayAlias = PSV->mayAlias(&MFI);
156 Objects.emplace_back(PSV, MayAlias);
157 } else if (const Value *V = MMO->getValue()) {
159 if (!getUnderlyingObjectsForCodeGen(V, Objs))
160 return false;
161
162 for (Value *V : Objs) {
164 Objects.emplace_back(V, true);
165 }
166 } else
167 return false;
168 }
169 return true;
170 };
171
172 if (!AllMMOsOkay()) {
173 Objects.clear();
174 return false;
175 }
176
177 return true;
178}
179
181 BB = bb;
182}
183
185 // Subclasses should no longer refer to the old block.
186 BB = nullptr;
187}
188
192 unsigned regioninstrs) {
193 assert(bb == BB && "startBlock should set BB");
195 RegionEnd = end;
196 NumRegionInstrs = regioninstrs;
197}
198
200 // Nothing to do.
201}
202
204 MachineInstr *ExitMI =
205 RegionEnd != BB->end()
207 : nullptr;
208 ExitSU.setInstr(ExitMI);
209 // Add dependencies on the defs and uses of the instruction.
210 if (ExitMI) {
211 for (const MachineOperand &MO : ExitMI->all_uses()) {
212 Register Reg = MO.getReg();
213 if (Reg.isPhysical()) {
214 for (MCRegUnit Unit : TRI->regunits(Reg))
215 Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
216 } else if (Reg.isVirtual() && MO.readsReg()) {
217 addVRegUseDeps(&ExitSU, MO.getOperandNo());
218 }
219 }
220 }
221 if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
222 // For others, e.g. fallthrough, conditional branch, assume the exit
223 // uses all the registers that are livein to the successor blocks.
224 for (const MachineBasicBlock *Succ : BB->successors()) {
225 for (const auto &LI : Succ->liveins()) {
226 for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) {
227 auto [Unit, Mask] = *U;
228 if ((Mask & LI.LaneMask).any() && !Uses.contains(Unit))
229 Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
230 }
231 }
232 }
233 }
234}
235
236/// MO is an operand of SU's instruction that defines a physical register. Adds
237/// data dependencies from SU to any uses of the physical register.
238void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
239 const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
240 assert(MO.isDef() && "expect physreg def");
241 Register Reg = MO.getReg();
242
243 // Ask the target if address-backscheduling is desirable, and if so how much.
244 const TargetSubtargetInfo &ST = MF.getSubtarget();
245
246 // Only use any non-zero latency for real defs/uses, in contrast to
247 // "fake" operands added by regalloc.
248 const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc();
249 bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() &&
250 !DefMIDesc.hasImplicitDefOfPhysReg(Reg));
251 for (MCRegUnit Unit : TRI->regunits(Reg)) {
252 for (RegUnit2SUnitsMap::iterator I = Uses.find(Unit); I != Uses.end();
253 ++I) {
254 SUnit *UseSU = I->SU;
255 if (UseSU == SU)
256 continue;
257
258 // Adjust the dependence latency using operand def/use information,
259 // then allow the target to perform its own adjustments.
260 MachineInstr *UseInstr = nullptr;
261 int UseOpIdx = I->OpIdx;
262 bool ImplicitPseudoUse = false;
263 SDep Dep;
264 if (UseOpIdx < 0) {
265 Dep = SDep(SU, SDep::Artificial);
266 } else {
267 // Set the hasPhysRegDefs only for physreg defs that have a use within
268 // the scheduling region.
269 SU->hasPhysRegDefs = true;
270
271 UseInstr = UseSU->getInstr();
272 Register UseReg = UseInstr->getOperand(UseOpIdx).getReg();
273 const MCInstrDesc &UseMIDesc = UseInstr->getDesc();
274 ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) &&
276
277 Dep = SDep(SU, SDep::Data, UseReg);
278 }
279 if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
281 UseInstr, UseOpIdx));
282 } else {
283 Dep.setLatency(0);
284 }
285 ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep);
286 UseSU->addPred(Dep);
287 }
288 }
289}
290
291/// Adds register dependencies (data, anti, and output) from this SUnit
292/// to following instructions in the same scheduling region that depend the
293/// physical register referenced at OperIdx.
294void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
295 MachineInstr *MI = SU->getInstr();
296 MachineOperand &MO = MI->getOperand(OperIdx);
297 Register Reg = MO.getReg();
298 // We do not need to track any dependencies for constant registers.
299 if (MRI.isConstantPhysReg(Reg))
300 return;
301
302 const TargetSubtargetInfo &ST = MF.getSubtarget();
303
304 // Optionally add output and anti dependencies. For anti
305 // dependencies we use a latency of 0 because for a multi-issue
306 // target we want to allow the defining instruction to issue
307 // in the same cycle as the using instruction.
308 // TODO: Using a latency of 1 here for output dependencies assumes
309 // there's no cost for reusing registers.
310 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
311 for (MCRegUnit Unit : TRI->regunits(Reg)) {
312 for (RegUnit2SUnitsMap::iterator I = Defs.find(Unit); I != Defs.end();
313 ++I) {
314 SUnit *DefSU = I->SU;
315 if (DefSU == &ExitSU)
316 continue;
317 MachineInstr *DefInstr = DefSU->getInstr();
318 MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx);
319 if (DefSU != SU &&
320 (Kind != SDep::Output || !MO.isDead() || !DefMO.isDead())) {
321 SDep Dep(SU, Kind, DefMO.getReg());
322 if (Kind != SDep::Anti) {
323 Dep.setLatency(
324 SchedModel.computeOutputLatency(MI, OperIdx, DefInstr));
325 }
326 ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep);
327 DefSU->addPred(Dep);
328 }
329 }
330 }
331
332 if (MO.isUse()) {
333 SU->hasPhysRegUses = true;
334 // Either insert a new Reg2SUnits entry with an empty SUnits list, or
335 // retrieve the existing SUnits list for this register's uses.
336 // Push this SUnit on the use list.
337 for (MCRegUnit Unit : TRI->regunits(Reg))
338 Uses.insert(PhysRegSUOper(SU, OperIdx, Unit));
339 if (RemoveKillFlags)
340 MO.setIsKill(false);
341 } else {
342 addPhysRegDataDeps(SU, OperIdx);
343
344 // Clear previous uses and defs of this register and its subregisters.
345 for (MCRegUnit Unit : TRI->regunits(Reg)) {
346 Uses.eraseAll(Unit);
347 if (!MO.isDead())
348 Defs.eraseAll(Unit);
349 }
350
351 if (MO.isDead() && SU->isCall) {
352 // Calls will not be reordered because of chain dependencies (see
353 // below). Since call operands are dead, calls may continue to be added
354 // to the DefList making dependence checking quadratic in the size of
355 // the block. Instead, we leave only one call at the back of the
356 // DefList.
357 for (MCRegUnit Unit : TRI->regunits(Reg)) {
361 for (bool isBegin = I == B; !isBegin; /* empty */) {
362 isBegin = (--I) == B;
363 if (!I->SU->isCall)
364 break;
365 I = Defs.erase(I);
366 }
367 }
368 }
369
370 // Defs are pushed in the order they are visited and never reordered.
371 for (MCRegUnit Unit : TRI->regunits(Reg))
372 Defs.insert(PhysRegSUOper(SU, OperIdx, Unit));
373 }
374}
375
377{
378 Register Reg = MO.getReg();
379 // No point in tracking lanemasks if we don't have interesting subregisters.
380 const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
381 if (!RC.HasDisjunctSubRegs)
382 return LaneBitmask::getAll();
383
384 unsigned SubReg = MO.getSubReg();
385 if (SubReg == 0)
386 return RC.getLaneMask();
388}
389
391 auto RegUse = CurrentVRegUses.find(MO.getReg());
392 if (RegUse == CurrentVRegUses.end())
393 return true;
394 return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
395}
396
397/// Adds register output and data dependencies from this SUnit to instructions
398/// that occur later in the same scheduling region if they read from or write to
399/// the virtual register defined at OperIdx.
400///
401/// TODO: Hoist loop induction variable increments. This has to be
402/// reevaluated. Generally, IV scheduling should be done before coalescing.
403void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
404 MachineInstr *MI = SU->getInstr();
405 MachineOperand &MO = MI->getOperand(OperIdx);
406 Register Reg = MO.getReg();
407
408 LaneBitmask DefLaneMask;
409 LaneBitmask KillLaneMask;
410 if (TrackLaneMasks) {
411 bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
412 DefLaneMask = getLaneMaskForMO(MO);
413 // If we have a <read-undef> flag, none of the lane values comes from an
414 // earlier instruction.
415 KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
416
417 if (MO.getSubReg() != 0 && MO.isUndef()) {
418 // There may be other subregister defs on the same instruction of the same
419 // register in later operands. The lanes of other defs will now be live
420 // after this instruction, so these should not be treated as killed by the
421 // instruction even though they appear to be killed in this one operand.
422 for (const MachineOperand &OtherMO :
423 llvm::drop_begin(MI->operands(), OperIdx + 1))
424 if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
425 KillLaneMask &= ~getLaneMaskForMO(OtherMO);
426 }
427
428 // Clear undef flag, we'll re-add it later once we know which subregister
429 // Def is first.
430 MO.setIsUndef(false);
431 } else {
432 DefLaneMask = LaneBitmask::getAll();
433 KillLaneMask = LaneBitmask::getAll();
434 }
435
436 if (MO.isDead()) {
437 assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
438 } else {
439 // Add data dependence to all uses we found so far.
440 const TargetSubtargetInfo &ST = MF.getSubtarget();
442 E = CurrentVRegUses.end(); I != E; /*empty*/) {
443 LaneBitmask LaneMask = I->LaneMask;
444 // Ignore uses of other lanes.
445 if ((LaneMask & KillLaneMask).none()) {
446 ++I;
447 continue;
448 }
449
450 if ((LaneMask & DefLaneMask).any()) {
451 SUnit *UseSU = I->SU;
452 MachineInstr *Use = UseSU->getInstr();
453 SDep Dep(SU, SDep::Data, Reg);
455 I->OperandIndex));
456 ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep);
457 UseSU->addPred(Dep);
458 }
459
460 LaneMask &= ~KillLaneMask;
461 // If we found a Def for all lanes of this use, remove it from the list.
462 if (LaneMask.any()) {
463 I->LaneMask = LaneMask;
464 ++I;
465 } else
467 }
468 }
469
470 // Shortcut: Singly defined vregs do not have output/anti dependencies.
471 if (MRI.hasOneDef(Reg))
472 return;
473
474 // Add output dependence to the next nearest defs of this vreg.
475 //
476 // Unless this definition is dead, the output dependence should be
477 // transitively redundant with antidependencies from this definition's
478 // uses. We're conservative for now until we have a way to guarantee the uses
479 // are not eliminated sometime during scheduling. The output dependence edge
480 // is also useful if output latency exceeds def-use latency.
481 LaneBitmask LaneMask = DefLaneMask;
482 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
483 CurrentVRegDefs.end())) {
484 // Ignore defs for other lanes.
485 if ((V2SU.LaneMask & LaneMask).none())
486 continue;
487 // Add an output dependence.
488 SUnit *DefSU = V2SU.SU;
489 // Ignore additional defs of the same lanes in one instruction. This can
490 // happen because lanemasks are shared for targets with too many
491 // subregisters. We also use some representration tricks/hacks where we
492 // add super-register defs/uses, to imply that although we only access parts
493 // of the reg we care about the full one.
494 if (DefSU == SU)
495 continue;
496 SDep Dep(SU, SDep::Output, Reg);
497 Dep.setLatency(
498 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
499 DefSU->addPred(Dep);
500
501 // Update current definition. This can get tricky if the def was about a
502 // bigger lanemask before. We then have to shrink it and create a new
503 // VReg2SUnit for the non-overlapping part.
504 LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
505 LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
506 V2SU.SU = SU;
507 V2SU.LaneMask = OverlapMask;
508 if (NonOverlapMask.any())
509 CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
510 }
511 // If there was no CurrentVRegDefs entry for some lanes yet, create one.
512 if (LaneMask.any())
513 CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
514}
515
516/// Adds a register data dependency if the instruction that defines the
517/// virtual register used at OperIdx is mapped to an SUnit. Add a register
518/// antidependency from this SUnit to instructions that occur later in the same
519/// scheduling region if they write the virtual register.
520///
521/// TODO: Handle ExitSU "uses" properly.
522void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
523 const MachineInstr *MI = SU->getInstr();
524 assert(!MI->isDebugOrPseudoInstr());
525
526 const MachineOperand &MO = MI->getOperand(OperIdx);
527 Register Reg = MO.getReg();
528
529 // Remember the use. Data dependencies will be added when we find the def.
532 CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
533
534 // Add antidependences to the following defs of the vreg.
535 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
536 CurrentVRegDefs.end())) {
537 // Ignore defs for unrelated lanes.
538 LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
539 if ((PrevDefLaneMask & LaneMask).none())
540 continue;
541 if (V2SU.SU == SU)
542 continue;
543
544 V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
545 }
546}
547
548/// Returns true if MI is an instruction we are unable to reason about
549/// (like a call or something with unmodeled side effects).
551 return MI->isCall() || MI->hasUnmodeledSideEffects() ||
552 (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
553}
554
556 unsigned Latency) {
557 if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
558 SDep Dep(SUa, SDep::MayAliasMem);
559 Dep.setLatency(Latency);
560 SUb->addPred(Dep);
561 }
562}
563
564/// Creates an SUnit for each real instruction, numbered in top-down
565/// topological order. The instruction order A < B, implies that no edge exists
566/// from B to A.
567///
568/// Map each real instruction to its SUnit.
569///
570/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
571/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
572/// instead of pointers.
573///
574/// MachineScheduler relies on initSUnits numbering the nodes by their order in
575/// the original instruction list.
577 // We'll be allocating one SUnit for each real instruction in the region,
578 // which is contained within a basic block.
579 SUnits.reserve(NumRegionInstrs);
580
582 if (MI.isDebugOrPseudoInstr())
583 continue;
584
585 SUnit *SU = newSUnit(&MI);
586 MISUnitMap[&MI] = SU;
587
588 SU->isCall = MI.isCall();
589 SU->isCommutable = MI.isCommutable();
590
591 // Assign the Latency field of SU using target-provided information.
592 SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
593
594 // If this SUnit uses a reserved or unbuffered resource, mark it as such.
595 //
596 // Reserved resources block an instruction from issuing and stall the
597 // entire pipeline. These are identified by BufferSize=0.
598 //
599 // Unbuffered resources prevent execution of subsequent instructions that
600 // require the same resources. This is used for in-order execution pipelines
601 // within an out-of-order core. These are identified by BufferSize=1.
603 const MCSchedClassDesc *SC = getSchedClass(SU);
604 for (const MCWriteProcResEntry &PRE :
607 switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
608 case 0:
609 SU->hasReservedResource = true;
610 break;
611 case 1:
612 SU->isUnbuffered = true;
613 break;
614 default:
615 break;
616 }
617 }
618 }
619 }
620}
621
622class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
623 /// Current total number of SUs in map.
624 unsigned NumNodes = 0;
625
626 /// 1 for loads, 0 for stores. (see comment in SUList)
627 unsigned TrueMemOrderLatency;
628
629public:
630 Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
631
632 /// To keep NumNodes up to date, insert() is used instead of
633 /// this operator w/ push_back().
635 llvm_unreachable("Don't use. Use insert() instead."); };
636
637 /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
638 /// reduce().
639 void inline insert(SUnit *SU, ValueType V) {
640 MapVector::operator[](V).push_back(SU);
641 NumNodes++;
642 }
643
644 /// Clears the list of SUs mapped to V.
645 void inline clearList(ValueType V) {
646 iterator Itr = find(V);
647 if (Itr != end()) {
648 assert(NumNodes >= Itr->second.size());
649 NumNodes -= Itr->second.size();
650
651 Itr->second.clear();
652 }
653 }
654
655 /// Clears map from all contents.
656 void clear() {
658 NumNodes = 0;
659 }
660
661 unsigned inline size() const { return NumNodes; }
662
663 /// Counts the number of SUs in this map after a reduction.
665 NumNodes = 0;
666 for (auto &I : *this)
667 NumNodes += I.second.size();
668 }
669
670 unsigned inline getTrueMemOrderLatency() const {
671 return TrueMemOrderLatency;
672 }
673
674 void dump();
675};
676
678 Value2SUsMap &Val2SUsMap) {
679 for (auto &I : Val2SUsMap)
680 addChainDependencies(SU, I.second,
681 Val2SUsMap.getTrueMemOrderLatency());
682}
683
685 Value2SUsMap &Val2SUsMap,
686 ValueType V) {
687 Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
688 if (Itr != Val2SUsMap.end())
689 addChainDependencies(SU, Itr->second,
690 Val2SUsMap.getTrueMemOrderLatency());
691}
692
694 assert(BarrierChain != nullptr);
695
696 for (auto &[V, SUs] : map) {
697 (void)V;
698 for (auto *SU : SUs)
699 SU->addPredBarrier(BarrierChain);
700 }
701 map.clear();
702}
703
705 assert(BarrierChain != nullptr);
706
707 // Go through all lists of SUs.
708 for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
709 Value2SUsMap::iterator CurrItr = I++;
710 SUList &sus = CurrItr->second;
711 SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
712 for (; SUItr != SUEE; ++SUItr) {
713 // Stop on BarrierChain or any instruction above it.
714 if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
715 break;
716
717 (*SUItr)->addPredBarrier(BarrierChain);
718 }
719
720 // Remove also the BarrierChain from list if present.
721 if (SUItr != SUEE && *SUItr == BarrierChain)
722 SUItr++;
723
724 // Remove all SUs that are now successors of BarrierChain.
725 if (SUItr != sus.begin())
726 sus.erase(sus.begin(), SUItr);
727 }
728
729 // Remove all entries with empty su lists.
730 map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
731 return (mapEntry.second.empty()); });
732
733 // Recompute the size of the map (NumNodes).
734 map.reComputeSize();
735}
736
738 RegPressureTracker *RPTracker,
739 PressureDiffs *PDiffs,
740 LiveIntervals *LIS,
741 bool TrackLaneMasks) {
742 const TargetSubtargetInfo &ST = MF.getSubtarget();
743 bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
744 : ST.useAA();
745 AAForDep = UseAA ? AA : nullptr;
746
747 BarrierChain = nullptr;
748
749 this->TrackLaneMasks = TrackLaneMasks;
750 MISUnitMap.clear();
752
753 // Create an SUnit for each real instruction.
754 initSUnits();
755
756 if (PDiffs)
757 PDiffs->init(SUnits.size());
758
759 // We build scheduling units by walking a block's instruction list
760 // from bottom to top.
761
762 // Each MIs' memory operand(s) is analyzed to a list of underlying
763 // objects. The SU is then inserted in the SUList(s) mapped from the
764 // Value(s). Each Value thus gets mapped to lists of SUs depending
765 // on it, stores and loads kept separately. Two SUs are trivially
766 // non-aliasing if they both depend on only identified Values and do
767 // not share any common Value.
768 Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
769
770 // Certain memory accesses are known to not alias any SU in Stores
771 // or Loads, and have therefore their own 'NonAlias'
772 // domain. E.g. spill / reload instructions never alias LLVM I/R
773 // Values. It would be nice to assume that this type of memory
774 // accesses always have a proper memory operand modelling, and are
775 // therefore never unanalyzable, but this is conservatively not
776 // done.
777 Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
778
779 // Track all instructions that may raise floating-point exceptions.
780 // These do not depend on one other (or normal loads or stores), but
781 // must not be rescheduled across global barriers. Note that we don't
782 // really need a "map" here since we don't track those MIs by value;
783 // using the same Value2SUsMap data type here is simply a matter of
784 // convenience.
785 Value2SUsMap FPExceptions;
786
787 // Remove any stale debug info; sometimes BuildSchedGraph is called again
788 // without emitting the info from the previous call.
789 DbgValues.clear();
790 FirstDbgValue = nullptr;
791
792 assert(Defs.empty() && Uses.empty() &&
793 "Only BuildGraph should update Defs/Uses");
796
797 assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
798 assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
799 unsigned NumVirtRegs = MRI.getNumVirtRegs();
800 CurrentVRegDefs.setUniverse(NumVirtRegs);
801 CurrentVRegUses.setUniverse(NumVirtRegs);
802
803 // Model data dependencies between instructions being scheduled and the
804 // ExitSU.
806
807 // Walk the list of instructions, from bottom moving up.
808 MachineInstr *DbgMI = nullptr;
810 MII != MIE; --MII) {
811 MachineInstr &MI = *std::prev(MII);
812 if (DbgMI) {
813 DbgValues.emplace_back(DbgMI, &MI);
814 DbgMI = nullptr;
815 }
816
817 if (MI.isDebugValue() || MI.isDebugPHI()) {
818 DbgMI = &MI;
819 continue;
820 }
821
822 if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe())
823 continue;
824
825 SUnit *SU = MISUnitMap[&MI];
826 assert(SU && "No SUnit mapped to this MI");
827
828 if (RPTracker) {
829 RegisterOperands RegOpers;
830 RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
831 if (TrackLaneMasks) {
832 SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
833 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
834 }
835 if (PDiffs != nullptr)
836 PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
837
838 if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
839 RPTracker->recedeSkipDebugValues();
840 assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
841 RPTracker->recede(RegOpers);
842 }
843
844 assert(
845 (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
846 "Cannot schedule terminators or labels!");
847
848 // Add register-based dependencies (data, anti, and output).
849 // For some instructions (calls, returns, inline-asm, etc.) there can
850 // be explicit uses and implicit defs, in which case the use will appear
851 // on the operand list before the def. Do two passes over the operand
852 // list to make sure that defs are processed before any uses.
853 bool HasVRegDef = false;
854 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
855 const MachineOperand &MO = MI.getOperand(j);
856 if (!MO.isReg() || !MO.isDef())
857 continue;
858 Register Reg = MO.getReg();
859 if (Reg.isPhysical()) {
860 addPhysRegDeps(SU, j);
861 } else if (Reg.isVirtual()) {
862 HasVRegDef = true;
863 addVRegDefDeps(SU, j);
864 }
865 }
866 // Now process all uses.
867 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
868 const MachineOperand &MO = MI.getOperand(j);
869 // Only look at use operands.
870 // We do not need to check for MO.readsReg() here because subsequent
871 // subregister defs will get output dependence edges and need no
872 // additional use dependencies.
873 if (!MO.isReg() || !MO.isUse())
874 continue;
875 Register Reg = MO.getReg();
876 if (Reg.isPhysical()) {
877 addPhysRegDeps(SU, j);
878 } else if (Reg.isVirtual() && MO.readsReg()) {
879 addVRegUseDeps(SU, j);
880 }
881 }
882
883 // If we haven't seen any uses in this scheduling region, create a
884 // dependence edge to ExitSU to model the live-out latency. This is required
885 // for vreg defs with no in-region use, and prefetches with no vreg def.
886 //
887 // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
888 // check currently relies on being called before adding chain deps.
889 if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
890 SDep Dep(SU, SDep::Artificial);
891 Dep.setLatency(SU->Latency - 1);
892 ExitSU.addPred(Dep);
893 }
894
895 // Add memory dependencies (Note: isStoreToStackSlot and
896 // isLoadFromStackSLot are not usable after stack slots are lowered to
897 // actual addresses).
898
899 // This is a barrier event that acts as a pivotal node in the DAG.
900 if (isGlobalMemoryObject(&MI)) {
901
902 // Become the barrier chain.
903 if (BarrierChain)
905 BarrierChain = SU;
906
907 LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
908 << BarrierChain->NodeNum << ").\n";);
909
910 // Add dependencies against everything below it and clear maps.
911 addBarrierChain(Stores);
912 addBarrierChain(Loads);
913 addBarrierChain(NonAliasStores);
914 addBarrierChain(NonAliasLoads);
915 addBarrierChain(FPExceptions);
916
917 continue;
918 }
919
920 // Instructions that may raise FP exceptions may not be moved
921 // across any global barriers.
922 if (MI.mayRaiseFPException()) {
923 if (BarrierChain)
925
926 FPExceptions.insert(SU, UnknownValue);
927
928 if (FPExceptions.size() >= HugeRegion) {
929 LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
930 Value2SUsMap empty;
931 reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
932 }
933 }
934
935 // If it's not a store or a variant load, we're done.
936 if (!MI.mayStore() &&
937 !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad()))
938 continue;
939
940 // Always add dependecy edge to BarrierChain if present.
941 if (BarrierChain)
943
944 // Find the underlying objects for MI. The Objs vector is either
945 // empty, or filled with the Values of memory locations which this
946 // SU depends on.
948 bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
949 MF.getDataLayout());
950
951 if (MI.mayStore()) {
952 if (!ObjsFound) {
953 // An unknown store depends on all stores and loads.
954 addChainDependencies(SU, Stores);
955 addChainDependencies(SU, NonAliasStores);
956 addChainDependencies(SU, Loads);
957 addChainDependencies(SU, NonAliasLoads);
958
959 // Map this store to 'UnknownValue'.
960 Stores.insert(SU, UnknownValue);
961 } else {
962 // Add precise dependencies against all previously seen memory
963 // accesses mapped to the same Value(s).
964 for (const UnderlyingObject &UnderlObj : Objs) {
965 ValueType V = UnderlObj.getValue();
966 bool ThisMayAlias = UnderlObj.mayAlias();
967
968 // Add dependencies to previous stores and loads mapped to V.
969 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
970 addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
971 }
972 // Update the store map after all chains have been added to avoid adding
973 // self-loop edge if multiple underlying objects are present.
974 for (const UnderlyingObject &UnderlObj : Objs) {
975 ValueType V = UnderlObj.getValue();
976 bool ThisMayAlias = UnderlObj.mayAlias();
977
978 // Map this store to V.
979 (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
980 }
981 // The store may have dependencies to unanalyzable loads and
982 // stores.
985 }
986 } else { // SU is a load.
987 if (!ObjsFound) {
988 // An unknown load depends on all stores.
989 addChainDependencies(SU, Stores);
990 addChainDependencies(SU, NonAliasStores);
991
992 Loads.insert(SU, UnknownValue);
993 } else {
994 for (const UnderlyingObject &UnderlObj : Objs) {
995 ValueType V = UnderlObj.getValue();
996 bool ThisMayAlias = UnderlObj.mayAlias();
997
998 // Add precise dependencies against all previously seen stores
999 // mapping to the same Value(s).
1000 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
1001
1002 // Map this load to V.
1003 (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
1004 }
1005 // The load may have dependencies to unanalyzable stores.
1007 }
1008 }
1009
1010 // Reduce maps if they grow huge.
1011 if (Stores.size() + Loads.size() >= HugeRegion) {
1012 LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
1013 reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
1014 }
1015 if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
1016 LLVM_DEBUG(
1017 dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
1018 reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
1019 }
1020 }
1021
1022 if (DbgMI)
1023 FirstDbgValue = DbgMI;
1024
1025 Defs.clear();
1026 Uses.clear();
1029
1030 Topo.MarkDirty();
1031}
1032
1034 PSV->printCustom(OS);
1035 return OS;
1036}
1037
1039 for (const auto &[ValType, SUs] : *this) {
1040 if (isa<const Value *>(ValType)) {
1041 const Value *V = cast<const Value *>(ValType);
1042 if (isa<UndefValue>(V))
1043 dbgs() << "Unknown";
1044 else
1045 V->printAsOperand(dbgs());
1046 } else if (isa<const PseudoSourceValue *>(ValType))
1047 dbgs() << cast<const PseudoSourceValue *>(ValType);
1048 else
1049 llvm_unreachable("Unknown Value type.");
1050
1051 dbgs() << " : ";
1052 dumpSUList(SUs);
1053 }
1054}
1055
1057 Value2SUsMap &loads, unsigned N) {
1058 LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
1059 dbgs() << "Loading SUnits:\n"; loads.dump());
1060
1061 // Insert all SU's NodeNums into a vector and sort it.
1062 std::vector<unsigned> NodeNums;
1063 NodeNums.reserve(stores.size() + loads.size());
1064 for (const auto &[V, SUs] : stores) {
1065 (void)V;
1066 for (const auto *SU : SUs)
1067 NodeNums.push_back(SU->NodeNum);
1068 }
1069 for (const auto &[V, SUs] : loads) {
1070 (void)V;
1071 for (const auto *SU : SUs)
1072 NodeNums.push_back(SU->NodeNum);
1073 }
1074 llvm::sort(NodeNums);
1075
1076 // The N last elements in NodeNums will be removed, and the SU with
1077 // the lowest NodeNum of them will become the new BarrierChain to
1078 // let the not yet seen SUs have a dependency to the removed SUs.
1079 assert(N <= NodeNums.size());
1080 SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1081 if (BarrierChain) {
1082 // The aliasing and non-aliasing maps reduce independently of each
1083 // other, but share a common BarrierChain. Check if the
1084 // newBarrierChain is above the former one. If it is not, it may
1085 // introduce a loop to use newBarrierChain, so keep the old one.
1086 if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1087 BarrierChain->addPredBarrier(newBarrierChain);
1088 BarrierChain = newBarrierChain;
1089 LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1090 << BarrierChain->NodeNum << ").\n";);
1091 }
1092 else
1093 LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1094 << BarrierChain->NodeNum << ").\n";);
1095 }
1096 else
1097 BarrierChain = newBarrierChain;
1098
1100 insertBarrierChain(loads);
1101
1102 LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1103 dbgs() << "Loading SUnits:\n"; loads.dump());
1104}
1105
1107 MachineInstr &MI, bool addToLiveRegs) {
1108 for (MachineOperand &MO : MI.operands()) {
1109 if (!MO.isReg() || !MO.readsReg())
1110 continue;
1111 Register Reg = MO.getReg();
1112 if (!Reg)
1113 continue;
1114
1115 // Things that are available after the instruction are killed by it.
1116 bool IsKill = LiveRegs.available(Reg);
1117
1118 // Exception: Do not kill reserved registers
1119 MO.setIsKill(IsKill && !MRI.isReserved(Reg));
1120 if (addToLiveRegs)
1121 LiveRegs.addReg(Reg);
1122 }
1123}
1124
1126 LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1127
1128 LiveRegs.init(*TRI);
1130
1131 // Examine block from end to start...
1132 for (MachineInstr &MI : llvm::reverse(MBB)) {
1133 if (MI.isDebugOrPseudoInstr())
1134 continue;
1135
1136 // Update liveness. Registers that are defed but not used in this
1137 // instruction are now dead. Mark register and all subregs as they
1138 // are completely defined.
1139 for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1140 const MachineOperand &MO = *O;
1141 if (MO.isReg()) {
1142 if (!MO.isDef())
1143 continue;
1144 Register Reg = MO.getReg();
1145 if (!Reg)
1146 continue;
1147 LiveRegs.removeReg(Reg);
1148 } else if (MO.isRegMask()) {
1150 }
1151 }
1152
1153 // If there is a bundle header fix it up first.
1154 if (!MI.isBundled()) {
1155 toggleKills(MRI, LiveRegs, MI, true);
1156 } else {
1157 MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
1158 if (MI.isBundle())
1159 toggleKills(MRI, LiveRegs, MI, false);
1160
1161 // Some targets make the (questionable) assumtion that the instructions
1162 // inside the bundle are ordered and consequently only the last use of
1163 // a register inside the bundle can kill it.
1164 MachineBasicBlock::instr_iterator I = std::next(Bundle);
1165 while (I->isBundledWithSucc())
1166 ++I;
1167 do {
1168 if (!I->isDebugOrPseudoInstr())
1169 toggleKills(MRI, LiveRegs, *I, true);
1170 --I;
1171 } while (I != Bundle);
1172 }
1173 }
1174}
1175
1176void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1177#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1178 dumpNodeName(SU);
1179 if (SchedPrintCycles)
1180 dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle
1181 << ", BottomReadyCycle = " << SU.BotReadyCycle << "]";
1182 dbgs() << ": ";
1183 SU.getInstr()->dump();
1184#endif
1185}
1186
1188#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1189 if (EntrySU.getInstr() != nullptr)
1191 for (const SUnit &SU : SUnits)
1192 dumpNodeAll(SU);
1193 if (ExitSU.getInstr() != nullptr)
1195#endif
1196}
1197
1198std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1199 std::string s;
1200 raw_string_ostream oss(s);
1201 if (SU == &EntrySU)
1202 oss << "<entry>";
1203 else if (SU == &ExitSU)
1204 oss << "<exit>";
1205 else
1206 SU->getInstr()->print(oss, /*IsStandalone=*/true);
1207 return oss.str();
1208}
1209
1210/// Return the basic block label. It is not necessarilly unique because a block
1211/// contains multiple scheduling regions. But it is fine for visualization.
1213 return "dag." + BB->getFullName();
1214}
1215
1217 return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
1218}
1219
1220bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
1221 if (SuccSU != &ExitSU) {
1222 // Do not use WillCreateCycle, it assumes SD scheduling.
1223 // If Pred is reachable from Succ, then the edge creates a cycle.
1224 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
1225 return false;
1226 Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
1227 }
1228 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
1229 // Return true regardless of whether a new edge needed to be inserted.
1230 return true;
1231}
1232
1233//===----------------------------------------------------------------------===//
1234// SchedDFSResult Implementation
1235//===----------------------------------------------------------------------===//
1236
1237namespace llvm {
1238
1239/// Internal state used to compute SchedDFSResult.
1241 SchedDFSResult &R;
1242
1243 /// Join DAG nodes into equivalence classes by their subtree.
1244 IntEqClasses SubtreeClasses;
1245 /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1246 std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1247
1248 struct RootData {
1249 unsigned NodeID;
1250 unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1251 unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1252 /// children.
1253
1254 RootData(unsigned id): NodeID(id),
1255 ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1256
1257 unsigned getSparseSetIndex() const { return NodeID; }
1258 };
1259
1260 SparseSet<RootData> RootSet;
1261
1262public:
1263 SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1264 RootSet.setUniverse(R.DFSNodeData.size());
1265 }
1266
1267 /// Returns true if this node been visited by the DFS traversal.
1268 ///
1269 /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1270 /// ID. Later, SubtreeID is updated but remains valid.
1271 bool isVisited(const SUnit *SU) const {
1272 return R.DFSNodeData[SU->NodeNum].SubtreeID
1273 != SchedDFSResult::InvalidSubtreeID;
1274 }
1275
1276 /// Initializes this node's instruction count. We don't need to flag the node
1277 /// visited until visitPostorder because the DAG cannot have cycles.
1278 void visitPreorder(const SUnit *SU) {
1279 R.DFSNodeData[SU->NodeNum].InstrCount =
1280 SU->getInstr()->isTransient() ? 0 : 1;
1281 }
1282
1283 /// Called once for each node after all predecessors are visited. Revisit this
1284 /// node's predecessors and potentially join them now that we know the ILP of
1285 /// the other predecessors.
1286 void visitPostorderNode(const SUnit *SU) {
1287 // Mark this node as the root of a subtree. It may be joined with its
1288 // successors later.
1289 R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1290 RootData RData(SU->NodeNum);
1291 RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1292
1293 // If any predecessors are still in their own subtree, they either cannot be
1294 // joined or are large enough to remain separate. If this parent node's
1295 // total instruction count is not greater than a child subtree by at least
1296 // the subtree limit, then try to join it now since splitting subtrees is
1297 // only useful if multiple high-pressure paths are possible.
1298 unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1299 for (const SDep &PredDep : SU->Preds) {
1300 if (PredDep.getKind() != SDep::Data)
1301 continue;
1302 unsigned PredNum = PredDep.getSUnit()->NodeNum;
1303 if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1304 joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1305
1306 // Either link or merge the TreeData entry from the child to the parent.
1307 if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1308 // If the predecessor's parent is invalid, this is a tree edge and the
1309 // current node is the parent.
1310 if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1311 RootSet[PredNum].ParentNodeID = SU->NodeNum;
1312 }
1313 else if (RootSet.count(PredNum)) {
1314 // The predecessor is not a root, but is still in the root set. This
1315 // must be the new parent that it was just joined to. Note that
1316 // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1317 // set to the original parent.
1318 RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1319 RootSet.erase(PredNum);
1320 }
1321 }
1322 RootSet[SU->NodeNum] = RData;
1323 }
1324
1325 /// Called once for each tree edge after calling visitPostOrderNode on
1326 /// the predecessor. Increment the parent node's instruction count and
1327 /// preemptively join this subtree to its parent's if it is small enough.
1328 void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1329 R.DFSNodeData[Succ->NodeNum].InstrCount
1330 += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1331 joinPredSubtree(PredDep, Succ);
1332 }
1333
1334 /// Adds a connection for cross edges.
1335 void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1336 ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ);
1337 }
1338
1339 /// Sets each node's subtree ID to the representative ID and record
1340 /// connections between trees.
1341 void finalize() {
1342 SubtreeClasses.compress();
1343 R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1344 assert(SubtreeClasses.getNumClasses() == RootSet.size()
1345 && "number of roots should match trees");
1346 for (const RootData &Root : RootSet) {
1347 unsigned TreeID = SubtreeClasses[Root.NodeID];
1348 if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1349 R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1350 R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1351 // Note that SubInstrCount may be greater than InstrCount if we joined
1352 // subtrees across a cross edge. InstrCount will be attributed to the
1353 // original parent, while SubInstrCount will be attributed to the joined
1354 // parent.
1355 }
1356 R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1357 R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1358 LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1359 for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1360 R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1361 LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1362 << R.DFSNodeData[Idx].SubtreeID << '\n');
1363 }
1364 for (const auto &[Pred, Succ] : ConnectionPairs) {
1365 unsigned PredTree = SubtreeClasses[Pred->NodeNum];
1366 unsigned SuccTree = SubtreeClasses[Succ->NodeNum];
1367 if (PredTree == SuccTree)
1368 continue;
1369 unsigned Depth = Pred->getDepth();
1370 addConnection(PredTree, SuccTree, Depth);
1371 addConnection(SuccTree, PredTree, Depth);
1372 }
1373 }
1374
1375protected:
1376 /// Joins the predecessor subtree with the successor that is its DFS parent.
1377 /// Applies some heuristics before joining.
1378 bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1379 bool CheckLimit = true) {
1380 assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1381
1382 // Check if the predecessor is already joined.
1383 const SUnit *PredSU = PredDep.getSUnit();
1384 unsigned PredNum = PredSU->NodeNum;
1385 if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1386 return false;
1387
1388 // Four is the magic number of successors before a node is considered a
1389 // pinch point.
1390 unsigned NumDataSucs = 0;
1391 for (const SDep &SuccDep : PredSU->Succs) {
1392 if (SuccDep.getKind() == SDep::Data) {
1393 if (++NumDataSucs >= 4)
1394 return false;
1395 }
1396 }
1397 if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1398 return false;
1399 R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1400 SubtreeClasses.join(Succ->NodeNum, PredNum);
1401 return true;
1402 }
1403
1404 /// Called by finalize() to record a connection between trees.
1405 void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1406 if (!Depth)
1407 return;
1408
1409 do {
1411 R.SubtreeConnections[FromTree];
1412 for (SchedDFSResult::Connection &C : Connections) {
1413 if (C.TreeID == ToTree) {
1414 C.Level = std::max(C.Level, Depth);
1415 return;
1416 }
1417 }
1418 Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1419 FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1420 } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1421 }
1422};
1423
1424} // end namespace llvm
1425
1426namespace {
1427
1428/// Manage the stack used by a reverse depth-first search over the DAG.
1429class SchedDAGReverseDFS {
1430 std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1431
1432public:
1433 bool isComplete() const { return DFSStack.empty(); }
1434
1435 void follow(const SUnit *SU) {
1436 DFSStack.emplace_back(SU, SU->Preds.begin());
1437 }
1438 void advance() { ++DFSStack.back().second; }
1439
1440 const SDep *backtrack() {
1441 DFSStack.pop_back();
1442 return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1443 }
1444
1445 const SUnit *getCurr() const { return DFSStack.back().first; }
1446
1447 SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1448
1449 SUnit::const_pred_iterator getPredEnd() const {
1450 return getCurr()->Preds.end();
1451 }
1452};
1453
1454} // end anonymous namespace
1455
1456static bool hasDataSucc(const SUnit *SU) {
1457 for (const SDep &SuccDep : SU->Succs) {
1458 if (SuccDep.getKind() == SDep::Data &&
1459 !SuccDep.getSUnit()->isBoundaryNode())
1460 return true;
1461 }
1462 return false;
1463}
1464
1465/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1466/// search from this root.
1468 if (!IsBottomUp)
1469 llvm_unreachable("Top-down ILP metric is unimplemented");
1470
1471 SchedDFSImpl Impl(*this);
1472 for (const SUnit &SU : SUnits) {
1473 if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1474 continue;
1475
1476 SchedDAGReverseDFS DFS;
1477 Impl.visitPreorder(&SU);
1478 DFS.follow(&SU);
1479 while (true) {
1480 // Traverse the leftmost path as far as possible.
1481 while (DFS.getPred() != DFS.getPredEnd()) {
1482 const SDep &PredDep = *DFS.getPred();
1483 DFS.advance();
1484 // Ignore non-data edges.
1485 if (PredDep.getKind() != SDep::Data
1486 || PredDep.getSUnit()->isBoundaryNode()) {
1487 continue;
1488 }
1489 // An already visited edge is a cross edge, assuming an acyclic DAG.
1490 if (Impl.isVisited(PredDep.getSUnit())) {
1491 Impl.visitCrossEdge(PredDep, DFS.getCurr());
1492 continue;
1493 }
1494 Impl.visitPreorder(PredDep.getSUnit());
1495 DFS.follow(PredDep.getSUnit());
1496 }
1497 // Visit the top of the stack in postorder and backtrack.
1498 const SUnit *Child = DFS.getCurr();
1499 const SDep *PredDep = DFS.backtrack();
1500 Impl.visitPostorderNode(Child);
1501 if (PredDep)
1502 Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1503 if (DFS.isComplete())
1504 break;
1505 }
1506 }
1507 Impl.finalize();
1508}
1509
1510/// The root of the given SubtreeID was just scheduled. For all subtrees
1511/// connected to this tree, record the depth of the connection so that the
1512/// nearest connected subtrees can be prioritized.
1513void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1514 for (const Connection &C : SubtreeConnections[SubtreeID]) {
1515 SubtreeConnectLevels[C.TreeID] =
1516 std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1517 LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1518 << SubtreeConnectLevels[C.TreeID] << '\n');
1519 }
1520}
1521
1522#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1524 OS << InstrCount << " / " << Length << " = ";
1525 if (!Length)
1526 OS << "BADILP";
1527 else
1528 OS << format("%g", ((double)InstrCount / Length));
1529}
1530
1532 dbgs() << *this << '\n';
1533}
1534
1535namespace llvm {
1536
1539 Val.print(OS);
1540 return OS;
1541}
1542
1543} // end namespace llvm
1544
1545#endif
unsigned SubReg
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned InstrCount
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:480
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
static Register UseReg(const MachineOperand &MO)
hexagon widen stores
IRTranslator LLVM IR MI
Equivalence classes for small integers.
A common definition of LaneBitmask for use in TableGen and CodeGen.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
static bool hasDataSucc(const SUnit *SU)
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::desc("Enable use of AA during MI DAG construction"))
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
static unsigned getReductionSize()
static void dumpSUList(const ScheduleDAGInstrs::SUList &L)
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static bool isGlobalMemoryObject(MachineInstr *MI)
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
static cl::opt< bool > SchedPrintCycles("sched-print-cycles", cl::Hidden, cl::init(false), cl::desc("Report top/bottom cycles when dumping SUnit instances"))
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
void reComputeSize()
Counts the number of SUs in this map after a reduction.
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
void clear()
Clears map from all contents.
void clearList(ValueType V)
Clears the list of SUs mapped to V.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
void compress()
compress - Compress equivalence classes by numbering them 0 .
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:72
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition: LiveRegUnits.h:73
void addReg(MCPhysReg Reg)
Adds register units covered by physical register Reg.
Definition: LiveRegUnits.h:86
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
void removeRegsNotPreserved(const uint32_t *RegMask)
Removes register units not preserved by the regmask RegMask.
void removeReg(MCPhysReg Reg)
Removes all register units covered by physical register Reg.
Definition: LiveRegUnits.h:102
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
bool hasImplicitUseOfPhysReg(unsigned Reg) const
Return true if this instruction implicitly uses the specified physical register.
Definition: MCInstrDesc.h:587
bool hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:32
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
iterator_range< MCRegUnitIterator > regunits(MCRegister Reg) const
Returns an iterator range over all regunits for Reg.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
Instructions::iterator instr_iterator
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
iterator_range< succ_iterator > successors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasTailCall() const
Returns true if the function contains a tail call.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:931
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:916
iterator_range< filtered_mop_iterator > all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
Definition: MachineInstr.h:741
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:541
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
typename VectorType::iterator iterator
Definition: MapVector.h:49
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:98
iterator find(const ValueType &Key)
Definition: MapVector.h:167
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
iterator begin()
Definition: MapVector.h:69
void clear()
Definition: MapVector.h:88
Array of PressureDiffs.
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
void init(unsigned N)
Initialize an array of N PressureDiffs.
Special value supplied for machine level alias analysis.
Track the current register pressure at some position in the instruction stream, and remember the high...
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
@ MayAliasMem
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:70
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:384
unsigned NumSuccs
Definition: ScheduleDAG.h:267
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Internal state used to compute SchedDFSResult.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
void finalize()
Sets each node's subtree ID to the representative ID and record connections between trees.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void visitPreorder(const SUnit *SU)
Initializes this node's instruction count.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
SchedDFSImpl(SchedDFSResult &r)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
LiveRegUnits LiveRegs
Set of live physical registers for updating kill flags.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU's instruction that defines a physical register.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
RegUnit2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AAResults and Target fail to deny the depende...
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
const MachineFrameInfo & MFI
bool deadDefHasNoUse(const MachineOperand &MO)
Returns true if the def register in MO has no uses.
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:776
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:63
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:562
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
void dumpNodeAll(const SUnit &SU) const
void dumpNodeName(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:563
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
iterator find(const KeyT &Key)
Find an element by its key.
bool empty() const
Returns true if the set is empty.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void clear()
Clears the set.
iterator erase(iterator I)
Erases an existing element identified by a valid iterator.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void eraseAll(const KeyT &K)
Erase all elements with the given key.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:190
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:289
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise.
Definition: SparseSet.h:241
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
'undef' values are things that do not have specified contents.
Definition: Constants.h:1350
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:678
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Length
Definition: DWP.cpp:456
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:34
void print(raw_ostream &OS) const
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
constexpr bool any() const
Definition: LaneBitmask.h:53
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
Record a physical register access.
Mapping from virtual register to SUnit including an operand index.
An individual mapping from virtual register number to SUnit.