LLVM  7.0.0svn
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This implements the ScheduleDAGInstrs class, which implements
11 /// re-scheduling of MachineInstrs.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/IntEqClasses.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/SparseSet.h"
41 #include "llvm/Config/llvm-config.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/Instruction.h"
45 #include "llvm/IR/Instructions.h"
46 #include "llvm/IR/Operator.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Value.h"
49 #include "llvm/MC/LaneBitmask.h"
50 #include "llvm/MC/MCRegisterInfo.h"
51 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Compiler.h"
54 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/Format.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <iterator>
61 #include <string>
62 #include <utility>
63 #include <vector>
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "machine-scheduler"
68 
69 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
70  cl::ZeroOrMore, cl::init(false),
71  cl::desc("Enable use of AA during MI DAG construction"));
72 
73 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
74  cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
75 
76 // Note: the two options below might be used in tuning compile time vs
77 // output quality. Setting HugeRegion so large that it will never be
78 // reached means best-effort, but may be slow.
79 
80 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
81 // together hold this many SUs, a reduction of maps will be done.
82 static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
83  cl::init(1000), cl::desc("The limit to use while constructing the DAG "
84  "prior to scheduling, at which point a trade-off "
85  "is made to avoid excessive compile time."));
86 
88  "dag-maps-reduction-size", cl::Hidden,
89  cl::desc("A huge scheduling region will have maps reduced by this many "
90  "nodes at a time. Defaults to HugeRegion / 2."));
91 
92 static unsigned getReductionSize() {
93  // Always reduce a huge region with half of the elements, except
94  // when user sets this number explicitly.
95  if (ReductionSize.getNumOccurrences() == 0)
96  return HugeRegion / 2;
97  return ReductionSize;
98 }
99 
101 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
102  dbgs() << "{ ";
103  for (const SUnit *su : L) {
104  dbgs() << "SU(" << su->NodeNum << ")";
105  if (su != L.back())
106  dbgs() << ", ";
107  }
108  dbgs() << "}\n";
109 #endif
110 }
111 
113  const MachineLoopInfo *mli,
114  bool RemoveKillFlags)
115  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
116  RemoveKillFlags(RemoveKillFlags),
117  UnknownValue(UndefValue::get(
118  Type::getVoidTy(mf.getFunction().getContext()))) {
119  DbgValues.clear();
120 
121  const TargetSubtargetInfo &ST = mf.getSubtarget();
122  SchedModel.init(&ST);
123 }
124 
125 /// If this machine instr has memory reference information and it can be
126 /// tracked to a normal reference to a known object, return the Value
127 /// for that object. This function returns false the memory location is
128 /// unknown or may alias anything.
130  const MachineFrameInfo &MFI,
131  UnderlyingObjectsVector &Objects,
132  const DataLayout &DL) {
133  auto allMMOsOkay = [&]() {
134  for (const MachineMemOperand *MMO : MI->memoperands()) {
135  if (MMO->isVolatile())
136  return false;
137 
138  if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
139  // Function that contain tail calls don't have unique PseudoSourceValue
140  // objects. Two PseudoSourceValues might refer to the same or
141  // overlapping locations. The client code calling this function assumes
142  // this is not the case. So return a conservative answer of no known
143  // object.
144  if (MFI.hasTailCall())
145  return false;
146 
147  // For now, ignore PseudoSourceValues which may alias LLVM IR values
148  // because the code that uses this function has no way to cope with
149  // such aliases.
150  if (PSV->isAliased(&MFI))
151  return false;
152 
153  bool MayAlias = PSV->mayAlias(&MFI);
154  Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
155  } else if (const Value *V = MMO->getValue()) {
157  if (!getUnderlyingObjectsForCodeGen(V, Objs, DL))
158  return false;
159 
160  for (Value *V : Objs) {
163  }
164  } else
165  return false;
166  }
167  return true;
168  };
169 
170  if (!allMMOsOkay()) {
171  Objects.clear();
172  return false;
173  }
174 
175  return true;
176 }
177 
179  BB = bb;
180 }
181 
183  // Subclasses should no longer refer to the old block.
184  BB = nullptr;
185 }
186 
190  unsigned regioninstrs) {
191  assert(bb == BB && "startBlock should set BB");
192  RegionBegin = begin;
193  RegionEnd = end;
194  NumRegionInstrs = regioninstrs;
195 }
196 
198  // Nothing to do.
199 }
200 
202  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
203  ExitSU.setInstr(ExitMI);
204  // Add dependencies on the defs and uses of the instruction.
205  if (ExitMI) {
206  for (const MachineOperand &MO : ExitMI->operands()) {
207  if (!MO.isReg() || MO.isDef()) continue;
208  unsigned Reg = MO.getReg();
210  Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
211  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
212  addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
213  }
214  }
215  }
216  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
217  // For others, e.g. fallthrough, conditional branch, assume the exit
218  // uses all the registers that are livein to the successor blocks.
219  for (const MachineBasicBlock *Succ : BB->successors()) {
220  for (const auto &LI : Succ->liveins()) {
221  if (!Uses.contains(LI.PhysReg))
222  Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
223  }
224  }
225  }
226 }
227 
228 /// MO is an operand of SU's instruction that defines a physical register. Adds
229 /// data dependencies from SU to any uses of the physical register.
230 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
231  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
232  assert(MO.isDef() && "expect physreg def");
233 
234  // Ask the target if address-backscheduling is desirable, and if so how much.
236 
237  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
238  Alias.isValid(); ++Alias) {
239  if (!Uses.contains(*Alias))
240  continue;
241  for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
242  SUnit *UseSU = I->SU;
243  if (UseSU == SU)
244  continue;
245 
246  // Adjust the dependence latency using operand def/use information,
247  // then allow the target to perform its own adjustments.
248  int UseOp = I->OpIdx;
249  MachineInstr *RegUse = nullptr;
250  SDep Dep;
251  if (UseOp < 0)
252  Dep = SDep(SU, SDep::Artificial);
253  else {
254  // Set the hasPhysRegDefs only for physreg defs that have a use within
255  // the scheduling region.
256  SU->hasPhysRegDefs = true;
257  Dep = SDep(SU, SDep::Data, *Alias);
258  RegUse = UseSU->getInstr();
259  }
260  Dep.setLatency(
261  SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
262  UseOp));
263 
264  ST.adjustSchedDependency(SU, UseSU, Dep);
265  UseSU->addPred(Dep);
266  }
267  }
268 }
269 
270 /// Adds register dependencies (data, anti, and output) from this SUnit
271 /// to following instructions in the same scheduling region that depend the
272 /// physical register referenced at OperIdx.
273 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
274  MachineInstr *MI = SU->getInstr();
275  MachineOperand &MO = MI->getOperand(OperIdx);
276  unsigned Reg = MO.getReg();
277  // We do not need to track any dependencies for constant registers.
278  if (MRI.isConstantPhysReg(Reg))
279  return;
280 
281  // Optionally add output and anti dependencies. For anti
282  // dependencies we use a latency of 0 because for a multi-issue
283  // target we want to allow the defining instruction to issue
284  // in the same cycle as the using instruction.
285  // TODO: Using a latency of 1 here for output dependencies assumes
286  // there's no cost for reusing registers.
288  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
289  if (!Defs.contains(*Alias))
290  continue;
291  for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
292  SUnit *DefSU = I->SU;
293  if (DefSU == &ExitSU)
294  continue;
295  if (DefSU != SU &&
296  (Kind != SDep::Output || !MO.isDead() ||
297  !DefSU->getInstr()->registerDefIsDead(*Alias))) {
298  if (Kind == SDep::Anti)
299  DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
300  else {
301  SDep Dep(SU, Kind, /*Reg=*/*Alias);
302  Dep.setLatency(
303  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
304  DefSU->addPred(Dep);
305  }
306  }
307  }
308  }
309 
310  if (!MO.isDef()) {
311  SU->hasPhysRegUses = true;
312  // Either insert a new Reg2SUnits entry with an empty SUnits list, or
313  // retrieve the existing SUnits list for this register's uses.
314  // Push this SUnit on the use list.
315  Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
316  if (RemoveKillFlags)
317  MO.setIsKill(false);
318  } else {
319  addPhysRegDataDeps(SU, OperIdx);
320 
321  // Clear previous uses and defs of this register and its subergisters.
322  for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) {
323  if (Uses.contains(*SubReg))
324  Uses.eraseAll(*SubReg);
325  if (!MO.isDead())
326  Defs.eraseAll(*SubReg);
327  }
328  if (MO.isDead() && SU->isCall) {
329  // Calls will not be reordered because of chain dependencies (see
330  // below). Since call operands are dead, calls may continue to be added
331  // to the DefList making dependence checking quadratic in the size of
332  // the block. Instead, we leave only one call at the back of the
333  // DefList.
335  Reg2SUnitsMap::iterator B = P.first;
336  Reg2SUnitsMap::iterator I = P.second;
337  for (bool isBegin = I == B; !isBegin; /* empty */) {
338  isBegin = (--I) == B;
339  if (!I->SU->isCall)
340  break;
341  I = Defs.erase(I);
342  }
343  }
344 
345  // Defs are pushed in the order they are visited and never reordered.
346  Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
347  }
348 }
349 
351 {
352  unsigned Reg = MO.getReg();
353  // No point in tracking lanemasks if we don't have interesting subregisters.
354  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
355  if (!RC.HasDisjunctSubRegs)
356  return LaneBitmask::getAll();
357 
358  unsigned SubReg = MO.getSubReg();
359  if (SubReg == 0)
360  return RC.getLaneMask();
361  return TRI->getSubRegIndexLaneMask(SubReg);
362 }
363 
364 /// Adds register output and data dependencies from this SUnit to instructions
365 /// that occur later in the same scheduling region if they read from or write to
366 /// the virtual register defined at OperIdx.
367 ///
368 /// TODO: Hoist loop induction variable increments. This has to be
369 /// reevaluated. Generally, IV scheduling should be done before coalescing.
370 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
371  MachineInstr *MI = SU->getInstr();
372  MachineOperand &MO = MI->getOperand(OperIdx);
373  unsigned Reg = MO.getReg();
374 
375  LaneBitmask DefLaneMask;
376  LaneBitmask KillLaneMask;
377  if (TrackLaneMasks) {
378  bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
379  DefLaneMask = getLaneMaskForMO(MO);
380  // If we have a <read-undef> flag, none of the lane values comes from an
381  // earlier instruction.
382  KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
383 
384  // Clear undef flag, we'll re-add it later once we know which subregister
385  // Def is first.
386  MO.setIsUndef(false);
387  } else {
388  DefLaneMask = LaneBitmask::getAll();
389  KillLaneMask = LaneBitmask::getAll();
390  }
391 
392  if (MO.isDead()) {
394  "Dead defs should have no uses");
395  } else {
396  // Add data dependence to all uses we found so far.
399  E = CurrentVRegUses.end(); I != E; /*empty*/) {
400  LaneBitmask LaneMask = I->LaneMask;
401  // Ignore uses of other lanes.
402  if ((LaneMask & KillLaneMask).none()) {
403  ++I;
404  continue;
405  }
406 
407  if ((LaneMask & DefLaneMask).any()) {
408  SUnit *UseSU = I->SU;
409  MachineInstr *Use = UseSU->getInstr();
410  SDep Dep(SU, SDep::Data, Reg);
411  Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
412  I->OperandIndex));
413  ST.adjustSchedDependency(SU, UseSU, Dep);
414  UseSU->addPred(Dep);
415  }
416 
417  LaneMask &= ~KillLaneMask;
418  // If we found a Def for all lanes of this use, remove it from the list.
419  if (LaneMask.any()) {
420  I->LaneMask = LaneMask;
421  ++I;
422  } else
424  }
425  }
426 
427  // Shortcut: Singly defined vregs do not have output/anti dependencies.
428  if (MRI.hasOneDef(Reg))
429  return;
430 
431  // Add output dependence to the next nearest defs of this vreg.
432  //
433  // Unless this definition is dead, the output dependence should be
434  // transitively redundant with antidependencies from this definition's
435  // uses. We're conservative for now until we have a way to guarantee the uses
436  // are not eliminated sometime during scheduling. The output dependence edge
437  // is also useful if output latency exceeds def-use latency.
438  LaneBitmask LaneMask = DefLaneMask;
439  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
440  CurrentVRegDefs.end())) {
441  // Ignore defs for other lanes.
442  if ((V2SU.LaneMask & LaneMask).none())
443  continue;
444  // Add an output dependence.
445  SUnit *DefSU = V2SU.SU;
446  // Ignore additional defs of the same lanes in one instruction. This can
447  // happen because lanemasks are shared for targets with too many
448  // subregisters. We also use some representration tricks/hacks where we
449  // add super-register defs/uses, to imply that although we only access parts
450  // of the reg we care about the full one.
451  if (DefSU == SU)
452  continue;
453  SDep Dep(SU, SDep::Output, Reg);
454  Dep.setLatency(
455  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
456  DefSU->addPred(Dep);
457 
458  // Update current definition. This can get tricky if the def was about a
459  // bigger lanemask before. We then have to shrink it and create a new
460  // VReg2SUnit for the non-overlapping part.
461  LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
462  LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
463  V2SU.SU = SU;
464  V2SU.LaneMask = OverlapMask;
465  if (NonOverlapMask.any())
466  CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
467  }
468  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
469  if (LaneMask.any())
470  CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
471 }
472 
473 /// Adds a register data dependency if the instruction that defines the
474 /// virtual register used at OperIdx is mapped to an SUnit. Add a register
475 /// antidependency from this SUnit to instructions that occur later in the same
476 /// scheduling region if they write the virtual register.
477 ///
478 /// TODO: Handle ExitSU "uses" properly.
479 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
480  const MachineInstr *MI = SU->getInstr();
481  const MachineOperand &MO = MI->getOperand(OperIdx);
482  unsigned Reg = MO.getReg();
483 
484  // Remember the use. Data dependencies will be added when we find the def.
487  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
488 
489  // Add antidependences to the following defs of the vreg.
490  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
491  CurrentVRegDefs.end())) {
492  // Ignore defs for unrelated lanes.
493  LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
494  if ((PrevDefLaneMask & LaneMask).none())
495  continue;
496  if (V2SU.SU == SU)
497  continue;
498 
499  V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
500  }
501 }
502 
503 /// Returns true if MI is an instruction we are unable to reason about
504 /// (like a call or something with unmodeled side effects).
506  return MI->isCall() || MI->hasUnmodeledSideEffects() ||
508 }
509 
511  unsigned Latency) {
512  if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
513  SDep Dep(SUa, SDep::MayAliasMem);
514  Dep.setLatency(Latency);
515  SUb->addPred(Dep);
516  }
517 }
518 
519 /// Creates an SUnit for each real instruction, numbered in top-down
520 /// topological order. The instruction order A < B, implies that no edge exists
521 /// from B to A.
522 ///
523 /// Map each real instruction to its SUnit.
524 ///
525 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
526 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
527 /// instead of pointers.
528 ///
529 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
530 /// the original instruction list.
532  // We'll be allocating one SUnit for each real instruction in the region,
533  // which is contained within a basic block.
534  SUnits.reserve(NumRegionInstrs);
535 
537  if (MI.isDebugInstr())
538  continue;
539 
540  SUnit *SU = newSUnit(&MI);
541  MISUnitMap[&MI] = SU;
542 
543  SU->isCall = MI.isCall();
544  SU->isCommutable = MI.isCommutable();
545 
546  // Assign the Latency field of SU using target-provided information.
547  SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
548 
549  // If this SUnit uses a reserved or unbuffered resource, mark it as such.
550  //
551  // Reserved resources block an instruction from issuing and stall the
552  // entire pipeline. These are identified by BufferSize=0.
553  //
554  // Unbuffered resources prevent execution of subsequent instructions that
555  // require the same resources. This is used for in-order execution pipelines
556  // within an out-of-order core. These are identified by BufferSize=1.
558  const MCSchedClassDesc *SC = getSchedClass(SU);
559  for (const MCWriteProcResEntry &PRE :
562  switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
563  case 0:
564  SU->hasReservedResource = true;
565  break;
566  case 1:
567  SU->isUnbuffered = true;
568  break;
569  default:
570  break;
571  }
572  }
573  }
574  }
575 }
576 
577 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
578  /// Current total number of SUs in map.
579  unsigned NumNodes = 0;
580 
581  /// 1 for loads, 0 for stores. (see comment in SUList)
582  unsigned TrueMemOrderLatency;
583 
584 public:
585  Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
586 
587  /// To keep NumNodes up to date, insert() is used instead of
588  /// this operator w/ push_back().
590  llvm_unreachable("Don't use. Use insert() instead."); };
591 
592  /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
593  /// reduce().
594  void inline insert(SUnit *SU, ValueType V) {
595  MapVector::operator[](V).push_back(SU);
596  NumNodes++;
597  }
598 
599  /// Clears the list of SUs mapped to V.
600  void inline clearList(ValueType V) {
601  iterator Itr = find(V);
602  if (Itr != end()) {
603  assert(NumNodes >= Itr->second.size());
604  NumNodes -= Itr->second.size();
605 
606  Itr->second.clear();
607  }
608  }
609 
610  /// Clears map from all contents.
611  void clear() {
613  NumNodes = 0;
614  }
615 
616  unsigned inline size() const { return NumNodes; }
617 
618  /// Counts the number of SUs in this map after a reduction.
619  void reComputeSize() {
620  NumNodes = 0;
621  for (auto &I : *this)
622  NumNodes += I.second.size();
623  }
624 
625  unsigned inline getTrueMemOrderLatency() const {
626  return TrueMemOrderLatency;
627  }
628 
629  void dump();
630 };
631 
633  Value2SUsMap &Val2SUsMap) {
634  for (auto &I : Val2SUsMap)
635  addChainDependencies(SU, I.second,
636  Val2SUsMap.getTrueMemOrderLatency());
637 }
638 
640  Value2SUsMap &Val2SUsMap,
641  ValueType V) {
642  Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
643  if (Itr != Val2SUsMap.end())
644  addChainDependencies(SU, Itr->second,
645  Val2SUsMap.getTrueMemOrderLatency());
646 }
647 
649  assert(BarrierChain != nullptr);
650 
651  for (auto &I : map) {
652  SUList &sus = I.second;
653  for (auto *SU : sus)
654  SU->addPredBarrier(BarrierChain);
655  }
656  map.clear();
657 }
658 
660  assert(BarrierChain != nullptr);
661 
662  // Go through all lists of SUs.
663  for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
664  Value2SUsMap::iterator CurrItr = I++;
665  SUList &sus = CurrItr->second;
666  SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
667  for (; SUItr != SUEE; ++SUItr) {
668  // Stop on BarrierChain or any instruction above it.
669  if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
670  break;
671 
672  (*SUItr)->addPredBarrier(BarrierChain);
673  }
674 
675  // Remove also the BarrierChain from list if present.
676  if (SUItr != SUEE && *SUItr == BarrierChain)
677  SUItr++;
678 
679  // Remove all SUs that are now successors of BarrierChain.
680  if (SUItr != sus.begin())
681  sus.erase(sus.begin(), SUItr);
682  }
683 
684  // Remove all entries with empty su lists.
685  map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
686  return (mapEntry.second.empty()); });
687 
688  // Recompute the size of the map (NumNodes).
689  map.reComputeSize();
690 }
691 
693  RegPressureTracker *RPTracker,
694  PressureDiffs *PDiffs,
695  LiveIntervals *LIS,
696  bool TrackLaneMasks) {
698  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
699  : ST.useAA();
700  AAForDep = UseAA ? AA : nullptr;
701 
702  BarrierChain = nullptr;
703 
704  this->TrackLaneMasks = TrackLaneMasks;
705  MISUnitMap.clear();
707 
708  // Create an SUnit for each real instruction.
709  initSUnits();
710 
711  if (PDiffs)
712  PDiffs->init(SUnits.size());
713 
714  // We build scheduling units by walking a block's instruction list
715  // from bottom to top.
716 
717  // Each MIs' memory operand(s) is analyzed to a list of underlying
718  // objects. The SU is then inserted in the SUList(s) mapped from the
719  // Value(s). Each Value thus gets mapped to lists of SUs depending
720  // on it, stores and loads kept separately. Two SUs are trivially
721  // non-aliasing if they both depend on only identified Values and do
722  // not share any common Value.
723  Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
724 
725  // Certain memory accesses are known to not alias any SU in Stores
726  // or Loads, and have therefore their own 'NonAlias'
727  // domain. E.g. spill / reload instructions never alias LLVM I/R
728  // Values. It would be nice to assume that this type of memory
729  // accesses always have a proper memory operand modelling, and are
730  // therefore never unanalyzable, but this is conservatively not
731  // done.
732  Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
733 
734  // Remove any stale debug info; sometimes BuildSchedGraph is called again
735  // without emitting the info from the previous call.
736  DbgValues.clear();
737  FirstDbgValue = nullptr;
738 
739  assert(Defs.empty() && Uses.empty() &&
740  "Only BuildGraph should update Defs/Uses");
743 
744  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
745  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
746  unsigned NumVirtRegs = MRI.getNumVirtRegs();
747  CurrentVRegDefs.setUniverse(NumVirtRegs);
748  CurrentVRegUses.setUniverse(NumVirtRegs);
749 
750  // Model data dependencies between instructions being scheduled and the
751  // ExitSU.
753 
754  // Walk the list of instructions, from bottom moving up.
755  MachineInstr *DbgMI = nullptr;
757  MII != MIE; --MII) {
758  MachineInstr &MI = *std::prev(MII);
759  if (DbgMI) {
760  DbgValues.push_back(std::make_pair(DbgMI, &MI));
761  DbgMI = nullptr;
762  }
763 
764  if (MI.isDebugValue()) {
765  DbgMI = &MI;
766  continue;
767  }
768  if (MI.isDebugLabel())
769  continue;
770 
771  SUnit *SU = MISUnitMap[&MI];
772  assert(SU && "No SUnit mapped to this MI");
773 
774  if (RPTracker) {
775  RegisterOperands RegOpers;
776  RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
777  if (TrackLaneMasks) {
778  SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
779  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
780  }
781  if (PDiffs != nullptr)
782  PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
783 
784  if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
785  RPTracker->recedeSkipDebugValues();
786  assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
787  RPTracker->recede(RegOpers);
788  }
789 
790  assert(
791  (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
792  "Cannot schedule terminators or labels!");
793 
794  // Add register-based dependencies (data, anti, and output).
795  // For some instructions (calls, returns, inline-asm, etc.) there can
796  // be explicit uses and implicit defs, in which case the use will appear
797  // on the operand list before the def. Do two passes over the operand
798  // list to make sure that defs are processed before any uses.
799  bool HasVRegDef = false;
800  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
801  const MachineOperand &MO = MI.getOperand(j);
802  if (!MO.isReg() || !MO.isDef())
803  continue;
804  unsigned Reg = MO.getReg();
806  addPhysRegDeps(SU, j);
807  } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
808  HasVRegDef = true;
809  addVRegDefDeps(SU, j);
810  }
811  }
812  // Now process all uses.
813  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
814  const MachineOperand &MO = MI.getOperand(j);
815  // Only look at use operands.
816  // We do not need to check for MO.readsReg() here because subsequent
817  // subregister defs will get output dependence edges and need no
818  // additional use dependencies.
819  if (!MO.isReg() || !MO.isUse())
820  continue;
821  unsigned Reg = MO.getReg();
823  addPhysRegDeps(SU, j);
824  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
825  addVRegUseDeps(SU, j);
826  }
827  }
828 
829  // If we haven't seen any uses in this scheduling region, create a
830  // dependence edge to ExitSU to model the live-out latency. This is required
831  // for vreg defs with no in-region use, and prefetches with no vreg def.
832  //
833  // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
834  // check currently relies on being called before adding chain deps.
835  if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
836  SDep Dep(SU, SDep::Artificial);
837  Dep.setLatency(SU->Latency - 1);
838  ExitSU.addPred(Dep);
839  }
840 
841  // Add memory dependencies (Note: isStoreToStackSlot and
842  // isLoadFromStackSLot are not usable after stack slots are lowered to
843  // actual addresses).
844 
845  // This is a barrier event that acts as a pivotal node in the DAG.
846  if (isGlobalMemoryObject(AA, &MI)) {
847 
848  // Become the barrier chain.
849  if (BarrierChain)
851  BarrierChain = SU;
852 
853  LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
854  << BarrierChain->NodeNum << ").\n";);
855 
856  // Add dependencies against everything below it and clear maps.
857  addBarrierChain(Stores);
858  addBarrierChain(Loads);
859  addBarrierChain(NonAliasStores);
860  addBarrierChain(NonAliasLoads);
861 
862  continue;
863  }
864 
865  // If it's not a store or a variant load, we're done.
866  if (!MI.mayStore() &&
867  !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
868  continue;
869 
870  // Always add dependecy edge to BarrierChain if present.
871  if (BarrierChain)
873 
874  // Find the underlying objects for MI. The Objs vector is either
875  // empty, or filled with the Values of memory locations which this
876  // SU depends on.
878  bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
879  MF.getDataLayout());
880 
881  if (MI.mayStore()) {
882  if (!ObjsFound) {
883  // An unknown store depends on all stores and loads.
884  addChainDependencies(SU, Stores);
885  addChainDependencies(SU, NonAliasStores);
886  addChainDependencies(SU, Loads);
887  addChainDependencies(SU, NonAliasLoads);
888 
889  // Map this store to 'UnknownValue'.
890  Stores.insert(SU, UnknownValue);
891  } else {
892  // Add precise dependencies against all previously seen memory
893  // accesses mapped to the same Value(s).
894  for (const UnderlyingObject &UnderlObj : Objs) {
895  ValueType V = UnderlObj.getValue();
896  bool ThisMayAlias = UnderlObj.mayAlias();
897 
898  // Add dependencies to previous stores and loads mapped to V.
899  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
900  addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
901  }
902  // Update the store map after all chains have been added to avoid adding
903  // self-loop edge if multiple underlying objects are present.
904  for (const UnderlyingObject &UnderlObj : Objs) {
905  ValueType V = UnderlObj.getValue();
906  bool ThisMayAlias = UnderlObj.mayAlias();
907 
908  // Map this store to V.
909  (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
910  }
911  // The store may have dependencies to unanalyzable loads and
912  // stores.
914  addChainDependencies(SU, Stores, UnknownValue);
915  }
916  } else { // SU is a load.
917  if (!ObjsFound) {
918  // An unknown load depends on all stores.
919  addChainDependencies(SU, Stores);
920  addChainDependencies(SU, NonAliasStores);
921 
922  Loads.insert(SU, UnknownValue);
923  } else {
924  for (const UnderlyingObject &UnderlObj : Objs) {
925  ValueType V = UnderlObj.getValue();
926  bool ThisMayAlias = UnderlObj.mayAlias();
927 
928  // Add precise dependencies against all previously seen stores
929  // mapping to the same Value(s).
930  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
931 
932  // Map this load to V.
933  (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
934  }
935  // The load may have dependencies to unanalyzable stores.
936  addChainDependencies(SU, Stores, UnknownValue);
937  }
938  }
939 
940  // Reduce maps if they grow huge.
941  if (Stores.size() + Loads.size() >= HugeRegion) {
942  LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
943  reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
944  }
945  if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
946  LLVM_DEBUG(
947  dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
948  reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
949  }
950  }
951 
952  if (DbgMI)
953  FirstDbgValue = DbgMI;
954 
955  Defs.clear();
956  Uses.clear();
959 }
960 
962  PSV->printCustom(OS);
963  return OS;
964 }
965 
967  for (auto &Itr : *this) {
968  if (Itr.first.is<const Value*>()) {
969  const Value *V = Itr.first.get<const Value*>();
970  if (isa<UndefValue>(V))
971  dbgs() << "Unknown";
972  else
973  V->printAsOperand(dbgs());
974  }
975  else if (Itr.first.is<const PseudoSourceValue*>())
976  dbgs() << Itr.first.get<const PseudoSourceValue*>();
977  else
978  llvm_unreachable("Unknown Value type.");
979 
980  dbgs() << " : ";
981  dumpSUList(Itr.second);
982  }
983 }
984 
986  Value2SUsMap &loads, unsigned N) {
987  LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
988  dbgs() << "Loading SUnits:\n"; loads.dump());
989 
990  // Insert all SU's NodeNums into a vector and sort it.
991  std::vector<unsigned> NodeNums;
992  NodeNums.reserve(stores.size() + loads.size());
993  for (auto &I : stores)
994  for (auto *SU : I.second)
995  NodeNums.push_back(SU->NodeNum);
996  for (auto &I : loads)
997  for (auto *SU : I.second)
998  NodeNums.push_back(SU->NodeNum);
999  llvm::sort(NodeNums.begin(), NodeNums.end());
1000 
1001  // The N last elements in NodeNums will be removed, and the SU with
1002  // the lowest NodeNum of them will become the new BarrierChain to
1003  // let the not yet seen SUs have a dependency to the removed SUs.
1004  assert(N <= NodeNums.size());
1005  SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1006  if (BarrierChain) {
1007  // The aliasing and non-aliasing maps reduce independently of each
1008  // other, but share a common BarrierChain. Check if the
1009  // newBarrierChain is above the former one. If it is not, it may
1010  // introduce a loop to use newBarrierChain, so keep the old one.
1011  if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1012  BarrierChain->addPredBarrier(newBarrierChain);
1013  BarrierChain = newBarrierChain;
1014  LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1015  << BarrierChain->NodeNum << ").\n";);
1016  }
1017  else
1018  LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1019  << BarrierChain->NodeNum << ").\n";);
1020  }
1021  else
1022  BarrierChain = newBarrierChain;
1023 
1024  insertBarrierChain(stores);
1025  insertBarrierChain(loads);
1026 
1027  LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1028  dbgs() << "Loading SUnits:\n"; loads.dump());
1029 }
1030 
1032  MachineInstr &MI, bool addToLiveRegs) {
1033  for (MachineOperand &MO : MI.operands()) {
1034  if (!MO.isReg() || !MO.readsReg())
1035  continue;
1036  unsigned Reg = MO.getReg();
1037  if (!Reg)
1038  continue;
1039 
1040  // Things that are available after the instruction are killed by it.
1041  bool IsKill = LiveRegs.available(MRI, Reg);
1042  MO.setIsKill(IsKill);
1043  if (addToLiveRegs)
1044  LiveRegs.addReg(Reg);
1045  }
1046 }
1047 
1049  LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1050 
1051  LiveRegs.init(*TRI);
1052  LiveRegs.addLiveOuts(MBB);
1053 
1054  // Examine block from end to start...
1055  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1056  if (MI.isDebugInstr())
1057  continue;
1058 
1059  // Update liveness. Registers that are defed but not used in this
1060  // instruction are now dead. Mark register and all subregs as they
1061  // are completely defined.
1062  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1063  const MachineOperand &MO = *O;
1064  if (MO.isReg()) {
1065  if (!MO.isDef())
1066  continue;
1067  unsigned Reg = MO.getReg();
1068  if (!Reg)
1069  continue;
1070  LiveRegs.removeReg(Reg);
1071  } else if (MO.isRegMask()) {
1073  }
1074  }
1075 
1076  // If there is a bundle header fix it up first.
1077  if (!MI.isBundled()) {
1078  toggleKills(MRI, LiveRegs, MI, true);
1079  } else {
1080  MachineBasicBlock::instr_iterator First = MI.getIterator();
1081  if (MI.isBundle()) {
1082  toggleKills(MRI, LiveRegs, MI, false);
1083  ++First;
1084  }
1085  // Some targets make the (questionable) assumtion that the instructions
1086  // inside the bundle are ordered and consequently only the last use of
1087  // a register inside the bundle can kill it.
1088  MachineBasicBlock::instr_iterator I = std::next(First);
1089  while (I->isBundledWithSucc())
1090  ++I;
1091  do {
1092  if (!I->isDebugInstr())
1093  toggleKills(MRI, LiveRegs, *I, true);
1094  --I;
1095  } while(I != First);
1096  }
1097  }
1098 }
1099 
1100 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
1101  // Cannot completely remove virtual function even in release mode.
1102 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1103  SU->getInstr()->dump();
1104 #endif
1105 }
1106 
1107 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1108  std::string s;
1109  raw_string_ostream oss(s);
1110  if (SU == &EntrySU)
1111  oss << "<entry>";
1112  else if (SU == &ExitSU)
1113  oss << "<exit>";
1114  else
1115  SU->getInstr()->print(oss, /*SkipOpers=*/true);
1116  return oss.str();
1117 }
1118 
1119 /// Return the basic block label. It is not necessarilly unique because a block
1120 /// contains multiple scheduling regions. But it is fine for visualization.
1121 std::string ScheduleDAGInstrs::getDAGName() const {
1122  return "dag." + BB->getFullName();
1123 }
1124 
1125 //===----------------------------------------------------------------------===//
1126 // SchedDFSResult Implementation
1127 //===----------------------------------------------------------------------===//
1128 
1129 namespace llvm {
1130 
1131 /// Internal state used to compute SchedDFSResult.
1133  SchedDFSResult &R;
1134 
1135  /// Join DAG nodes into equivalence classes by their subtree.
1136  IntEqClasses SubtreeClasses;
1137  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1138  std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1139 
1140  struct RootData {
1141  unsigned NodeID;
1142  unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1143  unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1144  /// children.
1145 
1146  RootData(unsigned id): NodeID(id),
1147  ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1148 
1149  unsigned getSparseSetIndex() const { return NodeID; }
1150  };
1151 
1152  SparseSet<RootData> RootSet;
1153 
1154 public:
1155  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1156  RootSet.setUniverse(R.DFSNodeData.size());
1157  }
1158 
1159  /// Returns true if this node been visited by the DFS traversal.
1160  ///
1161  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1162  /// ID. Later, SubtreeID is updated but remains valid.
1163  bool isVisited(const SUnit *SU) const {
1164  return R.DFSNodeData[SU->NodeNum].SubtreeID
1165  != SchedDFSResult::InvalidSubtreeID;
1166  }
1167 
1168  /// Initializes this node's instruction count. We don't need to flag the node
1169  /// visited until visitPostorder because the DAG cannot have cycles.
1170  void visitPreorder(const SUnit *SU) {
1171  R.DFSNodeData[SU->NodeNum].InstrCount =
1172  SU->getInstr()->isTransient() ? 0 : 1;
1173  }
1174 
1175  /// Called once for each node after all predecessors are visited. Revisit this
1176  /// node's predecessors and potentially join them now that we know the ILP of
1177  /// the other predecessors.
1178  void visitPostorderNode(const SUnit *SU) {
1179  // Mark this node as the root of a subtree. It may be joined with its
1180  // successors later.
1181  R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1182  RootData RData(SU->NodeNum);
1183  RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1184 
1185  // If any predecessors are still in their own subtree, they either cannot be
1186  // joined or are large enough to remain separate. If this parent node's
1187  // total instruction count is not greater than a child subtree by at least
1188  // the subtree limit, then try to join it now since splitting subtrees is
1189  // only useful if multiple high-pressure paths are possible.
1190  unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1191  for (const SDep &PredDep : SU->Preds) {
1192  if (PredDep.getKind() != SDep::Data)
1193  continue;
1194  unsigned PredNum = PredDep.getSUnit()->NodeNum;
1195  if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1196  joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1197 
1198  // Either link or merge the TreeData entry from the child to the parent.
1199  if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1200  // If the predecessor's parent is invalid, this is a tree edge and the
1201  // current node is the parent.
1202  if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1203  RootSet[PredNum].ParentNodeID = SU->NodeNum;
1204  }
1205  else if (RootSet.count(PredNum)) {
1206  // The predecessor is not a root, but is still in the root set. This
1207  // must be the new parent that it was just joined to. Note that
1208  // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1209  // set to the original parent.
1210  RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1211  RootSet.erase(PredNum);
1212  }
1213  }
1214  RootSet[SU->NodeNum] = RData;
1215  }
1216 
1217  /// Called once for each tree edge after calling visitPostOrderNode on
1218  /// the predecessor. Increment the parent node's instruction count and
1219  /// preemptively join this subtree to its parent's if it is small enough.
1220  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1221  R.DFSNodeData[Succ->NodeNum].InstrCount
1222  += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1223  joinPredSubtree(PredDep, Succ);
1224  }
1225 
1226  /// Adds a connection for cross edges.
1227  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1228  ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1229  }
1230 
1231  /// Sets each node's subtree ID to the representative ID and record
1232  /// connections between trees.
1233  void finalize() {
1234  SubtreeClasses.compress();
1235  R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1236  assert(SubtreeClasses.getNumClasses() == RootSet.size()
1237  && "number of roots should match trees");
1238  for (const RootData &Root : RootSet) {
1239  unsigned TreeID = SubtreeClasses[Root.NodeID];
1240  if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1241  R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1242  R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1243  // Note that SubInstrCount may be greater than InstrCount if we joined
1244  // subtrees across a cross edge. InstrCount will be attributed to the
1245  // original parent, while SubInstrCount will be attributed to the joined
1246  // parent.
1247  }
1248  R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1249  R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1250  LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1251  for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1252  R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1253  LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1254  << R.DFSNodeData[Idx].SubtreeID << '\n');
1255  }
1256  for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
1257  unsigned PredTree = SubtreeClasses[P.first->NodeNum];
1258  unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
1259  if (PredTree == SuccTree)
1260  continue;
1261  unsigned Depth = P.first->getDepth();
1262  addConnection(PredTree, SuccTree, Depth);
1263  addConnection(SuccTree, PredTree, Depth);
1264  }
1265  }
1266 
1267 protected:
1268  /// Joins the predecessor subtree with the successor that is its DFS parent.
1269  /// Applies some heuristics before joining.
1270  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1271  bool CheckLimit = true) {
1272  assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1273 
1274  // Check if the predecessor is already joined.
1275  const SUnit *PredSU = PredDep.getSUnit();
1276  unsigned PredNum = PredSU->NodeNum;
1277  if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1278  return false;
1279 
1280  // Four is the magic number of successors before a node is considered a
1281  // pinch point.
1282  unsigned NumDataSucs = 0;
1283  for (const SDep &SuccDep : PredSU->Succs) {
1284  if (SuccDep.getKind() == SDep::Data) {
1285  if (++NumDataSucs >= 4)
1286  return false;
1287  }
1288  }
1289  if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1290  return false;
1291  R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1292  SubtreeClasses.join(Succ->NodeNum, PredNum);
1293  return true;
1294  }
1295 
1296  /// Called by finalize() to record a connection between trees.
1297  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1298  if (!Depth)
1299  return;
1300 
1301  do {
1303  R.SubtreeConnections[FromTree];
1304  for (SchedDFSResult::Connection &C : Connections) {
1305  if (C.TreeID == ToTree) {
1306  C.Level = std::max(C.Level, Depth);
1307  return;
1308  }
1309  }
1310  Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1311  FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1312  } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1313  }
1314 };
1315 
1316 } // end namespace llvm
1317 
1318 namespace {
1319 
1320 /// Manage the stack used by a reverse depth-first search over the DAG.
1321 class SchedDAGReverseDFS {
1322  std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1323 
1324 public:
1325  bool isComplete() const { return DFSStack.empty(); }
1326 
1327  void follow(const SUnit *SU) {
1328  DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1329  }
1330  void advance() { ++DFSStack.back().second; }
1331 
1332  const SDep *backtrack() {
1333  DFSStack.pop_back();
1334  return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1335  }
1336 
1337  const SUnit *getCurr() const { return DFSStack.back().first; }
1338 
1339  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1340 
1341  SUnit::const_pred_iterator getPredEnd() const {
1342  return getCurr()->Preds.end();
1343  }
1344 };
1345 
1346 } // end anonymous namespace
1347 
1348 static bool hasDataSucc(const SUnit *SU) {
1349  for (const SDep &SuccDep : SU->Succs) {
1350  if (SuccDep.getKind() == SDep::Data &&
1351  !SuccDep.getSUnit()->isBoundaryNode())
1352  return true;
1353  }
1354  return false;
1355 }
1356 
1357 /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1358 /// search from this root.
1360  if (!IsBottomUp)
1361  llvm_unreachable("Top-down ILP metric is unimplemented");
1362 
1363  SchedDFSImpl Impl(*this);
1364  for (const SUnit &SU : SUnits) {
1365  if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1366  continue;
1367 
1368  SchedDAGReverseDFS DFS;
1369  Impl.visitPreorder(&SU);
1370  DFS.follow(&SU);
1371  while (true) {
1372  // Traverse the leftmost path as far as possible.
1373  while (DFS.getPred() != DFS.getPredEnd()) {
1374  const SDep &PredDep = *DFS.getPred();
1375  DFS.advance();
1376  // Ignore non-data edges.
1377  if (PredDep.getKind() != SDep::Data
1378  || PredDep.getSUnit()->isBoundaryNode()) {
1379  continue;
1380  }
1381  // An already visited edge is a cross edge, assuming an acyclic DAG.
1382  if (Impl.isVisited(PredDep.getSUnit())) {
1383  Impl.visitCrossEdge(PredDep, DFS.getCurr());
1384  continue;
1385  }
1386  Impl.visitPreorder(PredDep.getSUnit());
1387  DFS.follow(PredDep.getSUnit());
1388  }
1389  // Visit the top of the stack in postorder and backtrack.
1390  const SUnit *Child = DFS.getCurr();
1391  const SDep *PredDep = DFS.backtrack();
1392  Impl.visitPostorderNode(Child);
1393  if (PredDep)
1394  Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1395  if (DFS.isComplete())
1396  break;
1397  }
1398  }
1399  Impl.finalize();
1400 }
1401 
1402 /// The root of the given SubtreeID was just scheduled. For all subtrees
1403 /// connected to this tree, record the depth of the connection so that the
1404 /// nearest connected subtrees can be prioritized.
1405 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1406  for (const Connection &C : SubtreeConnections[SubtreeID]) {
1407  SubtreeConnectLevels[C.TreeID] =
1408  std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1409  LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1410  << SubtreeConnectLevels[C.TreeID] << '\n');
1411  }
1412 }
1413 
1414 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1416  OS << InstrCount << " / " << Length << " = ";
1417  if (!Length)
1418  OS << "BADILP";
1419  else
1420  OS << format("%g", ((double)InstrCount / Length));
1421 }
1422 
1424  dbgs() << *this << '\n';
1425 }
1426 
1427 namespace llvm {
1428 
1431  Val.print(OS);
1432  return OS;
1433 }
1434 
1435 } // end namespace llvm
1436 
1437 #endif
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
iterator end()
Returns an iterator past this container.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
void push_back(const T &Elt)
Definition: SmallVector.h:213
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
virtual void finishBlock()
Cleans up after scheduling in the given block.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
bool isDebugLabel() const
Definition: MachineInstr.h:850
A common definition of LaneBitmask for use in TableGen and CodeGen.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
void clear()
Definition: MapVector.h:89
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:485
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
bool available(const MachineRegisterInfo &MRI, unsigned Reg) const
Returns true if register Reg and no aliasing register is in the set.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:452
void init(unsigned N)
Initialize an array of N PressureDiffs.
Record a physical register access.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
LivePhysRegs LiveRegs
Set of live physical registers for updating kill flags.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit *SU) const override
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void removeRegsInMask(const MachineOperand &MO, SmallVectorImpl< std::pair< unsigned, const MachineOperand *>> *Clobbers=nullptr)
Removes physical registers clobbered by the regmask operand MO.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:411
void setIsUndef(bool Val=true)
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:35
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
unsigned Reg
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
void compress()
compress - Compress equivalence classes by numbering them 0 .
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:938
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI DAG construction"))
static unsigned InstrCount
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:423
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:361
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU&#39;s instruction that defines a physical register.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
void clearList(ValueType V)
Clears the list of SUs mapped to V.
iterator_range< succ_iterator > successors()
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:570
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
A description of a memory reference used in the backend.
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:272
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1275
An individual mapping from virtual register number to SUnit.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:314
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:371
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:266
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise. ...
Definition: SparseSet.h:236
static bool hasDataSucc(const SUnit *SU)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
unsigned SubReg
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:284
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:84
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:501
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:99
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
Key
PAL metadata keys.
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:56
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:294
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void print(raw_ostream &OS) const
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:191
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
List of registers defined and used by a machine instruction.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:221
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
iterator find(const KeyT &Key)
Definition: MapVector.h:148
reverse_iterator rend()
void visitPreorder(const SUnit *SU)
Initializes this node&#39;s instruction count.
bool mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA)
Returns true if this instruction&#39;s memory access aliases the memory access of Other.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
reverse_iterator rbegin()
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:349
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:164
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:286
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Scheduling dependency.
Definition: ScheduleDAG.h:50
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:293
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:672
#define P(N)
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
Array of PressureDiffs.
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:60
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static unsigned getReductionSize()
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
Internal state used to compute SchedDFSResult.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void reComputeSize()
Counts the number of SUs in this map after a reduction.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:72
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
MCRegAliasIterator enumerates all registers aliasing Reg.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:156
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
const MachineFrameInfo & MFI
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
void clear()
Clears the set.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
MCSubRegIterator enumerates all sub-registers of Reg.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:929
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4143
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:493
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
void setIsKill(bool Val=true)
iterator find(const KeyT &Key)
Find an element by its key.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:859
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
The two locations may or may not alias. This is the least precise result.
Definition: AliasAnalysis.h:87
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:283
Iterator for intrusive lists based on ilist_node.
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1032
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
bool isDebugValue() const
Definition: MachineInstr.h:849
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Reg2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
void eraseAll(const KeyT &K)
Erase all elements with the given key.
CHAIN = SC CHAIN, Imm128 - System call.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx. ...
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:71
typename std::vector< std::pair< ValueType, SUList >> ::iterator iterator
Definition: MapVector.h:50
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:389
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
Special value supplied for machine level alias analysis.
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
**iterator erase(iterator I)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:573
Representation of each machine instruction.
Definition: MachineInstr.h:60
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
void finalize()
Sets each node&#39;s subtree ID to the representative ID and record connections between trees...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:569
void removeReg(unsigned Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set...
Definition: LivePhysRegs.h:89
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:49
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
constexpr bool any() const
Definition: LaneBitmask.h:53
bool empty() const
Returns true if the set is empty.
Mapping from virtual register to SUnit including an operand index.
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AliasAnalysis and Target fail to deny the dep...
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2032
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
iterator begin()
Definition: MapVector.h:70
const unsigned Kind
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:659
iterator end()
Definition: MapVector.h:72
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:477
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
LLVM Value Representation.
Definition: Value.h:73
void clear()
Clears map from all contents.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
bool isPosition() const
Definition: MachineInstr.h:847
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
static bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI)
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
MachineBasicBlock * BB
The block in which to insert instructions.
IRTranslator LLVM IR MI
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:492
static void dumpSUList(ScheduleDAGInstrs::SUList &L)
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:571
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
SchedDFSImpl(SchedDFSResult &r)
#define LLVM_DEBUG(X)
Definition: Debug.h:119
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:316
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
bool hasTailCall() const
Returns true if the function contains a tail call.
hexagon widen stores
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
A discriminated union of two pointer types, with the discriminator in the low bit of the pointer...
Definition: PointerUnion.h:87
void addReg(unsigned Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
void resize(size_type N)
Definition: SmallVector.h:352