LLVM  7.0.0svn
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This implements the ScheduleDAGInstrs class, which implements
11 /// re-scheduling of MachineInstrs.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/IntEqClasses.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/SparseSet.h"
41 #include "llvm/IR/Constants.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/Operator.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/MC/LaneBitmask.h"
49 #include "llvm/MC/MCRegisterInfo.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Compiler.h"
53 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/Format.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "machine-scheduler"
67 
68 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
69  cl::ZeroOrMore, cl::init(false),
70  cl::desc("Enable use of AA during MI DAG construction"));
71 
72 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
73  cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
74 
75 // Note: the two options below might be used in tuning compile time vs
76 // output quality. Setting HugeRegion so large that it will never be
77 // reached means best-effort, but may be slow.
78 
79 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
80 // together hold this many SUs, a reduction of maps will be done.
81 static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
82  cl::init(1000), cl::desc("The limit to use while constructing the DAG "
83  "prior to scheduling, at which point a trade-off "
84  "is made to avoid excessive compile time."));
85 
87  "dag-maps-reduction-size", cl::Hidden,
88  cl::desc("A huge scheduling region will have maps reduced by this many "
89  "nodes at a time. Defaults to HugeRegion / 2."));
90 
91 static unsigned getReductionSize() {
92  // Always reduce a huge region with half of the elements, except
93  // when user sets this number explicitly.
94  if (ReductionSize.getNumOccurrences() == 0)
95  return HugeRegion / 2;
96  return ReductionSize;
97 }
98 
100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
101  dbgs() << "{ ";
102  for (const SUnit *su : L) {
103  dbgs() << "SU(" << su->NodeNum << ")";
104  if (su != L.back())
105  dbgs() << ", ";
106  }
107  dbgs() << "}\n";
108 #endif
109 }
110 
112  const MachineLoopInfo *mli,
113  bool RemoveKillFlags)
114  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
115  RemoveKillFlags(RemoveKillFlags),
116  UnknownValue(UndefValue::get(
117  Type::getVoidTy(mf.getFunction().getContext()))) {
118  DbgValues.clear();
119 
120  const TargetSubtargetInfo &ST = mf.getSubtarget();
122 }
123 
124 /// If this machine instr has memory reference information and it can be
125 /// tracked to a normal reference to a known object, return the Value
126 /// for that object. This function returns false the memory location is
127 /// unknown or may alias anything.
129  const MachineFrameInfo &MFI,
130  UnderlyingObjectsVector &Objects,
131  const DataLayout &DL) {
132  auto allMMOsOkay = [&]() {
133  for (const MachineMemOperand *MMO : MI->memoperands()) {
134  if (MMO->isVolatile())
135  return false;
136 
137  if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
138  // Function that contain tail calls don't have unique PseudoSourceValue
139  // objects. Two PseudoSourceValues might refer to the same or
140  // overlapping locations. The client code calling this function assumes
141  // this is not the case. So return a conservative answer of no known
142  // object.
143  if (MFI.hasTailCall())
144  return false;
145 
146  // For now, ignore PseudoSourceValues which may alias LLVM IR values
147  // because the code that uses this function has no way to cope with
148  // such aliases.
149  if (PSV->isAliased(&MFI))
150  return false;
151 
152  bool MayAlias = PSV->mayAlias(&MFI);
153  Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
154  } else if (const Value *V = MMO->getValue()) {
156  if (!getUnderlyingObjectsForCodeGen(V, Objs, DL))
157  return false;
158 
159  for (Value *V : Objs) {
162  }
163  } else
164  return false;
165  }
166  return true;
167  };
168 
169  if (!allMMOsOkay()) {
170  Objects.clear();
171  return false;
172  }
173 
174  return true;
175 }
176 
178  BB = bb;
179 }
180 
182  // Subclasses should no longer refer to the old block.
183  BB = nullptr;
184 }
185 
189  unsigned regioninstrs) {
190  assert(bb == BB && "startBlock should set BB");
191  RegionBegin = begin;
192  RegionEnd = end;
193  NumRegionInstrs = regioninstrs;
194 }
195 
197  // Nothing to do.
198 }
199 
201  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
202  ExitSU.setInstr(ExitMI);
203  // Add dependencies on the defs and uses of the instruction.
204  if (ExitMI) {
205  for (const MachineOperand &MO : ExitMI->operands()) {
206  if (!MO.isReg() || MO.isDef()) continue;
207  unsigned Reg = MO.getReg();
209  Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
210  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
211  addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
212  }
213  }
214  }
215  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
216  // For others, e.g. fallthrough, conditional branch, assume the exit
217  // uses all the registers that are livein to the successor blocks.
218  for (const MachineBasicBlock *Succ : BB->successors()) {
219  for (const auto &LI : Succ->liveins()) {
220  if (!Uses.contains(LI.PhysReg))
221  Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
222  }
223  }
224  }
225 }
226 
227 /// MO is an operand of SU's instruction that defines a physical register. Adds
228 /// data dependencies from SU to any uses of the physical register.
229 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
230  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
231  assert(MO.isDef() && "expect physreg def");
232 
233  // Ask the target if address-backscheduling is desirable, and if so how much.
235 
236  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
237  Alias.isValid(); ++Alias) {
238  if (!Uses.contains(*Alias))
239  continue;
240  for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
241  SUnit *UseSU = I->SU;
242  if (UseSU == SU)
243  continue;
244 
245  // Adjust the dependence latency using operand def/use information,
246  // then allow the target to perform its own adjustments.
247  int UseOp = I->OpIdx;
248  MachineInstr *RegUse = nullptr;
249  SDep Dep;
250  if (UseOp < 0)
251  Dep = SDep(SU, SDep::Artificial);
252  else {
253  // Set the hasPhysRegDefs only for physreg defs that have a use within
254  // the scheduling region.
255  SU->hasPhysRegDefs = true;
256  Dep = SDep(SU, SDep::Data, *Alias);
257  RegUse = UseSU->getInstr();
258  }
259  Dep.setLatency(
260  SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
261  UseOp));
262 
263  ST.adjustSchedDependency(SU, UseSU, Dep);
264  UseSU->addPred(Dep);
265  }
266  }
267 }
268 
269 /// \brief Adds register dependencies (data, anti, and output) from this SUnit
270 /// to following instructions in the same scheduling region that depend the
271 /// physical register referenced at OperIdx.
272 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
273  MachineInstr *MI = SU->getInstr();
274  MachineOperand &MO = MI->getOperand(OperIdx);
275  unsigned Reg = MO.getReg();
276  // We do not need to track any dependencies for constant registers.
277  if (MRI.isConstantPhysReg(Reg))
278  return;
279 
280  // Optionally add output and anti dependencies. For anti
281  // dependencies we use a latency of 0 because for a multi-issue
282  // target we want to allow the defining instruction to issue
283  // in the same cycle as the using instruction.
284  // TODO: Using a latency of 1 here for output dependencies assumes
285  // there's no cost for reusing registers.
287  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
288  if (!Defs.contains(*Alias))
289  continue;
290  for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
291  SUnit *DefSU = I->SU;
292  if (DefSU == &ExitSU)
293  continue;
294  if (DefSU != SU &&
295  (Kind != SDep::Output || !MO.isDead() ||
296  !DefSU->getInstr()->registerDefIsDead(*Alias))) {
297  if (Kind == SDep::Anti)
298  DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
299  else {
300  SDep Dep(SU, Kind, /*Reg=*/*Alias);
301  Dep.setLatency(
302  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
303  DefSU->addPred(Dep);
304  }
305  }
306  }
307  }
308 
309  if (!MO.isDef()) {
310  SU->hasPhysRegUses = true;
311  // Either insert a new Reg2SUnits entry with an empty SUnits list, or
312  // retrieve the existing SUnits list for this register's uses.
313  // Push this SUnit on the use list.
314  Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
315  if (RemoveKillFlags)
316  MO.setIsKill(false);
317  } else {
318  addPhysRegDataDeps(SU, OperIdx);
319 
320  // clear this register's use list
321  if (Uses.contains(Reg))
322  Uses.eraseAll(Reg);
323 
324  if (!MO.isDead()) {
325  Defs.eraseAll(Reg);
326  } else if (SU->isCall) {
327  // Calls will not be reordered because of chain dependencies (see
328  // below). Since call operands are dead, calls may continue to be added
329  // to the DefList making dependence checking quadratic in the size of
330  // the block. Instead, we leave only one call at the back of the
331  // DefList.
333  Reg2SUnitsMap::iterator B = P.first;
334  Reg2SUnitsMap::iterator I = P.second;
335  for (bool isBegin = I == B; !isBegin; /* empty */) {
336  isBegin = (--I) == B;
337  if (!I->SU->isCall)
338  break;
339  I = Defs.erase(I);
340  }
341  }
342 
343  // Defs are pushed in the order they are visited and never reordered.
344  Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
345  }
346 }
347 
349 {
350  unsigned Reg = MO.getReg();
351  // No point in tracking lanemasks if we don't have interesting subregisters.
352  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
353  if (!RC.HasDisjunctSubRegs)
354  return LaneBitmask::getAll();
355 
356  unsigned SubReg = MO.getSubReg();
357  if (SubReg == 0)
358  return RC.getLaneMask();
359  return TRI->getSubRegIndexLaneMask(SubReg);
360 }
361 
362 /// Adds register output and data dependencies from this SUnit to instructions
363 /// that occur later in the same scheduling region if they read from or write to
364 /// the virtual register defined at OperIdx.
365 ///
366 /// TODO: Hoist loop induction variable increments. This has to be
367 /// reevaluated. Generally, IV scheduling should be done before coalescing.
368 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
369  MachineInstr *MI = SU->getInstr();
370  MachineOperand &MO = MI->getOperand(OperIdx);
371  unsigned Reg = MO.getReg();
372 
373  LaneBitmask DefLaneMask;
374  LaneBitmask KillLaneMask;
375  if (TrackLaneMasks) {
376  bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
377  DefLaneMask = getLaneMaskForMO(MO);
378  // If we have a <read-undef> flag, none of the lane values comes from an
379  // earlier instruction.
380  KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
381 
382  // Clear undef flag, we'll re-add it later once we know which subregister
383  // Def is first.
384  MO.setIsUndef(false);
385  } else {
386  DefLaneMask = LaneBitmask::getAll();
387  KillLaneMask = LaneBitmask::getAll();
388  }
389 
390  if (MO.isDead()) {
392  "Dead defs should have no uses");
393  } else {
394  // Add data dependence to all uses we found so far.
397  E = CurrentVRegUses.end(); I != E; /*empty*/) {
398  LaneBitmask LaneMask = I->LaneMask;
399  // Ignore uses of other lanes.
400  if ((LaneMask & KillLaneMask).none()) {
401  ++I;
402  continue;
403  }
404 
405  if ((LaneMask & DefLaneMask).any()) {
406  SUnit *UseSU = I->SU;
407  MachineInstr *Use = UseSU->getInstr();
408  SDep Dep(SU, SDep::Data, Reg);
409  Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
410  I->OperandIndex));
411  ST.adjustSchedDependency(SU, UseSU, Dep);
412  UseSU->addPred(Dep);
413  }
414 
415  LaneMask &= ~KillLaneMask;
416  // If we found a Def for all lanes of this use, remove it from the list.
417  if (LaneMask.any()) {
418  I->LaneMask = LaneMask;
419  ++I;
420  } else
422  }
423  }
424 
425  // Shortcut: Singly defined vregs do not have output/anti dependencies.
426  if (MRI.hasOneDef(Reg))
427  return;
428 
429  // Add output dependence to the next nearest defs of this vreg.
430  //
431  // Unless this definition is dead, the output dependence should be
432  // transitively redundant with antidependencies from this definition's
433  // uses. We're conservative for now until we have a way to guarantee the uses
434  // are not eliminated sometime during scheduling. The output dependence edge
435  // is also useful if output latency exceeds def-use latency.
436  LaneBitmask LaneMask = DefLaneMask;
437  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
438  CurrentVRegDefs.end())) {
439  // Ignore defs for other lanes.
440  if ((V2SU.LaneMask & LaneMask).none())
441  continue;
442  // Add an output dependence.
443  SUnit *DefSU = V2SU.SU;
444  // Ignore additional defs of the same lanes in one instruction. This can
445  // happen because lanemasks are shared for targets with too many
446  // subregisters. We also use some representration tricks/hacks where we
447  // add super-register defs/uses, to imply that although we only access parts
448  // of the reg we care about the full one.
449  if (DefSU == SU)
450  continue;
451  SDep Dep(SU, SDep::Output, Reg);
452  Dep.setLatency(
453  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
454  DefSU->addPred(Dep);
455 
456  // Update current definition. This can get tricky if the def was about a
457  // bigger lanemask before. We then have to shrink it and create a new
458  // VReg2SUnit for the non-overlapping part.
459  LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
460  LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
461  V2SU.SU = SU;
462  V2SU.LaneMask = OverlapMask;
463  if (NonOverlapMask.any())
464  CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
465  }
466  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
467  if (LaneMask.any())
468  CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
469 }
470 
471 /// \brief Adds a register data dependency if the instruction that defines the
472 /// virtual register used at OperIdx is mapped to an SUnit. Add a register
473 /// antidependency from this SUnit to instructions that occur later in the same
474 /// scheduling region if they write the virtual register.
475 ///
476 /// TODO: Handle ExitSU "uses" properly.
477 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
478  const MachineInstr *MI = SU->getInstr();
479  const MachineOperand &MO = MI->getOperand(OperIdx);
480  unsigned Reg = MO.getReg();
481 
482  // Remember the use. Data dependencies will be added when we find the def.
485  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
486 
487  // Add antidependences to the following defs of the vreg.
488  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
489  CurrentVRegDefs.end())) {
490  // Ignore defs for unrelated lanes.
491  LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
492  if ((PrevDefLaneMask & LaneMask).none())
493  continue;
494  if (V2SU.SU == SU)
495  continue;
496 
497  V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
498  }
499 }
500 
501 /// Returns true if MI is an instruction we are unable to reason about
502 /// (like a call or something with unmodeled side effects).
504  return MI->isCall() || MI->hasUnmodeledSideEffects() ||
506 }
507 
509  unsigned Latency) {
510  if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
511  SDep Dep(SUa, SDep::MayAliasMem);
512  Dep.setLatency(Latency);
513  SUb->addPred(Dep);
514  }
515 }
516 
517 /// \brief Creates an SUnit for each real instruction, numbered in top-down
518 /// topological order. The instruction order A < B, implies that no edge exists
519 /// from B to A.
520 ///
521 /// Map each real instruction to its SUnit.
522 ///
523 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
524 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
525 /// instead of pointers.
526 ///
527 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
528 /// the original instruction list.
530  // We'll be allocating one SUnit for each real instruction in the region,
531  // which is contained within a basic block.
532  SUnits.reserve(NumRegionInstrs);
533 
535  if (MI.isDebugValue())
536  continue;
537 
538  SUnit *SU = newSUnit(&MI);
539  MISUnitMap[&MI] = SU;
540 
541  SU->isCall = MI.isCall();
542  SU->isCommutable = MI.isCommutable();
543 
544  // Assign the Latency field of SU using target-provided information.
545  SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
546 
547  // If this SUnit uses a reserved or unbuffered resource, mark it as such.
548  //
549  // Reserved resources block an instruction from issuing and stall the
550  // entire pipeline. These are identified by BufferSize=0.
551  //
552  // Unbuffered resources prevent execution of subsequent instructions that
553  // require the same resources. This is used for in-order execution pipelines
554  // within an out-of-order core. These are identified by BufferSize=1.
556  const MCSchedClassDesc *SC = getSchedClass(SU);
557  for (const MCWriteProcResEntry &PRE :
560  switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
561  case 0:
562  SU->hasReservedResource = true;
563  break;
564  case 1:
565  SU->isUnbuffered = true;
566  break;
567  default:
568  break;
569  }
570  }
571  }
572  }
573 }
574 
575 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
576  /// Current total number of SUs in map.
577  unsigned NumNodes = 0;
578 
579  /// 1 for loads, 0 for stores. (see comment in SUList)
580  unsigned TrueMemOrderLatency;
581 
582 public:
583  Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
584 
585  /// To keep NumNodes up to date, insert() is used instead of
586  /// this operator w/ push_back().
588  llvm_unreachable("Don't use. Use insert() instead."); };
589 
590  /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
591  /// reduce().
592  void inline insert(SUnit *SU, ValueType V) {
593  MapVector::operator[](V).push_back(SU);
594  NumNodes++;
595  }
596 
597  /// Clears the list of SUs mapped to V.
598  void inline clearList(ValueType V) {
599  iterator Itr = find(V);
600  if (Itr != end()) {
601  assert(NumNodes >= Itr->second.size());
602  NumNodes -= Itr->second.size();
603 
604  Itr->second.clear();
605  }
606  }
607 
608  /// Clears map from all contents.
609  void clear() {
611  NumNodes = 0;
612  }
613 
614  unsigned inline size() const { return NumNodes; }
615 
616  /// Counts the number of SUs in this map after a reduction.
617  void reComputeSize() {
618  NumNodes = 0;
619  for (auto &I : *this)
620  NumNodes += I.second.size();
621  }
622 
623  unsigned inline getTrueMemOrderLatency() const {
624  return TrueMemOrderLatency;
625  }
626 
627  void dump();
628 };
629 
631  Value2SUsMap &Val2SUsMap) {
632  for (auto &I : Val2SUsMap)
633  addChainDependencies(SU, I.second,
634  Val2SUsMap.getTrueMemOrderLatency());
635 }
636 
638  Value2SUsMap &Val2SUsMap,
639  ValueType V) {
640  Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
641  if (Itr != Val2SUsMap.end())
642  addChainDependencies(SU, Itr->second,
643  Val2SUsMap.getTrueMemOrderLatency());
644 }
645 
647  assert(BarrierChain != nullptr);
648 
649  for (auto &I : map) {
650  SUList &sus = I.second;
651  for (auto *SU : sus)
652  SU->addPredBarrier(BarrierChain);
653  }
654  map.clear();
655 }
656 
658  assert(BarrierChain != nullptr);
659 
660  // Go through all lists of SUs.
661  for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
662  Value2SUsMap::iterator CurrItr = I++;
663  SUList &sus = CurrItr->second;
664  SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
665  for (; SUItr != SUEE; ++SUItr) {
666  // Stop on BarrierChain or any instruction above it.
667  if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
668  break;
669 
670  (*SUItr)->addPredBarrier(BarrierChain);
671  }
672 
673  // Remove also the BarrierChain from list if present.
674  if (SUItr != SUEE && *SUItr == BarrierChain)
675  SUItr++;
676 
677  // Remove all SUs that are now successors of BarrierChain.
678  if (SUItr != sus.begin())
679  sus.erase(sus.begin(), SUItr);
680  }
681 
682  // Remove all entries with empty su lists.
683  map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
684  return (mapEntry.second.empty()); });
685 
686  // Recompute the size of the map (NumNodes).
687  map.reComputeSize();
688 }
689 
691  RegPressureTracker *RPTracker,
692  PressureDiffs *PDiffs,
693  LiveIntervals *LIS,
694  bool TrackLaneMasks) {
696  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
697  : ST.useAA();
698  AAForDep = UseAA ? AA : nullptr;
699 
700  BarrierChain = nullptr;
701 
702  this->TrackLaneMasks = TrackLaneMasks;
703  MISUnitMap.clear();
705 
706  // Create an SUnit for each real instruction.
707  initSUnits();
708 
709  if (PDiffs)
710  PDiffs->init(SUnits.size());
711 
712  // We build scheduling units by walking a block's instruction list
713  // from bottom to top.
714 
715  // Each MIs' memory operand(s) is analyzed to a list of underlying
716  // objects. The SU is then inserted in the SUList(s) mapped from the
717  // Value(s). Each Value thus gets mapped to lists of SUs depending
718  // on it, stores and loads kept separately. Two SUs are trivially
719  // non-aliasing if they both depend on only identified Values and do
720  // not share any common Value.
721  Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
722 
723  // Certain memory accesses are known to not alias any SU in Stores
724  // or Loads, and have therefore their own 'NonAlias'
725  // domain. E.g. spill / reload instructions never alias LLVM I/R
726  // Values. It would be nice to assume that this type of memory
727  // accesses always have a proper memory operand modelling, and are
728  // therefore never unanalyzable, but this is conservatively not
729  // done.
730  Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
731 
732  // Remove any stale debug info; sometimes BuildSchedGraph is called again
733  // without emitting the info from the previous call.
734  DbgValues.clear();
735  FirstDbgValue = nullptr;
736 
737  assert(Defs.empty() && Uses.empty() &&
738  "Only BuildGraph should update Defs/Uses");
741 
742  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
743  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
744  unsigned NumVirtRegs = MRI.getNumVirtRegs();
745  CurrentVRegDefs.setUniverse(NumVirtRegs);
746  CurrentVRegUses.setUniverse(NumVirtRegs);
747 
748  // Model data dependencies between instructions being scheduled and the
749  // ExitSU.
751 
752  // Walk the list of instructions, from bottom moving up.
753  MachineInstr *DbgMI = nullptr;
755  MII != MIE; --MII) {
756  MachineInstr &MI = *std::prev(MII);
757  if (DbgMI) {
758  DbgValues.push_back(std::make_pair(DbgMI, &MI));
759  DbgMI = nullptr;
760  }
761 
762  if (MI.isDebugValue()) {
763  DbgMI = &MI;
764  continue;
765  }
766  SUnit *SU = MISUnitMap[&MI];
767  assert(SU && "No SUnit mapped to this MI");
768 
769  if (RPTracker) {
770  RegisterOperands RegOpers;
771  RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
772  if (TrackLaneMasks) {
773  SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
774  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
775  }
776  if (PDiffs != nullptr)
777  PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
778 
779  if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
780  RPTracker->recedeSkipDebugValues();
781  assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
782  RPTracker->recede(RegOpers);
783  }
784 
785  assert(
786  (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
787  "Cannot schedule terminators or labels!");
788 
789  // Add register-based dependencies (data, anti, and output).
790  // For some instructions (calls, returns, inline-asm, etc.) there can
791  // be explicit uses and implicit defs, in which case the use will appear
792  // on the operand list before the def. Do two passes over the operand
793  // list to make sure that defs are processed before any uses.
794  bool HasVRegDef = false;
795  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
796  const MachineOperand &MO = MI.getOperand(j);
797  if (!MO.isReg() || !MO.isDef())
798  continue;
799  unsigned Reg = MO.getReg();
801  addPhysRegDeps(SU, j);
802  } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
803  HasVRegDef = true;
804  addVRegDefDeps(SU, j);
805  }
806  }
807  // Now process all uses.
808  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
809  const MachineOperand &MO = MI.getOperand(j);
810  // Only look at use operands.
811  // We do not need to check for MO.readsReg() here because subsequent
812  // subregister defs will get output dependence edges and need no
813  // additional use dependencies.
814  if (!MO.isReg() || !MO.isUse())
815  continue;
816  unsigned Reg = MO.getReg();
818  addPhysRegDeps(SU, j);
819  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
820  addVRegUseDeps(SU, j);
821  }
822  }
823 
824  // If we haven't seen any uses in this scheduling region, create a
825  // dependence edge to ExitSU to model the live-out latency. This is required
826  // for vreg defs with no in-region use, and prefetches with no vreg def.
827  //
828  // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
829  // check currently relies on being called before adding chain deps.
830  if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
831  SDep Dep(SU, SDep::Artificial);
832  Dep.setLatency(SU->Latency - 1);
833  ExitSU.addPred(Dep);
834  }
835 
836  // Add memory dependencies (Note: isStoreToStackSlot and
837  // isLoadFromStackSLot are not usable after stack slots are lowered to
838  // actual addresses).
839 
840  // This is a barrier event that acts as a pivotal node in the DAG.
841  if (isGlobalMemoryObject(AA, &MI)) {
842 
843  // Become the barrier chain.
844  if (BarrierChain)
846  BarrierChain = SU;
847 
848  DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
849  << BarrierChain->NodeNum << ").\n";);
850 
851  // Add dependencies against everything below it and clear maps.
852  addBarrierChain(Stores);
853  addBarrierChain(Loads);
854  addBarrierChain(NonAliasStores);
855  addBarrierChain(NonAliasLoads);
856 
857  continue;
858  }
859 
860  // If it's not a store or a variant load, we're done.
861  if (!MI.mayStore() &&
862  !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
863  continue;
864 
865  // Always add dependecy edge to BarrierChain if present.
866  if (BarrierChain)
868 
869  // Find the underlying objects for MI. The Objs vector is either
870  // empty, or filled with the Values of memory locations which this
871  // SU depends on.
873  bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
874  MF.getDataLayout());
875 
876  if (MI.mayStore()) {
877  if (!ObjsFound) {
878  // An unknown store depends on all stores and loads.
879  addChainDependencies(SU, Stores);
880  addChainDependencies(SU, NonAliasStores);
881  addChainDependencies(SU, Loads);
882  addChainDependencies(SU, NonAliasLoads);
883 
884  // Map this store to 'UnknownValue'.
885  Stores.insert(SU, UnknownValue);
886  } else {
887  // Add precise dependencies against all previously seen memory
888  // accesses mapped to the same Value(s).
889  for (const UnderlyingObject &UnderlObj : Objs) {
890  ValueType V = UnderlObj.getValue();
891  bool ThisMayAlias = UnderlObj.mayAlias();
892 
893  // Add dependencies to previous stores and loads mapped to V.
894  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
895  addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
896  }
897  // Update the store map after all chains have been added to avoid adding
898  // self-loop edge if multiple underlying objects are present.
899  for (const UnderlyingObject &UnderlObj : Objs) {
900  ValueType V = UnderlObj.getValue();
901  bool ThisMayAlias = UnderlObj.mayAlias();
902 
903  // Map this store to V.
904  (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
905  }
906  // The store may have dependencies to unanalyzable loads and
907  // stores.
909  addChainDependencies(SU, Stores, UnknownValue);
910  }
911  } else { // SU is a load.
912  if (!ObjsFound) {
913  // An unknown load depends on all stores.
914  addChainDependencies(SU, Stores);
915  addChainDependencies(SU, NonAliasStores);
916 
917  Loads.insert(SU, UnknownValue);
918  } else {
919  for (const UnderlyingObject &UnderlObj : Objs) {
920  ValueType V = UnderlObj.getValue();
921  bool ThisMayAlias = UnderlObj.mayAlias();
922 
923  // Add precise dependencies against all previously seen stores
924  // mapping to the same Value(s).
925  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
926 
927  // Map this load to V.
928  (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
929  }
930  // The load may have dependencies to unanalyzable stores.
931  addChainDependencies(SU, Stores, UnknownValue);
932  }
933  }
934 
935  // Reduce maps if they grow huge.
936  if (Stores.size() + Loads.size() >= HugeRegion) {
937  DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
938  reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
939  }
940  if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
941  DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
942  reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
943  }
944  }
945 
946  if (DbgMI)
947  FirstDbgValue = DbgMI;
948 
949  Defs.clear();
950  Uses.clear();
953 }
954 
956  PSV->printCustom(OS);
957  return OS;
958 }
959 
961  for (auto &Itr : *this) {
962  if (Itr.first.is<const Value*>()) {
963  const Value *V = Itr.first.get<const Value*>();
964  if (isa<UndefValue>(V))
965  dbgs() << "Unknown";
966  else
967  V->printAsOperand(dbgs());
968  }
969  else if (Itr.first.is<const PseudoSourceValue*>())
970  dbgs() << Itr.first.get<const PseudoSourceValue*>();
971  else
972  llvm_unreachable("Unknown Value type.");
973 
974  dbgs() << " : ";
975  dumpSUList(Itr.second);
976  }
977 }
978 
980  Value2SUsMap &loads, unsigned N) {
981  DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
982  stores.dump();
983  dbgs() << "Loading SUnits:\n";
984  loads.dump());
985 
986  // Insert all SU's NodeNums into a vector and sort it.
987  std::vector<unsigned> NodeNums;
988  NodeNums.reserve(stores.size() + loads.size());
989  for (auto &I : stores)
990  for (auto *SU : I.second)
991  NodeNums.push_back(SU->NodeNum);
992  for (auto &I : loads)
993  for (auto *SU : I.second)
994  NodeNums.push_back(SU->NodeNum);
995  std::sort(NodeNums.begin(), NodeNums.end());
996 
997  // The N last elements in NodeNums will be removed, and the SU with
998  // the lowest NodeNum of them will become the new BarrierChain to
999  // let the not yet seen SUs have a dependency to the removed SUs.
1000  assert(N <= NodeNums.size());
1001  SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1002  if (BarrierChain) {
1003  // The aliasing and non-aliasing maps reduce independently of each
1004  // other, but share a common BarrierChain. Check if the
1005  // newBarrierChain is above the former one. If it is not, it may
1006  // introduce a loop to use newBarrierChain, so keep the old one.
1007  if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1008  BarrierChain->addPredBarrier(newBarrierChain);
1009  BarrierChain = newBarrierChain;
1010  DEBUG(dbgs() << "Inserting new barrier chain: SU("
1011  << BarrierChain->NodeNum << ").\n";);
1012  }
1013  else
1014  DEBUG(dbgs() << "Keeping old barrier chain: SU("
1015  << BarrierChain->NodeNum << ").\n";);
1016  }
1017  else
1018  BarrierChain = newBarrierChain;
1019 
1020  insertBarrierChain(stores);
1021  insertBarrierChain(loads);
1022 
1023  DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
1024  stores.dump();
1025  dbgs() << "Loading SUnits:\n";
1026  loads.dump());
1027 }
1028 
1030  MachineInstr &MI, bool addToLiveRegs) {
1031  for (MachineOperand &MO : MI.operands()) {
1032  if (!MO.isReg() || !MO.readsReg())
1033  continue;
1034  unsigned Reg = MO.getReg();
1035  if (!Reg)
1036  continue;
1037 
1038  // Things that are available after the instruction are killed by it.
1039  bool IsKill = LiveRegs.available(MRI, Reg);
1040  MO.setIsKill(IsKill);
1041  if (addToLiveRegs)
1042  LiveRegs.addReg(Reg);
1043  }
1044 }
1045 
1047  DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1048 
1049  LiveRegs.init(*TRI);
1050  LiveRegs.addLiveOuts(MBB);
1051 
1052  // Examine block from end to start...
1053  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1054  if (MI.isDebugValue())
1055  continue;
1056 
1057  // Update liveness. Registers that are defed but not used in this
1058  // instruction are now dead. Mark register and all subregs as they
1059  // are completely defined.
1060  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1061  const MachineOperand &MO = *O;
1062  if (MO.isReg()) {
1063  if (!MO.isDef())
1064  continue;
1065  unsigned Reg = MO.getReg();
1066  if (!Reg)
1067  continue;
1068  LiveRegs.removeReg(Reg);
1069  } else if (MO.isRegMask()) {
1071  }
1072  }
1073 
1074  // If there is a bundle header fix it up first.
1075  if (!MI.isBundled()) {
1076  toggleKills(MRI, LiveRegs, MI, true);
1077  } else {
1078  MachineBasicBlock::instr_iterator First = MI.getIterator();
1079  if (MI.isBundle()) {
1080  toggleKills(MRI, LiveRegs, MI, false);
1081  ++First;
1082  }
1083  // Some targets make the (questionable) assumtion that the instructions
1084  // inside the bundle are ordered and consequently only the last use of
1085  // a register inside the bundle can kill it.
1086  MachineBasicBlock::instr_iterator I = std::next(First);
1087  while (I->isBundledWithSucc())
1088  ++I;
1089  do {
1090  if (!I->isDebugValue())
1091  toggleKills(MRI, LiveRegs, *I, true);
1092  --I;
1093  } while(I != First);
1094  }
1095  }
1096 }
1097 
1098 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
1099  // Cannot completely remove virtual function even in release mode.
1100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1101  SU->getInstr()->dump();
1102 #endif
1103 }
1104 
1105 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1106  std::string s;
1107  raw_string_ostream oss(s);
1108  if (SU == &EntrySU)
1109  oss << "<entry>";
1110  else if (SU == &ExitSU)
1111  oss << "<exit>";
1112  else
1113  SU->getInstr()->print(oss, /*SkipOpers=*/true);
1114  return oss.str();
1115 }
1116 
1117 /// Return the basic block label. It is not necessarilly unique because a block
1118 /// contains multiple scheduling regions. But it is fine for visualization.
1119 std::string ScheduleDAGInstrs::getDAGName() const {
1120  return "dag." + BB->getFullName();
1121 }
1122 
1123 //===----------------------------------------------------------------------===//
1124 // SchedDFSResult Implementation
1125 //===----------------------------------------------------------------------===//
1126 
1127 namespace llvm {
1128 
1129 /// Internal state used to compute SchedDFSResult.
1131  SchedDFSResult &R;
1132 
1133  /// Join DAG nodes into equivalence classes by their subtree.
1134  IntEqClasses SubtreeClasses;
1135  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1136  std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1137 
1138  struct RootData {
1139  unsigned NodeID;
1140  unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1141  unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1142  /// children.
1143 
1144  RootData(unsigned id): NodeID(id),
1145  ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1146 
1147  unsigned getSparseSetIndex() const { return NodeID; }
1148  };
1149 
1150  SparseSet<RootData> RootSet;
1151 
1152 public:
1153  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1154  RootSet.setUniverse(R.DFSNodeData.size());
1155  }
1156 
1157  /// Returns true if this node been visited by the DFS traversal.
1158  ///
1159  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1160  /// ID. Later, SubtreeID is updated but remains valid.
1161  bool isVisited(const SUnit *SU) const {
1162  return R.DFSNodeData[SU->NodeNum].SubtreeID
1163  != SchedDFSResult::InvalidSubtreeID;
1164  }
1165 
1166  /// Initializes this node's instruction count. We don't need to flag the node
1167  /// visited until visitPostorder because the DAG cannot have cycles.
1168  void visitPreorder(const SUnit *SU) {
1169  R.DFSNodeData[SU->NodeNum].InstrCount =
1170  SU->getInstr()->isTransient() ? 0 : 1;
1171  }
1172 
1173  /// Called once for each node after all predecessors are visited. Revisit this
1174  /// node's predecessors and potentially join them now that we know the ILP of
1175  /// the other predecessors.
1176  void visitPostorderNode(const SUnit *SU) {
1177  // Mark this node as the root of a subtree. It may be joined with its
1178  // successors later.
1179  R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1180  RootData RData(SU->NodeNum);
1181  RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1182 
1183  // If any predecessors are still in their own subtree, they either cannot be
1184  // joined or are large enough to remain separate. If this parent node's
1185  // total instruction count is not greater than a child subtree by at least
1186  // the subtree limit, then try to join it now since splitting subtrees is
1187  // only useful if multiple high-pressure paths are possible.
1188  unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1189  for (const SDep &PredDep : SU->Preds) {
1190  if (PredDep.getKind() != SDep::Data)
1191  continue;
1192  unsigned PredNum = PredDep.getSUnit()->NodeNum;
1193  if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1194  joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1195 
1196  // Either link or merge the TreeData entry from the child to the parent.
1197  if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1198  // If the predecessor's parent is invalid, this is a tree edge and the
1199  // current node is the parent.
1200  if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1201  RootSet[PredNum].ParentNodeID = SU->NodeNum;
1202  }
1203  else if (RootSet.count(PredNum)) {
1204  // The predecessor is not a root, but is still in the root set. This
1205  // must be the new parent that it was just joined to. Note that
1206  // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1207  // set to the original parent.
1208  RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1209  RootSet.erase(PredNum);
1210  }
1211  }
1212  RootSet[SU->NodeNum] = RData;
1213  }
1214 
1215  /// \brief Called once for each tree edge after calling visitPostOrderNode on
1216  /// the predecessor. Increment the parent node's instruction count and
1217  /// preemptively join this subtree to its parent's if it is small enough.
1218  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1219  R.DFSNodeData[Succ->NodeNum].InstrCount
1220  += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1221  joinPredSubtree(PredDep, Succ);
1222  }
1223 
1224  /// Adds a connection for cross edges.
1225  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1226  ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1227  }
1228 
1229  /// Sets each node's subtree ID to the representative ID and record
1230  /// connections between trees.
1231  void finalize() {
1232  SubtreeClasses.compress();
1233  R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1234  assert(SubtreeClasses.getNumClasses() == RootSet.size()
1235  && "number of roots should match trees");
1236  for (const RootData &Root : RootSet) {
1237  unsigned TreeID = SubtreeClasses[Root.NodeID];
1238  if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1239  R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1240  R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1241  // Note that SubInstrCount may be greater than InstrCount if we joined
1242  // subtrees across a cross edge. InstrCount will be attributed to the
1243  // original parent, while SubInstrCount will be attributed to the joined
1244  // parent.
1245  }
1246  R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1247  R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1248  DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1249  for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1250  R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1251  DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1252  << R.DFSNodeData[Idx].SubtreeID << '\n');
1253  }
1254  for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
1255  unsigned PredTree = SubtreeClasses[P.first->NodeNum];
1256  unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
1257  if (PredTree == SuccTree)
1258  continue;
1259  unsigned Depth = P.first->getDepth();
1260  addConnection(PredTree, SuccTree, Depth);
1261  addConnection(SuccTree, PredTree, Depth);
1262  }
1263  }
1264 
1265 protected:
1266  /// Joins the predecessor subtree with the successor that is its DFS parent.
1267  /// Applies some heuristics before joining.
1268  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1269  bool CheckLimit = true) {
1270  assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1271 
1272  // Check if the predecessor is already joined.
1273  const SUnit *PredSU = PredDep.getSUnit();
1274  unsigned PredNum = PredSU->NodeNum;
1275  if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1276  return false;
1277 
1278  // Four is the magic number of successors before a node is considered a
1279  // pinch point.
1280  unsigned NumDataSucs = 0;
1281  for (const SDep &SuccDep : PredSU->Succs) {
1282  if (SuccDep.getKind() == SDep::Data) {
1283  if (++NumDataSucs >= 4)
1284  return false;
1285  }
1286  }
1287  if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1288  return false;
1289  R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1290  SubtreeClasses.join(Succ->NodeNum, PredNum);
1291  return true;
1292  }
1293 
1294  /// Called by finalize() to record a connection between trees.
1295  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1296  if (!Depth)
1297  return;
1298 
1299  do {
1301  R.SubtreeConnections[FromTree];
1302  for (SchedDFSResult::Connection &C : Connections) {
1303  if (C.TreeID == ToTree) {
1304  C.Level = std::max(C.Level, Depth);
1305  return;
1306  }
1307  }
1308  Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1309  FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1310  } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1311  }
1312 };
1313 
1314 } // end namespace llvm
1315 
1316 namespace {
1317 
1318 /// Manage the stack used by a reverse depth-first search over the DAG.
1319 class SchedDAGReverseDFS {
1320  std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1321 
1322 public:
1323  bool isComplete() const { return DFSStack.empty(); }
1324 
1325  void follow(const SUnit *SU) {
1326  DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1327  }
1328  void advance() { ++DFSStack.back().second; }
1329 
1330  const SDep *backtrack() {
1331  DFSStack.pop_back();
1332  return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1333  }
1334 
1335  const SUnit *getCurr() const { return DFSStack.back().first; }
1336 
1337  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1338 
1339  SUnit::const_pred_iterator getPredEnd() const {
1340  return getCurr()->Preds.end();
1341  }
1342 };
1343 
1344 } // end anonymous namespace
1345 
1346 static bool hasDataSucc(const SUnit *SU) {
1347  for (const SDep &SuccDep : SU->Succs) {
1348  if (SuccDep.getKind() == SDep::Data &&
1349  !SuccDep.getSUnit()->isBoundaryNode())
1350  return true;
1351  }
1352  return false;
1353 }
1354 
1355 /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1356 /// search from this root.
1358  if (!IsBottomUp)
1359  llvm_unreachable("Top-down ILP metric is unimplemented");
1360 
1361  SchedDFSImpl Impl(*this);
1362  for (const SUnit &SU : SUnits) {
1363  if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1364  continue;
1365 
1366  SchedDAGReverseDFS DFS;
1367  Impl.visitPreorder(&SU);
1368  DFS.follow(&SU);
1369  while (true) {
1370  // Traverse the leftmost path as far as possible.
1371  while (DFS.getPred() != DFS.getPredEnd()) {
1372  const SDep &PredDep = *DFS.getPred();
1373  DFS.advance();
1374  // Ignore non-data edges.
1375  if (PredDep.getKind() != SDep::Data
1376  || PredDep.getSUnit()->isBoundaryNode()) {
1377  continue;
1378  }
1379  // An already visited edge is a cross edge, assuming an acyclic DAG.
1380  if (Impl.isVisited(PredDep.getSUnit())) {
1381  Impl.visitCrossEdge(PredDep, DFS.getCurr());
1382  continue;
1383  }
1384  Impl.visitPreorder(PredDep.getSUnit());
1385  DFS.follow(PredDep.getSUnit());
1386  }
1387  // Visit the top of the stack in postorder and backtrack.
1388  const SUnit *Child = DFS.getCurr();
1389  const SDep *PredDep = DFS.backtrack();
1390  Impl.visitPostorderNode(Child);
1391  if (PredDep)
1392  Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1393  if (DFS.isComplete())
1394  break;
1395  }
1396  }
1397  Impl.finalize();
1398 }
1399 
1400 /// The root of the given SubtreeID was just scheduled. For all subtrees
1401 /// connected to this tree, record the depth of the connection so that the
1402 /// nearest connected subtrees can be prioritized.
1403 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1404  for (const Connection &C : SubtreeConnections[SubtreeID]) {
1405  SubtreeConnectLevels[C.TreeID] =
1406  std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1407  DEBUG(dbgs() << " Tree: " << C.TreeID
1408  << " @" << SubtreeConnectLevels[C.TreeID] << '\n');
1409  }
1410 }
1411 
1412 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1414  OS << InstrCount << " / " << Length << " = ";
1415  if (!Length)
1416  OS << "BADILP";
1417  else
1418  OS << format("%g", ((double)InstrCount / Length));
1419 }
1420 
1422  dbgs() << *this << '\n';
1423 }
1424 
1425 namespace llvm {
1426 
1429  Val.print(OS);
1430  return OS;
1431 }
1432 
1433 } // end namespace llvm
1434 
1435 #endif
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
iterator end()
Returns an iterator past this container.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
void push_back(const T &Elt)
Definition: SmallVector.h:212
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
virtual void finishBlock()
Cleans up after scheduling in the given block.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
A common definition of LaneBitmask for use in TableGen and CodeGen.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
void clear()
Definition: MapVector.h:85
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:461
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
bool available(const MachineRegisterInfo &MRI, unsigned Reg) const
Returns true if register Reg and no aliasing register is in the set.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
void init(unsigned N)
Initialize an array of N PressureDiffs.
Record a physical register access.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
LivePhysRegs LiveRegs
Set of live physical registers for updating kill flags.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit *SU) const override
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void removeRegsInMask(const MachineOperand &MO, SmallVectorImpl< std::pair< unsigned, const MachineOperand *>> *Clobbers=nullptr)
Removes physical registers clobbered by the regmask operand MO.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:387
void setIsUndef(bool Val=true)
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:35
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
void compress()
compress - Compress equivalence classes by numbering them 0 .
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:903
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI DAG construction"))
static unsigned InstrCount
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:399
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:335
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU&#39;s instruction that defines a physical register.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
The two locations may or may not alias. This is the least precise result.
Definition: AliasAnalysis.h:87
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
void clearList(ValueType V)
Clears the list of SUs mapped to V.
iterator_range< succ_iterator > successors()
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:570
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
A description of a memory reference used in the backend.
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:272
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1247
An individual mapping from virtual register number to SUnit.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:371
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:266
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise. ...
Definition: SparseSet.h:235
static bool hasDataSucc(const SUnit *SU)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
unsigned SubReg
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:284
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:84
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:477
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:95
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
Key
PAL metadata keys.
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:56
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:294
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void print(raw_ostream &OS) const
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:190
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
List of registers defined and used by a machine instruction.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
iterator find(const KeyT &Key)
Definition: MapVector.h:144
reverse_iterator rend()
void visitPreorder(const SUnit *SU)
Initializes this node&#39;s instruction count.
bool mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA)
Returns true if this instruction&#39;s memory access aliases the memory access of Other.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
reverse_iterator rbegin()
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:349
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:164
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:285
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Scheduling dependency.
Definition: ScheduleDAG.h:50
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:293
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:642
#define P(N)
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
Array of PressureDiffs.
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:59
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static unsigned getReductionSize()
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
Definition: MachineInstr.h:975
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
Internal state used to compute SchedDFSResult.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void reComputeSize()
Counts the number of SUs in this map after a reduction.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:72
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
MCRegAliasIterator enumerates all registers aliasing Reg.
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
const MachineFrameInfo & MFI
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
void clear()
Clears the set.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:835
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:3575
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:478
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
void setIsKill(bool Val=true)
iterator find(const KeyT &Key)
Find an element by its key.
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:283
Iterator for intrusive lists based on ilist_node.
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
bool isDebugValue() const
Definition: MachineInstr.h:819
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
Reg2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
void eraseAll(const KeyT &K)
Erase all elements with the given key.
CHAIN = SC CHAIN, Imm128 - System call.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx. ...
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:71
typename std::vector< std::pair< ValueType, SUList >> ::iterator iterator
Definition: MapVector.h:46
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:389
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
Special value supplied for machine level alias analysis.
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
**iterator erase(iterator I)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:573
Representation of each machine instruction.
Definition: MachineInstr.h:60
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
void finalize()
Sets each node&#39;s subtree ID to the representative ID and record connections between trees...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:569
void removeReg(unsigned Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set...
Definition: LivePhysRegs.h:89
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:49
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
constexpr bool any() const
Definition: LaneBitmask.h:53
bool empty() const
Returns true if the set is empty.
Mapping from virtual register to SUnit including an operand index.
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AliasAnalysis and Target fail to deny the dep...
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:568
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2018
void print(raw_ostream &OS, bool SkipOpers=false, bool SkipDebugLoc=false, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
iterator begin()
Definition: MapVector.h:66
const unsigned Kind
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:629
iterator end()
Definition: MapVector.h:68
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:462
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
LLVM Value Representation.
Definition: Value.h:73
void clear()
Clears map from all contents.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
bool isPosition() const
Definition: MachineInstr.h:817
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
static bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI)
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
MachineBasicBlock * BB
The block in which to insert instructions.
IRTranslator LLVM IR MI
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:468
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
static void dumpSUList(ScheduleDAGInstrs::SUList &L)
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:571
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
SchedDFSImpl(SchedDFSResult &r)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
bool hasTailCall() const
Returns true if the function contains a tail call.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
hexagon widen stores
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
A discriminated union of two pointer types, with the discriminator in the low bit of the pointer...
Definition: PointerUnion.h:87
void addReg(unsigned Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
void resize(size_type N)
Definition: SmallVector.h:353