LLVM  6.0.0svn
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This implements the ScheduleDAGInstrs class, which implements
11 /// re-scheduling of MachineInstrs.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/IntEqClasses.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/SparseSet.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/Operator.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/MC/LaneBitmask.h"
47 #include "llvm/MC/MCRegisterInfo.h"
48 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/Format.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "machine-scheduler"
67 
68 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
69  cl::ZeroOrMore, cl::init(false),
70  cl::desc("Enable use of AA during MI DAG construction"));
71 
72 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
73  cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
74 
75 // Note: the two options below might be used in tuning compile time vs
76 // output quality. Setting HugeRegion so large that it will never be
77 // reached means best-effort, but may be slow.
78 
79 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
80 // together hold this many SUs, a reduction of maps will be done.
81 static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
82  cl::init(1000), cl::desc("The limit to use while constructing the DAG "
83  "prior to scheduling, at which point a trade-off "
84  "is made to avoid excessive compile time."));
85 
87  "dag-maps-reduction-size", cl::Hidden,
88  cl::desc("A huge scheduling region will have maps reduced by this many "
89  "nodes at a time. Defaults to HugeRegion / 2."));
90 
91 static unsigned getReductionSize() {
92  // Always reduce a huge region with half of the elements, except
93  // when user sets this number explicitly.
94  if (ReductionSize.getNumOccurrences() == 0)
95  return HugeRegion / 2;
96  return ReductionSize;
97 }
98 
100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
101  dbgs() << "{ ";
102  for (const SUnit *su : L) {
103  dbgs() << "SU(" << su->NodeNum << ")";
104  if (su != L.back())
105  dbgs() << ", ";
106  }
107  dbgs() << "}\n";
108 #endif
109 }
110 
112  const MachineLoopInfo *mli,
113  bool RemoveKillFlags)
114  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
115  RemoveKillFlags(RemoveKillFlags),
116  UnknownValue(UndefValue::get(
117  Type::getVoidTy(mf.getFunction()->getContext()))) {
118  DbgValues.clear();
119 
120  const TargetSubtargetInfo &ST = mf.getSubtarget();
122 }
123 
124 /// If this machine instr has memory reference information and it can be tracked
125 /// to a normal reference to a known object, return the Value for that object.
127  const MachineFrameInfo &MFI,
128  UnderlyingObjectsVector &Objects,
129  const DataLayout &DL) {
130  auto allMMOsOkay = [&]() {
131  for (const MachineMemOperand *MMO : MI->memoperands()) {
132  if (MMO->isVolatile())
133  return false;
134 
135  if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
136  // Function that contain tail calls don't have unique PseudoSourceValue
137  // objects. Two PseudoSourceValues might refer to the same or
138  // overlapping locations. The client code calling this function assumes
139  // this is not the case. So return a conservative answer of no known
140  // object.
141  if (MFI.hasTailCall())
142  return false;
143 
144  // For now, ignore PseudoSourceValues which may alias LLVM IR values
145  // because the code that uses this function has no way to cope with
146  // such aliases.
147  if (PSV->isAliased(&MFI))
148  return false;
149 
150  bool MayAlias = PSV->mayAlias(&MFI);
151  Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
152  } else if (const Value *V = MMO->getValue()) {
154  getUnderlyingObjectsForCodeGen(V, Objs, DL);
155 
156  for (Value *V : Objs) {
159  }
160  } else
161  return false;
162  }
163  return true;
164  };
165 
166  if (!allMMOsOkay())
167  Objects.clear();
168 }
169 
171  BB = bb;
172 }
173 
175  // Subclasses should no longer refer to the old block.
176  BB = nullptr;
177 }
178 
182  unsigned regioninstrs) {
183  assert(bb == BB && "startBlock should set BB");
184  RegionBegin = begin;
185  RegionEnd = end;
186  NumRegionInstrs = regioninstrs;
187 }
188 
190  // Nothing to do.
191 }
192 
194  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
195  ExitSU.setInstr(ExitMI);
196  // Add dependencies on the defs and uses of the instruction.
197  if (ExitMI) {
198  for (const MachineOperand &MO : ExitMI->operands()) {
199  if (!MO.isReg() || MO.isDef()) continue;
200  unsigned Reg = MO.getReg();
202  Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
203  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
204  addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
205  }
206  }
207  }
208  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
209  // For others, e.g. fallthrough, conditional branch, assume the exit
210  // uses all the registers that are livein to the successor blocks.
211  for (const MachineBasicBlock *Succ : BB->successors()) {
212  for (const auto &LI : Succ->liveins()) {
213  if (!Uses.contains(LI.PhysReg))
214  Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
215  }
216  }
217  }
218 }
219 
220 /// MO is an operand of SU's instruction that defines a physical register. Adds
221 /// data dependencies from SU to any uses of the physical register.
222 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
223  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
224  assert(MO.isDef() && "expect physreg def");
225 
226  // Ask the target if address-backscheduling is desirable, and if so how much.
228 
229  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
230  Alias.isValid(); ++Alias) {
231  if (!Uses.contains(*Alias))
232  continue;
233  for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
234  SUnit *UseSU = I->SU;
235  if (UseSU == SU)
236  continue;
237 
238  // Adjust the dependence latency using operand def/use information,
239  // then allow the target to perform its own adjustments.
240  int UseOp = I->OpIdx;
241  MachineInstr *RegUse = nullptr;
242  SDep Dep;
243  if (UseOp < 0)
244  Dep = SDep(SU, SDep::Artificial);
245  else {
246  // Set the hasPhysRegDefs only for physreg defs that have a use within
247  // the scheduling region.
248  SU->hasPhysRegDefs = true;
249  Dep = SDep(SU, SDep::Data, *Alias);
250  RegUse = UseSU->getInstr();
251  }
252  Dep.setLatency(
253  SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
254  UseOp));
255 
256  ST.adjustSchedDependency(SU, UseSU, Dep);
257  UseSU->addPred(Dep);
258  }
259  }
260 }
261 
262 /// \brief Adds register dependencies (data, anti, and output) from this SUnit
263 /// to following instructions in the same scheduling region that depend the
264 /// physical register referenced at OperIdx.
265 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
266  MachineInstr *MI = SU->getInstr();
267  MachineOperand &MO = MI->getOperand(OperIdx);
268  unsigned Reg = MO.getReg();
269  // We do not need to track any dependencies for constant registers.
270  if (MRI.isConstantPhysReg(Reg))
271  return;
272 
273  // Optionally add output and anti dependencies. For anti
274  // dependencies we use a latency of 0 because for a multi-issue
275  // target we want to allow the defining instruction to issue
276  // in the same cycle as the using instruction.
277  // TODO: Using a latency of 1 here for output dependencies assumes
278  // there's no cost for reusing registers.
280  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
281  if (!Defs.contains(*Alias))
282  continue;
283  for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
284  SUnit *DefSU = I->SU;
285  if (DefSU == &ExitSU)
286  continue;
287  if (DefSU != SU &&
288  (Kind != SDep::Output || !MO.isDead() ||
289  !DefSU->getInstr()->registerDefIsDead(*Alias))) {
290  if (Kind == SDep::Anti)
291  DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
292  else {
293  SDep Dep(SU, Kind, /*Reg=*/*Alias);
294  Dep.setLatency(
295  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
296  DefSU->addPred(Dep);
297  }
298  }
299  }
300  }
301 
302  if (!MO.isDef()) {
303  SU->hasPhysRegUses = true;
304  // Either insert a new Reg2SUnits entry with an empty SUnits list, or
305  // retrieve the existing SUnits list for this register's uses.
306  // Push this SUnit on the use list.
307  Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
308  if (RemoveKillFlags)
309  MO.setIsKill(false);
310  } else {
311  addPhysRegDataDeps(SU, OperIdx);
312 
313  // clear this register's use list
314  if (Uses.contains(Reg))
315  Uses.eraseAll(Reg);
316 
317  if (!MO.isDead()) {
318  Defs.eraseAll(Reg);
319  } else if (SU->isCall) {
320  // Calls will not be reordered because of chain dependencies (see
321  // below). Since call operands are dead, calls may continue to be added
322  // to the DefList making dependence checking quadratic in the size of
323  // the block. Instead, we leave only one call at the back of the
324  // DefList.
326  Reg2SUnitsMap::iterator B = P.first;
327  Reg2SUnitsMap::iterator I = P.second;
328  for (bool isBegin = I == B; !isBegin; /* empty */) {
329  isBegin = (--I) == B;
330  if (!I->SU->isCall)
331  break;
332  I = Defs.erase(I);
333  }
334  }
335 
336  // Defs are pushed in the order they are visited and never reordered.
337  Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
338  }
339 }
340 
342 {
343  unsigned Reg = MO.getReg();
344  // No point in tracking lanemasks if we don't have interesting subregisters.
345  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
346  if (!RC.HasDisjunctSubRegs)
347  return LaneBitmask::getAll();
348 
349  unsigned SubReg = MO.getSubReg();
350  if (SubReg == 0)
351  return RC.getLaneMask();
352  return TRI->getSubRegIndexLaneMask(SubReg);
353 }
354 
355 /// Adds register output and data dependencies from this SUnit to instructions
356 /// that occur later in the same scheduling region if they read from or write to
357 /// the virtual register defined at OperIdx.
358 ///
359 /// TODO: Hoist loop induction variable increments. This has to be
360 /// reevaluated. Generally, IV scheduling should be done before coalescing.
361 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
362  MachineInstr *MI = SU->getInstr();
363  MachineOperand &MO = MI->getOperand(OperIdx);
364  unsigned Reg = MO.getReg();
365 
366  LaneBitmask DefLaneMask;
367  LaneBitmask KillLaneMask;
368  if (TrackLaneMasks) {
369  bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
370  DefLaneMask = getLaneMaskForMO(MO);
371  // If we have a <read-undef> flag, none of the lane values comes from an
372  // earlier instruction.
373  KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
374 
375  // Clear undef flag, we'll re-add it later once we know which subregister
376  // Def is first.
377  MO.setIsUndef(false);
378  } else {
379  DefLaneMask = LaneBitmask::getAll();
380  KillLaneMask = LaneBitmask::getAll();
381  }
382 
383  if (MO.isDead()) {
385  "Dead defs should have no uses");
386  } else {
387  // Add data dependence to all uses we found so far.
390  E = CurrentVRegUses.end(); I != E; /*empty*/) {
391  LaneBitmask LaneMask = I->LaneMask;
392  // Ignore uses of other lanes.
393  if ((LaneMask & KillLaneMask).none()) {
394  ++I;
395  continue;
396  }
397 
398  if ((LaneMask & DefLaneMask).any()) {
399  SUnit *UseSU = I->SU;
400  MachineInstr *Use = UseSU->getInstr();
401  SDep Dep(SU, SDep::Data, Reg);
402  Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
403  I->OperandIndex));
404  ST.adjustSchedDependency(SU, UseSU, Dep);
405  UseSU->addPred(Dep);
406  }
407 
408  LaneMask &= ~KillLaneMask;
409  // If we found a Def for all lanes of this use, remove it from the list.
410  if (LaneMask.any()) {
411  I->LaneMask = LaneMask;
412  ++I;
413  } else
415  }
416  }
417 
418  // Shortcut: Singly defined vregs do not have output/anti dependencies.
419  if (MRI.hasOneDef(Reg))
420  return;
421 
422  // Add output dependence to the next nearest defs of this vreg.
423  //
424  // Unless this definition is dead, the output dependence should be
425  // transitively redundant with antidependencies from this definition's
426  // uses. We're conservative for now until we have a way to guarantee the uses
427  // are not eliminated sometime during scheduling. The output dependence edge
428  // is also useful if output latency exceeds def-use latency.
429  LaneBitmask LaneMask = DefLaneMask;
430  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
431  CurrentVRegDefs.end())) {
432  // Ignore defs for other lanes.
433  if ((V2SU.LaneMask & LaneMask).none())
434  continue;
435  // Add an output dependence.
436  SUnit *DefSU = V2SU.SU;
437  // Ignore additional defs of the same lanes in one instruction. This can
438  // happen because lanemasks are shared for targets with too many
439  // subregisters. We also use some representration tricks/hacks where we
440  // add super-register defs/uses, to imply that although we only access parts
441  // of the reg we care about the full one.
442  if (DefSU == SU)
443  continue;
444  SDep Dep(SU, SDep::Output, Reg);
445  Dep.setLatency(
446  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
447  DefSU->addPred(Dep);
448 
449  // Update current definition. This can get tricky if the def was about a
450  // bigger lanemask before. We then have to shrink it and create a new
451  // VReg2SUnit for the non-overlapping part.
452  LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
453  LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
454  V2SU.SU = SU;
455  V2SU.LaneMask = OverlapMask;
456  if (NonOverlapMask.any())
457  CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
458  }
459  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
460  if (LaneMask.any())
461  CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
462 }
463 
464 /// \brief Adds a register data dependency if the instruction that defines the
465 /// virtual register used at OperIdx is mapped to an SUnit. Add a register
466 /// antidependency from this SUnit to instructions that occur later in the same
467 /// scheduling region if they write the virtual register.
468 ///
469 /// TODO: Handle ExitSU "uses" properly.
470 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
471  const MachineInstr *MI = SU->getInstr();
472  const MachineOperand &MO = MI->getOperand(OperIdx);
473  unsigned Reg = MO.getReg();
474 
475  // Remember the use. Data dependencies will be added when we find the def.
478  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
479 
480  // Add antidependences to the following defs of the vreg.
481  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
482  CurrentVRegDefs.end())) {
483  // Ignore defs for unrelated lanes.
484  LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
485  if ((PrevDefLaneMask & LaneMask).none())
486  continue;
487  if (V2SU.SU == SU)
488  continue;
489 
490  V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
491  }
492 }
493 
494 /// Returns true if MI is an instruction we are unable to reason about
495 /// (like a call or something with unmodeled side effects).
497  return MI->isCall() || MI->hasUnmodeledSideEffects() ||
499 }
500 
502  unsigned Latency) {
503  if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
504  SDep Dep(SUa, SDep::MayAliasMem);
505  Dep.setLatency(Latency);
506  SUb->addPred(Dep);
507  }
508 }
509 
510 /// \brief Creates an SUnit for each real instruction, numbered in top-down
511 /// topological order. The instruction order A < B, implies that no edge exists
512 /// from B to A.
513 ///
514 /// Map each real instruction to its SUnit.
515 ///
516 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
517 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
518 /// instead of pointers.
519 ///
520 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
521 /// the original instruction list.
523  // We'll be allocating one SUnit for each real instruction in the region,
524  // which is contained within a basic block.
525  SUnits.reserve(NumRegionInstrs);
526 
528  if (MI.isDebugValue())
529  continue;
530 
531  SUnit *SU = newSUnit(&MI);
532  MISUnitMap[&MI] = SU;
533 
534  SU->isCall = MI.isCall();
535  SU->isCommutable = MI.isCommutable();
536 
537  // Assign the Latency field of SU using target-provided information.
538  SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
539 
540  // If this SUnit uses a reserved or unbuffered resource, mark it as such.
541  //
542  // Reserved resources block an instruction from issuing and stall the
543  // entire pipeline. These are identified by BufferSize=0.
544  //
545  // Unbuffered resources prevent execution of subsequent instructions that
546  // require the same resources. This is used for in-order execution pipelines
547  // within an out-of-order core. These are identified by BufferSize=1.
549  const MCSchedClassDesc *SC = getSchedClass(SU);
550  for (const MCWriteProcResEntry &PRE :
553  switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
554  case 0:
555  SU->hasReservedResource = true;
556  break;
557  case 1:
558  SU->isUnbuffered = true;
559  break;
560  default:
561  break;
562  }
563  }
564  }
565  }
566 }
567 
568 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
569  /// Current total number of SUs in map.
570  unsigned NumNodes = 0;
571 
572  /// 1 for loads, 0 for stores. (see comment in SUList)
573  unsigned TrueMemOrderLatency;
574 
575 public:
576  Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
577 
578  /// To keep NumNodes up to date, insert() is used instead of
579  /// this operator w/ push_back().
580  ValueType &operator[](const SUList &Key) {
581  llvm_unreachable("Don't use. Use insert() instead."); };
582 
583  /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
584  /// reduce().
585  void inline insert(SUnit *SU, ValueType V) {
586  MapVector::operator[](V).push_back(SU);
587  NumNodes++;
588  }
589 
590  /// Clears the list of SUs mapped to V.
591  void inline clearList(ValueType V) {
592  iterator Itr = find(V);
593  if (Itr != end()) {
594  assert(NumNodes >= Itr->second.size());
595  NumNodes -= Itr->second.size();
596 
597  Itr->second.clear();
598  }
599  }
600 
601  /// Clears map from all contents.
602  void clear() {
604  NumNodes = 0;
605  }
606 
607  unsigned inline size() const { return NumNodes; }
608 
609  /// Counts the number of SUs in this map after a reduction.
610  void reComputeSize() {
611  NumNodes = 0;
612  for (auto &I : *this)
613  NumNodes += I.second.size();
614  }
615 
616  unsigned inline getTrueMemOrderLatency() const {
617  return TrueMemOrderLatency;
618  }
619 
620  void dump();
621 };
622 
624  Value2SUsMap &Val2SUsMap) {
625  for (auto &I : Val2SUsMap)
626  addChainDependencies(SU, I.second,
627  Val2SUsMap.getTrueMemOrderLatency());
628 }
629 
631  Value2SUsMap &Val2SUsMap,
632  ValueType V) {
633  Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
634  if (Itr != Val2SUsMap.end())
635  addChainDependencies(SU, Itr->second,
636  Val2SUsMap.getTrueMemOrderLatency());
637 }
638 
640  assert(BarrierChain != nullptr);
641 
642  for (auto &I : map) {
643  SUList &sus = I.second;
644  for (auto *SU : sus)
645  SU->addPredBarrier(BarrierChain);
646  }
647  map.clear();
648 }
649 
651  assert(BarrierChain != nullptr);
652 
653  // Go through all lists of SUs.
654  for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
655  Value2SUsMap::iterator CurrItr = I++;
656  SUList &sus = CurrItr->second;
657  SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
658  for (; SUItr != SUEE; ++SUItr) {
659  // Stop on BarrierChain or any instruction above it.
660  if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
661  break;
662 
663  (*SUItr)->addPredBarrier(BarrierChain);
664  }
665 
666  // Remove also the BarrierChain from list if present.
667  if (SUItr != SUEE && *SUItr == BarrierChain)
668  SUItr++;
669 
670  // Remove all SUs that are now successors of BarrierChain.
671  if (SUItr != sus.begin())
672  sus.erase(sus.begin(), SUItr);
673  }
674 
675  // Remove all entries with empty su lists.
676  map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
677  return (mapEntry.second.empty()); });
678 
679  // Recompute the size of the map (NumNodes).
680  map.reComputeSize();
681 }
682 
684  RegPressureTracker *RPTracker,
685  PressureDiffs *PDiffs,
686  LiveIntervals *LIS,
687  bool TrackLaneMasks) {
689  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
690  : ST.useAA();
691  AAForDep = UseAA ? AA : nullptr;
692 
693  BarrierChain = nullptr;
694 
695  this->TrackLaneMasks = TrackLaneMasks;
696  MISUnitMap.clear();
698 
699  // Create an SUnit for each real instruction.
700  initSUnits();
701 
702  if (PDiffs)
703  PDiffs->init(SUnits.size());
704 
705  // We build scheduling units by walking a block's instruction list
706  // from bottom to top.
707 
708  // Each MIs' memory operand(s) is analyzed to a list of underlying
709  // objects. The SU is then inserted in the SUList(s) mapped from the
710  // Value(s). Each Value thus gets mapped to lists of SUs depending
711  // on it, stores and loads kept separately. Two SUs are trivially
712  // non-aliasing if they both depend on only identified Values and do
713  // not share any common Value.
714  Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
715 
716  // Certain memory accesses are known to not alias any SU in Stores
717  // or Loads, and have therefore their own 'NonAlias'
718  // domain. E.g. spill / reload instructions never alias LLVM I/R
719  // Values. It would be nice to assume that this type of memory
720  // accesses always have a proper memory operand modelling, and are
721  // therefore never unanalyzable, but this is conservatively not
722  // done.
723  Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
724 
725  // Remove any stale debug info; sometimes BuildSchedGraph is called again
726  // without emitting the info from the previous call.
727  DbgValues.clear();
728  FirstDbgValue = nullptr;
729 
730  assert(Defs.empty() && Uses.empty() &&
731  "Only BuildGraph should update Defs/Uses");
734 
735  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
736  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
737  unsigned NumVirtRegs = MRI.getNumVirtRegs();
738  CurrentVRegDefs.setUniverse(NumVirtRegs);
739  CurrentVRegUses.setUniverse(NumVirtRegs);
740 
741  // Model data dependencies between instructions being scheduled and the
742  // ExitSU.
744 
745  // Walk the list of instructions, from bottom moving up.
746  MachineInstr *DbgMI = nullptr;
748  MII != MIE; --MII) {
749  MachineInstr &MI = *std::prev(MII);
750  if (DbgMI) {
751  DbgValues.push_back(std::make_pair(DbgMI, &MI));
752  DbgMI = nullptr;
753  }
754 
755  if (MI.isDebugValue()) {
756  DbgMI = &MI;
757  continue;
758  }
759  SUnit *SU = MISUnitMap[&MI];
760  assert(SU && "No SUnit mapped to this MI");
761 
762  if (RPTracker) {
763  RegisterOperands RegOpers;
764  RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
765  if (TrackLaneMasks) {
766  SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
767  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
768  }
769  if (PDiffs != nullptr)
770  PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
771 
772  RPTracker->recedeSkipDebugValues();
773  assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
774  RPTracker->recede(RegOpers);
775  }
776 
777  assert(
778  (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
779  "Cannot schedule terminators or labels!");
780 
781  // Add register-based dependencies (data, anti, and output).
782  // For some instructions (calls, returns, inline-asm, etc.) there can
783  // be explicit uses and implicit defs, in which case the use will appear
784  // on the operand list before the def. Do two passes over the operand
785  // list to make sure that defs are processed before any uses.
786  bool HasVRegDef = false;
787  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
788  const MachineOperand &MO = MI.getOperand(j);
789  if (!MO.isReg() || !MO.isDef())
790  continue;
791  unsigned Reg = MO.getReg();
793  addPhysRegDeps(SU, j);
794  } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
795  HasVRegDef = true;
796  addVRegDefDeps(SU, j);
797  }
798  }
799  // Now process all uses.
800  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
801  const MachineOperand &MO = MI.getOperand(j);
802  // Only look at use operands.
803  // We do not need to check for MO.readsReg() here because subsequent
804  // subregister defs will get output dependence edges and need no
805  // additional use dependencies.
806  if (!MO.isReg() || !MO.isUse())
807  continue;
808  unsigned Reg = MO.getReg();
810  addPhysRegDeps(SU, j);
811  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
812  addVRegUseDeps(SU, j);
813  }
814  }
815 
816  // If we haven't seen any uses in this scheduling region, create a
817  // dependence edge to ExitSU to model the live-out latency. This is required
818  // for vreg defs with no in-region use, and prefetches with no vreg def.
819  //
820  // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
821  // check currently relies on being called before adding chain deps.
822  if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
823  SDep Dep(SU, SDep::Artificial);
824  Dep.setLatency(SU->Latency - 1);
825  ExitSU.addPred(Dep);
826  }
827 
828  // Add memory dependencies (Note: isStoreToStackSlot and
829  // isLoadFromStackSLot are not usable after stack slots are lowered to
830  // actual addresses).
831 
832  // This is a barrier event that acts as a pivotal node in the DAG.
833  if (isGlobalMemoryObject(AA, &MI)) {
834 
835  // Become the barrier chain.
836  if (BarrierChain)
838  BarrierChain = SU;
839 
840  DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
841  << BarrierChain->NodeNum << ").\n";);
842 
843  // Add dependencies against everything below it and clear maps.
844  addBarrierChain(Stores);
845  addBarrierChain(Loads);
846  addBarrierChain(NonAliasStores);
847  addBarrierChain(NonAliasLoads);
848 
849  continue;
850  }
851 
852  // If it's not a store or a variant load, we're done.
853  if (!MI.mayStore() &&
854  !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
855  continue;
856 
857  // Always add dependecy edge to BarrierChain if present.
858  if (BarrierChain)
860 
861  // Find the underlying objects for MI. The Objs vector is either
862  // empty, or filled with the Values of memory locations which this
863  // SU depends on. An empty vector means the memory location is
864  // unknown, and may alias anything.
867 
868  if (MI.mayStore()) {
869  if (Objs.empty()) {
870  // An unknown store depends on all stores and loads.
871  addChainDependencies(SU, Stores);
872  addChainDependencies(SU, NonAliasStores);
873  addChainDependencies(SU, Loads);
874  addChainDependencies(SU, NonAliasLoads);
875 
876  // Map this store to 'UnknownValue'.
877  Stores.insert(SU, UnknownValue);
878  } else {
879  // Add precise dependencies against all previously seen memory
880  // accesses mapped to the same Value(s).
881  for (const UnderlyingObject &UnderlObj : Objs) {
882  ValueType V = UnderlObj.getValue();
883  bool ThisMayAlias = UnderlObj.mayAlias();
884 
885  // Add dependencies to previous stores and loads mapped to V.
886  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
887  addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
888  }
889  // Update the store map after all chains have been added to avoid adding
890  // self-loop edge if multiple underlying objects are present.
891  for (const UnderlyingObject &UnderlObj : Objs) {
892  ValueType V = UnderlObj.getValue();
893  bool ThisMayAlias = UnderlObj.mayAlias();
894 
895  // Map this store to V.
896  (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
897  }
898  // The store may have dependencies to unanalyzable loads and
899  // stores.
901  addChainDependencies(SU, Stores, UnknownValue);
902  }
903  } else { // SU is a load.
904  if (Objs.empty()) {
905  // An unknown load depends on all stores.
906  addChainDependencies(SU, Stores);
907  addChainDependencies(SU, NonAliasStores);
908 
909  Loads.insert(SU, UnknownValue);
910  } else {
911  for (const UnderlyingObject &UnderlObj : Objs) {
912  ValueType V = UnderlObj.getValue();
913  bool ThisMayAlias = UnderlObj.mayAlias();
914 
915  // Add precise dependencies against all previously seen stores
916  // mapping to the same Value(s).
917  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
918 
919  // Map this load to V.
920  (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
921  }
922  // The load may have dependencies to unanalyzable stores.
923  addChainDependencies(SU, Stores, UnknownValue);
924  }
925  }
926 
927  // Reduce maps if they grow huge.
928  if (Stores.size() + Loads.size() >= HugeRegion) {
929  DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
930  reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
931  }
932  if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
933  DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
934  reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
935  }
936  }
937 
938  if (DbgMI)
939  FirstDbgValue = DbgMI;
940 
941  Defs.clear();
942  Uses.clear();
945 }
946 
948  PSV->printCustom(OS);
949  return OS;
950 }
951 
953  for (auto &Itr : *this) {
954  if (Itr.first.is<const Value*>()) {
955  const Value *V = Itr.first.get<const Value*>();
956  if (isa<UndefValue>(V))
957  dbgs() << "Unknown";
958  else
959  V->printAsOperand(dbgs());
960  }
961  else if (Itr.first.is<const PseudoSourceValue*>())
962  dbgs() << Itr.first.get<const PseudoSourceValue*>();
963  else
964  llvm_unreachable("Unknown Value type.");
965 
966  dbgs() << " : ";
967  dumpSUList(Itr.second);
968  }
969 }
970 
972  Value2SUsMap &loads, unsigned N) {
973  DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
974  stores.dump();
975  dbgs() << "Loading SUnits:\n";
976  loads.dump());
977 
978  // Insert all SU's NodeNums into a vector and sort it.
979  std::vector<unsigned> NodeNums;
980  NodeNums.reserve(stores.size() + loads.size());
981  for (auto &I : stores)
982  for (auto *SU : I.second)
983  NodeNums.push_back(SU->NodeNum);
984  for (auto &I : loads)
985  for (auto *SU : I.second)
986  NodeNums.push_back(SU->NodeNum);
987  std::sort(NodeNums.begin(), NodeNums.end());
988 
989  // The N last elements in NodeNums will be removed, and the SU with
990  // the lowest NodeNum of them will become the new BarrierChain to
991  // let the not yet seen SUs have a dependency to the removed SUs.
992  assert(N <= NodeNums.size());
993  SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
994  if (BarrierChain) {
995  // The aliasing and non-aliasing maps reduce independently of each
996  // other, but share a common BarrierChain. Check if the
997  // newBarrierChain is above the former one. If it is not, it may
998  // introduce a loop to use newBarrierChain, so keep the old one.
999  if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1000  BarrierChain->addPredBarrier(newBarrierChain);
1001  BarrierChain = newBarrierChain;
1002  DEBUG(dbgs() << "Inserting new barrier chain: SU("
1003  << BarrierChain->NodeNum << ").\n";);
1004  }
1005  else
1006  DEBUG(dbgs() << "Keeping old barrier chain: SU("
1007  << BarrierChain->NodeNum << ").\n";);
1008  }
1009  else
1010  BarrierChain = newBarrierChain;
1011 
1012  insertBarrierChain(stores);
1013  insertBarrierChain(loads);
1014 
1015  DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
1016  stores.dump();
1017  dbgs() << "Loading SUnits:\n";
1018  loads.dump());
1019 }
1020 
1022  MachineInstr &MI, bool addToLiveRegs) {
1023  for (MachineOperand &MO : MI.operands()) {
1024  if (!MO.isReg() || !MO.readsReg())
1025  continue;
1026  unsigned Reg = MO.getReg();
1027  if (!Reg)
1028  continue;
1029 
1030  // Things that are available after the instruction are killed by it.
1031  bool IsKill = LiveRegs.available(MRI, Reg);
1032  MO.setIsKill(IsKill);
1033  if (addToLiveRegs)
1034  LiveRegs.addReg(Reg);
1035  }
1036 }
1037 
1039  DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n');
1040 
1041  LiveRegs.init(*TRI);
1042  LiveRegs.addLiveOuts(MBB);
1043 
1044  // Examine block from end to start...
1045  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1046  if (MI.isDebugValue())
1047  continue;
1048 
1049  // Update liveness. Registers that are defed but not used in this
1050  // instruction are now dead. Mark register and all subregs as they
1051  // are completely defined.
1052  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1053  const MachineOperand &MO = *O;
1054  if (MO.isReg()) {
1055  if (!MO.isDef())
1056  continue;
1057  unsigned Reg = MO.getReg();
1058  if (!Reg)
1059  continue;
1060  LiveRegs.removeReg(Reg);
1061  } else if (MO.isRegMask()) {
1063  }
1064  }
1065 
1066  // If there is a bundle header fix it up first.
1067  if (!MI.isBundled()) {
1068  toggleKills(MRI, LiveRegs, MI, true);
1069  } else {
1070  MachineBasicBlock::instr_iterator First = MI.getIterator();
1071  if (MI.isBundle()) {
1072  toggleKills(MRI, LiveRegs, MI, false);
1073  ++First;
1074  }
1075  // Some targets make the (questionable) assumtion that the instructions
1076  // inside the bundle are ordered and consequently only the last use of
1077  // a register inside the bundle can kill it.
1078  MachineBasicBlock::instr_iterator I = std::next(First);
1079  while (I->isBundledWithSucc())
1080  ++I;
1081  do {
1082  if (!I->isDebugValue())
1083  toggleKills(MRI, LiveRegs, *I, true);
1084  --I;
1085  } while(I != First);
1086  }
1087  }
1088 }
1089 
1090 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
1091  // Cannot completely remove virtual function even in release mode.
1092 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1093  SU->getInstr()->dump();
1094 #endif
1095 }
1096 
1097 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1098  std::string s;
1099  raw_string_ostream oss(s);
1100  if (SU == &EntrySU)
1101  oss << "<entry>";
1102  else if (SU == &ExitSU)
1103  oss << "<exit>";
1104  else
1105  SU->getInstr()->print(oss, /*SkipOpers=*/true);
1106  return oss.str();
1107 }
1108 
1109 /// Return the basic block label. It is not necessarilly unique because a block
1110 /// contains multiple scheduling regions. But it is fine for visualization.
1111 std::string ScheduleDAGInstrs::getDAGName() const {
1112  return "dag." + BB->getFullName();
1113 }
1114 
1115 //===----------------------------------------------------------------------===//
1116 // SchedDFSResult Implementation
1117 //===----------------------------------------------------------------------===//
1118 
1119 namespace llvm {
1120 
1121 /// Internal state used to compute SchedDFSResult.
1123  SchedDFSResult &R;
1124 
1125  /// Join DAG nodes into equivalence classes by their subtree.
1126  IntEqClasses SubtreeClasses;
1127  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1128  std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1129 
1130  struct RootData {
1131  unsigned NodeID;
1132  unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1133  unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1134  /// children.
1135 
1136  RootData(unsigned id): NodeID(id),
1137  ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1138 
1139  unsigned getSparseSetIndex() const { return NodeID; }
1140  };
1141 
1142  SparseSet<RootData> RootSet;
1143 
1144 public:
1145  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1146  RootSet.setUniverse(R.DFSNodeData.size());
1147  }
1148 
1149  /// Returns true if this node been visited by the DFS traversal.
1150  ///
1151  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1152  /// ID. Later, SubtreeID is updated but remains valid.
1153  bool isVisited(const SUnit *SU) const {
1154  return R.DFSNodeData[SU->NodeNum].SubtreeID
1155  != SchedDFSResult::InvalidSubtreeID;
1156  }
1157 
1158  /// Initializes this node's instruction count. We don't need to flag the node
1159  /// visited until visitPostorder because the DAG cannot have cycles.
1160  void visitPreorder(const SUnit *SU) {
1161  R.DFSNodeData[SU->NodeNum].InstrCount =
1162  SU->getInstr()->isTransient() ? 0 : 1;
1163  }
1164 
1165  /// Called once for each node after all predecessors are visited. Revisit this
1166  /// node's predecessors and potentially join them now that we know the ILP of
1167  /// the other predecessors.
1168  void visitPostorderNode(const SUnit *SU) {
1169  // Mark this node as the root of a subtree. It may be joined with its
1170  // successors later.
1171  R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1172  RootData RData(SU->NodeNum);
1173  RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1174 
1175  // If any predecessors are still in their own subtree, they either cannot be
1176  // joined or are large enough to remain separate. If this parent node's
1177  // total instruction count is not greater than a child subtree by at least
1178  // the subtree limit, then try to join it now since splitting subtrees is
1179  // only useful if multiple high-pressure paths are possible.
1180  unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1181  for (const SDep &PredDep : SU->Preds) {
1182  if (PredDep.getKind() != SDep::Data)
1183  continue;
1184  unsigned PredNum = PredDep.getSUnit()->NodeNum;
1185  if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1186  joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1187 
1188  // Either link or merge the TreeData entry from the child to the parent.
1189  if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1190  // If the predecessor's parent is invalid, this is a tree edge and the
1191  // current node is the parent.
1192  if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1193  RootSet[PredNum].ParentNodeID = SU->NodeNum;
1194  }
1195  else if (RootSet.count(PredNum)) {
1196  // The predecessor is not a root, but is still in the root set. This
1197  // must be the new parent that it was just joined to. Note that
1198  // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1199  // set to the original parent.
1200  RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1201  RootSet.erase(PredNum);
1202  }
1203  }
1204  RootSet[SU->NodeNum] = RData;
1205  }
1206 
1207  /// \brief Called once for each tree edge after calling visitPostOrderNode on
1208  /// the predecessor. Increment the parent node's instruction count and
1209  /// preemptively join this subtree to its parent's if it is small enough.
1210  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1211  R.DFSNodeData[Succ->NodeNum].InstrCount
1212  += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1213  joinPredSubtree(PredDep, Succ);
1214  }
1215 
1216  /// Adds a connection for cross edges.
1217  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1218  ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1219  }
1220 
1221  /// Sets each node's subtree ID to the representative ID and record
1222  /// connections between trees.
1223  void finalize() {
1224  SubtreeClasses.compress();
1225  R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1226  assert(SubtreeClasses.getNumClasses() == RootSet.size()
1227  && "number of roots should match trees");
1228  for (const RootData &Root : RootSet) {
1229  unsigned TreeID = SubtreeClasses[Root.NodeID];
1230  if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1231  R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1232  R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1233  // Note that SubInstrCount may be greater than InstrCount if we joined
1234  // subtrees across a cross edge. InstrCount will be attributed to the
1235  // original parent, while SubInstrCount will be attributed to the joined
1236  // parent.
1237  }
1238  R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1239  R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1240  DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1241  for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1242  R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1243  DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1244  << R.DFSNodeData[Idx].SubtreeID << '\n');
1245  }
1246  for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
1247  unsigned PredTree = SubtreeClasses[P.first->NodeNum];
1248  unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
1249  if (PredTree == SuccTree)
1250  continue;
1251  unsigned Depth = P.first->getDepth();
1252  addConnection(PredTree, SuccTree, Depth);
1253  addConnection(SuccTree, PredTree, Depth);
1254  }
1255  }
1256 
1257 protected:
1258  /// Joins the predecessor subtree with the successor that is its DFS parent.
1259  /// Applies some heuristics before joining.
1260  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1261  bool CheckLimit = true) {
1262  assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1263 
1264  // Check if the predecessor is already joined.
1265  const SUnit *PredSU = PredDep.getSUnit();
1266  unsigned PredNum = PredSU->NodeNum;
1267  if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1268  return false;
1269 
1270  // Four is the magic number of successors before a node is considered a
1271  // pinch point.
1272  unsigned NumDataSucs = 0;
1273  for (const SDep &SuccDep : PredSU->Succs) {
1274  if (SuccDep.getKind() == SDep::Data) {
1275  if (++NumDataSucs >= 4)
1276  return false;
1277  }
1278  }
1279  if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1280  return false;
1281  R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1282  SubtreeClasses.join(Succ->NodeNum, PredNum);
1283  return true;
1284  }
1285 
1286  /// Called by finalize() to record a connection between trees.
1287  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1288  if (!Depth)
1289  return;
1290 
1291  do {
1293  R.SubtreeConnections[FromTree];
1294  for (SchedDFSResult::Connection &C : Connections) {
1295  if (C.TreeID == ToTree) {
1296  C.Level = std::max(C.Level, Depth);
1297  return;
1298  }
1299  }
1300  Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1301  FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1302  } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1303  }
1304 };
1305 
1306 } // end namespace llvm
1307 
1308 namespace {
1309 
1310 /// Manage the stack used by a reverse depth-first search over the DAG.
1311 class SchedDAGReverseDFS {
1312  std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1313 
1314 public:
1315  bool isComplete() const { return DFSStack.empty(); }
1316 
1317  void follow(const SUnit *SU) {
1318  DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1319  }
1320  void advance() { ++DFSStack.back().second; }
1321 
1322  const SDep *backtrack() {
1323  DFSStack.pop_back();
1324  return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1325  }
1326 
1327  const SUnit *getCurr() const { return DFSStack.back().first; }
1328 
1329  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1330 
1331  SUnit::const_pred_iterator getPredEnd() const {
1332  return getCurr()->Preds.end();
1333  }
1334 };
1335 
1336 } // end anonymous namespace
1337 
1338 static bool hasDataSucc(const SUnit *SU) {
1339  for (const SDep &SuccDep : SU->Succs) {
1340  if (SuccDep.getKind() == SDep::Data &&
1341  !SuccDep.getSUnit()->isBoundaryNode())
1342  return true;
1343  }
1344  return false;
1345 }
1346 
1347 /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1348 /// search from this root.
1350  if (!IsBottomUp)
1351  llvm_unreachable("Top-down ILP metric is unimplemented");
1352 
1353  SchedDFSImpl Impl(*this);
1354  for (const SUnit &SU : SUnits) {
1355  if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1356  continue;
1357 
1358  SchedDAGReverseDFS DFS;
1359  Impl.visitPreorder(&SU);
1360  DFS.follow(&SU);
1361  while (true) {
1362  // Traverse the leftmost path as far as possible.
1363  while (DFS.getPred() != DFS.getPredEnd()) {
1364  const SDep &PredDep = *DFS.getPred();
1365  DFS.advance();
1366  // Ignore non-data edges.
1367  if (PredDep.getKind() != SDep::Data
1368  || PredDep.getSUnit()->isBoundaryNode()) {
1369  continue;
1370  }
1371  // An already visited edge is a cross edge, assuming an acyclic DAG.
1372  if (Impl.isVisited(PredDep.getSUnit())) {
1373  Impl.visitCrossEdge(PredDep, DFS.getCurr());
1374  continue;
1375  }
1376  Impl.visitPreorder(PredDep.getSUnit());
1377  DFS.follow(PredDep.getSUnit());
1378  }
1379  // Visit the top of the stack in postorder and backtrack.
1380  const SUnit *Child = DFS.getCurr();
1381  const SDep *PredDep = DFS.backtrack();
1382  Impl.visitPostorderNode(Child);
1383  if (PredDep)
1384  Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1385  if (DFS.isComplete())
1386  break;
1387  }
1388  }
1389  Impl.finalize();
1390 }
1391 
1392 /// The root of the given SubtreeID was just scheduled. For all subtrees
1393 /// connected to this tree, record the depth of the connection so that the
1394 /// nearest connected subtrees can be prioritized.
1395 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1396  for (const Connection &C : SubtreeConnections[SubtreeID]) {
1397  SubtreeConnectLevels[C.TreeID] =
1398  std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1399  DEBUG(dbgs() << " Tree: " << C.TreeID
1400  << " @" << SubtreeConnectLevels[C.TreeID] << '\n');
1401  }
1402 }
1403 
1404 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1406  OS << InstrCount << " / " << Length << " = ";
1407  if (!Length)
1408  OS << "BADILP";
1409  else
1410  OS << format("%g", ((double)InstrCount / Length));
1411 }
1412 
1414  dbgs() << *this << '\n';
1415 }
1416 
1417 namespace llvm {
1418 
1421  Val.print(OS);
1422  return OS;
1423 }
1424 
1425 } // end namespace llvm
1426 
1427 #endif
iterator end()
Returns an iterator past this container.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
void push_back(const T &Elt)
Definition: SmallVector.h:212
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
virtual void finishBlock()
Cleans up after scheduling in the given block.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
A common definition of LaneBitmask for use in TableGen and CodeGen.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
void clear()
Definition: MapVector.h:78
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:432
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
bool available(const MachineRegisterInfo &MRI, unsigned Reg) const
Returns true if register Reg and no aliasing register is in the set.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
void init(unsigned N)
Initialize an array of N PressureDiffs.
Record a physical register access.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
LivePhysRegs LiveRegs
Set of live physical registers for updating kill flags.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit *SU) const override
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
static LaneBitmask getAll()
Definition: LaneBitmask.h:84
void removeRegsInMask(const MachineOperand &MO, SmallVectorImpl< std::pair< unsigned, const MachineOperand *>> *Clobbers=nullptr)
Removes physical registers clobbered by the regmask operand MO.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:358
void setIsUndef(bool Val=true)
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:35
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
void compress()
compress - Compress equivalence classes by numbering them 0 .
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:869
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI DAG construction"))
static unsigned InstrCount
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:370
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:306
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU&#39;s instruction that defines a physical register.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
The two locations may or may not alias. This is the least precise result.
Definition: AliasAnalysis.h:87
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
void clearList(ValueType V)
Clears the list of SUs mapped to V.
iterator_range< succ_iterator > successors()
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:570
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
A description of a memory reference used in the backend.
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:272
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1247
An individual mapping from virtual register number to SUnit.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:282
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:371
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:266
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise. ...
Definition: SparseSet.h:235
static bool hasDataSucc(const SUnit *SU)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
unsigned SubReg
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:284
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:448
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:88
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:56
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:294
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void print(raw_ostream &OS) const
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:190
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
List of registers defined and used by a machine instruction.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
iterator find(const KeyT &Key)
Definition: MapVector.h:137
reverse_iterator rend()
void visitPreorder(const SUnit *SU)
Initializes this node&#39;s instruction count.
bool mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA)
Returns true if this instruction&#39;s memory access aliases the memory access of Other.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
reverse_iterator rbegin()
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:349
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:164
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:285
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Scheduling dependency.
Definition: ScheduleDAG.h:50
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:293
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:613
#define P(N)
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
Array of PressureDiffs.
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:59
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static unsigned getReductionSize()
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
Definition: MachineInstr.h:941
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
void getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
Internal state used to compute SchedDFSResult.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void reComputeSize()
Counts the number of SUs in this map after a reduction.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:72
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
MCRegAliasIterator enumerates all registers aliasing Reg.
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
const MachineFrameInfo & MFI
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
void clear()
Clears the set.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:3559
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:482
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
void setIsKill(bool Val=true)
iterator find(const KeyT &Key)
Find an element by its key.
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:283
Iterator for intrusive lists based on ilist_node.
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:837
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
bool isDebugValue() const
Definition: MachineInstr.h:785
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Reg2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
void eraseAll(const KeyT &K)
Erase all elements with the given key.
CHAIN = SC CHAIN, Imm128 - System call.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx. ...
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:71
typename std::vector< std::pair< ValueType, SUList >> ::iterator iterator
Definition: MapVector.h:46
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:389
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
Special value supplied for machine level alias analysis.
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
**iterator erase(iterator I)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:573
Representation of each machine instruction.
Definition: MachineInstr.h:59
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
void finalize()
Sets each node&#39;s subtree ID to the representative ID and record connections between trees...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:569
void removeReg(unsigned Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set...
Definition: LivePhysRegs.h:89
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:49
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
constexpr bool any() const
Definition: LaneBitmask.h:53
bool empty() const
Returns true if the set is empty.
Mapping from virtual register to SUnit including an operand index.
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AliasAnalysis and Target fail to deny the dep...
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:568
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2018
void print(raw_ostream &OS, bool SkipOpers=false, bool SkipDebugLoc=false, const TargetInstrInfo *TII=nullptr) const
Debugging supportPrint this MI to OS.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
iterator begin()
Definition: MapVector.h:59
const unsigned Kind
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:600
iterator end()
Definition: MapVector.h:61
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:466
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
LLVM Value Representation.
Definition: Value.h:73
void clear()
Clears map from all contents.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
bool isPosition() const
Definition: MachineInstr.h:783
static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
static bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI)
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
MachineBasicBlock * BB
The block in which to insert instructions.
IRTranslator LLVM IR MI
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:439
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
static void dumpSUList(ScheduleDAGInstrs::SUList &L)
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:571
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
SchedDFSImpl(SchedDFSResult &r)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:284
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
bool hasTailCall() const
Returns true if the function contains a tail call.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
hexagon widen stores
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
A discriminated union of two pointer types, with the discriminator in the low bit of the pointer...
Definition: PointerUnion.h:87
void addReg(unsigned Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
void resize(size_type N)
Definition: SmallVector.h:355