LLVM  6.0.0svn
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This implements the ScheduleDAGInstrs class, which implements
11 /// re-scheduling of MachineInstrs.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/IntEqClasses.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/SparseSet.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/Operator.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/MC/LaneBitmask.h"
47 #include "llvm/MC/MCRegisterInfo.h"
48 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/Format.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "machine-scheduler"
67 
68 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
69  cl::ZeroOrMore, cl::init(false),
70  cl::desc("Enable use of AA during MI DAG construction"));
71 
72 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
73  cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
74 
75 // Note: the two options below might be used in tuning compile time vs
76 // output quality. Setting HugeRegion so large that it will never be
77 // reached means best-effort, but may be slow.
78 
79 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
80 // together hold this many SUs, a reduction of maps will be done.
81 static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
82  cl::init(1000), cl::desc("The limit to use while constructing the DAG "
83  "prior to scheduling, at which point a trade-off "
84  "is made to avoid excessive compile time."));
85 
87  "dag-maps-reduction-size", cl::Hidden,
88  cl::desc("A huge scheduling region will have maps reduced by this many "
89  "nodes at a time. Defaults to HugeRegion / 2."));
90 
91 static unsigned getReductionSize() {
92  // Always reduce a huge region with half of the elements, except
93  // when user sets this number explicitly.
94  if (ReductionSize.getNumOccurrences() == 0)
95  return HugeRegion / 2;
96  return ReductionSize;
97 }
98 
100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
101  dbgs() << "{ ";
102  for (const SUnit *su : L) {
103  dbgs() << "SU(" << su->NodeNum << ")";
104  if (su != L.back())
105  dbgs() << ", ";
106  }
107  dbgs() << "}\n";
108 #endif
109 }
110 
112  const MachineLoopInfo *mli,
113  bool RemoveKillFlags)
114  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
115  RemoveKillFlags(RemoveKillFlags),
116  UnknownValue(UndefValue::get(
117  Type::getVoidTy(mf.getFunction()->getContext()))) {
118  DbgValues.clear();
119 
120  const TargetSubtargetInfo &ST = mf.getSubtarget();
122 }
123 
124 /// This is the function that does the work of looking through basic
125 /// ptrtoint+arithmetic+inttoptr sequences.
126 static const Value *getUnderlyingObjectFromInt(const Value *V) {
127  do {
128  if (const Operator *U = dyn_cast<Operator>(V)) {
129  // If we find a ptrtoint, we can transfer control back to the
130  // regular getUnderlyingObjectFromInt.
131  if (U->getOpcode() == Instruction::PtrToInt)
132  return U->getOperand(0);
133  // If we find an add of a constant, a multiplied value, or a phi, it's
134  // likely that the other operand will lead us to the base
135  // object. We don't have to worry about the case where the
136  // object address is somehow being computed by the multiply,
137  // because our callers only care when the result is an
138  // identifiable object.
139  if (U->getOpcode() != Instruction::Add ||
140  (!isa<ConstantInt>(U->getOperand(1)) &&
141  Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
142  !isa<PHINode>(U->getOperand(1))))
143  return V;
144  V = U->getOperand(0);
145  } else {
146  return V;
147  }
148  assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
149  } while (true);
150 }
151 
152 /// This is a wrapper around GetUnderlyingObjects and adds support for basic
153 /// ptrtoint+arithmetic+inttoptr sequences.
154 static void getUnderlyingObjects(const Value *V,
155  SmallVectorImpl<Value *> &Objects,
156  const DataLayout &DL) {
158  SmallVector<const Value *, 4> Working(1, V);
159  do {
160  V = Working.pop_back_val();
161 
163  GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
164 
165  for (Value *V : Objs) {
166  if (!Visited.insert(V).second)
167  continue;
168  if (Operator::getOpcode(V) == Instruction::IntToPtr) {
169  const Value *O =
170  getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
171  if (O->getType()->isPointerTy()) {
172  Working.push_back(O);
173  continue;
174  }
175  }
176  Objects.push_back(const_cast<Value *>(V));
177  }
178  } while (!Working.empty());
179 }
180 
181 /// If this machine instr has memory reference information and it can be tracked
182 /// to a normal reference to a known object, return the Value for that object.
184  const MachineFrameInfo &MFI,
185  UnderlyingObjectsVector &Objects,
186  const DataLayout &DL) {
187  auto allMMOsOkay = [&]() {
188  for (const MachineMemOperand *MMO : MI->memoperands()) {
189  if (MMO->isVolatile())
190  return false;
191 
192  if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
193  // Function that contain tail calls don't have unique PseudoSourceValue
194  // objects. Two PseudoSourceValues might refer to the same or
195  // overlapping locations. The client code calling this function assumes
196  // this is not the case. So return a conservative answer of no known
197  // object.
198  if (MFI.hasTailCall())
199  return false;
200 
201  // For now, ignore PseudoSourceValues which may alias LLVM IR values
202  // because the code that uses this function has no way to cope with
203  // such aliases.
204  if (PSV->isAliased(&MFI))
205  return false;
206 
207  bool MayAlias = PSV->mayAlias(&MFI);
208  Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
209  } else if (const Value *V = MMO->getValue()) {
211  getUnderlyingObjects(V, Objs, DL);
212 
213  for (Value *V : Objs) {
214  if (!isIdentifiedObject(V))
215  return false;
216 
218  }
219  } else
220  return false;
221  }
222  return true;
223  };
224 
225  if (!allMMOsOkay())
226  Objects.clear();
227 }
228 
230  BB = bb;
231 }
232 
234  // Subclasses should no longer refer to the old block.
235  BB = nullptr;
236 }
237 
241  unsigned regioninstrs) {
242  assert(bb == BB && "startBlock should set BB");
243  RegionBegin = begin;
244  RegionEnd = end;
245  NumRegionInstrs = regioninstrs;
246 }
247 
249  // Nothing to do.
250 }
251 
253  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
254  ExitSU.setInstr(ExitMI);
255  // Add dependencies on the defs and uses of the instruction.
256  if (ExitMI) {
257  for (const MachineOperand &MO : ExitMI->operands()) {
258  if (!MO.isReg() || MO.isDef()) continue;
259  unsigned Reg = MO.getReg();
261  Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
262  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
263  addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
264  }
265  }
266  }
267  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
268  // For others, e.g. fallthrough, conditional branch, assume the exit
269  // uses all the registers that are livein to the successor blocks.
270  for (const MachineBasicBlock *Succ : BB->successors()) {
271  for (const auto &LI : Succ->liveins()) {
272  if (!Uses.contains(LI.PhysReg))
273  Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
274  }
275  }
276  }
277 }
278 
279 /// MO is an operand of SU's instruction that defines a physical register. Adds
280 /// data dependencies from SU to any uses of the physical register.
281 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
282  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
283  assert(MO.isDef() && "expect physreg def");
284 
285  // Ask the target if address-backscheduling is desirable, and if so how much.
287 
288  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
289  Alias.isValid(); ++Alias) {
290  if (!Uses.contains(*Alias))
291  continue;
292  for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
293  SUnit *UseSU = I->SU;
294  if (UseSU == SU)
295  continue;
296 
297  // Adjust the dependence latency using operand def/use information,
298  // then allow the target to perform its own adjustments.
299  int UseOp = I->OpIdx;
300  MachineInstr *RegUse = nullptr;
301  SDep Dep;
302  if (UseOp < 0)
303  Dep = SDep(SU, SDep::Artificial);
304  else {
305  // Set the hasPhysRegDefs only for physreg defs that have a use within
306  // the scheduling region.
307  SU->hasPhysRegDefs = true;
308  Dep = SDep(SU, SDep::Data, *Alias);
309  RegUse = UseSU->getInstr();
310  }
311  Dep.setLatency(
312  SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
313  UseOp));
314 
315  ST.adjustSchedDependency(SU, UseSU, Dep);
316  UseSU->addPred(Dep);
317  }
318  }
319 }
320 
321 /// \brief Adds register dependencies (data, anti, and output) from this SUnit
322 /// to following instructions in the same scheduling region that depend the
323 /// physical register referenced at OperIdx.
324 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
325  MachineInstr *MI = SU->getInstr();
326  MachineOperand &MO = MI->getOperand(OperIdx);
327  unsigned Reg = MO.getReg();
328  // We do not need to track any dependencies for constant registers.
329  if (MRI.isConstantPhysReg(Reg))
330  return;
331 
332  // Optionally add output and anti dependencies. For anti
333  // dependencies we use a latency of 0 because for a multi-issue
334  // target we want to allow the defining instruction to issue
335  // in the same cycle as the using instruction.
336  // TODO: Using a latency of 1 here for output dependencies assumes
337  // there's no cost for reusing registers.
339  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
340  if (!Defs.contains(*Alias))
341  continue;
342  for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
343  SUnit *DefSU = I->SU;
344  if (DefSU == &ExitSU)
345  continue;
346  if (DefSU != SU &&
347  (Kind != SDep::Output || !MO.isDead() ||
348  !DefSU->getInstr()->registerDefIsDead(*Alias))) {
349  if (Kind == SDep::Anti)
350  DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
351  else {
352  SDep Dep(SU, Kind, /*Reg=*/*Alias);
353  Dep.setLatency(
354  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
355  DefSU->addPred(Dep);
356  }
357  }
358  }
359  }
360 
361  if (!MO.isDef()) {
362  SU->hasPhysRegUses = true;
363  // Either insert a new Reg2SUnits entry with an empty SUnits list, or
364  // retrieve the existing SUnits list for this register's uses.
365  // Push this SUnit on the use list.
366  Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
367  if (RemoveKillFlags)
368  MO.setIsKill(false);
369  } else {
370  addPhysRegDataDeps(SU, OperIdx);
371 
372  // clear this register's use list
373  if (Uses.contains(Reg))
374  Uses.eraseAll(Reg);
375 
376  if (!MO.isDead()) {
377  Defs.eraseAll(Reg);
378  } else if (SU->isCall) {
379  // Calls will not be reordered because of chain dependencies (see
380  // below). Since call operands are dead, calls may continue to be added
381  // to the DefList making dependence checking quadratic in the size of
382  // the block. Instead, we leave only one call at the back of the
383  // DefList.
385  Reg2SUnitsMap::iterator B = P.first;
386  Reg2SUnitsMap::iterator I = P.second;
387  for (bool isBegin = I == B; !isBegin; /* empty */) {
388  isBegin = (--I) == B;
389  if (!I->SU->isCall)
390  break;
391  I = Defs.erase(I);
392  }
393  }
394 
395  // Defs are pushed in the order they are visited and never reordered.
396  Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
397  }
398 }
399 
401 {
402  unsigned Reg = MO.getReg();
403  // No point in tracking lanemasks if we don't have interesting subregisters.
404  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
405  if (!RC.HasDisjunctSubRegs)
406  return LaneBitmask::getAll();
407 
408  unsigned SubReg = MO.getSubReg();
409  if (SubReg == 0)
410  return RC.getLaneMask();
411  return TRI->getSubRegIndexLaneMask(SubReg);
412 }
413 
414 /// Adds register output and data dependencies from this SUnit to instructions
415 /// that occur later in the same scheduling region if they read from or write to
416 /// the virtual register defined at OperIdx.
417 ///
418 /// TODO: Hoist loop induction variable increments. This has to be
419 /// reevaluated. Generally, IV scheduling should be done before coalescing.
420 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
421  MachineInstr *MI = SU->getInstr();
422  MachineOperand &MO = MI->getOperand(OperIdx);
423  unsigned Reg = MO.getReg();
424 
425  LaneBitmask DefLaneMask;
426  LaneBitmask KillLaneMask;
427  if (TrackLaneMasks) {
428  bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
429  DefLaneMask = getLaneMaskForMO(MO);
430  // If we have a <read-undef> flag, none of the lane values comes from an
431  // earlier instruction.
432  KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
433 
434  // Clear undef flag, we'll re-add it later once we know which subregister
435  // Def is first.
436  MO.setIsUndef(false);
437  } else {
438  DefLaneMask = LaneBitmask::getAll();
439  KillLaneMask = LaneBitmask::getAll();
440  }
441 
442  if (MO.isDead()) {
444  "Dead defs should have no uses");
445  } else {
446  // Add data dependence to all uses we found so far.
449  E = CurrentVRegUses.end(); I != E; /*empty*/) {
450  LaneBitmask LaneMask = I->LaneMask;
451  // Ignore uses of other lanes.
452  if ((LaneMask & KillLaneMask).none()) {
453  ++I;
454  continue;
455  }
456 
457  if ((LaneMask & DefLaneMask).any()) {
458  SUnit *UseSU = I->SU;
459  MachineInstr *Use = UseSU->getInstr();
460  SDep Dep(SU, SDep::Data, Reg);
461  Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
462  I->OperandIndex));
463  ST.adjustSchedDependency(SU, UseSU, Dep);
464  UseSU->addPred(Dep);
465  }
466 
467  LaneMask &= ~KillLaneMask;
468  // If we found a Def for all lanes of this use, remove it from the list.
469  if (LaneMask.any()) {
470  I->LaneMask = LaneMask;
471  ++I;
472  } else
474  }
475  }
476 
477  // Shortcut: Singly defined vregs do not have output/anti dependencies.
478  if (MRI.hasOneDef(Reg))
479  return;
480 
481  // Add output dependence to the next nearest defs of this vreg.
482  //
483  // Unless this definition is dead, the output dependence should be
484  // transitively redundant with antidependencies from this definition's
485  // uses. We're conservative for now until we have a way to guarantee the uses
486  // are not eliminated sometime during scheduling. The output dependence edge
487  // is also useful if output latency exceeds def-use latency.
488  LaneBitmask LaneMask = DefLaneMask;
489  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
490  CurrentVRegDefs.end())) {
491  // Ignore defs for other lanes.
492  if ((V2SU.LaneMask & LaneMask).none())
493  continue;
494  // Add an output dependence.
495  SUnit *DefSU = V2SU.SU;
496  // Ignore additional defs of the same lanes in one instruction. This can
497  // happen because lanemasks are shared for targets with too many
498  // subregisters. We also use some representration tricks/hacks where we
499  // add super-register defs/uses, to imply that although we only access parts
500  // of the reg we care about the full one.
501  if (DefSU == SU)
502  continue;
503  SDep Dep(SU, SDep::Output, Reg);
504  Dep.setLatency(
505  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
506  DefSU->addPred(Dep);
507 
508  // Update current definition. This can get tricky if the def was about a
509  // bigger lanemask before. We then have to shrink it and create a new
510  // VReg2SUnit for the non-overlapping part.
511  LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
512  LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
513  V2SU.SU = SU;
514  V2SU.LaneMask = OverlapMask;
515  if (NonOverlapMask.any())
516  CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
517  }
518  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
519  if (LaneMask.any())
520  CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
521 }
522 
523 /// \brief Adds a register data dependency if the instruction that defines the
524 /// virtual register used at OperIdx is mapped to an SUnit. Add a register
525 /// antidependency from this SUnit to instructions that occur later in the same
526 /// scheduling region if they write the virtual register.
527 ///
528 /// TODO: Handle ExitSU "uses" properly.
529 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
530  const MachineInstr *MI = SU->getInstr();
531  const MachineOperand &MO = MI->getOperand(OperIdx);
532  unsigned Reg = MO.getReg();
533 
534  // Remember the use. Data dependencies will be added when we find the def.
537  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
538 
539  // Add antidependences to the following defs of the vreg.
540  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
541  CurrentVRegDefs.end())) {
542  // Ignore defs for unrelated lanes.
543  LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
544  if ((PrevDefLaneMask & LaneMask).none())
545  continue;
546  if (V2SU.SU == SU)
547  continue;
548 
549  V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
550  }
551 }
552 
553 /// Returns true if MI is an instruction we are unable to reason about
554 /// (like a call or something with unmodeled side effects).
556  return MI->isCall() || MI->hasUnmodeledSideEffects() ||
558 }
559 
561  unsigned Latency) {
562  if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
563  SDep Dep(SUa, SDep::MayAliasMem);
564  Dep.setLatency(Latency);
565  SUb->addPred(Dep);
566  }
567 }
568 
569 /// \brief Creates an SUnit for each real instruction, numbered in top-down
570 /// topological order. The instruction order A < B, implies that no edge exists
571 /// from B to A.
572 ///
573 /// Map each real instruction to its SUnit.
574 ///
575 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
576 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
577 /// instead of pointers.
578 ///
579 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
580 /// the original instruction list.
582  // We'll be allocating one SUnit for each real instruction in the region,
583  // which is contained within a basic block.
584  SUnits.reserve(NumRegionInstrs);
585 
587  if (MI.isDebugValue())
588  continue;
589 
590  SUnit *SU = newSUnit(&MI);
591  MISUnitMap[&MI] = SU;
592 
593  SU->isCall = MI.isCall();
594  SU->isCommutable = MI.isCommutable();
595 
596  // Assign the Latency field of SU using target-provided information.
597  SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
598 
599  // If this SUnit uses a reserved or unbuffered resource, mark it as such.
600  //
601  // Reserved resources block an instruction from issuing and stall the
602  // entire pipeline. These are identified by BufferSize=0.
603  //
604  // Unbuffered resources prevent execution of subsequent instructions that
605  // require the same resources. This is used for in-order execution pipelines
606  // within an out-of-order core. These are identified by BufferSize=1.
608  const MCSchedClassDesc *SC = getSchedClass(SU);
609  for (const MCWriteProcResEntry &PRE :
612  switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
613  case 0:
614  SU->hasReservedResource = true;
615  break;
616  case 1:
617  SU->isUnbuffered = true;
618  break;
619  default:
620  break;
621  }
622  }
623  }
624  }
625 }
626 
627 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
628  /// Current total number of SUs in map.
629  unsigned NumNodes = 0;
630 
631  /// 1 for loads, 0 for stores. (see comment in SUList)
632  unsigned TrueMemOrderLatency;
633 
634 public:
635  Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
636 
637  /// To keep NumNodes up to date, insert() is used instead of
638  /// this operator w/ push_back().
639  ValueType &operator[](const SUList &Key) {
640  llvm_unreachable("Don't use. Use insert() instead."); };
641 
642  /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
643  /// reduce().
644  void inline insert(SUnit *SU, ValueType V) {
645  MapVector::operator[](V).push_back(SU);
646  NumNodes++;
647  }
648 
649  /// Clears the list of SUs mapped to V.
650  void inline clearList(ValueType V) {
651  iterator Itr = find(V);
652  if (Itr != end()) {
653  assert(NumNodes >= Itr->second.size());
654  NumNodes -= Itr->second.size();
655 
656  Itr->second.clear();
657  }
658  }
659 
660  /// Clears map from all contents.
661  void clear() {
663  NumNodes = 0;
664  }
665 
666  unsigned inline size() const { return NumNodes; }
667 
668  /// Counts the number of SUs in this map after a reduction.
669  void reComputeSize() {
670  NumNodes = 0;
671  for (auto &I : *this)
672  NumNodes += I.second.size();
673  }
674 
675  unsigned inline getTrueMemOrderLatency() const {
676  return TrueMemOrderLatency;
677  }
678 
679  void dump();
680 };
681 
683  Value2SUsMap &Val2SUsMap) {
684  for (auto &I : Val2SUsMap)
685  addChainDependencies(SU, I.second,
686  Val2SUsMap.getTrueMemOrderLatency());
687 }
688 
690  Value2SUsMap &Val2SUsMap,
691  ValueType V) {
692  Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
693  if (Itr != Val2SUsMap.end())
694  addChainDependencies(SU, Itr->second,
695  Val2SUsMap.getTrueMemOrderLatency());
696 }
697 
699  assert(BarrierChain != nullptr);
700 
701  for (auto &I : map) {
702  SUList &sus = I.second;
703  for (auto *SU : sus)
704  SU->addPredBarrier(BarrierChain);
705  }
706  map.clear();
707 }
708 
710  assert(BarrierChain != nullptr);
711 
712  // Go through all lists of SUs.
713  for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
714  Value2SUsMap::iterator CurrItr = I++;
715  SUList &sus = CurrItr->second;
716  SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
717  for (; SUItr != SUEE; ++SUItr) {
718  // Stop on BarrierChain or any instruction above it.
719  if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
720  break;
721 
722  (*SUItr)->addPredBarrier(BarrierChain);
723  }
724 
725  // Remove also the BarrierChain from list if present.
726  if (SUItr != SUEE && *SUItr == BarrierChain)
727  SUItr++;
728 
729  // Remove all SUs that are now successors of BarrierChain.
730  if (SUItr != sus.begin())
731  sus.erase(sus.begin(), SUItr);
732  }
733 
734  // Remove all entries with empty su lists.
735  map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
736  return (mapEntry.second.empty()); });
737 
738  // Recompute the size of the map (NumNodes).
739  map.reComputeSize();
740 }
741 
743  RegPressureTracker *RPTracker,
744  PressureDiffs *PDiffs,
745  LiveIntervals *LIS,
746  bool TrackLaneMasks) {
748  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
749  : ST.useAA();
750  AAForDep = UseAA ? AA : nullptr;
751 
752  BarrierChain = nullptr;
753 
754  this->TrackLaneMasks = TrackLaneMasks;
755  MISUnitMap.clear();
757 
758  // Create an SUnit for each real instruction.
759  initSUnits();
760 
761  if (PDiffs)
762  PDiffs->init(SUnits.size());
763 
764  // We build scheduling units by walking a block's instruction list
765  // from bottom to top.
766 
767  // Each MIs' memory operand(s) is analyzed to a list of underlying
768  // objects. The SU is then inserted in the SUList(s) mapped from the
769  // Value(s). Each Value thus gets mapped to lists of SUs depending
770  // on it, stores and loads kept separately. Two SUs are trivially
771  // non-aliasing if they both depend on only identified Values and do
772  // not share any common Value.
773  Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
774 
775  // Certain memory accesses are known to not alias any SU in Stores
776  // or Loads, and have therefore their own 'NonAlias'
777  // domain. E.g. spill / reload instructions never alias LLVM I/R
778  // Values. It would be nice to assume that this type of memory
779  // accesses always have a proper memory operand modelling, and are
780  // therefore never unanalyzable, but this is conservatively not
781  // done.
782  Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
783 
784  // Remove any stale debug info; sometimes BuildSchedGraph is called again
785  // without emitting the info from the previous call.
786  DbgValues.clear();
787  FirstDbgValue = nullptr;
788 
789  assert(Defs.empty() && Uses.empty() &&
790  "Only BuildGraph should update Defs/Uses");
793 
794  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
795  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
796  unsigned NumVirtRegs = MRI.getNumVirtRegs();
797  CurrentVRegDefs.setUniverse(NumVirtRegs);
798  CurrentVRegUses.setUniverse(NumVirtRegs);
799 
800  // Model data dependencies between instructions being scheduled and the
801  // ExitSU.
803 
804  // Walk the list of instructions, from bottom moving up.
805  MachineInstr *DbgMI = nullptr;
807  MII != MIE; --MII) {
808  MachineInstr &MI = *std::prev(MII);
809  if (DbgMI) {
810  DbgValues.push_back(std::make_pair(DbgMI, &MI));
811  DbgMI = nullptr;
812  }
813 
814  if (MI.isDebugValue()) {
815  DbgMI = &MI;
816  continue;
817  }
818  SUnit *SU = MISUnitMap[&MI];
819  assert(SU && "No SUnit mapped to this MI");
820 
821  if (RPTracker) {
822  RegisterOperands RegOpers;
823  RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
824  if (TrackLaneMasks) {
825  SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
826  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
827  }
828  if (PDiffs != nullptr)
829  PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
830 
831  RPTracker->recedeSkipDebugValues();
832  assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
833  RPTracker->recede(RegOpers);
834  }
835 
836  assert(
837  (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
838  "Cannot schedule terminators or labels!");
839 
840  // Add register-based dependencies (data, anti, and output).
841  // For some instructions (calls, returns, inline-asm, etc.) there can
842  // be explicit uses and implicit defs, in which case the use will appear
843  // on the operand list before the def. Do two passes over the operand
844  // list to make sure that defs are processed before any uses.
845  bool HasVRegDef = false;
846  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
847  const MachineOperand &MO = MI.getOperand(j);
848  if (!MO.isReg() || !MO.isDef())
849  continue;
850  unsigned Reg = MO.getReg();
852  addPhysRegDeps(SU, j);
853  } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
854  HasVRegDef = true;
855  addVRegDefDeps(SU, j);
856  }
857  }
858  // Now process all uses.
859  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
860  const MachineOperand &MO = MI.getOperand(j);
861  // Only look at use operands.
862  // We do not need to check for MO.readsReg() here because subsequent
863  // subregister defs will get output dependence edges and need no
864  // additional use dependencies.
865  if (!MO.isReg() || !MO.isUse())
866  continue;
867  unsigned Reg = MO.getReg();
869  addPhysRegDeps(SU, j);
870  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
871  addVRegUseDeps(SU, j);
872  }
873  }
874 
875  // If we haven't seen any uses in this scheduling region, create a
876  // dependence edge to ExitSU to model the live-out latency. This is required
877  // for vreg defs with no in-region use, and prefetches with no vreg def.
878  //
879  // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
880  // check currently relies on being called before adding chain deps.
881  if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
882  SDep Dep(SU, SDep::Artificial);
883  Dep.setLatency(SU->Latency - 1);
884  ExitSU.addPred(Dep);
885  }
886 
887  // Add memory dependencies (Note: isStoreToStackSlot and
888  // isLoadFromStackSLot are not usable after stack slots are lowered to
889  // actual addresses).
890 
891  // This is a barrier event that acts as a pivotal node in the DAG.
892  if (isGlobalMemoryObject(AA, &MI)) {
893 
894  // Become the barrier chain.
895  if (BarrierChain)
897  BarrierChain = SU;
898 
899  DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
900  << BarrierChain->NodeNum << ").\n";);
901 
902  // Add dependencies against everything below it and clear maps.
903  addBarrierChain(Stores);
904  addBarrierChain(Loads);
905  addBarrierChain(NonAliasStores);
906  addBarrierChain(NonAliasLoads);
907 
908  continue;
909  }
910 
911  // If it's not a store or a variant load, we're done.
912  if (!MI.mayStore() &&
913  !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
914  continue;
915 
916  // Always add dependecy edge to BarrierChain if present.
917  if (BarrierChain)
919 
920  // Find the underlying objects for MI. The Objs vector is either
921  // empty, or filled with the Values of memory locations which this
922  // SU depends on. An empty vector means the memory location is
923  // unknown, and may alias anything.
926 
927  if (MI.mayStore()) {
928  if (Objs.empty()) {
929  // An unknown store depends on all stores and loads.
930  addChainDependencies(SU, Stores);
931  addChainDependencies(SU, NonAliasStores);
932  addChainDependencies(SU, Loads);
933  addChainDependencies(SU, NonAliasLoads);
934 
935  // Map this store to 'UnknownValue'.
936  Stores.insert(SU, UnknownValue);
937  } else {
938  // Add precise dependencies against all previously seen memory
939  // accesses mapped to the same Value(s).
940  for (const UnderlyingObject &UnderlObj : Objs) {
941  ValueType V = UnderlObj.getValue();
942  bool ThisMayAlias = UnderlObj.mayAlias();
943 
944  // Add dependencies to previous stores and loads mapped to V.
945  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
946  addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
947  }
948  // Update the store map after all chains have been added to avoid adding
949  // self-loop edge if multiple underlying objects are present.
950  for (const UnderlyingObject &UnderlObj : Objs) {
951  ValueType V = UnderlObj.getValue();
952  bool ThisMayAlias = UnderlObj.mayAlias();
953 
954  // Map this store to V.
955  (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
956  }
957  // The store may have dependencies to unanalyzable loads and
958  // stores.
960  addChainDependencies(SU, Stores, UnknownValue);
961  }
962  } else { // SU is a load.
963  if (Objs.empty()) {
964  // An unknown load depends on all stores.
965  addChainDependencies(SU, Stores);
966  addChainDependencies(SU, NonAliasStores);
967 
968  Loads.insert(SU, UnknownValue);
969  } else {
970  for (const UnderlyingObject &UnderlObj : Objs) {
971  ValueType V = UnderlObj.getValue();
972  bool ThisMayAlias = UnderlObj.mayAlias();
973 
974  // Add precise dependencies against all previously seen stores
975  // mapping to the same Value(s).
976  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
977 
978  // Map this load to V.
979  (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
980  }
981  // The load may have dependencies to unanalyzable stores.
982  addChainDependencies(SU, Stores, UnknownValue);
983  }
984  }
985 
986  // Reduce maps if they grow huge.
987  if (Stores.size() + Loads.size() >= HugeRegion) {
988  DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
989  reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
990  }
991  if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
992  DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
993  reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
994  }
995  }
996 
997  if (DbgMI)
998  FirstDbgValue = DbgMI;
999 
1000  Defs.clear();
1001  Uses.clear();
1004 }
1005 
1007  PSV->printCustom(OS);
1008  return OS;
1009 }
1010 
1012  for (auto &Itr : *this) {
1013  if (Itr.first.is<const Value*>()) {
1014  const Value *V = Itr.first.get<const Value*>();
1015  if (isa<UndefValue>(V))
1016  dbgs() << "Unknown";
1017  else
1018  V->printAsOperand(dbgs());
1019  }
1020  else if (Itr.first.is<const PseudoSourceValue*>())
1021  dbgs() << Itr.first.get<const PseudoSourceValue*>();
1022  else
1023  llvm_unreachable("Unknown Value type.");
1024 
1025  dbgs() << " : ";
1026  dumpSUList(Itr.second);
1027  }
1028 }
1029 
1031  Value2SUsMap &loads, unsigned N) {
1032  DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
1033  stores.dump();
1034  dbgs() << "Loading SUnits:\n";
1035  loads.dump());
1036 
1037  // Insert all SU's NodeNums into a vector and sort it.
1038  std::vector<unsigned> NodeNums;
1039  NodeNums.reserve(stores.size() + loads.size());
1040  for (auto &I : stores)
1041  for (auto *SU : I.second)
1042  NodeNums.push_back(SU->NodeNum);
1043  for (auto &I : loads)
1044  for (auto *SU : I.second)
1045  NodeNums.push_back(SU->NodeNum);
1046  std::sort(NodeNums.begin(), NodeNums.end());
1047 
1048  // The N last elements in NodeNums will be removed, and the SU with
1049  // the lowest NodeNum of them will become the new BarrierChain to
1050  // let the not yet seen SUs have a dependency to the removed SUs.
1051  assert(N <= NodeNums.size());
1052  SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1053  if (BarrierChain) {
1054  // The aliasing and non-aliasing maps reduce independently of each
1055  // other, but share a common BarrierChain. Check if the
1056  // newBarrierChain is above the former one. If it is not, it may
1057  // introduce a loop to use newBarrierChain, so keep the old one.
1058  if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1059  BarrierChain->addPredBarrier(newBarrierChain);
1060  BarrierChain = newBarrierChain;
1061  DEBUG(dbgs() << "Inserting new barrier chain: SU("
1062  << BarrierChain->NodeNum << ").\n";);
1063  }
1064  else
1065  DEBUG(dbgs() << "Keeping old barrier chain: SU("
1066  << BarrierChain->NodeNum << ").\n";);
1067  }
1068  else
1069  BarrierChain = newBarrierChain;
1070 
1071  insertBarrierChain(stores);
1072  insertBarrierChain(loads);
1073 
1074  DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
1075  stores.dump();
1076  dbgs() << "Loading SUnits:\n";
1077  loads.dump());
1078 }
1079 
1081  MachineInstr &MI, bool addToLiveRegs) {
1082  for (MachineOperand &MO : MI.operands()) {
1083  if (!MO.isReg() || !MO.readsReg())
1084  continue;
1085  unsigned Reg = MO.getReg();
1086  if (!Reg)
1087  continue;
1088 
1089  // Things that are available after the instruction are killed by it.
1090  bool IsKill = LiveRegs.available(MRI, Reg);
1091  MO.setIsKill(IsKill);
1092  if (addToLiveRegs)
1093  LiveRegs.addReg(Reg);
1094  }
1095 }
1096 
1098  DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n');
1099 
1100  LiveRegs.init(*TRI);
1101  LiveRegs.addLiveOuts(MBB);
1102 
1103  // Examine block from end to start...
1104  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1105  if (MI.isDebugValue())
1106  continue;
1107 
1108  // Update liveness. Registers that are defed but not used in this
1109  // instruction are now dead. Mark register and all subregs as they
1110  // are completely defined.
1111  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1112  const MachineOperand &MO = *O;
1113  if (MO.isReg()) {
1114  if (!MO.isDef())
1115  continue;
1116  unsigned Reg = MO.getReg();
1117  if (!Reg)
1118  continue;
1119  LiveRegs.removeReg(Reg);
1120  } else if (MO.isRegMask()) {
1122  }
1123  }
1124 
1125  // If there is a bundle header fix it up first.
1126  if (!MI.isBundled()) {
1127  toggleKills(MRI, LiveRegs, MI, true);
1128  } else {
1129  MachineBasicBlock::instr_iterator First = MI.getIterator();
1130  if (MI.isBundle()) {
1131  toggleKills(MRI, LiveRegs, MI, false);
1132  ++First;
1133  }
1134  // Some targets make the (questionable) assumtion that the instructions
1135  // inside the bundle are ordered and consequently only the last use of
1136  // a register inside the bundle can kill it.
1137  MachineBasicBlock::instr_iterator I = std::next(First);
1138  while (I->isBundledWithSucc())
1139  ++I;
1140  do {
1141  if (!I->isDebugValue())
1142  toggleKills(MRI, LiveRegs, *I, true);
1143  --I;
1144  } while(I != First);
1145  }
1146  }
1147 }
1148 
1149 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
1150  // Cannot completely remove virtual function even in release mode.
1151 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1152  SU->getInstr()->dump();
1153 #endif
1154 }
1155 
1156 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1157  std::string s;
1158  raw_string_ostream oss(s);
1159  if (SU == &EntrySU)
1160  oss << "<entry>";
1161  else if (SU == &ExitSU)
1162  oss << "<exit>";
1163  else
1164  SU->getInstr()->print(oss, /*SkipOpers=*/true);
1165  return oss.str();
1166 }
1167 
1168 /// Return the basic block label. It is not necessarilly unique because a block
1169 /// contains multiple scheduling regions. But it is fine for visualization.
1170 std::string ScheduleDAGInstrs::getDAGName() const {
1171  return "dag." + BB->getFullName();
1172 }
1173 
1174 //===----------------------------------------------------------------------===//
1175 // SchedDFSResult Implementation
1176 //===----------------------------------------------------------------------===//
1177 
1178 namespace llvm {
1179 
1180 /// Internal state used to compute SchedDFSResult.
1182  SchedDFSResult &R;
1183 
1184  /// Join DAG nodes into equivalence classes by their subtree.
1185  IntEqClasses SubtreeClasses;
1186  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1187  std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1188 
1189  struct RootData {
1190  unsigned NodeID;
1191  unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1192  unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1193  /// children.
1194 
1195  RootData(unsigned id): NodeID(id),
1196  ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1197 
1198  unsigned getSparseSetIndex() const { return NodeID; }
1199  };
1200 
1201  SparseSet<RootData> RootSet;
1202 
1203 public:
1204  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1205  RootSet.setUniverse(R.DFSNodeData.size());
1206  }
1207 
1208  /// Returns true if this node been visited by the DFS traversal.
1209  ///
1210  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1211  /// ID. Later, SubtreeID is updated but remains valid.
1212  bool isVisited(const SUnit *SU) const {
1213  return R.DFSNodeData[SU->NodeNum].SubtreeID
1214  != SchedDFSResult::InvalidSubtreeID;
1215  }
1216 
1217  /// Initializes this node's instruction count. We don't need to flag the node
1218  /// visited until visitPostorder because the DAG cannot have cycles.
1219  void visitPreorder(const SUnit *SU) {
1220  R.DFSNodeData[SU->NodeNum].InstrCount =
1221  SU->getInstr()->isTransient() ? 0 : 1;
1222  }
1223 
1224  /// Called once for each node after all predecessors are visited. Revisit this
1225  /// node's predecessors and potentially join them now that we know the ILP of
1226  /// the other predecessors.
1227  void visitPostorderNode(const SUnit *SU) {
1228  // Mark this node as the root of a subtree. It may be joined with its
1229  // successors later.
1230  R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1231  RootData RData(SU->NodeNum);
1232  RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1233 
1234  // If any predecessors are still in their own subtree, they either cannot be
1235  // joined or are large enough to remain separate. If this parent node's
1236  // total instruction count is not greater than a child subtree by at least
1237  // the subtree limit, then try to join it now since splitting subtrees is
1238  // only useful if multiple high-pressure paths are possible.
1239  unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1240  for (const SDep &PredDep : SU->Preds) {
1241  if (PredDep.getKind() != SDep::Data)
1242  continue;
1243  unsigned PredNum = PredDep.getSUnit()->NodeNum;
1244  if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1245  joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1246 
1247  // Either link or merge the TreeData entry from the child to the parent.
1248  if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1249  // If the predecessor's parent is invalid, this is a tree edge and the
1250  // current node is the parent.
1251  if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1252  RootSet[PredNum].ParentNodeID = SU->NodeNum;
1253  }
1254  else if (RootSet.count(PredNum)) {
1255  // The predecessor is not a root, but is still in the root set. This
1256  // must be the new parent that it was just joined to. Note that
1257  // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1258  // set to the original parent.
1259  RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1260  RootSet.erase(PredNum);
1261  }
1262  }
1263  RootSet[SU->NodeNum] = RData;
1264  }
1265 
1266  /// \brief Called once for each tree edge after calling visitPostOrderNode on
1267  /// the predecessor. Increment the parent node's instruction count and
1268  /// preemptively join this subtree to its parent's if it is small enough.
1269  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1270  R.DFSNodeData[Succ->NodeNum].InstrCount
1271  += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1272  joinPredSubtree(PredDep, Succ);
1273  }
1274 
1275  /// Adds a connection for cross edges.
1276  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1277  ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1278  }
1279 
1280  /// Sets each node's subtree ID to the representative ID and record
1281  /// connections between trees.
1282  void finalize() {
1283  SubtreeClasses.compress();
1284  R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1285  assert(SubtreeClasses.getNumClasses() == RootSet.size()
1286  && "number of roots should match trees");
1287  for (const RootData &Root : RootSet) {
1288  unsigned TreeID = SubtreeClasses[Root.NodeID];
1289  if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1290  R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1291  R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1292  // Note that SubInstrCount may be greater than InstrCount if we joined
1293  // subtrees across a cross edge. InstrCount will be attributed to the
1294  // original parent, while SubInstrCount will be attributed to the joined
1295  // parent.
1296  }
1297  R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1298  R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1299  DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1300  for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1301  R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1302  DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1303  << R.DFSNodeData[Idx].SubtreeID << '\n');
1304  }
1305  for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
1306  unsigned PredTree = SubtreeClasses[P.first->NodeNum];
1307  unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
1308  if (PredTree == SuccTree)
1309  continue;
1310  unsigned Depth = P.first->getDepth();
1311  addConnection(PredTree, SuccTree, Depth);
1312  addConnection(SuccTree, PredTree, Depth);
1313  }
1314  }
1315 
1316 protected:
1317  /// Joins the predecessor subtree with the successor that is its DFS parent.
1318  /// Applies some heuristics before joining.
1319  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1320  bool CheckLimit = true) {
1321  assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1322 
1323  // Check if the predecessor is already joined.
1324  const SUnit *PredSU = PredDep.getSUnit();
1325  unsigned PredNum = PredSU->NodeNum;
1326  if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1327  return false;
1328 
1329  // Four is the magic number of successors before a node is considered a
1330  // pinch point.
1331  unsigned NumDataSucs = 0;
1332  for (const SDep &SuccDep : PredSU->Succs) {
1333  if (SuccDep.getKind() == SDep::Data) {
1334  if (++NumDataSucs >= 4)
1335  return false;
1336  }
1337  }
1338  if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1339  return false;
1340  R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1341  SubtreeClasses.join(Succ->NodeNum, PredNum);
1342  return true;
1343  }
1344 
1345  /// Called by finalize() to record a connection between trees.
1346  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1347  if (!Depth)
1348  return;
1349 
1350  do {
1352  R.SubtreeConnections[FromTree];
1353  for (SchedDFSResult::Connection &C : Connections) {
1354  if (C.TreeID == ToTree) {
1355  C.Level = std::max(C.Level, Depth);
1356  return;
1357  }
1358  }
1359  Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1360  FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1361  } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1362  }
1363 };
1364 
1365 } // end namespace llvm
1366 
1367 namespace {
1368 
1369 /// Manage the stack used by a reverse depth-first search over the DAG.
1370 class SchedDAGReverseDFS {
1371  std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1372 
1373 public:
1374  bool isComplete() const { return DFSStack.empty(); }
1375 
1376  void follow(const SUnit *SU) {
1377  DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1378  }
1379  void advance() { ++DFSStack.back().second; }
1380 
1381  const SDep *backtrack() {
1382  DFSStack.pop_back();
1383  return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1384  }
1385 
1386  const SUnit *getCurr() const { return DFSStack.back().first; }
1387 
1388  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1389 
1390  SUnit::const_pred_iterator getPredEnd() const {
1391  return getCurr()->Preds.end();
1392  }
1393 };
1394 
1395 } // end anonymous namespace
1396 
1397 static bool hasDataSucc(const SUnit *SU) {
1398  for (const SDep &SuccDep : SU->Succs) {
1399  if (SuccDep.getKind() == SDep::Data &&
1400  !SuccDep.getSUnit()->isBoundaryNode())
1401  return true;
1402  }
1403  return false;
1404 }
1405 
1406 /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1407 /// search from this root.
1409  if (!IsBottomUp)
1410  llvm_unreachable("Top-down ILP metric is unimplemnted");
1411 
1412  SchedDFSImpl Impl(*this);
1413  for (const SUnit &SU : SUnits) {
1414  if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1415  continue;
1416 
1417  SchedDAGReverseDFS DFS;
1418  Impl.visitPreorder(&SU);
1419  DFS.follow(&SU);
1420  while (true) {
1421  // Traverse the leftmost path as far as possible.
1422  while (DFS.getPred() != DFS.getPredEnd()) {
1423  const SDep &PredDep = *DFS.getPred();
1424  DFS.advance();
1425  // Ignore non-data edges.
1426  if (PredDep.getKind() != SDep::Data
1427  || PredDep.getSUnit()->isBoundaryNode()) {
1428  continue;
1429  }
1430  // An already visited edge is a cross edge, assuming an acyclic DAG.
1431  if (Impl.isVisited(PredDep.getSUnit())) {
1432  Impl.visitCrossEdge(PredDep, DFS.getCurr());
1433  continue;
1434  }
1435  Impl.visitPreorder(PredDep.getSUnit());
1436  DFS.follow(PredDep.getSUnit());
1437  }
1438  // Visit the top of the stack in postorder and backtrack.
1439  const SUnit *Child = DFS.getCurr();
1440  const SDep *PredDep = DFS.backtrack();
1441  Impl.visitPostorderNode(Child);
1442  if (PredDep)
1443  Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1444  if (DFS.isComplete())
1445  break;
1446  }
1447  }
1448  Impl.finalize();
1449 }
1450 
1451 /// The root of the given SubtreeID was just scheduled. For all subtrees
1452 /// connected to this tree, record the depth of the connection so that the
1453 /// nearest connected subtrees can be prioritized.
1454 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1455  for (const Connection &C : SubtreeConnections[SubtreeID]) {
1456  SubtreeConnectLevels[C.TreeID] =
1457  std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1458  DEBUG(dbgs() << " Tree: " << C.TreeID
1459  << " @" << SubtreeConnectLevels[C.TreeID] << '\n');
1460  }
1461 }
1462 
1463 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1465  OS << InstrCount << " / " << Length << " = ";
1466  if (!Length)
1467  OS << "BADILP";
1468  else
1469  OS << format("%g", ((double)InstrCount / Length));
1470 }
1471 
1473  dbgs() << *this << '\n';
1474 }
1475 
1476 namespace llvm {
1477 
1480  Val.print(OS);
1481  return OS;
1482 }
1483 
1484 } // end namespace llvm
1485 
1486 #endif
iterator end()
Returns an iterator past this container.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
virtual void finishBlock()
Cleans up after scheduling in the given block.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
A common definition of LaneBitmask for use in TableGen and CodeGen.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
void clear()
Definition: MapVector.h:78
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:429
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
bool available(const MachineRegisterInfo &MRI, unsigned Reg) const
Returns true if register Reg and no aliasing register is in the set.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:449
void init(unsigned N)
Initialize an array of N PressureDiffs.
Record a physical register access.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
LivePhysRegs LiveRegs
Set of live physical registers for updating kill flags.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit *SU) const override
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
static LaneBitmask getAll()
Definition: LaneBitmask.h:84
void removeRegsInMask(const MachineOperand &MO, SmallVectorImpl< std::pair< unsigned, const MachineOperand *>> *Clobbers=nullptr)
Removes physical registers clobbered by the regmask operand MO.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
unsigned getReg() const
getReg - Returns the register number.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:358
void setIsUndef(bool Val=true)
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:35
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
void compress()
compress - Compress equivalence classes by numbering them 0 .
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:863
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI DAG construction"))
static unsigned InstrCount
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:370
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:306
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU&#39;s instruction that defines a physical register.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:261
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
The two locations may or may not alias. This is the least precise result.
Definition: AliasAnalysis.h:81
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
void clearList(ValueType V)
Clears the list of SUs mapped to V.
iterator_range< succ_iterator > successors()
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:570
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
A description of a memory reference used in the backend.
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:272
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1247
An individual mapping from virtual register number to SUnit.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:282
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:371
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:266
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise. ...
Definition: SparseSet.h:235
static bool hasDataSucc(const SUnit *SU)
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
unsigned SubReg
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:284
Reg
All possible values of the reg field in the ModR/M byte.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:445
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:88
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:285
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:56
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
bool isValid() const
isValid - Returns true until all the operands have been visited.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:294
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void print(raw_ostream &OS) const
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:190
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they&#39;re not in a MachineFuncti...
List of registers defined and used by a machine instruction.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:490
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
iterator find(const KeyT &Key)
Definition: MapVector.h:137
reverse_iterator rend()
void visitPreorder(const SUnit *SU)
Initializes this node&#39;s instruction count.
bool mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA)
Returns true if this instruction&#39;s memory access aliases the memory access of Other.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
reverse_iterator rbegin()
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:349
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:164
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:285
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Scheduling dependency.
Definition: ScheduleDAG.h:50
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:293
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:610
#define P(N)
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:404
bool isCall
Is a function call.
Definition: ScheduleDAG.h:280
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:378
Array of PressureDiffs.
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:59
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static unsigned getReductionSize()
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:278
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
Definition: MachineInstr.h:934
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
Internal state used to compute SchedDFSResult.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void reComputeSize()
Counts the number of SUs in this map after a reduction.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:72
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:372
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
MCRegAliasIterator enumerates all registers aliasing Reg.
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
const MachineFrameInfo & MFI
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
void clear()
Clears the set.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:3538
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:479
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
static void getUnderlyingObjects(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
void setIsKill(bool Val=true)
iterator find(const KeyT &Key)
Find an element by its key.
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:283
Iterator for intrusive lists based on ilist_node.
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:423
#define E
Definition: LargeTest.cpp:27
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:839
#define B
Definition: LargeTest.cpp:24
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
bool isDebugValue() const
Definition: MachineInstr.h:782
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:31
Reg2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
void eraseAll(const KeyT &K)
Erase all elements with the given key.
const size_t N
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
CHAIN = SC CHAIN, Imm128 - System call.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx. ...
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:71
typename std::vector< std::pair< ValueType, SUList >> ::iterator iterator
Definition: MapVector.h:46
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:389
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
Special value supplied for machine level alias analysis.
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
**iterator erase(iterator I)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:573
Representation of each machine instruction.
Definition: MachineInstr.h:59
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:574
void finalize()
Sets each node&#39;s subtree ID to the representative ID and record connections between trees...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:569
void removeReg(unsigned Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set...
Definition: LivePhysRegs.h:89
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:49
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
#define I(x, y, z)
Definition: MD5.cpp:58
constexpr bool any() const
Definition: LaneBitmask.h:53
bool empty() const
Returns true if the set is empty.
Mapping from virtual register to SUnit including an operand index.
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AliasAnalysis and Target fail to deny the dep...
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:568
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:496
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2018
void print(raw_ostream &OS, bool SkipOpers=false, bool SkipDebugLoc=false, const TargetInstrInfo *TII=nullptr) const
Debugging supportPrint this MI to OS.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:269
iterator begin()
Definition: MapVector.h:59
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
const unsigned Kind
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:597
iterator end()
Definition: MapVector.h:61
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:463
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
LLVM Value Representation.
Definition: Value.h:73
void clear()
Clears map from all contents.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
bool isPosition() const
Definition: MachineInstr.h:780
static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:262
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
static bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI)
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
MachineBasicBlock * BB
The block in which to insert instructions.
IRTranslator LLVM IR MI
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:436
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:201
static void dumpSUList(ScheduleDAGInstrs::SUList &L)
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:571
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:572
SchedDFSImpl(SchedDFSResult &r)
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:284
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:84
static const Value * getUnderlyingObjectFromInt(const Value *V)
This is the function that does the work of looking through basic ptrtoint+arithmetic+inttoptr sequenc...
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
bool hasTailCall() const
Returns true if the function contains a tail call.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
hexagon widen stores
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:247
A discriminated union of two pointer types, with the discriminator in the low bit of the pointer...
Definition: PointerUnion.h:87
void addReg(unsigned Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
void resize(size_type N)
Definition: SmallVector.h:355