LLVM  10.0.0svn
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This implements the ScheduleDAGInstrs class, which implements
10 /// re-scheduling of MachineInstrs.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/IntEqClasses.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/SparseSet.h"
39 #include "llvm/Config/llvm-config.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/Function.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/Operator.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/MC/LaneBitmask.h"
48 #include "llvm/MC/MCRegisterInfo.h"
49 #include "llvm/Support/Casting.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/Format.h"
56 #include <algorithm>
57 #include <cassert>
58 #include <iterator>
59 #include <string>
60 #include <utility>
61 #include <vector>
62 
63 using namespace llvm;
64 
65 #define DEBUG_TYPE "machine-scheduler"
66 
67 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
68  cl::ZeroOrMore, cl::init(false),
69  cl::desc("Enable use of AA during MI DAG construction"));
70 
71 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
72  cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
73 
74 // Note: the two options below might be used in tuning compile time vs
75 // output quality. Setting HugeRegion so large that it will never be
76 // reached means best-effort, but may be slow.
77 
78 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
79 // together hold this many SUs, a reduction of maps will be done.
80 static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
81  cl::init(1000), cl::desc("The limit to use while constructing the DAG "
82  "prior to scheduling, at which point a trade-off "
83  "is made to avoid excessive compile time."));
84 
86  "dag-maps-reduction-size", cl::Hidden,
87  cl::desc("A huge scheduling region will have maps reduced by this many "
88  "nodes at a time. Defaults to HugeRegion / 2."));
89 
90 static unsigned getReductionSize() {
91  // Always reduce a huge region with half of the elements, except
92  // when user sets this number explicitly.
93  if (ReductionSize.getNumOccurrences() == 0)
94  return HugeRegion / 2;
95  return ReductionSize;
96 }
97 
99 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
100  dbgs() << "{ ";
101  for (const SUnit *su : L) {
102  dbgs() << "SU(" << su->NodeNum << ")";
103  if (su != L.back())
104  dbgs() << ", ";
105  }
106  dbgs() << "}\n";
107 #endif
108 }
109 
111  const MachineLoopInfo *mli,
112  bool RemoveKillFlags)
113  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
114  RemoveKillFlags(RemoveKillFlags),
115  UnknownValue(UndefValue::get(
116  Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
117  DbgValues.clear();
118 
119  const TargetSubtargetInfo &ST = mf.getSubtarget();
120  SchedModel.init(&ST);
121 }
122 
123 /// If this machine instr has memory reference information and it can be
124 /// tracked to a normal reference to a known object, return the Value
125 /// for that object. This function returns false the memory location is
126 /// unknown or may alias anything.
128  const MachineFrameInfo &MFI,
129  UnderlyingObjectsVector &Objects,
130  const DataLayout &DL) {
131  auto allMMOsOkay = [&]() {
132  for (const MachineMemOperand *MMO : MI->memoperands()) {
133  // TODO: Figure out whether isAtomic is really necessary (see D57601).
134  if (MMO->isVolatile() || MMO->isAtomic())
135  return false;
136 
137  if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
138  // Function that contain tail calls don't have unique PseudoSourceValue
139  // objects. Two PseudoSourceValues might refer to the same or
140  // overlapping locations. The client code calling this function assumes
141  // this is not the case. So return a conservative answer of no known
142  // object.
143  if (MFI.hasTailCall())
144  return false;
145 
146  // For now, ignore PseudoSourceValues which may alias LLVM IR values
147  // because the code that uses this function has no way to cope with
148  // such aliases.
149  if (PSV->isAliased(&MFI))
150  return false;
151 
152  bool MayAlias = PSV->mayAlias(&MFI);
153  Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
154  } else if (const Value *V = MMO->getValue()) {
156  if (!getUnderlyingObjectsForCodeGen(V, Objs, DL))
157  return false;
158 
159  for (Value *V : Objs) {
162  }
163  } else
164  return false;
165  }
166  return true;
167  };
168 
169  if (!allMMOsOkay()) {
170  Objects.clear();
171  return false;
172  }
173 
174  return true;
175 }
176 
178  BB = bb;
179 }
180 
182  // Subclasses should no longer refer to the old block.
183  BB = nullptr;
184 }
185 
189  unsigned regioninstrs) {
190  assert(bb == BB && "startBlock should set BB");
191  RegionBegin = begin;
192  RegionEnd = end;
193  NumRegionInstrs = regioninstrs;
194 }
195 
197  // Nothing to do.
198 }
199 
201  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
202  ExitSU.setInstr(ExitMI);
203  // Add dependencies on the defs and uses of the instruction.
204  if (ExitMI) {
205  for (const MachineOperand &MO : ExitMI->operands()) {
206  if (!MO.isReg() || MO.isDef()) continue;
207  Register Reg = MO.getReg();
208  if (Register::isPhysicalRegister(Reg)) {
209  Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
210  } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
211  addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
212  }
213  }
214  }
215  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
216  // For others, e.g. fallthrough, conditional branch, assume the exit
217  // uses all the registers that are livein to the successor blocks.
218  for (const MachineBasicBlock *Succ : BB->successors()) {
219  for (const auto &LI : Succ->liveins()) {
220  if (!Uses.contains(LI.PhysReg))
221  Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
222  }
223  }
224  }
225 }
226 
227 /// MO is an operand of SU's instruction that defines a physical register. Adds
228 /// data dependencies from SU to any uses of the physical register.
229 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
230  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
231  assert(MO.isDef() && "expect physreg def");
232 
233  // Ask the target if address-backscheduling is desirable, and if so how much.
235 
236  // Only use any non-zero latency for real defs/uses, in contrast to
237  // "fake" operands added by regalloc.
238  const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
239  bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
240  !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
241  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
242  Alias.isValid(); ++Alias) {
243  if (!Uses.contains(*Alias))
244  continue;
245  for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
246  SUnit *UseSU = I->SU;
247  if (UseSU == SU)
248  continue;
249 
250  // Adjust the dependence latency using operand def/use information,
251  // then allow the target to perform its own adjustments.
252  int UseOp = I->OpIdx;
253  MachineInstr *RegUse = nullptr;
254  SDep Dep;
255  if (UseOp < 0)
256  Dep = SDep(SU, SDep::Artificial);
257  else {
258  // Set the hasPhysRegDefs only for physreg defs that have a use within
259  // the scheduling region.
260  SU->hasPhysRegDefs = true;
261  Dep = SDep(SU, SDep::Data, *Alias);
262  RegUse = UseSU->getInstr();
263  }
264  const MCInstrDesc *UseMIDesc =
265  (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
266  bool ImplicitPseudoUse =
267  (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
268  !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
269  if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
271  RegUse, UseOp));
272  ST.adjustSchedDependency(SU, UseSU, Dep);
273  } else
274  Dep.setLatency(0);
275 
276  UseSU->addPred(Dep);
277  }
278  }
279 }
280 
281 /// Adds register dependencies (data, anti, and output) from this SUnit
282 /// to following instructions in the same scheduling region that depend the
283 /// physical register referenced at OperIdx.
284 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
285  MachineInstr *MI = SU->getInstr();
286  MachineOperand &MO = MI->getOperand(OperIdx);
287  Register Reg = MO.getReg();
288  // We do not need to track any dependencies for constant registers.
289  if (MRI.isConstantPhysReg(Reg))
290  return;
291 
292  // Optionally add output and anti dependencies. For anti
293  // dependencies we use a latency of 0 because for a multi-issue
294  // target we want to allow the defining instruction to issue
295  // in the same cycle as the using instruction.
296  // TODO: Using a latency of 1 here for output dependencies assumes
297  // there's no cost for reusing registers.
299  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
300  if (!Defs.contains(*Alias))
301  continue;
302  for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
303  SUnit *DefSU = I->SU;
304  if (DefSU == &ExitSU)
305  continue;
306  if (DefSU != SU &&
307  (Kind != SDep::Output || !MO.isDead() ||
308  !DefSU->getInstr()->registerDefIsDead(*Alias))) {
309  if (Kind == SDep::Anti)
310  DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
311  else {
312  SDep Dep(SU, Kind, /*Reg=*/*Alias);
313  Dep.setLatency(
314  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
315  DefSU->addPred(Dep);
316  }
317  }
318  }
319  }
320 
321  if (!MO.isDef()) {
322  SU->hasPhysRegUses = true;
323  // Either insert a new Reg2SUnits entry with an empty SUnits list, or
324  // retrieve the existing SUnits list for this register's uses.
325  // Push this SUnit on the use list.
326  Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
327  if (RemoveKillFlags)
328  MO.setIsKill(false);
329  } else {
330  addPhysRegDataDeps(SU, OperIdx);
331 
332  // Clear previous uses and defs of this register and its subergisters.
333  for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) {
334  if (Uses.contains(*SubReg))
335  Uses.eraseAll(*SubReg);
336  if (!MO.isDead())
337  Defs.eraseAll(*SubReg);
338  }
339  if (MO.isDead() && SU->isCall) {
340  // Calls will not be reordered because of chain dependencies (see
341  // below). Since call operands are dead, calls may continue to be added
342  // to the DefList making dependence checking quadratic in the size of
343  // the block. Instead, we leave only one call at the back of the
344  // DefList.
346  Reg2SUnitsMap::iterator B = P.first;
347  Reg2SUnitsMap::iterator I = P.second;
348  for (bool isBegin = I == B; !isBegin; /* empty */) {
349  isBegin = (--I) == B;
350  if (!I->SU->isCall)
351  break;
352  I = Defs.erase(I);
353  }
354  }
355 
356  // Defs are pushed in the order they are visited and never reordered.
357  Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
358  }
359 }
360 
362 {
363  Register Reg = MO.getReg();
364  // No point in tracking lanemasks if we don't have interesting subregisters.
365  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
366  if (!RC.HasDisjunctSubRegs)
367  return LaneBitmask::getAll();
368 
369  unsigned SubReg = MO.getSubReg();
370  if (SubReg == 0)
371  return RC.getLaneMask();
372  return TRI->getSubRegIndexLaneMask(SubReg);
373 }
374 
376  auto RegUse = CurrentVRegUses.find(MO.getReg());
377  if (RegUse == CurrentVRegUses.end())
378  return true;
379  return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
380 }
381 
382 /// Adds register output and data dependencies from this SUnit to instructions
383 /// that occur later in the same scheduling region if they read from or write to
384 /// the virtual register defined at OperIdx.
385 ///
386 /// TODO: Hoist loop induction variable increments. This has to be
387 /// reevaluated. Generally, IV scheduling should be done before coalescing.
388 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
389  MachineInstr *MI = SU->getInstr();
390  MachineOperand &MO = MI->getOperand(OperIdx);
391  Register Reg = MO.getReg();
392 
393  LaneBitmask DefLaneMask;
394  LaneBitmask KillLaneMask;
395  if (TrackLaneMasks) {
396  bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
397  DefLaneMask = getLaneMaskForMO(MO);
398  // If we have a <read-undef> flag, none of the lane values comes from an
399  // earlier instruction.
400  KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
401 
402  if (MO.getSubReg() != 0 && MO.isUndef()) {
403  // There may be other subregister defs on the same instruction of the same
404  // register in later operands. The lanes of other defs will now be live
405  // after this instruction, so these should not be treated as killed by the
406  // instruction even though they appear to be killed in this one operand.
407  for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) {
408  const MachineOperand &OtherMO = MI->getOperand(I);
409  if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
410  KillLaneMask &= ~getLaneMaskForMO(OtherMO);
411  }
412  }
413 
414  // Clear undef flag, we'll re-add it later once we know which subregister
415  // Def is first.
416  MO.setIsUndef(false);
417  } else {
418  DefLaneMask = LaneBitmask::getAll();
419  KillLaneMask = LaneBitmask::getAll();
420  }
421 
422  if (MO.isDead()) {
423  assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
424  } else {
425  // Add data dependence to all uses we found so far.
428  E = CurrentVRegUses.end(); I != E; /*empty*/) {
429  LaneBitmask LaneMask = I->LaneMask;
430  // Ignore uses of other lanes.
431  if ((LaneMask & KillLaneMask).none()) {
432  ++I;
433  continue;
434  }
435 
436  if ((LaneMask & DefLaneMask).any()) {
437  SUnit *UseSU = I->SU;
438  MachineInstr *Use = UseSU->getInstr();
439  SDep Dep(SU, SDep::Data, Reg);
440  Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
441  I->OperandIndex));
442  ST.adjustSchedDependency(SU, UseSU, Dep);
443  UseSU->addPred(Dep);
444  }
445 
446  LaneMask &= ~KillLaneMask;
447  // If we found a Def for all lanes of this use, remove it from the list.
448  if (LaneMask.any()) {
449  I->LaneMask = LaneMask;
450  ++I;
451  } else
453  }
454  }
455 
456  // Shortcut: Singly defined vregs do not have output/anti dependencies.
457  if (MRI.hasOneDef(Reg))
458  return;
459 
460  // Add output dependence to the next nearest defs of this vreg.
461  //
462  // Unless this definition is dead, the output dependence should be
463  // transitively redundant with antidependencies from this definition's
464  // uses. We're conservative for now until we have a way to guarantee the uses
465  // are not eliminated sometime during scheduling. The output dependence edge
466  // is also useful if output latency exceeds def-use latency.
467  LaneBitmask LaneMask = DefLaneMask;
468  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
469  CurrentVRegDefs.end())) {
470  // Ignore defs for other lanes.
471  if ((V2SU.LaneMask & LaneMask).none())
472  continue;
473  // Add an output dependence.
474  SUnit *DefSU = V2SU.SU;
475  // Ignore additional defs of the same lanes in one instruction. This can
476  // happen because lanemasks are shared for targets with too many
477  // subregisters. We also use some representration tricks/hacks where we
478  // add super-register defs/uses, to imply that although we only access parts
479  // of the reg we care about the full one.
480  if (DefSU == SU)
481  continue;
482  SDep Dep(SU, SDep::Output, Reg);
483  Dep.setLatency(
484  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
485  DefSU->addPred(Dep);
486 
487  // Update current definition. This can get tricky if the def was about a
488  // bigger lanemask before. We then have to shrink it and create a new
489  // VReg2SUnit for the non-overlapping part.
490  LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
491  LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
492  V2SU.SU = SU;
493  V2SU.LaneMask = OverlapMask;
494  if (NonOverlapMask.any())
495  CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
496  }
497  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
498  if (LaneMask.any())
499  CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
500 }
501 
502 /// Adds a register data dependency if the instruction that defines the
503 /// virtual register used at OperIdx is mapped to an SUnit. Add a register
504 /// antidependency from this SUnit to instructions that occur later in the same
505 /// scheduling region if they write the virtual register.
506 ///
507 /// TODO: Handle ExitSU "uses" properly.
508 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
509  const MachineInstr *MI = SU->getInstr();
510  const MachineOperand &MO = MI->getOperand(OperIdx);
511  Register Reg = MO.getReg();
512 
513  // Remember the use. Data dependencies will be added when we find the def.
516  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
517 
518  // Add antidependences to the following defs of the vreg.
519  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
520  CurrentVRegDefs.end())) {
521  // Ignore defs for unrelated lanes.
522  LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
523  if ((PrevDefLaneMask & LaneMask).none())
524  continue;
525  if (V2SU.SU == SU)
526  continue;
527 
528  V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
529  }
530 }
531 
532 /// Returns true if MI is an instruction we are unable to reason about
533 /// (like a call or something with unmodeled side effects).
534 static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) {
535  return MI->isCall() || MI->hasUnmodeledSideEffects() ||
537 }
538 
540  unsigned Latency) {
541  if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
542  SDep Dep(SUa, SDep::MayAliasMem);
543  Dep.setLatency(Latency);
544  SUb->addPred(Dep);
545  }
546 }
547 
548 /// Creates an SUnit for each real instruction, numbered in top-down
549 /// topological order. The instruction order A < B, implies that no edge exists
550 /// from B to A.
551 ///
552 /// Map each real instruction to its SUnit.
553 ///
554 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
555 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
556 /// instead of pointers.
557 ///
558 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
559 /// the original instruction list.
561  // We'll be allocating one SUnit for each real instruction in the region,
562  // which is contained within a basic block.
563  SUnits.reserve(NumRegionInstrs);
564 
566  if (MI.isDebugInstr())
567  continue;
568 
569  SUnit *SU = newSUnit(&MI);
570  MISUnitMap[&MI] = SU;
571 
572  SU->isCall = MI.isCall();
573  SU->isCommutable = MI.isCommutable();
574 
575  // Assign the Latency field of SU using target-provided information.
576  SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
577 
578  // If this SUnit uses a reserved or unbuffered resource, mark it as such.
579  //
580  // Reserved resources block an instruction from issuing and stall the
581  // entire pipeline. These are identified by BufferSize=0.
582  //
583  // Unbuffered resources prevent execution of subsequent instructions that
584  // require the same resources. This is used for in-order execution pipelines
585  // within an out-of-order core. These are identified by BufferSize=1.
587  const MCSchedClassDesc *SC = getSchedClass(SU);
588  for (const MCWriteProcResEntry &PRE :
591  switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
592  case 0:
593  SU->hasReservedResource = true;
594  break;
595  case 1:
596  SU->isUnbuffered = true;
597  break;
598  default:
599  break;
600  }
601  }
602  }
603  }
604 }
605 
606 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
607  /// Current total number of SUs in map.
608  unsigned NumNodes = 0;
609 
610  /// 1 for loads, 0 for stores. (see comment in SUList)
611  unsigned TrueMemOrderLatency;
612 
613 public:
614  Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
615 
616  /// To keep NumNodes up to date, insert() is used instead of
617  /// this operator w/ push_back().
619  llvm_unreachable("Don't use. Use insert() instead."); };
620 
621  /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
622  /// reduce().
623  void inline insert(SUnit *SU, ValueType V) {
624  MapVector::operator[](V).push_back(SU);
625  NumNodes++;
626  }
627 
628  /// Clears the list of SUs mapped to V.
629  void inline clearList(ValueType V) {
630  iterator Itr = find(V);
631  if (Itr != end()) {
632  assert(NumNodes >= Itr->second.size());
633  NumNodes -= Itr->second.size();
634 
635  Itr->second.clear();
636  }
637  }
638 
639  /// Clears map from all contents.
640  void clear() {
642  NumNodes = 0;
643  }
644 
645  unsigned inline size() const { return NumNodes; }
646 
647  /// Counts the number of SUs in this map after a reduction.
648  void reComputeSize() {
649  NumNodes = 0;
650  for (auto &I : *this)
651  NumNodes += I.second.size();
652  }
653 
654  unsigned inline getTrueMemOrderLatency() const {
655  return TrueMemOrderLatency;
656  }
657 
658  void dump();
659 };
660 
662  Value2SUsMap &Val2SUsMap) {
663  for (auto &I : Val2SUsMap)
664  addChainDependencies(SU, I.second,
665  Val2SUsMap.getTrueMemOrderLatency());
666 }
667 
669  Value2SUsMap &Val2SUsMap,
670  ValueType V) {
671  Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
672  if (Itr != Val2SUsMap.end())
673  addChainDependencies(SU, Itr->second,
674  Val2SUsMap.getTrueMemOrderLatency());
675 }
676 
678  assert(BarrierChain != nullptr);
679 
680  for (auto &I : map) {
681  SUList &sus = I.second;
682  for (auto *SU : sus)
683  SU->addPredBarrier(BarrierChain);
684  }
685  map.clear();
686 }
687 
689  assert(BarrierChain != nullptr);
690 
691  // Go through all lists of SUs.
692  for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
693  Value2SUsMap::iterator CurrItr = I++;
694  SUList &sus = CurrItr->second;
695  SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
696  for (; SUItr != SUEE; ++SUItr) {
697  // Stop on BarrierChain or any instruction above it.
698  if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
699  break;
700 
701  (*SUItr)->addPredBarrier(BarrierChain);
702  }
703 
704  // Remove also the BarrierChain from list if present.
705  if (SUItr != SUEE && *SUItr == BarrierChain)
706  SUItr++;
707 
708  // Remove all SUs that are now successors of BarrierChain.
709  if (SUItr != sus.begin())
710  sus.erase(sus.begin(), SUItr);
711  }
712 
713  // Remove all entries with empty su lists.
714  map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
715  return (mapEntry.second.empty()); });
716 
717  // Recompute the size of the map (NumNodes).
718  map.reComputeSize();
719 }
720 
722  RegPressureTracker *RPTracker,
723  PressureDiffs *PDiffs,
724  LiveIntervals *LIS,
725  bool TrackLaneMasks) {
727  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
728  : ST.useAA();
729  AAForDep = UseAA ? AA : nullptr;
730 
731  BarrierChain = nullptr;
732 
733  this->TrackLaneMasks = TrackLaneMasks;
734  MISUnitMap.clear();
736 
737  // Create an SUnit for each real instruction.
738  initSUnits();
739 
740  if (PDiffs)
741  PDiffs->init(SUnits.size());
742 
743  // We build scheduling units by walking a block's instruction list
744  // from bottom to top.
745 
746  // Each MIs' memory operand(s) is analyzed to a list of underlying
747  // objects. The SU is then inserted in the SUList(s) mapped from the
748  // Value(s). Each Value thus gets mapped to lists of SUs depending
749  // on it, stores and loads kept separately. Two SUs are trivially
750  // non-aliasing if they both depend on only identified Values and do
751  // not share any common Value.
752  Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
753 
754  // Certain memory accesses are known to not alias any SU in Stores
755  // or Loads, and have therefore their own 'NonAlias'
756  // domain. E.g. spill / reload instructions never alias LLVM I/R
757  // Values. It would be nice to assume that this type of memory
758  // accesses always have a proper memory operand modelling, and are
759  // therefore never unanalyzable, but this is conservatively not
760  // done.
761  Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
762 
763  // Track all instructions that may raise floating-point exceptions.
764  // These do not depend on one other (or normal loads or stores), but
765  // must not be rescheduled across global barriers. Note that we don't
766  // really need a "map" here since we don't track those MIs by value;
767  // using the same Value2SUsMap data type here is simply a matter of
768  // convenience.
769  Value2SUsMap FPExceptions;
770 
771  // Remove any stale debug info; sometimes BuildSchedGraph is called again
772  // without emitting the info from the previous call.
773  DbgValues.clear();
774  FirstDbgValue = nullptr;
775 
776  assert(Defs.empty() && Uses.empty() &&
777  "Only BuildGraph should update Defs/Uses");
780 
781  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
782  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
783  unsigned NumVirtRegs = MRI.getNumVirtRegs();
784  CurrentVRegDefs.setUniverse(NumVirtRegs);
785  CurrentVRegUses.setUniverse(NumVirtRegs);
786 
787  // Model data dependencies between instructions being scheduled and the
788  // ExitSU.
790 
791  // Walk the list of instructions, from bottom moving up.
792  MachineInstr *DbgMI = nullptr;
794  MII != MIE; --MII) {
795  MachineInstr &MI = *std::prev(MII);
796  if (DbgMI) {
797  DbgValues.push_back(std::make_pair(DbgMI, &MI));
798  DbgMI = nullptr;
799  }
800 
801  if (MI.isDebugValue()) {
802  DbgMI = &MI;
803  continue;
804  }
805  if (MI.isDebugLabel())
806  continue;
807 
808  SUnit *SU = MISUnitMap[&MI];
809  assert(SU && "No SUnit mapped to this MI");
810 
811  if (RPTracker) {
812  RegisterOperands RegOpers;
813  RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
814  if (TrackLaneMasks) {
815  SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
816  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
817  }
818  if (PDiffs != nullptr)
819  PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
820 
821  if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
822  RPTracker->recedeSkipDebugValues();
823  assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
824  RPTracker->recede(RegOpers);
825  }
826 
827  assert(
828  (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
829  "Cannot schedule terminators or labels!");
830 
831  // Add register-based dependencies (data, anti, and output).
832  // For some instructions (calls, returns, inline-asm, etc.) there can
833  // be explicit uses and implicit defs, in which case the use will appear
834  // on the operand list before the def. Do two passes over the operand
835  // list to make sure that defs are processed before any uses.
836  bool HasVRegDef = false;
837  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
838  const MachineOperand &MO = MI.getOperand(j);
839  if (!MO.isReg() || !MO.isDef())
840  continue;
841  Register Reg = MO.getReg();
842  if (Register::isPhysicalRegister(Reg)) {
843  addPhysRegDeps(SU, j);
844  } else if (Register::isVirtualRegister(Reg)) {
845  HasVRegDef = true;
846  addVRegDefDeps(SU, j);
847  }
848  }
849  // Now process all uses.
850  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
851  const MachineOperand &MO = MI.getOperand(j);
852  // Only look at use operands.
853  // We do not need to check for MO.readsReg() here because subsequent
854  // subregister defs will get output dependence edges and need no
855  // additional use dependencies.
856  if (!MO.isReg() || !MO.isUse())
857  continue;
858  Register Reg = MO.getReg();
859  if (Register::isPhysicalRegister(Reg)) {
860  addPhysRegDeps(SU, j);
861  } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
862  addVRegUseDeps(SU, j);
863  }
864  }
865 
866  // If we haven't seen any uses in this scheduling region, create a
867  // dependence edge to ExitSU to model the live-out latency. This is required
868  // for vreg defs with no in-region use, and prefetches with no vreg def.
869  //
870  // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
871  // check currently relies on being called before adding chain deps.
872  if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
873  SDep Dep(SU, SDep::Artificial);
874  Dep.setLatency(SU->Latency - 1);
875  ExitSU.addPred(Dep);
876  }
877 
878  // Add memory dependencies (Note: isStoreToStackSlot and
879  // isLoadFromStackSLot are not usable after stack slots are lowered to
880  // actual addresses).
881 
882  // This is a barrier event that acts as a pivotal node in the DAG.
883  if (isGlobalMemoryObject(AA, &MI)) {
884 
885  // Become the barrier chain.
886  if (BarrierChain)
888  BarrierChain = SU;
889 
890  LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
891  << BarrierChain->NodeNum << ").\n";);
892 
893  // Add dependencies against everything below it and clear maps.
894  addBarrierChain(Stores);
895  addBarrierChain(Loads);
896  addBarrierChain(NonAliasStores);
897  addBarrierChain(NonAliasLoads);
898  addBarrierChain(FPExceptions);
899 
900  continue;
901  }
902 
903  // Instructions that may raise FP exceptions may not be moved
904  // across any global barriers.
905  if (MI.mayRaiseFPException()) {
906  if (BarrierChain)
908 
909  FPExceptions.insert(SU, UnknownValue);
910 
911  if (FPExceptions.size() >= HugeRegion) {
912  LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
914  reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
915  }
916  }
917 
918  // If it's not a store or a variant load, we're done.
919  if (!MI.mayStore() &&
920  !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
921  continue;
922 
923  // Always add dependecy edge to BarrierChain if present.
924  if (BarrierChain)
926 
927  // Find the underlying objects for MI. The Objs vector is either
928  // empty, or filled with the Values of memory locations which this
929  // SU depends on.
931  bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
932  MF.getDataLayout());
933 
934  if (MI.mayStore()) {
935  if (!ObjsFound) {
936  // An unknown store depends on all stores and loads.
937  addChainDependencies(SU, Stores);
938  addChainDependencies(SU, NonAliasStores);
939  addChainDependencies(SU, Loads);
940  addChainDependencies(SU, NonAliasLoads);
941 
942  // Map this store to 'UnknownValue'.
943  Stores.insert(SU, UnknownValue);
944  } else {
945  // Add precise dependencies against all previously seen memory
946  // accesses mapped to the same Value(s).
947  for (const UnderlyingObject &UnderlObj : Objs) {
948  ValueType V = UnderlObj.getValue();
949  bool ThisMayAlias = UnderlObj.mayAlias();
950 
951  // Add dependencies to previous stores and loads mapped to V.
952  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
953  addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
954  }
955  // Update the store map after all chains have been added to avoid adding
956  // self-loop edge if multiple underlying objects are present.
957  for (const UnderlyingObject &UnderlObj : Objs) {
958  ValueType V = UnderlObj.getValue();
959  bool ThisMayAlias = UnderlObj.mayAlias();
960 
961  // Map this store to V.
962  (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
963  }
964  // The store may have dependencies to unanalyzable loads and
965  // stores.
967  addChainDependencies(SU, Stores, UnknownValue);
968  }
969  } else { // SU is a load.
970  if (!ObjsFound) {
971  // An unknown load depends on all stores.
972  addChainDependencies(SU, Stores);
973  addChainDependencies(SU, NonAliasStores);
974 
975  Loads.insert(SU, UnknownValue);
976  } else {
977  for (const UnderlyingObject &UnderlObj : Objs) {
978  ValueType V = UnderlObj.getValue();
979  bool ThisMayAlias = UnderlObj.mayAlias();
980 
981  // Add precise dependencies against all previously seen stores
982  // mapping to the same Value(s).
983  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
984 
985  // Map this load to V.
986  (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
987  }
988  // The load may have dependencies to unanalyzable stores.
989  addChainDependencies(SU, Stores, UnknownValue);
990  }
991  }
992 
993  // Reduce maps if they grow huge.
994  if (Stores.size() + Loads.size() >= HugeRegion) {
995  LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
996  reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
997  }
998  if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
999  LLVM_DEBUG(
1000  dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
1001  reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
1002  }
1003  }
1004 
1005  if (DbgMI)
1006  FirstDbgValue = DbgMI;
1007 
1008  Defs.clear();
1009  Uses.clear();
1012 
1013  Topo.MarkDirty();
1014 }
1015 
1017  PSV->printCustom(OS);
1018  return OS;
1019 }
1020 
1022  for (auto &Itr : *this) {
1023  if (Itr.first.is<const Value*>()) {
1024  const Value *V = Itr.first.get<const Value*>();
1025  if (isa<UndefValue>(V))
1026  dbgs() << "Unknown";
1027  else
1028  V->printAsOperand(dbgs());
1029  }
1030  else if (Itr.first.is<const PseudoSourceValue*>())
1031  dbgs() << Itr.first.get<const PseudoSourceValue*>();
1032  else
1033  llvm_unreachable("Unknown Value type.");
1034 
1035  dbgs() << " : ";
1036  dumpSUList(Itr.second);
1037  }
1038 }
1039 
1041  Value2SUsMap &loads, unsigned N) {
1042  LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
1043  dbgs() << "Loading SUnits:\n"; loads.dump());
1044 
1045  // Insert all SU's NodeNums into a vector and sort it.
1046  std::vector<unsigned> NodeNums;
1047  NodeNums.reserve(stores.size() + loads.size());
1048  for (auto &I : stores)
1049  for (auto *SU : I.second)
1050  NodeNums.push_back(SU->NodeNum);
1051  for (auto &I : loads)
1052  for (auto *SU : I.second)
1053  NodeNums.push_back(SU->NodeNum);
1054  llvm::sort(NodeNums);
1055 
1056  // The N last elements in NodeNums will be removed, and the SU with
1057  // the lowest NodeNum of them will become the new BarrierChain to
1058  // let the not yet seen SUs have a dependency to the removed SUs.
1059  assert(N <= NodeNums.size());
1060  SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1061  if (BarrierChain) {
1062  // The aliasing and non-aliasing maps reduce independently of each
1063  // other, but share a common BarrierChain. Check if the
1064  // newBarrierChain is above the former one. If it is not, it may
1065  // introduce a loop to use newBarrierChain, so keep the old one.
1066  if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1067  BarrierChain->addPredBarrier(newBarrierChain);
1068  BarrierChain = newBarrierChain;
1069  LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1070  << BarrierChain->NodeNum << ").\n";);
1071  }
1072  else
1073  LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1074  << BarrierChain->NodeNum << ").\n";);
1075  }
1076  else
1077  BarrierChain = newBarrierChain;
1078 
1079  insertBarrierChain(stores);
1080  insertBarrierChain(loads);
1081 
1082  LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1083  dbgs() << "Loading SUnits:\n"; loads.dump());
1084 }
1085 
1087  MachineInstr &MI, bool addToLiveRegs) {
1088  for (MachineOperand &MO : MI.operands()) {
1089  if (!MO.isReg() || !MO.readsReg())
1090  continue;
1091  Register Reg = MO.getReg();
1092  if (!Reg)
1093  continue;
1094 
1095  // Things that are available after the instruction are killed by it.
1096  bool IsKill = LiveRegs.available(MRI, Reg);
1097  MO.setIsKill(IsKill);
1098  if (addToLiveRegs)
1099  LiveRegs.addReg(Reg);
1100  }
1101 }
1102 
1104  LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1105 
1106  LiveRegs.init(*TRI);
1107  LiveRegs.addLiveOuts(MBB);
1108 
1109  // Examine block from end to start...
1110  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1111  if (MI.isDebugInstr())
1112  continue;
1113 
1114  // Update liveness. Registers that are defed but not used in this
1115  // instruction are now dead. Mark register and all subregs as they
1116  // are completely defined.
1117  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1118  const MachineOperand &MO = *O;
1119  if (MO.isReg()) {
1120  if (!MO.isDef())
1121  continue;
1122  Register Reg = MO.getReg();
1123  if (!Reg)
1124  continue;
1125  LiveRegs.removeReg(Reg);
1126  } else if (MO.isRegMask()) {
1128  }
1129  }
1130 
1131  // If there is a bundle header fix it up first.
1132  if (!MI.isBundled()) {
1133  toggleKills(MRI, LiveRegs, MI, true);
1134  } else {
1135  MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
1136  if (MI.isBundle())
1137  toggleKills(MRI, LiveRegs, MI, false);
1138 
1139  // Some targets make the (questionable) assumtion that the instructions
1140  // inside the bundle are ordered and consequently only the last use of
1141  // a register inside the bundle can kill it.
1142  MachineBasicBlock::instr_iterator I = std::next(Bundle);
1143  while (I->isBundledWithSucc())
1144  ++I;
1145  do {
1146  if (!I->isDebugInstr())
1147  toggleKills(MRI, LiveRegs, *I, true);
1148  --I;
1149  } while (I != Bundle);
1150  }
1151  }
1152 }
1153 
1154 void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1155 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1156  dumpNodeName(SU);
1157  dbgs() << ": ";
1158  SU.getInstr()->dump();
1159 #endif
1160 }
1161 
1163 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1164  if (EntrySU.getInstr() != nullptr)
1166  for (const SUnit &SU : SUnits)
1167  dumpNodeAll(SU);
1168  if (ExitSU.getInstr() != nullptr)
1170 #endif
1171 }
1172 
1173 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1174  std::string s;
1175  raw_string_ostream oss(s);
1176  if (SU == &EntrySU)
1177  oss << "<entry>";
1178  else if (SU == &ExitSU)
1179  oss << "<exit>";
1180  else
1181  SU->getInstr()->print(oss, /*SkipOpers=*/true);
1182  return oss.str();
1183 }
1184 
1185 /// Return the basic block label. It is not necessarilly unique because a block
1186 /// contains multiple scheduling regions. But it is fine for visualization.
1187 std::string ScheduleDAGInstrs::getDAGName() const {
1188  return "dag." + BB->getFullName();
1189 }
1190 
1192  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
1193 }
1194 
1195 bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
1196  if (SuccSU != &ExitSU) {
1197  // Do not use WillCreateCycle, it assumes SD scheduling.
1198  // If Pred is reachable from Succ, then the edge creates a cycle.
1199  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
1200  return false;
1201  Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
1202  }
1203  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
1204  // Return true regardless of whether a new edge needed to be inserted.
1205  return true;
1206 }
1207 
1208 //===----------------------------------------------------------------------===//
1209 // SchedDFSResult Implementation
1210 //===----------------------------------------------------------------------===//
1211 
1212 namespace llvm {
1213 
1214 /// Internal state used to compute SchedDFSResult.
1216  SchedDFSResult &R;
1217 
1218  /// Join DAG nodes into equivalence classes by their subtree.
1219  IntEqClasses SubtreeClasses;
1220  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1221  std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1222 
1223  struct RootData {
1224  unsigned NodeID;
1225  unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1226  unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1227  /// children.
1228 
1229  RootData(unsigned id): NodeID(id),
1230  ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1231 
1232  unsigned getSparseSetIndex() const { return NodeID; }
1233  };
1234 
1235  SparseSet<RootData> RootSet;
1236 
1237 public:
1238  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1239  RootSet.setUniverse(R.DFSNodeData.size());
1240  }
1241 
1242  /// Returns true if this node been visited by the DFS traversal.
1243  ///
1244  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1245  /// ID. Later, SubtreeID is updated but remains valid.
1246  bool isVisited(const SUnit *SU) const {
1247  return R.DFSNodeData[SU->NodeNum].SubtreeID
1248  != SchedDFSResult::InvalidSubtreeID;
1249  }
1250 
1251  /// Initializes this node's instruction count. We don't need to flag the node
1252  /// visited until visitPostorder because the DAG cannot have cycles.
1253  void visitPreorder(const SUnit *SU) {
1254  R.DFSNodeData[SU->NodeNum].InstrCount =
1255  SU->getInstr()->isTransient() ? 0 : 1;
1256  }
1257 
1258  /// Called once for each node after all predecessors are visited. Revisit this
1259  /// node's predecessors and potentially join them now that we know the ILP of
1260  /// the other predecessors.
1261  void visitPostorderNode(const SUnit *SU) {
1262  // Mark this node as the root of a subtree. It may be joined with its
1263  // successors later.
1264  R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1265  RootData RData(SU->NodeNum);
1266  RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1267 
1268  // If any predecessors are still in their own subtree, they either cannot be
1269  // joined or are large enough to remain separate. If this parent node's
1270  // total instruction count is not greater than a child subtree by at least
1271  // the subtree limit, then try to join it now since splitting subtrees is
1272  // only useful if multiple high-pressure paths are possible.
1273  unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1274  for (const SDep &PredDep : SU->Preds) {
1275  if (PredDep.getKind() != SDep::Data)
1276  continue;
1277  unsigned PredNum = PredDep.getSUnit()->NodeNum;
1278  if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1279  joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1280 
1281  // Either link or merge the TreeData entry from the child to the parent.
1282  if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1283  // If the predecessor's parent is invalid, this is a tree edge and the
1284  // current node is the parent.
1285  if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1286  RootSet[PredNum].ParentNodeID = SU->NodeNum;
1287  }
1288  else if (RootSet.count(PredNum)) {
1289  // The predecessor is not a root, but is still in the root set. This
1290  // must be the new parent that it was just joined to. Note that
1291  // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1292  // set to the original parent.
1293  RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1294  RootSet.erase(PredNum);
1295  }
1296  }
1297  RootSet[SU->NodeNum] = RData;
1298  }
1299 
1300  /// Called once for each tree edge after calling visitPostOrderNode on
1301  /// the predecessor. Increment the parent node's instruction count and
1302  /// preemptively join this subtree to its parent's if it is small enough.
1303  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1304  R.DFSNodeData[Succ->NodeNum].InstrCount
1305  += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1306  joinPredSubtree(PredDep, Succ);
1307  }
1308 
1309  /// Adds a connection for cross edges.
1310  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1311  ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1312  }
1313 
1314  /// Sets each node's subtree ID to the representative ID and record
1315  /// connections between trees.
1316  void finalize() {
1317  SubtreeClasses.compress();
1318  R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1319  assert(SubtreeClasses.getNumClasses() == RootSet.size()
1320  && "number of roots should match trees");
1321  for (const RootData &Root : RootSet) {
1322  unsigned TreeID = SubtreeClasses[Root.NodeID];
1323  if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1324  R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1325  R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1326  // Note that SubInstrCount may be greater than InstrCount if we joined
1327  // subtrees across a cross edge. InstrCount will be attributed to the
1328  // original parent, while SubInstrCount will be attributed to the joined
1329  // parent.
1330  }
1331  R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1332  R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1333  LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1334  for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1335  R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1336  LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1337  << R.DFSNodeData[Idx].SubtreeID << '\n');
1338  }
1339  for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
1340  unsigned PredTree = SubtreeClasses[P.first->NodeNum];
1341  unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
1342  if (PredTree == SuccTree)
1343  continue;
1344  unsigned Depth = P.first->getDepth();
1345  addConnection(PredTree, SuccTree, Depth);
1346  addConnection(SuccTree, PredTree, Depth);
1347  }
1348  }
1349 
1350 protected:
1351  /// Joins the predecessor subtree with the successor that is its DFS parent.
1352  /// Applies some heuristics before joining.
1353  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1354  bool CheckLimit = true) {
1355  assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1356 
1357  // Check if the predecessor is already joined.
1358  const SUnit *PredSU = PredDep.getSUnit();
1359  unsigned PredNum = PredSU->NodeNum;
1360  if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1361  return false;
1362 
1363  // Four is the magic number of successors before a node is considered a
1364  // pinch point.
1365  unsigned NumDataSucs = 0;
1366  for (const SDep &SuccDep : PredSU->Succs) {
1367  if (SuccDep.getKind() == SDep::Data) {
1368  if (++NumDataSucs >= 4)
1369  return false;
1370  }
1371  }
1372  if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1373  return false;
1374  R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1375  SubtreeClasses.join(Succ->NodeNum, PredNum);
1376  return true;
1377  }
1378 
1379  /// Called by finalize() to record a connection between trees.
1380  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1381  if (!Depth)
1382  return;
1383 
1384  do {
1386  R.SubtreeConnections[FromTree];
1387  for (SchedDFSResult::Connection &C : Connections) {
1388  if (C.TreeID == ToTree) {
1389  C.Level = std::max(C.Level, Depth);
1390  return;
1391  }
1392  }
1393  Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1394  FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1395  } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1396  }
1397 };
1398 
1399 } // end namespace llvm
1400 
1401 namespace {
1402 
1403 /// Manage the stack used by a reverse depth-first search over the DAG.
1404 class SchedDAGReverseDFS {
1405  std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1406 
1407 public:
1408  bool isComplete() const { return DFSStack.empty(); }
1409 
1410  void follow(const SUnit *SU) {
1411  DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1412  }
1413  void advance() { ++DFSStack.back().second; }
1414 
1415  const SDep *backtrack() {
1416  DFSStack.pop_back();
1417  return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1418  }
1419 
1420  const SUnit *getCurr() const { return DFSStack.back().first; }
1421 
1422  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1423 
1424  SUnit::const_pred_iterator getPredEnd() const {
1425  return getCurr()->Preds.end();
1426  }
1427 };
1428 
1429 } // end anonymous namespace
1430 
1431 static bool hasDataSucc(const SUnit *SU) {
1432  for (const SDep &SuccDep : SU->Succs) {
1433  if (SuccDep.getKind() == SDep::Data &&
1434  !SuccDep.getSUnit()->isBoundaryNode())
1435  return true;
1436  }
1437  return false;
1438 }
1439 
1440 /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1441 /// search from this root.
1443  if (!IsBottomUp)
1444  llvm_unreachable("Top-down ILP metric is unimplemented");
1445 
1446  SchedDFSImpl Impl(*this);
1447  for (const SUnit &SU : SUnits) {
1448  if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1449  continue;
1450 
1451  SchedDAGReverseDFS DFS;
1452  Impl.visitPreorder(&SU);
1453  DFS.follow(&SU);
1454  while (true) {
1455  // Traverse the leftmost path as far as possible.
1456  while (DFS.getPred() != DFS.getPredEnd()) {
1457  const SDep &PredDep = *DFS.getPred();
1458  DFS.advance();
1459  // Ignore non-data edges.
1460  if (PredDep.getKind() != SDep::Data
1461  || PredDep.getSUnit()->isBoundaryNode()) {
1462  continue;
1463  }
1464  // An already visited edge is a cross edge, assuming an acyclic DAG.
1465  if (Impl.isVisited(PredDep.getSUnit())) {
1466  Impl.visitCrossEdge(PredDep, DFS.getCurr());
1467  continue;
1468  }
1469  Impl.visitPreorder(PredDep.getSUnit());
1470  DFS.follow(PredDep.getSUnit());
1471  }
1472  // Visit the top of the stack in postorder and backtrack.
1473  const SUnit *Child = DFS.getCurr();
1474  const SDep *PredDep = DFS.backtrack();
1475  Impl.visitPostorderNode(Child);
1476  if (PredDep)
1477  Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1478  if (DFS.isComplete())
1479  break;
1480  }
1481  }
1482  Impl.finalize();
1483 }
1484 
1485 /// The root of the given SubtreeID was just scheduled. For all subtrees
1486 /// connected to this tree, record the depth of the connection so that the
1487 /// nearest connected subtrees can be prioritized.
1488 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1489  for (const Connection &C : SubtreeConnections[SubtreeID]) {
1490  SubtreeConnectLevels[C.TreeID] =
1491  std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1492  LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1493  << SubtreeConnectLevels[C.TreeID] << '\n');
1494  }
1495 }
1496 
1497 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1499  OS << InstrCount << " / " << Length << " = ";
1500  if (!Length)
1501  OS << "BADILP";
1502  else
1503  OS << format("%g", ((double)InstrCount / Length));
1504 }
1505 
1507  dbgs() << *this << '\n';
1508 }
1509 
1510 namespace llvm {
1511 
1514  Val.print(OS);
1515  return OS;
1516 }
1517 
1518 } // end namespace llvm
1519 
1520 #endif
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
iterator end()
Returns an iterator past this container.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
virtual void finishBlock()
Cleans up after scheduling in the given block.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
bool isDebugLabel() const
A common definition of LaneBitmask for use in TableGen and CodeGen.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
void clear()
Definition: MapVector.h:88
bool deadDefHasNoUse(const MachineOperand &MO)
Returns true if the def register in MO has no uses.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:656
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void dump() const override
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:484
void init(unsigned N)
Initialize an array of N PressureDiffs.
Record a physical register access.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
LivePhysRegs LiveRegs
Set of live physical registers for updating kill flags.
bool TrackLaneMasks
Whether lane masks should get tracked.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void push_back(const T &Elt)
Definition: SmallVector.h:211
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:526
void setIsUndef(bool Val=true)
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:34
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
unsigned Reg
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
void compress()
compress - Compress equivalence classes by numbering them 0 .
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction&#39;s memory access aliases the memory access of Other.
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI DAG construction"))
static unsigned InstrCount
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:476
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU&#39;s instruction that defines a physical register.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
void clearList(ValueType V)
Clears the list of SUs mapped to V.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
iterator_range< succ_iterator > successors()
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
A description of a memory reference used in the backend.
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:267
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1285
An individual mapping from virtual register number to SUnit.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:413
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise. ...
Definition: SparseSet.h:235
static bool hasDataSucc(const SUnit *SU)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
unsigned SubReg
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:83
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:672
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:98
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
Key
PAL metadata keys.
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:863
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void print(raw_ostream &OS) const
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:190
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
List of registers defined and used by a machine instruction.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:258
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
iterator find(const KeyT &Key)
Definition: MapVector.h:147
reverse_iterator rend()
void visitPreorder(const SUnit *SU)
Initializes this node&#39;s instruction count.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
reverse_iterator rbegin()
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:285
void dumpNodeAll(const SUnit &SU) const
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Scheduling dependency.
Definition: ScheduleDAG.h:49
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:843
#define P(N)
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
Array of PressureDiffs.
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:64
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static unsigned getReductionSize()
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
Internal state used to compute SchedDFSResult.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void reComputeSize()
Counts the number of SUs in this map after a reduction.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:71
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
MCRegAliasIterator enumerates all registers aliasing Reg.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
const MachineFrameInfo & MFI
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
void clear()
Clears the set.
void dumpNodeName(const SUnit &SU) const
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
MCSubRegIterator enumerates all sub-registers of Reg.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4356
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:519
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
void setIsKill(bool Val=true)
iterator find(const KeyT &Key)
Find an element by its key.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:197
The two locations may or may not alias. This is the least precise result.
Definition: AliasAnalysis.h:86
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
Iterator for intrusive lists based on ilist_node.
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
bool isDereferenceableInvariantLoad(AAResults *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
bool isDebugValue() const
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
Reg2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
void eraseAll(const KeyT &K)
Erase all elements with the given key.
CHAIN = SC CHAIN, Imm128 - System call.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx. ...
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:70
typename std::vector< std::pair< ValueType, SUList >> ::iterator iterator
Definition: MapVector.h:49
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:384
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:759
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Special value supplied for machine level alias analysis.
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
void dumpNode(const SUnit &SU) const override
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
**iterator erase(iterator I)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:563
Representation of each machine instruction.
Definition: MachineInstr.h:63
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:564
void finalize()
Sets each node&#39;s subtree ID to the representative ID and record connections between trees...
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
constexpr bool any() const
Definition: LaneBitmask.h:52
bool empty() const
Returns true if the set is empty.
Mapping from virtual register to SUnit including an operand index.
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AAResults and Target fail to deny the depende...
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2047
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
iterator begin()
Definition: MapVector.h:69
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:830
iterator end()
Definition: MapVector.h:71
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool hasImplicitUseOfPhysReg(unsigned Reg) const
Return true if this instruction implicitly uses the specified physical register.
Definition: MCInstrDesc.h:584
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
LLVM Value Representation.
Definition: Value.h:74
void removeRegsInMask(const MachineOperand &MO, SmallVectorImpl< std::pair< MCPhysReg, const MachineOperand *>> *Clobbers=nullptr)
Removes physical registers clobbered by the regmask operand MO.
void clear()
Clears map from all contents.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool isPosition() const
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
MachineBasicBlock * BB
The block in which to insert instructions.
IRTranslator LLVM IR MI
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:663
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set...
Definition: LivePhysRegs.h:89
Register getReg() const
getReg - Returns the register number.
static void dumpSUList(ScheduleDAGInstrs::SUList &L)
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
SchedDFSImpl(SchedDFSResult &r)
bool hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:44
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:415
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
static bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI)
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
bool hasTailCall() const
Returns true if the function contains a tail call.
hexagon widen stores
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:156
void resize(size_type N)
Definition: SmallVector.h:344