LLVM  10.0.0svn
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This implements the ScheduleDAGInstrs class, which implements
10 /// re-scheduling of MachineInstrs.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/IntEqClasses.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/SparseSet.h"
40 #include "llvm/Config/llvm-config.h"
41 #include "llvm/IR/Constants.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/Operator.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/MC/LaneBitmask.h"
49 #include "llvm/MC/MCRegisterInfo.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Compiler.h"
53 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/Format.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "machine-scheduler"
67 
68 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
69  cl::ZeroOrMore, cl::init(false),
70  cl::desc("Enable use of AA during MI DAG construction"));
71 
72 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
73  cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
74 
75 // Note: the two options below might be used in tuning compile time vs
76 // output quality. Setting HugeRegion so large that it will never be
77 // reached means best-effort, but may be slow.
78 
79 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
80 // together hold this many SUs, a reduction of maps will be done.
81 static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
82  cl::init(1000), cl::desc("The limit to use while constructing the DAG "
83  "prior to scheduling, at which point a trade-off "
84  "is made to avoid excessive compile time."));
85 
87  "dag-maps-reduction-size", cl::Hidden,
88  cl::desc("A huge scheduling region will have maps reduced by this many "
89  "nodes at a time. Defaults to HugeRegion / 2."));
90 
91 static unsigned getReductionSize() {
92  // Always reduce a huge region with half of the elements, except
93  // when user sets this number explicitly.
94  if (ReductionSize.getNumOccurrences() == 0)
95  return HugeRegion / 2;
96  return ReductionSize;
97 }
98 
100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
101  dbgs() << "{ ";
102  for (const SUnit *su : L) {
103  dbgs() << "SU(" << su->NodeNum << ")";
104  if (su != L.back())
105  dbgs() << ", ";
106  }
107  dbgs() << "}\n";
108 #endif
109 }
110 
112  const MachineLoopInfo *mli,
113  bool RemoveKillFlags)
114  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
115  RemoveKillFlags(RemoveKillFlags),
116  UnknownValue(UndefValue::get(
117  Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
118  DbgValues.clear();
119 
120  const TargetSubtargetInfo &ST = mf.getSubtarget();
121  SchedModel.init(&ST);
122 }
123 
124 /// If this machine instr has memory reference information and it can be
125 /// tracked to a normal reference to a known object, return the Value
126 /// for that object. This function returns false the memory location is
127 /// unknown or may alias anything.
129  const MachineFrameInfo &MFI,
130  UnderlyingObjectsVector &Objects,
131  const DataLayout &DL) {
132  auto allMMOsOkay = [&]() {
133  for (const MachineMemOperand *MMO : MI->memoperands()) {
134  // TODO: Figure out whether isAtomic is really necessary (see D57601).
135  if (MMO->isVolatile() || MMO->isAtomic())
136  return false;
137 
138  if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
139  // Function that contain tail calls don't have unique PseudoSourceValue
140  // objects. Two PseudoSourceValues might refer to the same or
141  // overlapping locations. The client code calling this function assumes
142  // this is not the case. So return a conservative answer of no known
143  // object.
144  if (MFI.hasTailCall())
145  return false;
146 
147  // For now, ignore PseudoSourceValues which may alias LLVM IR values
148  // because the code that uses this function has no way to cope with
149  // such aliases.
150  if (PSV->isAliased(&MFI))
151  return false;
152 
153  bool MayAlias = PSV->mayAlias(&MFI);
154  Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
155  } else if (const Value *V = MMO->getValue()) {
157  if (!getUnderlyingObjectsForCodeGen(V, Objs, DL))
158  return false;
159 
160  for (Value *V : Objs) {
163  }
164  } else
165  return false;
166  }
167  return true;
168  };
169 
170  if (!allMMOsOkay()) {
171  Objects.clear();
172  return false;
173  }
174 
175  return true;
176 }
177 
179  BB = bb;
180 }
181 
183  // Subclasses should no longer refer to the old block.
184  BB = nullptr;
185 }
186 
190  unsigned regioninstrs) {
191  assert(bb == BB && "startBlock should set BB");
192  RegionBegin = begin;
193  RegionEnd = end;
194  NumRegionInstrs = regioninstrs;
195 }
196 
198  // Nothing to do.
199 }
200 
202  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
203  ExitSU.setInstr(ExitMI);
204  // Add dependencies on the defs and uses of the instruction.
205  if (ExitMI) {
206  for (const MachineOperand &MO : ExitMI->operands()) {
207  if (!MO.isReg() || MO.isDef()) continue;
208  Register Reg = MO.getReg();
209  if (Register::isPhysicalRegister(Reg)) {
210  Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
211  } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
212  addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
213  }
214  }
215  }
216  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
217  // For others, e.g. fallthrough, conditional branch, assume the exit
218  // uses all the registers that are livein to the successor blocks.
219  for (const MachineBasicBlock *Succ : BB->successors()) {
220  for (const auto &LI : Succ->liveins()) {
221  if (!Uses.contains(LI.PhysReg))
222  Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
223  }
224  }
225  }
226 }
227 
228 /// MO is an operand of SU's instruction that defines a physical register. Adds
229 /// data dependencies from SU to any uses of the physical register.
230 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
231  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
232  assert(MO.isDef() && "expect physreg def");
233 
234  // Ask the target if address-backscheduling is desirable, and if so how much.
236 
237  // Only use any non-zero latency for real defs/uses, in contrast to
238  // "fake" operands added by regalloc.
239  const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
240  bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
241  !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
242  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
243  Alias.isValid(); ++Alias) {
244  if (!Uses.contains(*Alias))
245  continue;
246  for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
247  SUnit *UseSU = I->SU;
248  if (UseSU == SU)
249  continue;
250 
251  // Adjust the dependence latency using operand def/use information,
252  // then allow the target to perform its own adjustments.
253  int UseOp = I->OpIdx;
254  MachineInstr *RegUse = nullptr;
255  SDep Dep;
256  if (UseOp < 0)
257  Dep = SDep(SU, SDep::Artificial);
258  else {
259  // Set the hasPhysRegDefs only for physreg defs that have a use within
260  // the scheduling region.
261  SU->hasPhysRegDefs = true;
262  Dep = SDep(SU, SDep::Data, *Alias);
263  RegUse = UseSU->getInstr();
264  }
265  const MCInstrDesc *UseMIDesc =
266  (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
267  bool ImplicitPseudoUse =
268  (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
269  !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
270  if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
272  RegUse, UseOp));
273  ST.adjustSchedDependency(SU, UseSU, Dep);
274  } else
275  Dep.setLatency(0);
276 
277  UseSU->addPred(Dep);
278  }
279  }
280 }
281 
282 /// Adds register dependencies (data, anti, and output) from this SUnit
283 /// to following instructions in the same scheduling region that depend the
284 /// physical register referenced at OperIdx.
285 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
286  MachineInstr *MI = SU->getInstr();
287  MachineOperand &MO = MI->getOperand(OperIdx);
288  Register Reg = MO.getReg();
289  // We do not need to track any dependencies for constant registers.
290  if (MRI.isConstantPhysReg(Reg))
291  return;
292 
293  // Optionally add output and anti dependencies. For anti
294  // dependencies we use a latency of 0 because for a multi-issue
295  // target we want to allow the defining instruction to issue
296  // in the same cycle as the using instruction.
297  // TODO: Using a latency of 1 here for output dependencies assumes
298  // there's no cost for reusing registers.
300  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
301  if (!Defs.contains(*Alias))
302  continue;
303  for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
304  SUnit *DefSU = I->SU;
305  if (DefSU == &ExitSU)
306  continue;
307  if (DefSU != SU &&
308  (Kind != SDep::Output || !MO.isDead() ||
309  !DefSU->getInstr()->registerDefIsDead(*Alias))) {
310  if (Kind == SDep::Anti)
311  DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
312  else {
313  SDep Dep(SU, Kind, /*Reg=*/*Alias);
314  Dep.setLatency(
315  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
316  DefSU->addPred(Dep);
317  }
318  }
319  }
320  }
321 
322  if (!MO.isDef()) {
323  SU->hasPhysRegUses = true;
324  // Either insert a new Reg2SUnits entry with an empty SUnits list, or
325  // retrieve the existing SUnits list for this register's uses.
326  // Push this SUnit on the use list.
327  Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
328  if (RemoveKillFlags)
329  MO.setIsKill(false);
330  } else {
331  addPhysRegDataDeps(SU, OperIdx);
332 
333  // Clear previous uses and defs of this register and its subergisters.
334  for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) {
335  if (Uses.contains(*SubReg))
336  Uses.eraseAll(*SubReg);
337  if (!MO.isDead())
338  Defs.eraseAll(*SubReg);
339  }
340  if (MO.isDead() && SU->isCall) {
341  // Calls will not be reordered because of chain dependencies (see
342  // below). Since call operands are dead, calls may continue to be added
343  // to the DefList making dependence checking quadratic in the size of
344  // the block. Instead, we leave only one call at the back of the
345  // DefList.
347  Reg2SUnitsMap::iterator B = P.first;
348  Reg2SUnitsMap::iterator I = P.second;
349  for (bool isBegin = I == B; !isBegin; /* empty */) {
350  isBegin = (--I) == B;
351  if (!I->SU->isCall)
352  break;
353  I = Defs.erase(I);
354  }
355  }
356 
357  // Defs are pushed in the order they are visited and never reordered.
358  Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
359  }
360 }
361 
363 {
364  Register Reg = MO.getReg();
365  // No point in tracking lanemasks if we don't have interesting subregisters.
366  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
367  if (!RC.HasDisjunctSubRegs)
368  return LaneBitmask::getAll();
369 
370  unsigned SubReg = MO.getSubReg();
371  if (SubReg == 0)
372  return RC.getLaneMask();
373  return TRI->getSubRegIndexLaneMask(SubReg);
374 }
375 
376 /// Adds register output and data dependencies from this SUnit to instructions
377 /// that occur later in the same scheduling region if they read from or write to
378 /// the virtual register defined at OperIdx.
379 ///
380 /// TODO: Hoist loop induction variable increments. This has to be
381 /// reevaluated. Generally, IV scheduling should be done before coalescing.
382 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
383  MachineInstr *MI = SU->getInstr();
384  MachineOperand &MO = MI->getOperand(OperIdx);
385  Register Reg = MO.getReg();
386 
387  LaneBitmask DefLaneMask;
388  LaneBitmask KillLaneMask;
389  if (TrackLaneMasks) {
390  bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
391  DefLaneMask = getLaneMaskForMO(MO);
392  // If we have a <read-undef> flag, none of the lane values comes from an
393  // earlier instruction.
394  KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
395 
396  // Clear undef flag, we'll re-add it later once we know which subregister
397  // Def is first.
398  MO.setIsUndef(false);
399  } else {
400  DefLaneMask = LaneBitmask::getAll();
401  KillLaneMask = LaneBitmask::getAll();
402  }
403 
404  if (MO.isDead()) {
406  "Dead defs should have no uses");
407  } else {
408  // Add data dependence to all uses we found so far.
411  E = CurrentVRegUses.end(); I != E; /*empty*/) {
412  LaneBitmask LaneMask = I->LaneMask;
413  // Ignore uses of other lanes.
414  if ((LaneMask & KillLaneMask).none()) {
415  ++I;
416  continue;
417  }
418 
419  if ((LaneMask & DefLaneMask).any()) {
420  SUnit *UseSU = I->SU;
421  MachineInstr *Use = UseSU->getInstr();
422  SDep Dep(SU, SDep::Data, Reg);
423  Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
424  I->OperandIndex));
425  ST.adjustSchedDependency(SU, UseSU, Dep);
426  UseSU->addPred(Dep);
427  }
428 
429  LaneMask &= ~KillLaneMask;
430  // If we found a Def for all lanes of this use, remove it from the list.
431  if (LaneMask.any()) {
432  I->LaneMask = LaneMask;
433  ++I;
434  } else
436  }
437  }
438 
439  // Shortcut: Singly defined vregs do not have output/anti dependencies.
440  if (MRI.hasOneDef(Reg))
441  return;
442 
443  // Add output dependence to the next nearest defs of this vreg.
444  //
445  // Unless this definition is dead, the output dependence should be
446  // transitively redundant with antidependencies from this definition's
447  // uses. We're conservative for now until we have a way to guarantee the uses
448  // are not eliminated sometime during scheduling. The output dependence edge
449  // is also useful if output latency exceeds def-use latency.
450  LaneBitmask LaneMask = DefLaneMask;
451  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
452  CurrentVRegDefs.end())) {
453  // Ignore defs for other lanes.
454  if ((V2SU.LaneMask & LaneMask).none())
455  continue;
456  // Add an output dependence.
457  SUnit *DefSU = V2SU.SU;
458  // Ignore additional defs of the same lanes in one instruction. This can
459  // happen because lanemasks are shared for targets with too many
460  // subregisters. We also use some representration tricks/hacks where we
461  // add super-register defs/uses, to imply that although we only access parts
462  // of the reg we care about the full one.
463  if (DefSU == SU)
464  continue;
465  SDep Dep(SU, SDep::Output, Reg);
466  Dep.setLatency(
467  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
468  DefSU->addPred(Dep);
469 
470  // Update current definition. This can get tricky if the def was about a
471  // bigger lanemask before. We then have to shrink it and create a new
472  // VReg2SUnit for the non-overlapping part.
473  LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
474  LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
475  V2SU.SU = SU;
476  V2SU.LaneMask = OverlapMask;
477  if (NonOverlapMask.any())
478  CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
479  }
480  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
481  if (LaneMask.any())
482  CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
483 }
484 
485 /// Adds a register data dependency if the instruction that defines the
486 /// virtual register used at OperIdx is mapped to an SUnit. Add a register
487 /// antidependency from this SUnit to instructions that occur later in the same
488 /// scheduling region if they write the virtual register.
489 ///
490 /// TODO: Handle ExitSU "uses" properly.
491 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
492  const MachineInstr *MI = SU->getInstr();
493  const MachineOperand &MO = MI->getOperand(OperIdx);
494  Register Reg = MO.getReg();
495 
496  // Remember the use. Data dependencies will be added when we find the def.
499  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
500 
501  // Add antidependences to the following defs of the vreg.
502  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
503  CurrentVRegDefs.end())) {
504  // Ignore defs for unrelated lanes.
505  LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
506  if ((PrevDefLaneMask & LaneMask).none())
507  continue;
508  if (V2SU.SU == SU)
509  continue;
510 
511  V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
512  }
513 }
514 
515 /// Returns true if MI is an instruction we are unable to reason about
516 /// (like a call or something with unmodeled side effects).
518  return MI->isCall() || MI->hasUnmodeledSideEffects() ||
520 }
521 
523  unsigned Latency) {
524  if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
525  SDep Dep(SUa, SDep::MayAliasMem);
526  Dep.setLatency(Latency);
527  SUb->addPred(Dep);
528  }
529 }
530 
531 /// Creates an SUnit for each real instruction, numbered in top-down
532 /// topological order. The instruction order A < B, implies that no edge exists
533 /// from B to A.
534 ///
535 /// Map each real instruction to its SUnit.
536 ///
537 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
538 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
539 /// instead of pointers.
540 ///
541 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
542 /// the original instruction list.
544  // We'll be allocating one SUnit for each real instruction in the region,
545  // which is contained within a basic block.
546  SUnits.reserve(NumRegionInstrs);
547 
549  if (MI.isDebugInstr())
550  continue;
551 
552  SUnit *SU = newSUnit(&MI);
553  MISUnitMap[&MI] = SU;
554 
555  SU->isCall = MI.isCall();
556  SU->isCommutable = MI.isCommutable();
557 
558  // Assign the Latency field of SU using target-provided information.
559  SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
560 
561  // If this SUnit uses a reserved or unbuffered resource, mark it as such.
562  //
563  // Reserved resources block an instruction from issuing and stall the
564  // entire pipeline. These are identified by BufferSize=0.
565  //
566  // Unbuffered resources prevent execution of subsequent instructions that
567  // require the same resources. This is used for in-order execution pipelines
568  // within an out-of-order core. These are identified by BufferSize=1.
570  const MCSchedClassDesc *SC = getSchedClass(SU);
571  for (const MCWriteProcResEntry &PRE :
574  switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
575  case 0:
576  SU->hasReservedResource = true;
577  break;
578  case 1:
579  SU->isUnbuffered = true;
580  break;
581  default:
582  break;
583  }
584  }
585  }
586  }
587 }
588 
589 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
590  /// Current total number of SUs in map.
591  unsigned NumNodes = 0;
592 
593  /// 1 for loads, 0 for stores. (see comment in SUList)
594  unsigned TrueMemOrderLatency;
595 
596 public:
597  Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
598 
599  /// To keep NumNodes up to date, insert() is used instead of
600  /// this operator w/ push_back().
602  llvm_unreachable("Don't use. Use insert() instead."); };
603 
604  /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
605  /// reduce().
606  void inline insert(SUnit *SU, ValueType V) {
607  MapVector::operator[](V).push_back(SU);
608  NumNodes++;
609  }
610 
611  /// Clears the list of SUs mapped to V.
612  void inline clearList(ValueType V) {
613  iterator Itr = find(V);
614  if (Itr != end()) {
615  assert(NumNodes >= Itr->second.size());
616  NumNodes -= Itr->second.size();
617 
618  Itr->second.clear();
619  }
620  }
621 
622  /// Clears map from all contents.
623  void clear() {
625  NumNodes = 0;
626  }
627 
628  unsigned inline size() const { return NumNodes; }
629 
630  /// Counts the number of SUs in this map after a reduction.
631  void reComputeSize() {
632  NumNodes = 0;
633  for (auto &I : *this)
634  NumNodes += I.second.size();
635  }
636 
637  unsigned inline getTrueMemOrderLatency() const {
638  return TrueMemOrderLatency;
639  }
640 
641  void dump();
642 };
643 
645  Value2SUsMap &Val2SUsMap) {
646  for (auto &I : Val2SUsMap)
647  addChainDependencies(SU, I.second,
648  Val2SUsMap.getTrueMemOrderLatency());
649 }
650 
652  Value2SUsMap &Val2SUsMap,
653  ValueType V) {
654  Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
655  if (Itr != Val2SUsMap.end())
656  addChainDependencies(SU, Itr->second,
657  Val2SUsMap.getTrueMemOrderLatency());
658 }
659 
661  assert(BarrierChain != nullptr);
662 
663  for (auto &I : map) {
664  SUList &sus = I.second;
665  for (auto *SU : sus)
666  SU->addPredBarrier(BarrierChain);
667  }
668  map.clear();
669 }
670 
672  assert(BarrierChain != nullptr);
673 
674  // Go through all lists of SUs.
675  for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
676  Value2SUsMap::iterator CurrItr = I++;
677  SUList &sus = CurrItr->second;
678  SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
679  for (; SUItr != SUEE; ++SUItr) {
680  // Stop on BarrierChain or any instruction above it.
681  if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
682  break;
683 
684  (*SUItr)->addPredBarrier(BarrierChain);
685  }
686 
687  // Remove also the BarrierChain from list if present.
688  if (SUItr != SUEE && *SUItr == BarrierChain)
689  SUItr++;
690 
691  // Remove all SUs that are now successors of BarrierChain.
692  if (SUItr != sus.begin())
693  sus.erase(sus.begin(), SUItr);
694  }
695 
696  // Remove all entries with empty su lists.
697  map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
698  return (mapEntry.second.empty()); });
699 
700  // Recompute the size of the map (NumNodes).
701  map.reComputeSize();
702 }
703 
705  RegPressureTracker *RPTracker,
706  PressureDiffs *PDiffs,
707  LiveIntervals *LIS,
708  bool TrackLaneMasks) {
710  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
711  : ST.useAA();
712  AAForDep = UseAA ? AA : nullptr;
713 
714  BarrierChain = nullptr;
715 
716  this->TrackLaneMasks = TrackLaneMasks;
717  MISUnitMap.clear();
719 
720  // Create an SUnit for each real instruction.
721  initSUnits();
722 
723  if (PDiffs)
724  PDiffs->init(SUnits.size());
725 
726  // We build scheduling units by walking a block's instruction list
727  // from bottom to top.
728 
729  // Each MIs' memory operand(s) is analyzed to a list of underlying
730  // objects. The SU is then inserted in the SUList(s) mapped from the
731  // Value(s). Each Value thus gets mapped to lists of SUs depending
732  // on it, stores and loads kept separately. Two SUs are trivially
733  // non-aliasing if they both depend on only identified Values and do
734  // not share any common Value.
735  Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
736 
737  // Certain memory accesses are known to not alias any SU in Stores
738  // or Loads, and have therefore their own 'NonAlias'
739  // domain. E.g. spill / reload instructions never alias LLVM I/R
740  // Values. It would be nice to assume that this type of memory
741  // accesses always have a proper memory operand modelling, and are
742  // therefore never unanalyzable, but this is conservatively not
743  // done.
744  Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
745 
746  // Track all instructions that may raise floating-point exceptions.
747  // These do not depend on one other (or normal loads or stores), but
748  // must not be rescheduled across global barriers. Note that we don't
749  // really need a "map" here since we don't track those MIs by value;
750  // using the same Value2SUsMap data type here is simply a matter of
751  // convenience.
752  Value2SUsMap FPExceptions;
753 
754  // Remove any stale debug info; sometimes BuildSchedGraph is called again
755  // without emitting the info from the previous call.
756  DbgValues.clear();
757  FirstDbgValue = nullptr;
758 
759  assert(Defs.empty() && Uses.empty() &&
760  "Only BuildGraph should update Defs/Uses");
763 
764  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
765  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
766  unsigned NumVirtRegs = MRI.getNumVirtRegs();
767  CurrentVRegDefs.setUniverse(NumVirtRegs);
768  CurrentVRegUses.setUniverse(NumVirtRegs);
769 
770  // Model data dependencies between instructions being scheduled and the
771  // ExitSU.
773 
774  // Walk the list of instructions, from bottom moving up.
775  MachineInstr *DbgMI = nullptr;
777  MII != MIE; --MII) {
778  MachineInstr &MI = *std::prev(MII);
779  if (DbgMI) {
780  DbgValues.push_back(std::make_pair(DbgMI, &MI));
781  DbgMI = nullptr;
782  }
783 
784  if (MI.isDebugValue()) {
785  DbgMI = &MI;
786  continue;
787  }
788  if (MI.isDebugLabel())
789  continue;
790 
791  SUnit *SU = MISUnitMap[&MI];
792  assert(SU && "No SUnit mapped to this MI");
793 
794  if (RPTracker) {
795  RegisterOperands RegOpers;
796  RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
797  if (TrackLaneMasks) {
798  SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
799  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
800  }
801  if (PDiffs != nullptr)
802  PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
803 
804  if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
805  RPTracker->recedeSkipDebugValues();
806  assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
807  RPTracker->recede(RegOpers);
808  }
809 
810  assert(
811  (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
812  "Cannot schedule terminators or labels!");
813 
814  // Add register-based dependencies (data, anti, and output).
815  // For some instructions (calls, returns, inline-asm, etc.) there can
816  // be explicit uses and implicit defs, in which case the use will appear
817  // on the operand list before the def. Do two passes over the operand
818  // list to make sure that defs are processed before any uses.
819  bool HasVRegDef = false;
820  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
821  const MachineOperand &MO = MI.getOperand(j);
822  if (!MO.isReg() || !MO.isDef())
823  continue;
824  Register Reg = MO.getReg();
825  if (Register::isPhysicalRegister(Reg)) {
826  addPhysRegDeps(SU, j);
827  } else if (Register::isVirtualRegister(Reg)) {
828  HasVRegDef = true;
829  addVRegDefDeps(SU, j);
830  }
831  }
832  // Now process all uses.
833  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
834  const MachineOperand &MO = MI.getOperand(j);
835  // Only look at use operands.
836  // We do not need to check for MO.readsReg() here because subsequent
837  // subregister defs will get output dependence edges and need no
838  // additional use dependencies.
839  if (!MO.isReg() || !MO.isUse())
840  continue;
841  Register Reg = MO.getReg();
842  if (Register::isPhysicalRegister(Reg)) {
843  addPhysRegDeps(SU, j);
844  } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
845  addVRegUseDeps(SU, j);
846  }
847  }
848 
849  // If we haven't seen any uses in this scheduling region, create a
850  // dependence edge to ExitSU to model the live-out latency. This is required
851  // for vreg defs with no in-region use, and prefetches with no vreg def.
852  //
853  // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
854  // check currently relies on being called before adding chain deps.
855  if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
856  SDep Dep(SU, SDep::Artificial);
857  Dep.setLatency(SU->Latency - 1);
858  ExitSU.addPred(Dep);
859  }
860 
861  // Add memory dependencies (Note: isStoreToStackSlot and
862  // isLoadFromStackSLot are not usable after stack slots are lowered to
863  // actual addresses).
864 
865  // This is a barrier event that acts as a pivotal node in the DAG.
866  if (isGlobalMemoryObject(AA, &MI)) {
867 
868  // Become the barrier chain.
869  if (BarrierChain)
871  BarrierChain = SU;
872 
873  LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
874  << BarrierChain->NodeNum << ").\n";);
875 
876  // Add dependencies against everything below it and clear maps.
877  addBarrierChain(Stores);
878  addBarrierChain(Loads);
879  addBarrierChain(NonAliasStores);
880  addBarrierChain(NonAliasLoads);
881  addBarrierChain(FPExceptions);
882 
883  continue;
884  }
885 
886  // Instructions that may raise FP exceptions may not be moved
887  // across any global barriers.
888  if (MI.mayRaiseFPException()) {
889  if (BarrierChain)
891 
892  FPExceptions.insert(SU, UnknownValue);
893 
894  if (FPExceptions.size() >= HugeRegion) {
895  LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
897  reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
898  }
899  }
900 
901  // If it's not a store or a variant load, we're done.
902  if (!MI.mayStore() &&
903  !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
904  continue;
905 
906  // Always add dependecy edge to BarrierChain if present.
907  if (BarrierChain)
909 
910  // Find the underlying objects for MI. The Objs vector is either
911  // empty, or filled with the Values of memory locations which this
912  // SU depends on.
914  bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
915  MF.getDataLayout());
916 
917  if (MI.mayStore()) {
918  if (!ObjsFound) {
919  // An unknown store depends on all stores and loads.
920  addChainDependencies(SU, Stores);
921  addChainDependencies(SU, NonAliasStores);
922  addChainDependencies(SU, Loads);
923  addChainDependencies(SU, NonAliasLoads);
924 
925  // Map this store to 'UnknownValue'.
926  Stores.insert(SU, UnknownValue);
927  } else {
928  // Add precise dependencies against all previously seen memory
929  // accesses mapped to the same Value(s).
930  for (const UnderlyingObject &UnderlObj : Objs) {
931  ValueType V = UnderlObj.getValue();
932  bool ThisMayAlias = UnderlObj.mayAlias();
933 
934  // Add dependencies to previous stores and loads mapped to V.
935  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
936  addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
937  }
938  // Update the store map after all chains have been added to avoid adding
939  // self-loop edge if multiple underlying objects are present.
940  for (const UnderlyingObject &UnderlObj : Objs) {
941  ValueType V = UnderlObj.getValue();
942  bool ThisMayAlias = UnderlObj.mayAlias();
943 
944  // Map this store to V.
945  (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
946  }
947  // The store may have dependencies to unanalyzable loads and
948  // stores.
950  addChainDependencies(SU, Stores, UnknownValue);
951  }
952  } else { // SU is a load.
953  if (!ObjsFound) {
954  // An unknown load depends on all stores.
955  addChainDependencies(SU, Stores);
956  addChainDependencies(SU, NonAliasStores);
957 
958  Loads.insert(SU, UnknownValue);
959  } else {
960  for (const UnderlyingObject &UnderlObj : Objs) {
961  ValueType V = UnderlObj.getValue();
962  bool ThisMayAlias = UnderlObj.mayAlias();
963 
964  // Add precise dependencies against all previously seen stores
965  // mapping to the same Value(s).
966  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
967 
968  // Map this load to V.
969  (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
970  }
971  // The load may have dependencies to unanalyzable stores.
972  addChainDependencies(SU, Stores, UnknownValue);
973  }
974  }
975 
976  // Reduce maps if they grow huge.
977  if (Stores.size() + Loads.size() >= HugeRegion) {
978  LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
979  reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
980  }
981  if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
982  LLVM_DEBUG(
983  dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
984  reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
985  }
986  }
987 
988  if (DbgMI)
989  FirstDbgValue = DbgMI;
990 
991  Defs.clear();
992  Uses.clear();
995 
996  Topo.MarkDirty();
997 }
998 
1000  PSV->printCustom(OS);
1001  return OS;
1002 }
1003 
1005  for (auto &Itr : *this) {
1006  if (Itr.first.is<const Value*>()) {
1007  const Value *V = Itr.first.get<const Value*>();
1008  if (isa<UndefValue>(V))
1009  dbgs() << "Unknown";
1010  else
1011  V->printAsOperand(dbgs());
1012  }
1013  else if (Itr.first.is<const PseudoSourceValue*>())
1014  dbgs() << Itr.first.get<const PseudoSourceValue*>();
1015  else
1016  llvm_unreachable("Unknown Value type.");
1017 
1018  dbgs() << " : ";
1019  dumpSUList(Itr.second);
1020  }
1021 }
1022 
1024  Value2SUsMap &loads, unsigned N) {
1025  LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
1026  dbgs() << "Loading SUnits:\n"; loads.dump());
1027 
1028  // Insert all SU's NodeNums into a vector and sort it.
1029  std::vector<unsigned> NodeNums;
1030  NodeNums.reserve(stores.size() + loads.size());
1031  for (auto &I : stores)
1032  for (auto *SU : I.second)
1033  NodeNums.push_back(SU->NodeNum);
1034  for (auto &I : loads)
1035  for (auto *SU : I.second)
1036  NodeNums.push_back(SU->NodeNum);
1037  llvm::sort(NodeNums);
1038 
1039  // The N last elements in NodeNums will be removed, and the SU with
1040  // the lowest NodeNum of them will become the new BarrierChain to
1041  // let the not yet seen SUs have a dependency to the removed SUs.
1042  assert(N <= NodeNums.size());
1043  SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1044  if (BarrierChain) {
1045  // The aliasing and non-aliasing maps reduce independently of each
1046  // other, but share a common BarrierChain. Check if the
1047  // newBarrierChain is above the former one. If it is not, it may
1048  // introduce a loop to use newBarrierChain, so keep the old one.
1049  if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1050  BarrierChain->addPredBarrier(newBarrierChain);
1051  BarrierChain = newBarrierChain;
1052  LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1053  << BarrierChain->NodeNum << ").\n";);
1054  }
1055  else
1056  LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1057  << BarrierChain->NodeNum << ").\n";);
1058  }
1059  else
1060  BarrierChain = newBarrierChain;
1061 
1062  insertBarrierChain(stores);
1063  insertBarrierChain(loads);
1064 
1065  LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1066  dbgs() << "Loading SUnits:\n"; loads.dump());
1067 }
1068 
1070  MachineInstr &MI, bool addToLiveRegs) {
1071  for (MachineOperand &MO : MI.operands()) {
1072  if (!MO.isReg() || !MO.readsReg())
1073  continue;
1074  Register Reg = MO.getReg();
1075  if (!Reg)
1076  continue;
1077 
1078  // Things that are available after the instruction are killed by it.
1079  bool IsKill = LiveRegs.available(MRI, Reg);
1080  MO.setIsKill(IsKill);
1081  if (addToLiveRegs)
1082  LiveRegs.addReg(Reg);
1083  }
1084 }
1085 
1087  LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1088 
1089  LiveRegs.init(*TRI);
1090  LiveRegs.addLiveOuts(MBB);
1091 
1092  // Examine block from end to start...
1093  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1094  if (MI.isDebugInstr())
1095  continue;
1096 
1097  // Update liveness. Registers that are defed but not used in this
1098  // instruction are now dead. Mark register and all subregs as they
1099  // are completely defined.
1100  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1101  const MachineOperand &MO = *O;
1102  if (MO.isReg()) {
1103  if (!MO.isDef())
1104  continue;
1105  Register Reg = MO.getReg();
1106  if (!Reg)
1107  continue;
1108  LiveRegs.removeReg(Reg);
1109  } else if (MO.isRegMask()) {
1111  }
1112  }
1113 
1114  // If there is a bundle header fix it up first.
1115  if (!MI.isBundled()) {
1116  toggleKills(MRI, LiveRegs, MI, true);
1117  } else {
1118  MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
1119  if (MI.isBundle())
1120  toggleKills(MRI, LiveRegs, MI, false);
1121 
1122  // Some targets make the (questionable) assumtion that the instructions
1123  // inside the bundle are ordered and consequently only the last use of
1124  // a register inside the bundle can kill it.
1125  MachineBasicBlock::instr_iterator I = std::next(Bundle);
1126  while (I->isBundledWithSucc())
1127  ++I;
1128  do {
1129  if (!I->isDebugInstr())
1130  toggleKills(MRI, LiveRegs, *I, true);
1131  --I;
1132  } while (I != Bundle);
1133  }
1134  }
1135 }
1136 
1137 void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1138 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1139  dumpNodeName(SU);
1140  dbgs() << ": ";
1141  SU.getInstr()->dump();
1142 #endif
1143 }
1144 
1146 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1147  if (EntrySU.getInstr() != nullptr)
1149  for (const SUnit &SU : SUnits)
1150  dumpNodeAll(SU);
1151  if (ExitSU.getInstr() != nullptr)
1153 #endif
1154 }
1155 
1156 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1157  std::string s;
1158  raw_string_ostream oss(s);
1159  if (SU == &EntrySU)
1160  oss << "<entry>";
1161  else if (SU == &ExitSU)
1162  oss << "<exit>";
1163  else
1164  SU->getInstr()->print(oss, /*SkipOpers=*/true);
1165  return oss.str();
1166 }
1167 
1168 /// Return the basic block label. It is not necessarilly unique because a block
1169 /// contains multiple scheduling regions. But it is fine for visualization.
1170 std::string ScheduleDAGInstrs::getDAGName() const {
1171  return "dag." + BB->getFullName();
1172 }
1173 
1175  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
1176 }
1177 
1178 bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
1179  if (SuccSU != &ExitSU) {
1180  // Do not use WillCreateCycle, it assumes SD scheduling.
1181  // If Pred is reachable from Succ, then the edge creates a cycle.
1182  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
1183  return false;
1184  Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
1185  }
1186  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
1187  // Return true regardless of whether a new edge needed to be inserted.
1188  return true;
1189 }
1190 
1191 //===----------------------------------------------------------------------===//
1192 // SchedDFSResult Implementation
1193 //===----------------------------------------------------------------------===//
1194 
1195 namespace llvm {
1196 
1197 /// Internal state used to compute SchedDFSResult.
1199  SchedDFSResult &R;
1200 
1201  /// Join DAG nodes into equivalence classes by their subtree.
1202  IntEqClasses SubtreeClasses;
1203  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1204  std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1205 
1206  struct RootData {
1207  unsigned NodeID;
1208  unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1209  unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1210  /// children.
1211 
1212  RootData(unsigned id): NodeID(id),
1213  ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1214 
1215  unsigned getSparseSetIndex() const { return NodeID; }
1216  };
1217 
1218  SparseSet<RootData> RootSet;
1219 
1220 public:
1221  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1222  RootSet.setUniverse(R.DFSNodeData.size());
1223  }
1224 
1225  /// Returns true if this node been visited by the DFS traversal.
1226  ///
1227  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1228  /// ID. Later, SubtreeID is updated but remains valid.
1229  bool isVisited(const SUnit *SU) const {
1230  return R.DFSNodeData[SU->NodeNum].SubtreeID
1231  != SchedDFSResult::InvalidSubtreeID;
1232  }
1233 
1234  /// Initializes this node's instruction count. We don't need to flag the node
1235  /// visited until visitPostorder because the DAG cannot have cycles.
1236  void visitPreorder(const SUnit *SU) {
1237  R.DFSNodeData[SU->NodeNum].InstrCount =
1238  SU->getInstr()->isTransient() ? 0 : 1;
1239  }
1240 
1241  /// Called once for each node after all predecessors are visited. Revisit this
1242  /// node's predecessors and potentially join them now that we know the ILP of
1243  /// the other predecessors.
1244  void visitPostorderNode(const SUnit *SU) {
1245  // Mark this node as the root of a subtree. It may be joined with its
1246  // successors later.
1247  R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1248  RootData RData(SU->NodeNum);
1249  RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1250 
1251  // If any predecessors are still in their own subtree, they either cannot be
1252  // joined or are large enough to remain separate. If this parent node's
1253  // total instruction count is not greater than a child subtree by at least
1254  // the subtree limit, then try to join it now since splitting subtrees is
1255  // only useful if multiple high-pressure paths are possible.
1256  unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1257  for (const SDep &PredDep : SU->Preds) {
1258  if (PredDep.getKind() != SDep::Data)
1259  continue;
1260  unsigned PredNum = PredDep.getSUnit()->NodeNum;
1261  if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1262  joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1263 
1264  // Either link or merge the TreeData entry from the child to the parent.
1265  if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1266  // If the predecessor's parent is invalid, this is a tree edge and the
1267  // current node is the parent.
1268  if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1269  RootSet[PredNum].ParentNodeID = SU->NodeNum;
1270  }
1271  else if (RootSet.count(PredNum)) {
1272  // The predecessor is not a root, but is still in the root set. This
1273  // must be the new parent that it was just joined to. Note that
1274  // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1275  // set to the original parent.
1276  RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1277  RootSet.erase(PredNum);
1278  }
1279  }
1280  RootSet[SU->NodeNum] = RData;
1281  }
1282 
1283  /// Called once for each tree edge after calling visitPostOrderNode on
1284  /// the predecessor. Increment the parent node's instruction count and
1285  /// preemptively join this subtree to its parent's if it is small enough.
1286  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1287  R.DFSNodeData[Succ->NodeNum].InstrCount
1288  += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1289  joinPredSubtree(PredDep, Succ);
1290  }
1291 
1292  /// Adds a connection for cross edges.
1293  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1294  ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1295  }
1296 
1297  /// Sets each node's subtree ID to the representative ID and record
1298  /// connections between trees.
1299  void finalize() {
1300  SubtreeClasses.compress();
1301  R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1302  assert(SubtreeClasses.getNumClasses() == RootSet.size()
1303  && "number of roots should match trees");
1304  for (const RootData &Root : RootSet) {
1305  unsigned TreeID = SubtreeClasses[Root.NodeID];
1306  if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1307  R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1308  R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1309  // Note that SubInstrCount may be greater than InstrCount if we joined
1310  // subtrees across a cross edge. InstrCount will be attributed to the
1311  // original parent, while SubInstrCount will be attributed to the joined
1312  // parent.
1313  }
1314  R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1315  R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1316  LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1317  for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1318  R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1319  LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1320  << R.DFSNodeData[Idx].SubtreeID << '\n');
1321  }
1322  for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
1323  unsigned PredTree = SubtreeClasses[P.first->NodeNum];
1324  unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
1325  if (PredTree == SuccTree)
1326  continue;
1327  unsigned Depth = P.first->getDepth();
1328  addConnection(PredTree, SuccTree, Depth);
1329  addConnection(SuccTree, PredTree, Depth);
1330  }
1331  }
1332 
1333 protected:
1334  /// Joins the predecessor subtree with the successor that is its DFS parent.
1335  /// Applies some heuristics before joining.
1336  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1337  bool CheckLimit = true) {
1338  assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1339 
1340  // Check if the predecessor is already joined.
1341  const SUnit *PredSU = PredDep.getSUnit();
1342  unsigned PredNum = PredSU->NodeNum;
1343  if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1344  return false;
1345 
1346  // Four is the magic number of successors before a node is considered a
1347  // pinch point.
1348  unsigned NumDataSucs = 0;
1349  for (const SDep &SuccDep : PredSU->Succs) {
1350  if (SuccDep.getKind() == SDep::Data) {
1351  if (++NumDataSucs >= 4)
1352  return false;
1353  }
1354  }
1355  if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1356  return false;
1357  R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1358  SubtreeClasses.join(Succ->NodeNum, PredNum);
1359  return true;
1360  }
1361 
1362  /// Called by finalize() to record a connection between trees.
1363  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1364  if (!Depth)
1365  return;
1366 
1367  do {
1369  R.SubtreeConnections[FromTree];
1370  for (SchedDFSResult::Connection &C : Connections) {
1371  if (C.TreeID == ToTree) {
1372  C.Level = std::max(C.Level, Depth);
1373  return;
1374  }
1375  }
1376  Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1377  FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1378  } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1379  }
1380 };
1381 
1382 } // end namespace llvm
1383 
1384 namespace {
1385 
1386 /// Manage the stack used by a reverse depth-first search over the DAG.
1387 class SchedDAGReverseDFS {
1388  std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1389 
1390 public:
1391  bool isComplete() const { return DFSStack.empty(); }
1392 
1393  void follow(const SUnit *SU) {
1394  DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1395  }
1396  void advance() { ++DFSStack.back().second; }
1397 
1398  const SDep *backtrack() {
1399  DFSStack.pop_back();
1400  return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1401  }
1402 
1403  const SUnit *getCurr() const { return DFSStack.back().first; }
1404 
1405  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1406 
1407  SUnit::const_pred_iterator getPredEnd() const {
1408  return getCurr()->Preds.end();
1409  }
1410 };
1411 
1412 } // end anonymous namespace
1413 
1414 static bool hasDataSucc(const SUnit *SU) {
1415  for (const SDep &SuccDep : SU->Succs) {
1416  if (SuccDep.getKind() == SDep::Data &&
1417  !SuccDep.getSUnit()->isBoundaryNode())
1418  return true;
1419  }
1420  return false;
1421 }
1422 
1423 /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1424 /// search from this root.
1426  if (!IsBottomUp)
1427  llvm_unreachable("Top-down ILP metric is unimplemented");
1428 
1429  SchedDFSImpl Impl(*this);
1430  for (const SUnit &SU : SUnits) {
1431  if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1432  continue;
1433 
1434  SchedDAGReverseDFS DFS;
1435  Impl.visitPreorder(&SU);
1436  DFS.follow(&SU);
1437  while (true) {
1438  // Traverse the leftmost path as far as possible.
1439  while (DFS.getPred() != DFS.getPredEnd()) {
1440  const SDep &PredDep = *DFS.getPred();
1441  DFS.advance();
1442  // Ignore non-data edges.
1443  if (PredDep.getKind() != SDep::Data
1444  || PredDep.getSUnit()->isBoundaryNode()) {
1445  continue;
1446  }
1447  // An already visited edge is a cross edge, assuming an acyclic DAG.
1448  if (Impl.isVisited(PredDep.getSUnit())) {
1449  Impl.visitCrossEdge(PredDep, DFS.getCurr());
1450  continue;
1451  }
1452  Impl.visitPreorder(PredDep.getSUnit());
1453  DFS.follow(PredDep.getSUnit());
1454  }
1455  // Visit the top of the stack in postorder and backtrack.
1456  const SUnit *Child = DFS.getCurr();
1457  const SDep *PredDep = DFS.backtrack();
1458  Impl.visitPostorderNode(Child);
1459  if (PredDep)
1460  Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1461  if (DFS.isComplete())
1462  break;
1463  }
1464  }
1465  Impl.finalize();
1466 }
1467 
1468 /// The root of the given SubtreeID was just scheduled. For all subtrees
1469 /// connected to this tree, record the depth of the connection so that the
1470 /// nearest connected subtrees can be prioritized.
1471 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1472  for (const Connection &C : SubtreeConnections[SubtreeID]) {
1473  SubtreeConnectLevels[C.TreeID] =
1474  std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1475  LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1476  << SubtreeConnectLevels[C.TreeID] << '\n');
1477  }
1478 }
1479 
1480 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1482  OS << InstrCount << " / " << Length << " = ";
1483  if (!Length)
1484  OS << "BADILP";
1485  else
1486  OS << format("%g", ((double)InstrCount / Length));
1487 }
1488 
1490  dbgs() << *this << '\n';
1491 }
1492 
1493 namespace llvm {
1494 
1497  Val.print(OS);
1498  return OS;
1499 }
1500 
1501 } // end namespace llvm
1502 
1503 #endif
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
iterator end()
Returns an iterator past this container.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
uint64_t CallInst * C
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
virtual void finishBlock()
Cleans up after scheduling in the given block.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
bool isDebugLabel() const
A common definition of LaneBitmask for use in TableGen and CodeGen.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
void clear()
Definition: MapVector.h:88
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:651
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void dump() const override
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:476
void init(unsigned N)
Initialize an array of N PressureDiffs.
Record a physical register access.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
LivePhysRegs LiveRegs
Set of live physical registers for updating kill flags.
bool TrackLaneMasks
Whether lane masks should get tracked.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void push_back(const T &Elt)
Definition: SmallVector.h:211
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:178
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:527
void setIsUndef(bool Val=true)
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:34
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
unsigned Reg
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
void compress()
compress - Compress equivalence classes by numbering them 0 .
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI DAG construction"))
static unsigned InstrCount
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU&#39;s instruction that defines a physical register.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
void clearList(ValueType V)
Clears the list of SUs mapped to V.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
iterator_range< succ_iterator > successors()
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
A description of a memory reference used in the backend.
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:267
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:225
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
&#39;undef&#39; values are things that do not have specified contents.
Definition: Constants.h:1285
An individual mapping from virtual register number to SUnit.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise. ...
Definition: SparseSet.h:235
static bool hasDataSucc(const SUnit *SU)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
unsigned SubReg
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:83
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:667
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:98
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
Key
PAL metadata keys.
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:408
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:858
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void print(raw_ostream &OS) const
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:190
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
List of registers defined and used by a machine instruction.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:258
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
iterator find(const KeyT &Key)
Definition: MapVector.h:147
reverse_iterator rend()
void visitPreorder(const SUnit *SU)
Initializes this node&#39;s instruction count.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
reverse_iterator rbegin()
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:285
void dumpNodeAll(const SUnit &SU) const
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Scheduling dependency.
Definition: ScheduleDAG.h:49
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:838
#define P(N)
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
Array of PressureDiffs.
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:64
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static unsigned getReductionSize()
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
Internal state used to compute SchedDFSResult.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void reComputeSize()
Counts the number of SUs in this map after a reduction.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:71
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL)
This is a wrapper around GetUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:66
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
MCRegAliasIterator enumerates all registers aliasing Reg.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
const MachineFrameInfo & MFI
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
void clear()
Clears the set.
void dumpNodeName(const SUnit &SU) const
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
MCSubRegIterator enumerates all sub-registers of Reg.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4355
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:519
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
void setIsKill(bool Val=true)
iterator find(const KeyT &Key)
Find an element by its key.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:197
The two locations may or may not alias. This is the least precise result.
Definition: AliasAnalysis.h:86
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
Iterator for intrusive lists based on ilist_node.
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
bool isDebugValue() const
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
Reg2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
void eraseAll(const KeyT &K)
Erase all elements with the given key.
CHAIN = SC CHAIN, Imm128 - System call.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx. ...
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:70
typename std::vector< std::pair< ValueType, SUList >> ::iterator iterator
Definition: MapVector.h:49
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:384
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:759
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Special value supplied for machine level alias analysis.
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
void dumpNode(const SUnit &SU) const override
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
**iterator erase(iterator I)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:563
Representation of each machine instruction.
Definition: MachineInstr.h:64
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:564
void finalize()
Sets each node&#39;s subtree ID to the representative ID and record connections between trees...
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
constexpr bool any() const
Definition: LaneBitmask.h:52
bool empty() const
Returns true if the set is empty.
Mapping from virtual register to SUnit including an operand index.
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AliasAnalysis and Target fail to deny the dep...
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:2045
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
iterator begin()
Definition: MapVector.h:69
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:825
iterator end()
Definition: MapVector.h:71
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool hasImplicitUseOfPhysReg(unsigned Reg) const
Return true if this instruction implicitly uses the specified physical register.
Definition: MCInstrDesc.h:579
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
LLVM Value Representation.
Definition: Value.h:73
void removeRegsInMask(const MachineOperand &MO, SmallVectorImpl< std::pair< MCPhysReg, const MachineOperand *>> *Clobbers=nullptr)
Removes physical registers clobbered by the regmask operand MO.
void clear()
Clears map from all contents.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool isPosition() const
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
static bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI)
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
MachineBasicBlock * BB
The block in which to insert instructions.
IRTranslator LLVM IR MI
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:658
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set...
Definition: LivePhysRegs.h:89
Register getReg() const
getReg - Returns the register number.
static void dumpSUList(ScheduleDAGInstrs::SUList &L)
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
SchedDFSImpl(SchedDFSResult &r)
bool hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:44
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
bool hasTailCall() const
Returns true if the function contains a tail call.
bool mayAlias(AliasAnalysis *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction&#39;s memory access aliases the memory access of Other.
hexagon widen stores
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:156
void resize(size_type N)
Definition: SmallVector.h:344