LLVM  14.0.0git
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This implements the ScheduleDAGInstrs class, which implements
10 /// re-scheduling of MachineInstrs.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/IntEqClasses.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/SparseSet.h"
40 #include "llvm/Config/llvm-config.h"
41 #include "llvm/IR/Constants.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/Operator.h"
46 #include "llvm/IR/Type.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/MC/LaneBitmask.h"
49 #include "llvm/MC/MCRegisterInfo.h"
50 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Compiler.h"
53 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/Format.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <string>
61 #include <utility>
62 #include <vector>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "machine-scheduler"
67 
68 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
69  cl::ZeroOrMore, cl::init(false),
70  cl::desc("Enable use of AA during MI DAG construction"));
71 
72 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
73  cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
74 
75 // Note: the two options below might be used in tuning compile time vs
76 // output quality. Setting HugeRegion so large that it will never be
77 // reached means best-effort, but may be slow.
78 
79 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
80 // together hold this many SUs, a reduction of maps will be done.
81 static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
82  cl::init(1000), cl::desc("The limit to use while constructing the DAG "
83  "prior to scheduling, at which point a trade-off "
84  "is made to avoid excessive compile time."));
85 
87  "dag-maps-reduction-size", cl::Hidden,
88  cl::desc("A huge scheduling region will have maps reduced by this many "
89  "nodes at a time. Defaults to HugeRegion / 2."));
90 
91 static unsigned getReductionSize() {
92  // Always reduce a huge region with half of the elements, except
93  // when user sets this number explicitly.
94  if (ReductionSize.getNumOccurrences() == 0)
95  return HugeRegion / 2;
96  return ReductionSize;
97 }
98 
100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
101  dbgs() << "{ ";
102  for (const SUnit *su : L) {
103  dbgs() << "SU(" << su->NodeNum << ")";
104  if (su != L.back())
105  dbgs() << ", ";
106  }
107  dbgs() << "}\n";
108 #endif
109 }
110 
112  const MachineLoopInfo *mli,
113  bool RemoveKillFlags)
114  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
115  RemoveKillFlags(RemoveKillFlags),
116  UnknownValue(UndefValue::get(
117  Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
118  DbgValues.clear();
119 
120  const TargetSubtargetInfo &ST = mf.getSubtarget();
121  SchedModel.init(&ST);
122 }
123 
124 /// If this machine instr has memory reference information and it can be
125 /// tracked to a normal reference to a known object, return the Value
126 /// for that object. This function returns false the memory location is
127 /// unknown or may alias anything.
129  const MachineFrameInfo &MFI,
130  UnderlyingObjectsVector &Objects,
131  const DataLayout &DL) {
132  auto allMMOsOkay = [&]() {
133  for (const MachineMemOperand *MMO : MI->memoperands()) {
134  // TODO: Figure out whether isAtomic is really necessary (see D57601).
135  if (MMO->isVolatile() || MMO->isAtomic())
136  return false;
137 
138  if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
139  // Function that contain tail calls don't have unique PseudoSourceValue
140  // objects. Two PseudoSourceValues might refer to the same or
141  // overlapping locations. The client code calling this function assumes
142  // this is not the case. So return a conservative answer of no known
143  // object.
144  if (MFI.hasTailCall())
145  return false;
146 
147  // For now, ignore PseudoSourceValues which may alias LLVM IR values
148  // because the code that uses this function has no way to cope with
149  // such aliases.
150  if (PSV->isAliased(&MFI))
151  return false;
152 
153  bool MayAlias = PSV->mayAlias(&MFI);
154  Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
155  } else if (const Value *V = MMO->getValue()) {
157  if (!getUnderlyingObjectsForCodeGen(V, Objs))
158  return false;
159 
160  for (Value *V : Objs) {
162  Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
163  }
164  } else
165  return false;
166  }
167  return true;
168  };
169 
170  if (!allMMOsOkay()) {
171  Objects.clear();
172  return false;
173  }
174 
175  return true;
176 }
177 
179  BB = bb;
180 }
181 
183  // Subclasses should no longer refer to the old block.
184  BB = nullptr;
185 }
186 
190  unsigned regioninstrs) {
191  assert(bb == BB && "startBlock should set BB");
192  RegionBegin = begin;
193  RegionEnd = end;
194  NumRegionInstrs = regioninstrs;
195 }
196 
198  // Nothing to do.
199 }
200 
202  MachineInstr *ExitMI =
203  RegionEnd != BB->end()
205  : nullptr;
206  ExitSU.setInstr(ExitMI);
207  // Add dependencies on the defs and uses of the instruction.
208  if (ExitMI) {
209  for (const MachineOperand &MO : ExitMI->operands()) {
210  if (!MO.isReg() || MO.isDef()) continue;
211  Register Reg = MO.getReg();
214  } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
215  addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
216  }
217  }
218  }
219  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
220  // For others, e.g. fallthrough, conditional branch, assume the exit
221  // uses all the registers that are livein to the successor blocks.
222  for (const MachineBasicBlock *Succ : BB->successors()) {
223  for (const auto &LI : Succ->liveins()) {
224  if (!Uses.contains(LI.PhysReg))
225  Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
226  }
227  }
228  }
229 }
230 
231 /// MO is an operand of SU's instruction that defines a physical register. Adds
232 /// data dependencies from SU to any uses of the physical register.
233 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
234  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
235  assert(MO.isDef() && "expect physreg def");
236 
237  // Ask the target if address-backscheduling is desirable, and if so how much.
239 
240  // Only use any non-zero latency for real defs/uses, in contrast to
241  // "fake" operands added by regalloc.
242  const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
243  bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
244  !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
245  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
246  Alias.isValid(); ++Alias) {
247  for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
248  SUnit *UseSU = I->SU;
249  if (UseSU == SU)
250  continue;
251 
252  // Adjust the dependence latency using operand def/use information,
253  // then allow the target to perform its own adjustments.
254  int UseOp = I->OpIdx;
255  MachineInstr *RegUse = nullptr;
256  SDep Dep;
257  if (UseOp < 0)
258  Dep = SDep(SU, SDep::Artificial);
259  else {
260  // Set the hasPhysRegDefs only for physreg defs that have a use within
261  // the scheduling region.
262  SU->hasPhysRegDefs = true;
263  Dep = SDep(SU, SDep::Data, *Alias);
264  RegUse = UseSU->getInstr();
265  }
266  const MCInstrDesc *UseMIDesc =
267  (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
268  bool ImplicitPseudoUse =
269  (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
270  !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
271  if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
273  RegUse, UseOp));
274  ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
275  } else {
276  Dep.setLatency(0);
277  // FIXME: We could always let target to adjustSchedDependency(), and
278  // remove this condition, but that currently asserts in Hexagon BE.
279  if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
280  ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
281  }
282 
283  UseSU->addPred(Dep);
284  }
285  }
286 }
287 
288 /// Adds register dependencies (data, anti, and output) from this SUnit
289 /// to following instructions in the same scheduling region that depend the
290 /// physical register referenced at OperIdx.
291 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
292  MachineInstr *MI = SU->getInstr();
293  MachineOperand &MO = MI->getOperand(OperIdx);
294  Register Reg = MO.getReg();
295  // We do not need to track any dependencies for constant registers.
297  return;
298 
300 
301  // Optionally add output and anti dependencies. For anti
302  // dependencies we use a latency of 0 because for a multi-issue
303  // target we want to allow the defining instruction to issue
304  // in the same cycle as the using instruction.
305  // TODO: Using a latency of 1 here for output dependencies assumes
306  // there's no cost for reusing registers.
308  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
309  if (!Defs.contains(*Alias))
310  continue;
311  for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
312  SUnit *DefSU = I->SU;
313  if (DefSU == &ExitSU)
314  continue;
315  if (DefSU != SU &&
316  (Kind != SDep::Output || !MO.isDead() ||
317  !DefSU->getInstr()->registerDefIsDead(*Alias))) {
318  SDep Dep(SU, Kind, /*Reg=*/*Alias);
319  if (Kind != SDep::Anti)
320  Dep.setLatency(
321  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
322  ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep);
323  DefSU->addPred(Dep);
324  }
325  }
326  }
327 
328  if (!MO.isDef()) {
329  SU->hasPhysRegUses = true;
330  // Either insert a new Reg2SUnits entry with an empty SUnits list, or
331  // retrieve the existing SUnits list for this register's uses.
332  // Push this SUnit on the use list.
333  Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
334  if (RemoveKillFlags)
335  MO.setIsKill(false);
336  } else {
337  addPhysRegDataDeps(SU, OperIdx);
338 
339  // Clear previous uses and defs of this register and its subergisters.
340  for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) {
341  if (Uses.contains(*SubReg))
342  Uses.eraseAll(*SubReg);
343  if (!MO.isDead())
344  Defs.eraseAll(*SubReg);
345  }
346  if (MO.isDead() && SU->isCall) {
347  // Calls will not be reordered because of chain dependencies (see
348  // below). Since call operands are dead, calls may continue to be added
349  // to the DefList making dependence checking quadratic in the size of
350  // the block. Instead, we leave only one call at the back of the
351  // DefList.
353  Reg2SUnitsMap::iterator B = P.first;
354  Reg2SUnitsMap::iterator I = P.second;
355  for (bool isBegin = I == B; !isBegin; /* empty */) {
356  isBegin = (--I) == B;
357  if (!I->SU->isCall)
358  break;
359  I = Defs.erase(I);
360  }
361  }
362 
363  // Defs are pushed in the order they are visited and never reordered.
364  Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
365  }
366 }
367 
369 {
370  Register Reg = MO.getReg();
371  // No point in tracking lanemasks if we don't have interesting subregisters.
372  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
373  if (!RC.HasDisjunctSubRegs)
374  return LaneBitmask::getAll();
375 
376  unsigned SubReg = MO.getSubReg();
377  if (SubReg == 0)
378  return RC.getLaneMask();
380 }
381 
383  auto RegUse = CurrentVRegUses.find(MO.getReg());
384  if (RegUse == CurrentVRegUses.end())
385  return true;
386  return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
387 }
388 
389 /// Adds register output and data dependencies from this SUnit to instructions
390 /// that occur later in the same scheduling region if they read from or write to
391 /// the virtual register defined at OperIdx.
392 ///
393 /// TODO: Hoist loop induction variable increments. This has to be
394 /// reevaluated. Generally, IV scheduling should be done before coalescing.
395 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
396  MachineInstr *MI = SU->getInstr();
397  MachineOperand &MO = MI->getOperand(OperIdx);
398  Register Reg = MO.getReg();
399 
400  LaneBitmask DefLaneMask;
401  LaneBitmask KillLaneMask;
402  if (TrackLaneMasks) {
403  bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
404  DefLaneMask = getLaneMaskForMO(MO);
405  // If we have a <read-undef> flag, none of the lane values comes from an
406  // earlier instruction.
407  KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
408 
409  if (MO.getSubReg() != 0 && MO.isUndef()) {
410  // There may be other subregister defs on the same instruction of the same
411  // register in later operands. The lanes of other defs will now be live
412  // after this instruction, so these should not be treated as killed by the
413  // instruction even though they appear to be killed in this one operand.
414  for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) {
415  const MachineOperand &OtherMO = MI->getOperand(I);
416  if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
417  KillLaneMask &= ~getLaneMaskForMO(OtherMO);
418  }
419  }
420 
421  // Clear undef flag, we'll re-add it later once we know which subregister
422  // Def is first.
423  MO.setIsUndef(false);
424  } else {
425  DefLaneMask = LaneBitmask::getAll();
426  KillLaneMask = LaneBitmask::getAll();
427  }
428 
429  if (MO.isDead()) {
430  assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
431  } else {
432  // Add data dependence to all uses we found so far.
435  E = CurrentVRegUses.end(); I != E; /*empty*/) {
436  LaneBitmask LaneMask = I->LaneMask;
437  // Ignore uses of other lanes.
438  if ((LaneMask & KillLaneMask).none()) {
439  ++I;
440  continue;
441  }
442 
443  if ((LaneMask & DefLaneMask).any()) {
444  SUnit *UseSU = I->SU;
445  MachineInstr *Use = UseSU->getInstr();
446  SDep Dep(SU, SDep::Data, Reg);
448  I->OperandIndex));
449  ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep);
450  UseSU->addPred(Dep);
451  }
452 
453  LaneMask &= ~KillLaneMask;
454  // If we found a Def for all lanes of this use, remove it from the list.
455  if (LaneMask.any()) {
456  I->LaneMask = LaneMask;
457  ++I;
458  } else
460  }
461  }
462 
463  // Shortcut: Singly defined vregs do not have output/anti dependencies.
464  if (MRI.hasOneDef(Reg))
465  return;
466 
467  // Add output dependence to the next nearest defs of this vreg.
468  //
469  // Unless this definition is dead, the output dependence should be
470  // transitively redundant with antidependencies from this definition's
471  // uses. We're conservative for now until we have a way to guarantee the uses
472  // are not eliminated sometime during scheduling. The output dependence edge
473  // is also useful if output latency exceeds def-use latency.
474  LaneBitmask LaneMask = DefLaneMask;
476  CurrentVRegDefs.end())) {
477  // Ignore defs for other lanes.
478  if ((V2SU.LaneMask & LaneMask).none())
479  continue;
480  // Add an output dependence.
481  SUnit *DefSU = V2SU.SU;
482  // Ignore additional defs of the same lanes in one instruction. This can
483  // happen because lanemasks are shared for targets with too many
484  // subregisters. We also use some representration tricks/hacks where we
485  // add super-register defs/uses, to imply that although we only access parts
486  // of the reg we care about the full one.
487  if (DefSU == SU)
488  continue;
489  SDep Dep(SU, SDep::Output, Reg);
490  Dep.setLatency(
491  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
492  DefSU->addPred(Dep);
493 
494  // Update current definition. This can get tricky if the def was about a
495  // bigger lanemask before. We then have to shrink it and create a new
496  // VReg2SUnit for the non-overlapping part.
497  LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
498  LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
499  V2SU.SU = SU;
500  V2SU.LaneMask = OverlapMask;
501  if (NonOverlapMask.any())
502  CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
503  }
504  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
505  if (LaneMask.any())
506  CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
507 }
508 
509 /// Adds a register data dependency if the instruction that defines the
510 /// virtual register used at OperIdx is mapped to an SUnit. Add a register
511 /// antidependency from this SUnit to instructions that occur later in the same
512 /// scheduling region if they write the virtual register.
513 ///
514 /// TODO: Handle ExitSU "uses" properly.
515 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
516  const MachineInstr *MI = SU->getInstr();
517  assert(!MI->isDebugOrPseudoInstr());
518 
519  const MachineOperand &MO = MI->getOperand(OperIdx);
520  Register Reg = MO.getReg();
521 
522  // Remember the use. Data dependencies will be added when we find the def.
525  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
526 
527  // Add antidependences to the following defs of the vreg.
529  CurrentVRegDefs.end())) {
530  // Ignore defs for unrelated lanes.
531  LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
532  if ((PrevDefLaneMask & LaneMask).none())
533  continue;
534  if (V2SU.SU == SU)
535  continue;
536 
537  V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
538  }
539 }
540 
541 /// Returns true if MI is an instruction we are unable to reason about
542 /// (like a call or something with unmodeled side effects).
543 static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) {
544  return MI->isCall() || MI->hasUnmodeledSideEffects() ||
545  (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
546 }
547 
549  unsigned Latency) {
550  if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
551  SDep Dep(SUa, SDep::MayAliasMem);
552  Dep.setLatency(Latency);
553  SUb->addPred(Dep);
554  }
555 }
556 
557 /// Creates an SUnit for each real instruction, numbered in top-down
558 /// topological order. The instruction order A < B, implies that no edge exists
559 /// from B to A.
560 ///
561 /// Map each real instruction to its SUnit.
562 ///
563 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
564 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
565 /// instead of pointers.
566 ///
567 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
568 /// the original instruction list.
570  // We'll be allocating one SUnit for each real instruction in the region,
571  // which is contained within a basic block.
572  SUnits.reserve(NumRegionInstrs);
573 
575  if (MI.isDebugOrPseudoInstr())
576  continue;
577 
578  SUnit *SU = newSUnit(&MI);
579  MISUnitMap[&MI] = SU;
580 
581  SU->isCall = MI.isCall();
582  SU->isCommutable = MI.isCommutable();
583 
584  // Assign the Latency field of SU using target-provided information.
585  SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
586 
587  // If this SUnit uses a reserved or unbuffered resource, mark it as such.
588  //
589  // Reserved resources block an instruction from issuing and stall the
590  // entire pipeline. These are identified by BufferSize=0.
591  //
592  // Unbuffered resources prevent execution of subsequent instructions that
593  // require the same resources. This is used for in-order execution pipelines
594  // within an out-of-order core. These are identified by BufferSize=1.
596  const MCSchedClassDesc *SC = getSchedClass(SU);
597  for (const MCWriteProcResEntry &PRE :
600  switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
601  case 0:
602  SU->hasReservedResource = true;
603  break;
604  case 1:
605  SU->isUnbuffered = true;
606  break;
607  default:
608  break;
609  }
610  }
611  }
612  }
613 }
614 
615 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
616  /// Current total number of SUs in map.
617  unsigned NumNodes = 0;
618 
619  /// 1 for loads, 0 for stores. (see comment in SUList)
620  unsigned TrueMemOrderLatency;
621 
622 public:
623  Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
624 
625  /// To keep NumNodes up to date, insert() is used instead of
626  /// this operator w/ push_back().
628  llvm_unreachable("Don't use. Use insert() instead."); };
629 
630  /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
631  /// reduce().
632  void inline insert(SUnit *SU, ValueType V) {
633  MapVector::operator[](V).push_back(SU);
634  NumNodes++;
635  }
636 
637  /// Clears the list of SUs mapped to V.
638  void inline clearList(ValueType V) {
639  iterator Itr = find(V);
640  if (Itr != end()) {
641  assert(NumNodes >= Itr->second.size());
642  NumNodes -= Itr->second.size();
643 
644  Itr->second.clear();
645  }
646  }
647 
648  /// Clears map from all contents.
649  void clear() {
651  NumNodes = 0;
652  }
653 
654  unsigned inline size() const { return NumNodes; }
655 
656  /// Counts the number of SUs in this map after a reduction.
657  void reComputeSize() {
658  NumNodes = 0;
659  for (auto &I : *this)
660  NumNodes += I.second.size();
661  }
662 
663  unsigned inline getTrueMemOrderLatency() const {
664  return TrueMemOrderLatency;
665  }
666 
667  void dump();
668 };
669 
671  Value2SUsMap &Val2SUsMap) {
672  for (auto &I : Val2SUsMap)
673  addChainDependencies(SU, I.second,
674  Val2SUsMap.getTrueMemOrderLatency());
675 }
676 
678  Value2SUsMap &Val2SUsMap,
679  ValueType V) {
680  Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
681  if (Itr != Val2SUsMap.end())
682  addChainDependencies(SU, Itr->second,
683  Val2SUsMap.getTrueMemOrderLatency());
684 }
685 
687  assert(BarrierChain != nullptr);
688 
689  for (auto &I : map) {
690  SUList &sus = I.second;
691  for (auto *SU : sus)
692  SU->addPredBarrier(BarrierChain);
693  }
694  map.clear();
695 }
696 
698  assert(BarrierChain != nullptr);
699 
700  // Go through all lists of SUs.
701  for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
702  Value2SUsMap::iterator CurrItr = I++;
703  SUList &sus = CurrItr->second;
704  SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
705  for (; SUItr != SUEE; ++SUItr) {
706  // Stop on BarrierChain or any instruction above it.
707  if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
708  break;
709 
710  (*SUItr)->addPredBarrier(BarrierChain);
711  }
712 
713  // Remove also the BarrierChain from list if present.
714  if (SUItr != SUEE && *SUItr == BarrierChain)
715  SUItr++;
716 
717  // Remove all SUs that are now successors of BarrierChain.
718  if (SUItr != sus.begin())
719  sus.erase(sus.begin(), SUItr);
720  }
721 
722  // Remove all entries with empty su lists.
723  map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
724  return (mapEntry.second.empty()); });
725 
726  // Recompute the size of the map (NumNodes).
727  map.reComputeSize();
728 }
729 
731  RegPressureTracker *RPTracker,
732  PressureDiffs *PDiffs,
733  LiveIntervals *LIS,
734  bool TrackLaneMasks) {
737  : ST.useAA();
738  AAForDep = UseAA ? AA : nullptr;
739 
740  BarrierChain = nullptr;
741 
742  this->TrackLaneMasks = TrackLaneMasks;
743  MISUnitMap.clear();
745 
746  // Create an SUnit for each real instruction.
747  initSUnits();
748 
749  if (PDiffs)
750  PDiffs->init(SUnits.size());
751 
752  // We build scheduling units by walking a block's instruction list
753  // from bottom to top.
754 
755  // Each MIs' memory operand(s) is analyzed to a list of underlying
756  // objects. The SU is then inserted in the SUList(s) mapped from the
757  // Value(s). Each Value thus gets mapped to lists of SUs depending
758  // on it, stores and loads kept separately. Two SUs are trivially
759  // non-aliasing if they both depend on only identified Values and do
760  // not share any common Value.
761  Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
762 
763  // Certain memory accesses are known to not alias any SU in Stores
764  // or Loads, and have therefore their own 'NonAlias'
765  // domain. E.g. spill / reload instructions never alias LLVM I/R
766  // Values. It would be nice to assume that this type of memory
767  // accesses always have a proper memory operand modelling, and are
768  // therefore never unanalyzable, but this is conservatively not
769  // done.
770  Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
771 
772  // Track all instructions that may raise floating-point exceptions.
773  // These do not depend on one other (or normal loads or stores), but
774  // must not be rescheduled across global barriers. Note that we don't
775  // really need a "map" here since we don't track those MIs by value;
776  // using the same Value2SUsMap data type here is simply a matter of
777  // convenience.
778  Value2SUsMap FPExceptions;
779 
780  // Remove any stale debug info; sometimes BuildSchedGraph is called again
781  // without emitting the info from the previous call.
782  DbgValues.clear();
783  FirstDbgValue = nullptr;
784 
785  assert(Defs.empty() && Uses.empty() &&
786  "Only BuildGraph should update Defs/Uses");
789 
790  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
791  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
792  unsigned NumVirtRegs = MRI.getNumVirtRegs();
793  CurrentVRegDefs.setUniverse(NumVirtRegs);
794  CurrentVRegUses.setUniverse(NumVirtRegs);
795 
796  // Model data dependencies between instructions being scheduled and the
797  // ExitSU.
799 
800  // Walk the list of instructions, from bottom moving up.
801  MachineInstr *DbgMI = nullptr;
803  MII != MIE; --MII) {
804  MachineInstr &MI = *std::prev(MII);
805  if (DbgMI) {
806  DbgValues.push_back(std::make_pair(DbgMI, &MI));
807  DbgMI = nullptr;
808  }
809 
810  if (MI.isDebugValue() || MI.isDebugPHI()) {
811  DbgMI = &MI;
812  continue;
813  }
814 
815  if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe())
816  continue;
817 
818  SUnit *SU = MISUnitMap[&MI];
819  assert(SU && "No SUnit mapped to this MI");
820 
821  if (RPTracker) {
822  RegisterOperands RegOpers;
823  RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
824  if (TrackLaneMasks) {
825  SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
826  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
827  }
828  if (PDiffs != nullptr)
829  PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
830 
831  if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
832  RPTracker->recedeSkipDebugValues();
833  assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
834  RPTracker->recede(RegOpers);
835  }
836 
837  assert(
838  (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
839  "Cannot schedule terminators or labels!");
840 
841  // Add register-based dependencies (data, anti, and output).
842  // For some instructions (calls, returns, inline-asm, etc.) there can
843  // be explicit uses and implicit defs, in which case the use will appear
844  // on the operand list before the def. Do two passes over the operand
845  // list to make sure that defs are processed before any uses.
846  bool HasVRegDef = false;
847  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
848  const MachineOperand &MO = MI.getOperand(j);
849  if (!MO.isReg() || !MO.isDef())
850  continue;
851  Register Reg = MO.getReg();
853  addPhysRegDeps(SU, j);
854  } else if (Register::isVirtualRegister(Reg)) {
855  HasVRegDef = true;
856  addVRegDefDeps(SU, j);
857  }
858  }
859  // Now process all uses.
860  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
861  const MachineOperand &MO = MI.getOperand(j);
862  // Only look at use operands.
863  // We do not need to check for MO.readsReg() here because subsequent
864  // subregister defs will get output dependence edges and need no
865  // additional use dependencies.
866  if (!MO.isReg() || !MO.isUse())
867  continue;
868  Register Reg = MO.getReg();
870  addPhysRegDeps(SU, j);
871  } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
872  addVRegUseDeps(SU, j);
873  }
874  }
875 
876  // If we haven't seen any uses in this scheduling region, create a
877  // dependence edge to ExitSU to model the live-out latency. This is required
878  // for vreg defs with no in-region use, and prefetches with no vreg def.
879  //
880  // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
881  // check currently relies on being called before adding chain deps.
882  if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
883  SDep Dep(SU, SDep::Artificial);
884  Dep.setLatency(SU->Latency - 1);
885  ExitSU.addPred(Dep);
886  }
887 
888  // Add memory dependencies (Note: isStoreToStackSlot and
889  // isLoadFromStackSLot are not usable after stack slots are lowered to
890  // actual addresses).
891 
892  // This is a barrier event that acts as a pivotal node in the DAG.
893  if (isGlobalMemoryObject(AA, &MI)) {
894 
895  // Become the barrier chain.
896  if (BarrierChain)
898  BarrierChain = SU;
899 
900  LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
901  << BarrierChain->NodeNum << ").\n";);
902 
903  // Add dependencies against everything below it and clear maps.
904  addBarrierChain(Stores);
905  addBarrierChain(Loads);
906  addBarrierChain(NonAliasStores);
907  addBarrierChain(NonAliasLoads);
908  addBarrierChain(FPExceptions);
909 
910  continue;
911  }
912 
913  // Instructions that may raise FP exceptions may not be moved
914  // across any global barriers.
915  if (MI.mayRaiseFPException()) {
916  if (BarrierChain)
918 
919  FPExceptions.insert(SU, UnknownValue);
920 
921  if (FPExceptions.size() >= HugeRegion) {
922  LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
924  reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
925  }
926  }
927 
928  // If it's not a store or a variant load, we're done.
929  if (!MI.mayStore() &&
930  !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
931  continue;
932 
933  // Always add dependecy edge to BarrierChain if present.
934  if (BarrierChain)
936 
937  // Find the underlying objects for MI. The Objs vector is either
938  // empty, or filled with the Values of memory locations which this
939  // SU depends on.
941  bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
942  MF.getDataLayout());
943 
944  if (MI.mayStore()) {
945  if (!ObjsFound) {
946  // An unknown store depends on all stores and loads.
947  addChainDependencies(SU, Stores);
948  addChainDependencies(SU, NonAliasStores);
949  addChainDependencies(SU, Loads);
950  addChainDependencies(SU, NonAliasLoads);
951 
952  // Map this store to 'UnknownValue'.
953  Stores.insert(SU, UnknownValue);
954  } else {
955  // Add precise dependencies against all previously seen memory
956  // accesses mapped to the same Value(s).
957  for (const UnderlyingObject &UnderlObj : Objs) {
958  ValueType V = UnderlObj.getValue();
959  bool ThisMayAlias = UnderlObj.mayAlias();
960 
961  // Add dependencies to previous stores and loads mapped to V.
962  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
963  addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
964  }
965  // Update the store map after all chains have been added to avoid adding
966  // self-loop edge if multiple underlying objects are present.
967  for (const UnderlyingObject &UnderlObj : Objs) {
968  ValueType V = UnderlObj.getValue();
969  bool ThisMayAlias = UnderlObj.mayAlias();
970 
971  // Map this store to V.
972  (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
973  }
974  // The store may have dependencies to unanalyzable loads and
975  // stores.
977  addChainDependencies(SU, Stores, UnknownValue);
978  }
979  } else { // SU is a load.
980  if (!ObjsFound) {
981  // An unknown load depends on all stores.
982  addChainDependencies(SU, Stores);
983  addChainDependencies(SU, NonAliasStores);
984 
985  Loads.insert(SU, UnknownValue);
986  } else {
987  for (const UnderlyingObject &UnderlObj : Objs) {
988  ValueType V = UnderlObj.getValue();
989  bool ThisMayAlias = UnderlObj.mayAlias();
990 
991  // Add precise dependencies against all previously seen stores
992  // mapping to the same Value(s).
993  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
994 
995  // Map this load to V.
996  (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
997  }
998  // The load may have dependencies to unanalyzable stores.
999  addChainDependencies(SU, Stores, UnknownValue);
1000  }
1001  }
1002 
1003  // Reduce maps if they grow huge.
1004  if (Stores.size() + Loads.size() >= HugeRegion) {
1005  LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
1006  reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
1007  }
1008  if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
1009  LLVM_DEBUG(
1010  dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
1011  reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
1012  }
1013  }
1014 
1015  if (DbgMI)
1016  FirstDbgValue = DbgMI;
1017 
1018  Defs.clear();
1019  Uses.clear();
1022 
1023  Topo.MarkDirty();
1024 }
1025 
1027  PSV->printCustom(OS);
1028  return OS;
1029 }
1030 
1032  for (auto &Itr : *this) {
1033  if (Itr.first.is<const Value*>()) {
1034  const Value *V = Itr.first.get<const Value*>();
1035  if (isa<UndefValue>(V))
1036  dbgs() << "Unknown";
1037  else
1038  V->printAsOperand(dbgs());
1039  }
1040  else if (Itr.first.is<const PseudoSourceValue*>())
1041  dbgs() << Itr.first.get<const PseudoSourceValue*>();
1042  else
1043  llvm_unreachable("Unknown Value type.");
1044 
1045  dbgs() << " : ";
1046  dumpSUList(Itr.second);
1047  }
1048 }
1049 
1050 void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
1051  Value2SUsMap &loads, unsigned N) {
1052  LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
1053  dbgs() << "Loading SUnits:\n"; loads.dump());
1054 
1055  // Insert all SU's NodeNums into a vector and sort it.
1056  std::vector<unsigned> NodeNums;
1057  NodeNums.reserve(stores.size() + loads.size());
1058  for (auto &I : stores)
1059  for (auto *SU : I.second)
1060  NodeNums.push_back(SU->NodeNum);
1061  for (auto &I : loads)
1062  for (auto *SU : I.second)
1063  NodeNums.push_back(SU->NodeNum);
1064  llvm::sort(NodeNums);
1065 
1066  // The N last elements in NodeNums will be removed, and the SU with
1067  // the lowest NodeNum of them will become the new BarrierChain to
1068  // let the not yet seen SUs have a dependency to the removed SUs.
1069  assert(N <= NodeNums.size());
1070  SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1071  if (BarrierChain) {
1072  // The aliasing and non-aliasing maps reduce independently of each
1073  // other, but share a common BarrierChain. Check if the
1074  // newBarrierChain is above the former one. If it is not, it may
1075  // introduce a loop to use newBarrierChain, so keep the old one.
1076  if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1077  BarrierChain->addPredBarrier(newBarrierChain);
1078  BarrierChain = newBarrierChain;
1079  LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1080  << BarrierChain->NodeNum << ").\n";);
1081  }
1082  else
1083  LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1084  << BarrierChain->NodeNum << ").\n";);
1085  }
1086  else
1087  BarrierChain = newBarrierChain;
1088 
1089  insertBarrierChain(stores);
1090  insertBarrierChain(loads);
1091 
1092  LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1093  dbgs() << "Loading SUnits:\n"; loads.dump());
1094 }
1095 
1096 static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
1097  MachineInstr &MI, bool addToLiveRegs) {
1098  for (MachineOperand &MO : MI.operands()) {
1099  if (!MO.isReg() || !MO.readsReg())
1100  continue;
1101  Register Reg = MO.getReg();
1102  if (!Reg)
1103  continue;
1104 
1105  // Things that are available after the instruction are killed by it.
1106  bool IsKill = LiveRegs.available(MRI, Reg);
1107  MO.setIsKill(IsKill);
1108  if (addToLiveRegs)
1109  LiveRegs.addReg(Reg);
1110  }
1111 }
1112 
1113 void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
1114  LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1115 
1116  LiveRegs.init(*TRI);
1117  LiveRegs.addLiveOuts(MBB);
1118 
1119  // Examine block from end to start...
1120  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1121  if (MI.isDebugOrPseudoInstr())
1122  continue;
1123 
1124  // Update liveness. Registers that are defed but not used in this
1125  // instruction are now dead. Mark register and all subregs as they
1126  // are completely defined.
1127  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1128  const MachineOperand &MO = *O;
1129  if (MO.isReg()) {
1130  if (!MO.isDef())
1131  continue;
1132  Register Reg = MO.getReg();
1133  if (!Reg)
1134  continue;
1135  LiveRegs.removeReg(Reg);
1136  } else if (MO.isRegMask()) {
1137  LiveRegs.removeRegsInMask(MO);
1138  }
1139  }
1140 
1141  // If there is a bundle header fix it up first.
1142  if (!MI.isBundled()) {
1143  toggleKills(MRI, LiveRegs, MI, true);
1144  } else {
1145  MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
1146  if (MI.isBundle())
1147  toggleKills(MRI, LiveRegs, MI, false);
1148 
1149  // Some targets make the (questionable) assumtion that the instructions
1150  // inside the bundle are ordered and consequently only the last use of
1151  // a register inside the bundle can kill it.
1152  MachineBasicBlock::instr_iterator I = std::next(Bundle);
1153  while (I->isBundledWithSucc())
1154  ++I;
1155  do {
1156  if (!I->isDebugOrPseudoInstr())
1157  toggleKills(MRI, LiveRegs, *I, true);
1158  --I;
1159  } while (I != Bundle);
1160  }
1161  }
1162 }
1163 
1164 void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1165 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1166  dumpNodeName(SU);
1167  dbgs() << ": ";
1168  SU.getInstr()->dump();
1169 #endif
1170 }
1171 
1173 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1174  if (EntrySU.getInstr() != nullptr)
1175  dumpNodeAll(EntrySU);
1176  for (const SUnit &SU : SUnits)
1177  dumpNodeAll(SU);
1178  if (ExitSU.getInstr() != nullptr)
1179  dumpNodeAll(ExitSU);
1180 #endif
1181 }
1182 
1183 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1184  std::string s;
1185  raw_string_ostream oss(s);
1186  if (SU == &EntrySU)
1187  oss << "<entry>";
1188  else if (SU == &ExitSU)
1189  oss << "<exit>";
1190  else
1191  SU->getInstr()->print(oss, /*IsStandalone=*/true);
1192  return oss.str();
1193 }
1194 
1195 /// Return the basic block label. It is not necessarilly unique because a block
1196 /// contains multiple scheduling regions. But it is fine for visualization.
1197 std::string ScheduleDAGInstrs::getDAGName() const {
1198  return "dag." + BB->getFullName();
1199 }
1200 
1201 bool ScheduleDAGInstrs::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
1202  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
1203 }
1204 
1205 bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
1206  if (SuccSU != &ExitSU) {
1207  // Do not use WillCreateCycle, it assumes SD scheduling.
1208  // If Pred is reachable from Succ, then the edge creates a cycle.
1209  if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
1210  return false;
1211  Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
1212  }
1213  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
1214  // Return true regardless of whether a new edge needed to be inserted.
1215  return true;
1216 }
1217 
1218 //===----------------------------------------------------------------------===//
1219 // SchedDFSResult Implementation
1220 //===----------------------------------------------------------------------===//
1221 
1222 namespace llvm {
1223 
1224 /// Internal state used to compute SchedDFSResult.
1226  SchedDFSResult &R;
1227 
1228  /// Join DAG nodes into equivalence classes by their subtree.
1229  IntEqClasses SubtreeClasses;
1230  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1231  std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1232 
1233  struct RootData {
1234  unsigned NodeID;
1235  unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1236  unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1237  /// children.
1238 
1239  RootData(unsigned id): NodeID(id),
1240  ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1241 
1242  unsigned getSparseSetIndex() const { return NodeID; }
1243  };
1244 
1245  SparseSet<RootData> RootSet;
1246 
1247 public:
1248  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1249  RootSet.setUniverse(R.DFSNodeData.size());
1250  }
1251 
1252  /// Returns true if this node been visited by the DFS traversal.
1253  ///
1254  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1255  /// ID. Later, SubtreeID is updated but remains valid.
1256  bool isVisited(const SUnit *SU) const {
1257  return R.DFSNodeData[SU->NodeNum].SubtreeID
1258  != SchedDFSResult::InvalidSubtreeID;
1259  }
1260 
1261  /// Initializes this node's instruction count. We don't need to flag the node
1262  /// visited until visitPostorder because the DAG cannot have cycles.
1263  void visitPreorder(const SUnit *SU) {
1264  R.DFSNodeData[SU->NodeNum].InstrCount =
1265  SU->getInstr()->isTransient() ? 0 : 1;
1266  }
1267 
1268  /// Called once for each node after all predecessors are visited. Revisit this
1269  /// node's predecessors and potentially join them now that we know the ILP of
1270  /// the other predecessors.
1271  void visitPostorderNode(const SUnit *SU) {
1272  // Mark this node as the root of a subtree. It may be joined with its
1273  // successors later.
1274  R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1275  RootData RData(SU->NodeNum);
1276  RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1277 
1278  // If any predecessors are still in their own subtree, they either cannot be
1279  // joined or are large enough to remain separate. If this parent node's
1280  // total instruction count is not greater than a child subtree by at least
1281  // the subtree limit, then try to join it now since splitting subtrees is
1282  // only useful if multiple high-pressure paths are possible.
1283  unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1284  for (const SDep &PredDep : SU->Preds) {
1285  if (PredDep.getKind() != SDep::Data)
1286  continue;
1287  unsigned PredNum = PredDep.getSUnit()->NodeNum;
1288  if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1289  joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1290 
1291  // Either link or merge the TreeData entry from the child to the parent.
1292  if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1293  // If the predecessor's parent is invalid, this is a tree edge and the
1294  // current node is the parent.
1295  if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1296  RootSet[PredNum].ParentNodeID = SU->NodeNum;
1297  }
1298  else if (RootSet.count(PredNum)) {
1299  // The predecessor is not a root, but is still in the root set. This
1300  // must be the new parent that it was just joined to. Note that
1301  // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1302  // set to the original parent.
1303  RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1304  RootSet.erase(PredNum);
1305  }
1306  }
1307  RootSet[SU->NodeNum] = RData;
1308  }
1309 
1310  /// Called once for each tree edge after calling visitPostOrderNode on
1311  /// the predecessor. Increment the parent node's instruction count and
1312  /// preemptively join this subtree to its parent's if it is small enough.
1313  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1314  R.DFSNodeData[Succ->NodeNum].InstrCount
1315  += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1316  joinPredSubtree(PredDep, Succ);
1317  }
1318 
1319  /// Adds a connection for cross edges.
1320  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1321  ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1322  }
1323 
1324  /// Sets each node's subtree ID to the representative ID and record
1325  /// connections between trees.
1326  void finalize() {
1327  SubtreeClasses.compress();
1328  R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1329  assert(SubtreeClasses.getNumClasses() == RootSet.size()
1330  && "number of roots should match trees");
1331  for (const RootData &Root : RootSet) {
1332  unsigned TreeID = SubtreeClasses[Root.NodeID];
1333  if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1334  R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1335  R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1336  // Note that SubInstrCount may be greater than InstrCount if we joined
1337  // subtrees across a cross edge. InstrCount will be attributed to the
1338  // original parent, while SubInstrCount will be attributed to the joined
1339  // parent.
1340  }
1341  R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1342  R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1343  LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1344  for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1345  R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1346  LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1347  << R.DFSNodeData[Idx].SubtreeID << '\n');
1348  }
1349  for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
1350  unsigned PredTree = SubtreeClasses[P.first->NodeNum];
1351  unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
1352  if (PredTree == SuccTree)
1353  continue;
1354  unsigned Depth = P.first->getDepth();
1355  addConnection(PredTree, SuccTree, Depth);
1356  addConnection(SuccTree, PredTree, Depth);
1357  }
1358  }
1359 
1360 protected:
1361  /// Joins the predecessor subtree with the successor that is its DFS parent.
1362  /// Applies some heuristics before joining.
1363  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1364  bool CheckLimit = true) {
1365  assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1366 
1367  // Check if the predecessor is already joined.
1368  const SUnit *PredSU = PredDep.getSUnit();
1369  unsigned PredNum = PredSU->NodeNum;
1370  if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1371  return false;
1372 
1373  // Four is the magic number of successors before a node is considered a
1374  // pinch point.
1375  unsigned NumDataSucs = 0;
1376  for (const SDep &SuccDep : PredSU->Succs) {
1377  if (SuccDep.getKind() == SDep::Data) {
1378  if (++NumDataSucs >= 4)
1379  return false;
1380  }
1381  }
1382  if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1383  return false;
1384  R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1385  SubtreeClasses.join(Succ->NodeNum, PredNum);
1386  return true;
1387  }
1388 
1389  /// Called by finalize() to record a connection between trees.
1390  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1391  if (!Depth)
1392  return;
1393 
1394  do {
1396  R.SubtreeConnections[FromTree];
1397  for (SchedDFSResult::Connection &C : Connections) {
1398  if (C.TreeID == ToTree) {
1399  C.Level = std::max(C.Level, Depth);
1400  return;
1401  }
1402  }
1403  Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1404  FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1405  } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1406  }
1407 };
1408 
1409 } // end namespace llvm
1410 
1411 namespace {
1412 
1413 /// Manage the stack used by a reverse depth-first search over the DAG.
1414 class SchedDAGReverseDFS {
1415  std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1416 
1417 public:
1418  bool isComplete() const { return DFSStack.empty(); }
1419 
1420  void follow(const SUnit *SU) {
1421  DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1422  }
1423  void advance() { ++DFSStack.back().second; }
1424 
1425  const SDep *backtrack() {
1426  DFSStack.pop_back();
1427  return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1428  }
1429 
1430  const SUnit *getCurr() const { return DFSStack.back().first; }
1431 
1432  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1433 
1434  SUnit::const_pred_iterator getPredEnd() const {
1435  return getCurr()->Preds.end();
1436  }
1437 };
1438 
1439 } // end anonymous namespace
1440 
1441 static bool hasDataSucc(const SUnit *SU) {
1442  for (const SDep &SuccDep : SU->Succs) {
1443  if (SuccDep.getKind() == SDep::Data &&
1444  !SuccDep.getSUnit()->isBoundaryNode())
1445  return true;
1446  }
1447  return false;
1448 }
1449 
1450 /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1451 /// search from this root.
1452 void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
1453  if (!IsBottomUp)
1454  llvm_unreachable("Top-down ILP metric is unimplemented");
1455 
1456  SchedDFSImpl Impl(*this);
1457  for (const SUnit &SU : SUnits) {
1458  if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1459  continue;
1460 
1461  SchedDAGReverseDFS DFS;
1462  Impl.visitPreorder(&SU);
1463  DFS.follow(&SU);
1464  while (true) {
1465  // Traverse the leftmost path as far as possible.
1466  while (DFS.getPred() != DFS.getPredEnd()) {
1467  const SDep &PredDep = *DFS.getPred();
1468  DFS.advance();
1469  // Ignore non-data edges.
1470  if (PredDep.getKind() != SDep::Data
1471  || PredDep.getSUnit()->isBoundaryNode()) {
1472  continue;
1473  }
1474  // An already visited edge is a cross edge, assuming an acyclic DAG.
1475  if (Impl.isVisited(PredDep.getSUnit())) {
1476  Impl.visitCrossEdge(PredDep, DFS.getCurr());
1477  continue;
1478  }
1479  Impl.visitPreorder(PredDep.getSUnit());
1480  DFS.follow(PredDep.getSUnit());
1481  }
1482  // Visit the top of the stack in postorder and backtrack.
1483  const SUnit *Child = DFS.getCurr();
1484  const SDep *PredDep = DFS.backtrack();
1485  Impl.visitPostorderNode(Child);
1486  if (PredDep)
1487  Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1488  if (DFS.isComplete())
1489  break;
1490  }
1491  }
1492  Impl.finalize();
1493 }
1494 
1495 /// The root of the given SubtreeID was just scheduled. For all subtrees
1496 /// connected to this tree, record the depth of the connection so that the
1497 /// nearest connected subtrees can be prioritized.
1498 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1499  for (const Connection &C : SubtreeConnections[SubtreeID]) {
1500  SubtreeConnectLevels[C.TreeID] =
1501  std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1502  LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1503  << SubtreeConnectLevels[C.TreeID] << '\n');
1504  }
1505 }
1506 
1507 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1509  OS << InstrCount << " / " << Length << " = ";
1510  if (!Length)
1511  OS << "BADILP";
1512  else
1513  OS << format("%g", ((double)InstrCount / Length));
1514 }
1515 
1517  dbgs() << *this << '\n';
1518 }
1519 
1520 namespace llvm {
1521 
1524  Val.print(OS);
1525  return OS;
1526 }
1527 
1528 } // end namespace llvm
1529 
1530 #endif
llvm::ScheduleDAGInstrs::initSUnits
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
Definition: ScheduleDAGInstrs.cpp:569
llvm::ScheduleDAGInstrs::FirstDbgValue
MachineInstr * FirstDbgValue
Definition: ScheduleDAGInstrs.h:249
llvm::MapVector< ValueType, SUList >::iterator
typename std::vector< std::pair< ValueType, SUList >> ::iterator iterator
Definition: MapVector.h:49
llvm::LaneBitmask
Definition: LaneBitmask.h:40
llvm::SparseMultiSet::setUniverse
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Definition: SparseMultiSet.h:202
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
llvm::TargetSchedModel::getWriteProcResBegin
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:134
llvm::MCProcResourceDesc::BufferSize
int BufferSize
Definition: MCSchedule.h:49
ScheduleDAG.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm::MachineInstr::getOperandNo
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:683
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:491
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MapVector::remove_if
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::ScheduleDAGInstrs::addBarrierChain
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
Definition: ScheduleDAGInstrs.cpp:686
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::LivePhysRegs::addReg
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
getUnderlyingObjectsForInstr
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
Definition: ScheduleDAGInstrs.cpp:128
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::TargetRegisterClass::getLaneMask
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
Definition: TargetRegisterInfo.h:206
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
ReductionSize
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
llvm::ScheduleDAGInstrs::CurrentVRegUses
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
Definition: ScheduleDAGInstrs.h:175
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
getFunction
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:255
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:730
llvm::ScheduleDAGInstrs::begin
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
Definition: ScheduleDAGInstrs.h:277
llvm::SparseMultiSet::find
iterator find(const KeyT &Key)
Find an element by its key.
Definition: SparseMultiSet.h:375
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition: RegisterPressure.h:167
llvm::UnderlyingObject
Definition: ScheduleDAGInstrs.h:108
ScheduleDAGInstrs.h
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:625
isGlobalMemoryObject
static bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI)
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
Definition: ScheduleDAGInstrs.cpp:543
addEdge
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
Definition: LazyCallGraph.cpp:63
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:500
llvm::MapVector::clear
void clear()
Definition: MapVector.h:88
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
hasDataSucc
static bool hasDataSucc(const SUnit *SU)
Definition: ScheduleDAGInstrs.cpp:1441
llvm::ScheduleDAGInstrs::Value2SUsMap::size
unsigned size() const
Definition: ScheduleDAGInstrs.cpp:654
llvm::ScheduleDAGInstrs::Value2SUsMap::clear
void clear()
Clears map from all contents.
Definition: ScheduleDAGInstrs.cpp:649
llvm::ScheduleDAGInstrs::NumRegionInstrs
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
Definition: ScheduleDAGInstrs.h:154
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:119
llvm::SchedDFSImpl
Internal state used to compute SchedDFSResult.
Definition: ScheduleDAGInstrs.cpp:1225
llvm::SparseMultiSet::clear
void clear()
Clears the set.
Definition: SparseMultiSet.h:342
ErrorHandling.h
llvm::MCRegisterInfo::getNumRegs
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
Definition: MCRegisterInfo.h:491
MapVector.h
llvm::SUnit::const_pred_iterator
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
llvm::SUnit::isCommutable
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
llvm::SchedDFSImpl::addConnection
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
Definition: ScheduleDAGInstrs.cpp:1390
ValueTracking.h
llvm::SUnit::isCall
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
MachineBasicBlock.h
llvm::LivePhysRegs
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
stores
hexagon widen stores
Definition: HexagonStoreWidening.cpp:118
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::ScheduleDAGInstrs::Topo
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
Definition: ScheduleDAGInstrs.h:241
llvm::ScheduleDAGInstrs::SUList
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
Definition: ScheduleDAGInstrs.h:190
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
InstrCount
static unsigned InstrCount
Definition: DFAPacketizer.cpp:53
llvm::TargetSchedModel::computeOutputLatency
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
Definition: TargetSchedule.cpp:290
llvm::SDep::Kind
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
IntEqClasses.h
llvm::SparseMultiSet::eraseAll
void eraseAll(const KeyT &K)
Erase all elements with the given key.
Definition: SparseMultiSet.h:482
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:358
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:486
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::ScheduleDAGInstrs::Value2SUsMap::insert
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
Definition: ScheduleDAGInstrs.cpp:632
llvm::ScheduleDAGInstrs::startBlock
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:178
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::PressureDiffs
Array of PressureDiffs.
Definition: RegisterPressure.h:198
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:757
llvm::SchedDFSImpl::SchedDFSImpl
SchedDFSImpl(SchedDFSResult &r)
Definition: ScheduleDAGInstrs.cpp:1248
Operator.h
llvm::ScheduleDAGInstrs::getLaneMaskForMO
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
Definition: ScheduleDAGInstrs.cpp:368
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
toggleKills
static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
Definition: ScheduleDAGInstrs.cpp:1096
EnableAASchedMI
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI DAG construction"))
llvm::ScheduleDAGInstrs::SchedModel
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
Definition: ScheduleDAGInstrs.h:125
Format.h
llvm::PressureDiffs::init
void init(unsigned N)
Initialize an array of N PressureDiffs.
Definition: RegisterPressure.cpp:648
HugeRegion
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::IntEqClasses::join
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
Definition: IntEqClasses.cpp:32
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::ScheduleDAGInstrs::end
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
Definition: ScheduleDAGInstrs.h:280
llvm::LegalityPredicates::any
Predicate any(Predicate P0, Predicate P1)
True iff P0 or P1 are true.
Definition: LegalizerInfo.h:241
llvm::ScheduleDAGInstrs::Value2SUsMap::clearList
void clearList(ValueType V)
Clears the list of SUs mapped to V.
Definition: ScheduleDAGInstrs.cpp:638
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
RegisterPressure.h
llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition: LiveIntervals.h:226
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::TargetSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
Definition: TargetSchedule.h:118
MachineRegisterInfo.h
llvm::ScheduleDAGInstrs::Value2SUsMap::reComputeSize
void reComputeSize()
Counts the number of SUs in this map after a reduction.
Definition: ScheduleDAGInstrs.cpp:657
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:63
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::PressureDiffs::addInstruction
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
Definition: RegisterPressure.cpp:659
llvm::ScheduleDAGTopologicalSort::MarkDirty
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:768
llvm::SparseSet::size
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:190
llvm::SparseMultiSet< PhysRegSUOper, identity< unsigned >, uint16_t >::RangePair
std::pair< iterator, iterator > RangePair
Definition: SparseMultiSet.h:315
Instruction.h
CommandLine.h
llvm::SUnit::isBoundaryNode
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::ScheduleDAGInstrs::ScheduleDAGInstrs
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
Definition: ScheduleDAGInstrs.cpp:111
llvm::MapVector::begin
iterator begin()
Definition: MapVector.h:69
llvm::ScheduleDAGInstrs::addChainDependencies
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
Definition: ScheduleDAGInstrs.h:210
llvm::MachineInstr::isCall
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:823
llvm::ScheduleDAGInstrs::addVRegDefDeps
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
Definition: ScheduleDAGInstrs.cpp:395
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:410
Constants.h
llvm::SUnit::isUnbuffered
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
llvm::TargetRegisterInfo::getSubRegIndexLaneMask
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
Definition: TargetRegisterInfo.h:377
llvm::AAResults
Definition: AliasAnalysis.h:508
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::ScheduleDAGInstrs::addSchedBarrierDeps
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
Definition: ScheduleDAGInstrs.cpp:201
llvm::ScheduleDAGInstrs::UnknownValue
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
Definition: ScheduleDAGInstrs.h:236
llvm::PhysRegSUOper
Record a physical register access.
Definition: ScheduleDAGInstrs.h:76
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:370
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::ScheduleDAGInstrs::CanHandleTerminators
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
Definition: ScheduleDAGInstrs.h:136
llvm::ScheduleDAGInstrs::exitRegion
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Definition: ScheduleDAGInstrs.cpp:197
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::ScheduleDAGInstrs::BarrierChain
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
Definition: ScheduleDAGInstrs.h:182
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:110
llvm::SparseMultiSet::empty
bool empty() const
Returns true if the set is empty.
Definition: SparseMultiSet.h:328
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::MachineInstr::mayAlias
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
Definition: MachineInstr.cpp:1328
llvm::ScheduleDAGInstrs::addPhysRegDeps
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
Definition: ScheduleDAGInstrs.cpp:291
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::ILPValue
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:34
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:481
PseudoSourceValue.h
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::SUnit::setInstr
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
MachineInstrBundle.h
llvm::MachineBasicBlock::rend
reverse_iterator rend()
Definition: MachineBasicBlock.h:278
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
llvm::SDep::Output
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
SmallPtrSet.h
llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:151
llvm::TargetSchedModel::getWriteProcResEnd
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:138
llvm::PseudoSourceValue
Special value supplied for machine level alias analysis.
Definition: PseudoSourceValue.h:35
llvm::ILPValue::print
void print(raw_ostream &OS) const
Definition: ScheduleDAGInstrs.cpp:1508
llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
llvm::VReg2SUnit
An individual mapping from virtual register number to SUnit.
Definition: ScheduleDAGInstrs.h:52
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::MapVector::operator[]
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:98
Type.h
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::RegisterOperands::collect
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition: RegisterPressure.cpp:570
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::MCInstrDesc::hasImplicitDefOfPhysReg
bool hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:33
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::ScheduleDAGInstrs::Value2SUsMap::Value2SUsMap
Value2SUsMap(unsigned lat=0)
Definition: ScheduleDAGInstrs.cpp:623
llvm::SparseMultiSet::end
iterator end()
Returns an iterator past this container.
Definition: SparseMultiSet.h:319
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::RegPressureTracker::recedeSkipDebugValues
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Definition: RegisterPressure.cpp:853
llvm::cl::opt< bool >
llvm::SparseMultiSet::insert
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
Definition: SparseMultiSet.h:419
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:197
llvm::MachineOperand::isUndef
bool isUndef() const
Definition: MachineOperand.h:395
llvm::MapVector< ValueType, SUList >::find
iterator find(const ValueType &Key)
Definition: MapVector.h:147
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
LiveIntervals.h
getReductionSize
static unsigned getReductionSize()
Definition: ScheduleDAGInstrs.cpp:91
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::IntEqClasses::compress
void compress()
compress - Compress equivalence classes by numbering them 0 .
Definition: IntEqClasses.cpp:60
llvm::LivePhysRegs::available
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
Definition: LivePhysRegs.cpp:139
llvm::MachineOperand::isDead
bool isDead() const
Definition: MachineOperand.h:385
llvm::TargetRegisterClass::HasDisjunctSubRegs
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
Definition: TargetRegisterInfo.h:63
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::LaneBitmask::any
constexpr bool any() const
Definition: LaneBitmask.h:53
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1348
llvm::SparseMultiSet< PhysRegSUOper, identity< unsigned >, uint16_t >::iterator
iterator_base< SparseMultiSet * > iterator
Definition: SparseMultiSet.h:311
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
MCRegisterInfo.h
llvm::SDep::MayAliasMem
@ MayAliasMem
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:70
size
i< reg-> size
Definition: README.txt:166
llvm::ScheduleDAGInstrs::Defs
Reg2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
Definition: ScheduleDAGInstrs.h:167
llvm::SchedDFSImpl::visitPostorderEdge
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
Definition: ScheduleDAGInstrs.cpp:1313
llvm::ScheduleDAGInstrs::MFI
const MachineFrameInfo & MFI
Definition: ScheduleDAGInstrs.h:122
llvm::SchedDFSImpl::joinPredSubtree
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
Definition: ScheduleDAGInstrs.cpp:1363
llvm::SparseSet::setUniverse
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::SUnit::hasPhysRegUses
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
llvm::SchedDFSImpl::finalize
void finalize()
Sets each node's subtree ID to the representative ID and record connections between trees.
Definition: ScheduleDAGInstrs.cpp:1326
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SchedDFSImpl::visitPostorderNode
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
Definition: ScheduleDAGInstrs.cpp:1271
llvm::SchedDFSImpl::visitPreorder
void visitPreorder(const SUnit *SU)
Initializes this node's instruction count.
Definition: ScheduleDAGInstrs.cpp:1263
llvm::ScheduleDAGInstrs::TrackLaneMasks
bool TrackLaneMasks
Whether lane masks should get tracked.
Definition: ScheduleDAGInstrs.h:139
llvm::getUnderlyingObjectsForCodeGen
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
Definition: ValueTracking.cpp:4494
iterator_range.h
llvm::MachineOperand::isRegMask
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
Definition: MachineOperand.h:345
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineRegisterInfo::hasOneDef
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
Definition: MachineRegisterInfo.h:444
llvm::ScheduleDAGInstrs::DbgValues
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
Definition: ScheduleDAGInstrs.h:248
llvm::TargetSchedModel::computeOperandLatency
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
Definition: TargetSchedule.cpp:184
UseTBAA
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
llvm::Value::printAsOperand
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4675
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1540
llvm::SparseMultiSet::equal_range
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
Definition: SparseMultiSet.h:411
llvm::ScheduleDAG
Definition: ScheduleDAG.h:555
llvm::SparseMultiSet::contains
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
Definition: SparseMultiSet.h:395
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::VReg2SUnitOperIdx
Mapping from virtual register to SUnit including an operand index.
Definition: ScheduleDAGInstrs.h:66
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:355
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:272
llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::SparseMultiSet::erase
iterator erase(iterator I)
Erases an existing element identified by a valid iterator.
Definition: SparseMultiSet.h:466
llvm::MachineOperand::setIsUndef
void setIsUndef(bool Val=true)
Definition: MachineOperand.h:511
Compiler.h
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineInstr::print
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
Definition: MachineInstr.cpp:1577
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::RegPressureTracker::getPos
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
Definition: RegisterPressure.h:414
llvm::ScheduleDAGInstrs::getSchedClass
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
Definition: ScheduleDAGInstrs.h:265
SparseSet.h
llvm::ScheduleDAGInstrs::Value2SUsMap::dump
void dump()
Definition: ScheduleDAGInstrs.cpp:1031
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::ScheduleDAGInstrs::MISUnitMap
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
Definition: ScheduleDAGInstrs.h:158
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::PointerUnion< const Value *, const PseudoSourceValue * >
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ScheduleDAGInstrs::enterRegion
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
Definition: ScheduleDAGInstrs.cpp:187
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:365
llvm::ScheduleDAGInstrs::addChainDependency
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AAResults and Target fail to deny the depende...
Definition: ScheduleDAGInstrs.cpp:548
llvm::MapVector< ValueType, SUList >::end
iterator end()
Definition: MapVector.h:71
llvm::ScheduleDAGInstrs::Value2SUsMap::getTrueMemOrderLatency
unsigned getTrueMemOrderLatency() const
Definition: ScheduleDAGInstrs.cpp:663
llvm::ScheduleDAGInstrs::newSUnit
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
Definition: ScheduleDAGInstrs.h:379
llvm::MachineRegisterInfo::isConstantPhysReg
bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
Definition: MachineRegisterInfo.cpp:513
llvm::ScheduleDAGInstrs::RemoveKillFlags
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
Definition: ScheduleDAGInstrs.h:129
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::ScheduleDAGInstrs::AAForDep
AAResults * AAForDep
Definition: ScheduleDAGInstrs.h:177
j
return j(j<< 16)
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
llvm::ScheduleDAGInstrs::RegionBegin
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:148
llvm::isIdentifiedObject
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Definition: AliasAnalysis.cpp:973
llvm::MachineOperand::readsReg
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
Definition: MachineOperand.h:458
llvm::SUnit::NumSuccs
unsigned NumSuccs
Definition: ScheduleDAG.h:267
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::MCInstrDesc::hasImplicitUseOfPhysReg
bool hasImplicitUseOfPhysReg(unsigned Reg) const
Return true if this instruction implicitly uses the specified physical register.
Definition: MCInstrDesc.h:595
llvm::ConstMIBundleOperands
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
Definition: MachineInstrBundle.h:185
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
MachineFrameInfo.h
llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
Casting.h
Function.h
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1492
llvm::SparseSet::count
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise.
Definition: SparseSet.h:240
llvm::SchedDFSResult
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
llvm::ScheduleDAGInstrs::Value2SUsMap
Definition: ScheduleDAGInstrs.cpp:615
llvm::ScheduleDAGInstrs::deadDefHasNoUse
bool deadDefHasNoUse(const MachineOperand &MO)
Returns true if the def register in MO has no uses.
Definition: ScheduleDAGInstrs.cpp:382
llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:107
llvm::SparseSet::erase
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:288
llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::SUnit::addPredBarrier
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:384
llvm::ScheduleDAGInstrs::Uses
Reg2SUnitsMap Uses
Definition: ScheduleDAGInstrs.h:168
llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
llvm::MachineInstr::isBundle
bool isBundle() const
Definition: MachineInstr.h:1287
llvm::RegisterOperands::adjustLaneLiveness
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
Definition: RegisterPressure.cpp:601
llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:145
llvm::MCSubRegIterator
MCSubRegIterator enumerates all sub-registers of Reg.
Definition: MCRegisterInfo.h:594
llvm::ScheduleDAGInstrs::Value2SUsMap::operator[]
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
Definition: ScheduleDAGInstrs.cpp:627
llvm::MCRegAliasIterator::isValid
bool isValid() const
Definition: MCRegisterInfo.h:805
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
Instructions.h
llvm::SUnit::hasReservedResource
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
llvm::skipDebugInstructionsBackward
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
Definition: MachineBasicBlock.h:1219
SmallVector.h
UseAA
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
llvm::ScheduleDAGInstrs::finishBlock
virtual void finishBlock()
Cleans up after scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:182
llvm::ScheduleDAGInstrs::addPhysRegDataDeps
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU's instruction that defines a physical register.
Definition: ScheduleDAGInstrs.cpp:233
ScheduleDFS.h
llvm::ScheduleDAGInstrs::CurrentVRegDefs
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
Definition: ScheduleDAGInstrs.h:173
N
#define N
llvm::ScheduleDAGInstrs::addVRegUseDeps
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
Definition: ScheduleDAGInstrs.cpp:515
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
LaneBitmask.h
llvm::MachineFunction::getDataLayout
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Definition: MachineFunction.cpp:260
llvm::IntEqClasses::getNumClasses
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:71
llvm::MachineInstr::isTransient
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:1341
llvm::SchedDFSImpl::visitCrossEdge
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
Definition: ScheduleDAGInstrs.cpp:1320
MachineMemOperand.h
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
dumpSUList
static void dumpSUList(ScheduleDAGInstrs::SUList &L)
Definition: ScheduleDAGInstrs.cpp:99
MachineOperand.h
llvm::SparseSet< RootData >
llvm::SchedDFSImpl::isVisited
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
Definition: ScheduleDAGInstrs.cpp:1256
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::IntEqClasses
Definition: IntEqClasses.h:27
llvm::MachineFrameInfo::hasTailCall
bool hasTailCall() const
Returns true if the function contains a tail call.
Definition: MachineFrameInfo.h:606
SlotIndexes.h
llvm::cl::desc
Definition: CommandLine.h:414
llvm::ScheduleDAGInstrs::insertBarrierChain
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
Definition: ScheduleDAGInstrs.cpp:697
raw_ostream.h
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
MachineFunction.h
llvm::MachineInstr::registerDefIsDead
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
Definition: MachineInstr.h:1414
llvm::MachineInstrBundleIterator< MachineInstr >
Value.h
llvm::raw_string_ostream::str
std::string & str()
Flushes the stream contents to the target string and returns the string's reference.
Definition: raw_ostream.h:643
llvm::MachineInstr::isBarrier
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:838
llvm::ScheduleDAGInstrs::reduceHugeMemNodeMaps
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
Definition: ScheduleDAGInstrs.cpp:1050
llvm::ScheduleDAG::clearDAG
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:64
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:228
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:618
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
TargetRegisterInfo.h
llvm::RegPressureTracker::recede
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
Definition: RegisterPressure.cpp:874
Debug.h
llvm::ScheduleDAG::ExitSU
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:564
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::LaneBitmask::getAll
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:84
SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:104
llvm::TargetSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
Definition: TargetSchedule.cpp:39
llvm::MCRegAliasIterator
MCRegAliasIterator enumerates all registers aliasing Reg.
Definition: MCRegisterInfo.h:780
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
LivePhysRegs.h