LLVM  4.0.0
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling
11 // of MachineInstrs.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/IntEqClasses.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/IR/Operator.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/Format.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "misched"
45 
46 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
47  cl::ZeroOrMore, cl::init(false),
48  cl::desc("Enable use of AA during MI DAG construction"));
49 
50 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
51  cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
52 
53 // Note: the two options below might be used in tuning compile time vs
54 // output quality. Setting HugeRegion so large that it will never be
55 // reached means best-effort, but may be slow.
56 
57 // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
58 // together hold this many SUs, a reduction of maps will be done.
59 static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
60  cl::init(1000), cl::desc("The limit to use while constructing the DAG "
61  "prior to scheduling, at which point a trade-off "
62  "is made to avoid excessive compile time."));
63 
65  "dag-maps-reduction-size", cl::Hidden,
66  cl::desc("A huge scheduling region will have maps reduced by this many "
67  "nodes at a time. Defaults to HugeRegion / 2."));
68 
69 static unsigned getReductionSize() {
70  // Always reduce a huge region with half of the elements, except
71  // when user sets this number explicitly.
72  if (ReductionSize.getNumOccurrences() == 0)
73  return HugeRegion / 2;
74  return ReductionSize;
75 }
76 
78 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
79  dbgs() << "{ ";
80  for (const SUnit *su : L) {
81  dbgs() << "SU(" << su->NodeNum << ")";
82  if (su != L.back())
83  dbgs() << ", ";
84  }
85  dbgs() << "}\n";
86 #endif
87 }
88 
90  const MachineLoopInfo *mli,
91  bool RemoveKillFlags)
92  : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
93  RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
94  TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
95  UnknownValue(UndefValue::get(
96  Type::getVoidTy(mf.getFunction()->getContext()))),
97  FirstDbgValue(nullptr) {
98  DbgValues.clear();
99 
100  const TargetSubtargetInfo &ST = mf.getSubtarget();
102 }
103 
104 /// getUnderlyingObjectFromInt - This is the function that does the work of
105 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
106 static const Value *getUnderlyingObjectFromInt(const Value *V) {
107  do {
108  if (const Operator *U = dyn_cast<Operator>(V)) {
109  // If we find a ptrtoint, we can transfer control back to the
110  // regular getUnderlyingObjectFromInt.
111  if (U->getOpcode() == Instruction::PtrToInt)
112  return U->getOperand(0);
113  // If we find an add of a constant, a multiplied value, or a phi, it's
114  // likely that the other operand will lead us to the base
115  // object. We don't have to worry about the case where the
116  // object address is somehow being computed by the multiply,
117  // because our callers only care when the result is an
118  // identifiable object.
119  if (U->getOpcode() != Instruction::Add ||
120  (!isa<ConstantInt>(U->getOperand(1)) &&
121  Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
122  !isa<PHINode>(U->getOperand(1))))
123  return V;
124  V = U->getOperand(0);
125  } else {
126  return V;
127  }
128  assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
129  } while (1);
130 }
131 
132 /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
133 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
134 static void getUnderlyingObjects(const Value *V,
135  SmallVectorImpl<Value *> &Objects,
136  const DataLayout &DL) {
138  SmallVector<const Value *, 4> Working(1, V);
139  do {
140  V = Working.pop_back_val();
141 
143  GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
144 
145  for (Value *V : Objs) {
146  if (!Visited.insert(V).second)
147  continue;
148  if (Operator::getOpcode(V) == Instruction::IntToPtr) {
149  const Value *O =
150  getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
151  if (O->getType()->isPointerTy()) {
152  Working.push_back(O);
153  continue;
154  }
155  }
156  Objects.push_back(const_cast<Value *>(V));
157  }
158  } while (!Working.empty());
159 }
160 
161 /// getUnderlyingObjectsForInstr - If this machine instr has memory reference
162 /// information and it can be tracked to a normal reference to a known
163 /// object, return the Value for that object.
165  const MachineFrameInfo &MFI,
166  UnderlyingObjectsVector &Objects,
167  const DataLayout &DL) {
168  auto allMMOsOkay = [&]() {
169  for (const MachineMemOperand *MMO : MI->memoperands()) {
170  if (MMO->isVolatile())
171  return false;
172 
173  if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
174  // Function that contain tail calls don't have unique PseudoSourceValue
175  // objects. Two PseudoSourceValues might refer to the same or
176  // overlapping locations. The client code calling this function assumes
177  // this is not the case. So return a conservative answer of no known
178  // object.
179  if (MFI.hasTailCall())
180  return false;
181 
182  // For now, ignore PseudoSourceValues which may alias LLVM IR values
183  // because the code that uses this function has no way to cope with
184  // such aliases.
185  if (PSV->isAliased(&MFI))
186  return false;
187 
188  bool MayAlias = PSV->mayAlias(&MFI);
189  Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
190  } else if (const Value *V = MMO->getValue()) {
192  getUnderlyingObjects(V, Objs, DL);
193 
194  for (Value *V : Objs) {
195  if (!isIdentifiedObject(V))
196  return false;
197 
199  }
200  } else
201  return false;
202  }
203  return true;
204  };
205 
206  if (!allMMOsOkay())
207  Objects.clear();
208 }
209 
211  BB = bb;
212 }
213 
215  // Subclasses should no longer refer to the old block.
216  BB = nullptr;
217 }
218 
219 /// Initialize the DAG and common scheduler state for the current scheduling
220 /// region. This does not actually create the DAG, only clears it. The
221 /// scheduling driver may call BuildSchedGraph multiple times per scheduling
222 /// region.
226  unsigned regioninstrs) {
227  assert(bb == BB && "startBlock should set BB");
228  RegionBegin = begin;
229  RegionEnd = end;
230  NumRegionInstrs = regioninstrs;
231 }
232 
233 /// Close the current scheduling region. Don't clear any state in case the
234 /// driver wants to refer to the previous scheduling region.
236  // Nothing to do.
237 }
238 
239 /// addSchedBarrierDeps - Add dependencies from instructions in the current
240 /// list of instructions being scheduled to scheduling barrier by adding
241 /// the exit SU to the register defs and use list. This is because we want to
242 /// make sure instructions which define registers that are either used by
243 /// the terminator or are live-out are properly scheduled. This is
244 /// especially important when the definition latency of the return value(s)
245 /// are too high to be hidden by the branch or when the liveout registers
246 /// used by instructions in the fallthrough block.
248  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
249  ExitSU.setInstr(ExitMI);
250  // Add dependencies on the defs and uses of the instruction.
251  if (ExitMI) {
252  for (const MachineOperand &MO : ExitMI->operands()) {
253  if (!MO.isReg() || MO.isDef()) continue;
254  unsigned Reg = MO.getReg();
256  Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
257  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
258  addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
259  }
260  }
261  }
262  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
263  // For others, e.g. fallthrough, conditional branch, assume the exit
264  // uses all the registers that are livein to the successor blocks.
265  for (const MachineBasicBlock *Succ : BB->successors()) {
266  for (const auto &LI : Succ->liveins()) {
267  if (!Uses.contains(LI.PhysReg))
268  Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
269  }
270  }
271  }
272 }
273 
274 /// MO is an operand of SU's instruction that defines a physical register. Add
275 /// data dependencies from SU to any uses of the physical register.
276 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
277  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
278  assert(MO.isDef() && "expect physreg def");
279 
280  // Ask the target if address-backscheduling is desirable, and if so how much.
282 
283  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
284  Alias.isValid(); ++Alias) {
285  if (!Uses.contains(*Alias))
286  continue;
287  for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
288  SUnit *UseSU = I->SU;
289  if (UseSU == SU)
290  continue;
291 
292  // Adjust the dependence latency using operand def/use information,
293  // then allow the target to perform its own adjustments.
294  int UseOp = I->OpIdx;
295  MachineInstr *RegUse = nullptr;
296  SDep Dep;
297  if (UseOp < 0)
298  Dep = SDep(SU, SDep::Artificial);
299  else {
300  // Set the hasPhysRegDefs only for physreg defs that have a use within
301  // the scheduling region.
302  SU->hasPhysRegDefs = true;
303  Dep = SDep(SU, SDep::Data, *Alias);
304  RegUse = UseSU->getInstr();
305  }
306  Dep.setLatency(
307  SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
308  UseOp));
309 
310  ST.adjustSchedDependency(SU, UseSU, Dep);
311  UseSU->addPred(Dep);
312  }
313  }
314 }
315 
316 /// addPhysRegDeps - Add register dependencies (data, anti, and output) from
317 /// this SUnit to following instructions in the same scheduling region that
318 /// depend the physical register referenced at OperIdx.
319 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
320  MachineInstr *MI = SU->getInstr();
321  MachineOperand &MO = MI->getOperand(OperIdx);
322  unsigned Reg = MO.getReg();
323  // We do not need to track any dependencies for constant registers.
324  if (MRI.isConstantPhysReg(Reg))
325  return;
326 
327  // Optionally add output and anti dependencies. For anti
328  // dependencies we use a latency of 0 because for a multi-issue
329  // target we want to allow the defining instruction to issue
330  // in the same cycle as the using instruction.
331  // TODO: Using a latency of 1 here for output dependencies assumes
332  // there's no cost for reusing registers.
334  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
335  if (!Defs.contains(*Alias))
336  continue;
337  for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
338  SUnit *DefSU = I->SU;
339  if (DefSU == &ExitSU)
340  continue;
341  if (DefSU != SU &&
342  (Kind != SDep::Output || !MO.isDead() ||
343  !DefSU->getInstr()->registerDefIsDead(*Alias))) {
344  if (Kind == SDep::Anti)
345  DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
346  else {
347  SDep Dep(SU, Kind, /*Reg=*/*Alias);
348  Dep.setLatency(
349  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
350  DefSU->addPred(Dep);
351  }
352  }
353  }
354  }
355 
356  if (!MO.isDef()) {
357  SU->hasPhysRegUses = true;
358  // Either insert a new Reg2SUnits entry with an empty SUnits list, or
359  // retrieve the existing SUnits list for this register's uses.
360  // Push this SUnit on the use list.
361  Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
362  if (RemoveKillFlags)
363  MO.setIsKill(false);
364  } else {
365  addPhysRegDataDeps(SU, OperIdx);
366 
367  // clear this register's use list
368  if (Uses.contains(Reg))
369  Uses.eraseAll(Reg);
370 
371  if (!MO.isDead()) {
372  Defs.eraseAll(Reg);
373  } else if (SU->isCall) {
374  // Calls will not be reordered because of chain dependencies (see
375  // below). Since call operands are dead, calls may continue to be added
376  // to the DefList making dependence checking quadratic in the size of
377  // the block. Instead, we leave only one call at the back of the
378  // DefList.
380  Reg2SUnitsMap::iterator B = P.first;
381  Reg2SUnitsMap::iterator I = P.second;
382  for (bool isBegin = I == B; !isBegin; /* empty */) {
383  isBegin = (--I) == B;
384  if (!I->SU->isCall)
385  break;
386  I = Defs.erase(I);
387  }
388  }
389 
390  // Defs are pushed in the order they are visited and never reordered.
391  Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
392  }
393 }
394 
396 {
397  unsigned Reg = MO.getReg();
398  // No point in tracking lanemasks if we don't have interesting subregisters.
399  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
400  if (!RC.HasDisjunctSubRegs)
401  return LaneBitmask::getAll();
402 
403  unsigned SubReg = MO.getSubReg();
404  if (SubReg == 0)
405  return RC.getLaneMask();
406  return TRI->getSubRegIndexLaneMask(SubReg);
407 }
408 
409 /// addVRegDefDeps - Add register output and data dependencies from this SUnit
410 /// to instructions that occur later in the same scheduling region if they read
411 /// from or write to the virtual register defined at OperIdx.
412 ///
413 /// TODO: Hoist loop induction variable increments. This has to be
414 /// reevaluated. Generally, IV scheduling should be done before coalescing.
415 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
416  MachineInstr *MI = SU->getInstr();
417  MachineOperand &MO = MI->getOperand(OperIdx);
418  unsigned Reg = MO.getReg();
419 
420  LaneBitmask DefLaneMask;
421  LaneBitmask KillLaneMask;
422  if (TrackLaneMasks) {
423  bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
424  DefLaneMask = getLaneMaskForMO(MO);
425  // If we have a <read-undef> flag, none of the lane values comes from an
426  // earlier instruction.
427  KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
428 
429  // Clear undef flag, we'll re-add it later once we know which subregister
430  // Def is first.
431  MO.setIsUndef(false);
432  } else {
433  DefLaneMask = LaneBitmask::getAll();
434  KillLaneMask = LaneBitmask::getAll();
435  }
436 
437  if (MO.isDead()) {
439  "Dead defs should have no uses");
440  } else {
441  // Add data dependence to all uses we found so far.
444  E = CurrentVRegUses.end(); I != E; /*empty*/) {
445  LaneBitmask LaneMask = I->LaneMask;
446  // Ignore uses of other lanes.
447  if ((LaneMask & KillLaneMask).none()) {
448  ++I;
449  continue;
450  }
451 
452  if ((LaneMask & DefLaneMask).any()) {
453  SUnit *UseSU = I->SU;
454  MachineInstr *Use = UseSU->getInstr();
455  SDep Dep(SU, SDep::Data, Reg);
456  Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
457  I->OperandIndex));
458  ST.adjustSchedDependency(SU, UseSU, Dep);
459  UseSU->addPred(Dep);
460  }
461 
462  LaneMask &= ~KillLaneMask;
463  // If we found a Def for all lanes of this use, remove it from the list.
464  if (LaneMask.any()) {
465  I->LaneMask = LaneMask;
466  ++I;
467  } else
469  }
470  }
471 
472  // Shortcut: Singly defined vregs do not have output/anti dependencies.
473  if (MRI.hasOneDef(Reg))
474  return;
475 
476  // Add output dependence to the next nearest defs of this vreg.
477  //
478  // Unless this definition is dead, the output dependence should be
479  // transitively redundant with antidependencies from this definition's
480  // uses. We're conservative for now until we have a way to guarantee the uses
481  // are not eliminated sometime during scheduling. The output dependence edge
482  // is also useful if output latency exceeds def-use latency.
483  LaneBitmask LaneMask = DefLaneMask;
484  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
485  CurrentVRegDefs.end())) {
486  // Ignore defs for other lanes.
487  if ((V2SU.LaneMask & LaneMask).none())
488  continue;
489  // Add an output dependence.
490  SUnit *DefSU = V2SU.SU;
491  // Ignore additional defs of the same lanes in one instruction. This can
492  // happen because lanemasks are shared for targets with too many
493  // subregisters. We also use some representration tricks/hacks where we
494  // add super-register defs/uses, to imply that although we only access parts
495  // of the reg we care about the full one.
496  if (DefSU == SU)
497  continue;
498  SDep Dep(SU, SDep::Output, Reg);
499  Dep.setLatency(
500  SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
501  DefSU->addPred(Dep);
502 
503  // Update current definition. This can get tricky if the def was about a
504  // bigger lanemask before. We then have to shrink it and create a new
505  // VReg2SUnit for the non-overlapping part.
506  LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
507  LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
508  V2SU.SU = SU;
509  V2SU.LaneMask = OverlapMask;
510  if (NonOverlapMask.any())
511  CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
512  }
513  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
514  if (LaneMask.any())
515  CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
516 }
517 
518 /// addVRegUseDeps - Add a register data dependency if the instruction that
519 /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
520 /// register antidependency from this SUnit to instructions that occur later in
521 /// the same scheduling region if they write the virtual register.
522 ///
523 /// TODO: Handle ExitSU "uses" properly.
524 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
525  const MachineInstr *MI = SU->getInstr();
526  const MachineOperand &MO = MI->getOperand(OperIdx);
527  unsigned Reg = MO.getReg();
528 
529  // Remember the use. Data dependencies will be added when we find the def.
532  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
533 
534  // Add antidependences to the following defs of the vreg.
535  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
536  CurrentVRegDefs.end())) {
537  // Ignore defs for unrelated lanes.
538  LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
539  if ((PrevDefLaneMask & LaneMask).none())
540  continue;
541  if (V2SU.SU == SU)
542  continue;
543 
544  V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
545  }
546 }
547 
548 /// Return true if MI is an instruction we are unable to reason about
549 /// (like a call or something with unmodeled side effects).
551  return MI->isCall() || MI->hasUnmodeledSideEffects() ||
553 }
554 
555 /// This returns true if the two MIs need a chain edge between them.
556 /// This is called on normal stores and loads.
557 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
558  const DataLayout &DL, MachineInstr *MIa,
559  MachineInstr *MIb) {
560  const MachineFunction *MF = MIa->getParent()->getParent();
561  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
562 
563  assert ((MIa->mayStore() || MIb->mayStore()) &&
564  "Dependency checked between two loads");
565 
566  // Let the target decide if memory accesses cannot possibly overlap.
567  if (TII->areMemAccessesTriviallyDisjoint(*MIa, *MIb, AA))
568  return false;
569 
570  // To this point analysis is generic. From here on we do need AA.
571  if (!AA)
572  return true;
573 
574  // FIXME: Need to handle multiple memory operands to support all targets.
575  if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
576  return true;
577 
578  MachineMemOperand *MMOa = *MIa->memoperands_begin();
579  MachineMemOperand *MMOb = *MIb->memoperands_begin();
580 
581  if (!MMOa->getValue() || !MMOb->getValue())
582  return true;
583 
584  // The following interface to AA is fashioned after DAGCombiner::isAlias
585  // and operates with MachineMemOperand offset with some important
586  // assumptions:
587  // - LLVM fundamentally assumes flat address spaces.
588  // - MachineOperand offset can *only* result from legalization and
589  // cannot affect queries other than the trivial case of overlap
590  // checking.
591  // - These offsets never wrap and never step outside
592  // of allocated objects.
593  // - There should never be any negative offsets here.
594  //
595  // FIXME: Modify API to hide this math from "user"
596  // FIXME: Even before we go to AA we can reason locally about some
597  // memory objects. It can save compile time, and possibly catch some
598  // corner cases not currently covered.
599 
600  assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
601  assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
602 
603  int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
604  int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
605  int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
606 
607  AliasResult AAResult =
608  AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
609  UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
610  MemoryLocation(MMOb->getValue(), Overlapb,
611  UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
612 
613  return (AAResult != NoAlias);
614 }
615 
616 /// Check whether two objects need a chain edge and add it if needed.
618  unsigned Latency) {
620  SUb->getInstr())) {
621  SDep Dep(SUa, SDep::MayAliasMem);
622  Dep.setLatency(Latency);
623  SUb->addPred(Dep);
624  }
625 }
626 
627 /// Create an SUnit for each real instruction, numbered in top-down topological
628 /// order. The instruction order A < B, implies that no edge exists from B to A.
629 ///
630 /// Map each real instruction to its SUnit.
631 ///
632 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
633 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
634 /// instead of pointers.
635 ///
636 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
637 /// the original instruction list.
639  // We'll be allocating one SUnit for each real instruction in the region,
640  // which is contained within a basic block.
641  SUnits.reserve(NumRegionInstrs);
642 
644  if (MI.isDebugValue())
645  continue;
646 
647  SUnit *SU = newSUnit(&MI);
648  MISUnitMap[&MI] = SU;
649 
650  SU->isCall = MI.isCall();
651  SU->isCommutable = MI.isCommutable();
652 
653  // Assign the Latency field of SU using target-provided information.
654  SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
655 
656  // If this SUnit uses a reserved or unbuffered resource, mark it as such.
657  //
658  // Reserved resources block an instruction from issuing and stall the
659  // entire pipeline. These are identified by BufferSize=0.
660  //
661  // Unbuffered resources prevent execution of subsequent instructions that
662  // require the same resources. This is used for in-order execution pipelines
663  // within an out-of-order core. These are identified by BufferSize=1.
665  const MCSchedClassDesc *SC = getSchedClass(SU);
666  for (const MCWriteProcResEntry &PRE :
669  switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
670  case 0:
671  SU->hasReservedResource = true;
672  break;
673  case 1:
674  SU->isUnbuffered = true;
675  break;
676  default:
677  break;
678  }
679  }
680  }
681  }
682 }
683 
684 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
685 
686  /// Current total number of SUs in map.
687  unsigned NumNodes;
688 
689  /// 1 for loads, 0 for stores. (see comment in SUList)
690  unsigned TrueMemOrderLatency;
691 public:
692 
693  Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
694 
695  /// To keep NumNodes up to date, insert() is used instead of
696  /// this operator w/ push_back().
697  ValueType &operator[](const SUList &Key) {
698  llvm_unreachable("Don't use. Use insert() instead."); };
699 
700  /// Add SU to the SUList of V. If Map grows huge, reduce its size
701  /// by calling reduce().
702  void inline insert(SUnit *SU, ValueType V) {
703  MapVector::operator[](V).push_back(SU);
704  NumNodes++;
705  }
706 
707  /// Clears the list of SUs mapped to V.
708  void inline clearList(ValueType V) {
709  iterator Itr = find(V);
710  if (Itr != end()) {
711  assert (NumNodes >= Itr->second.size());
712  NumNodes -= Itr->second.size();
713 
714  Itr->second.clear();
715  }
716  }
717 
718  /// Clears map from all contents.
719  void clear() {
721  NumNodes = 0;
722  }
723 
724  unsigned inline size() const { return NumNodes; }
725 
726  /// Count the number of SUs in this map after a reduction.
727  void reComputeSize(void) {
728  NumNodes = 0;
729  for (auto &I : *this)
730  NumNodes += I.second.size();
731  }
732 
733  unsigned inline getTrueMemOrderLatency() const {
734  return TrueMemOrderLatency;
735  }
736 
737  void dump();
738 };
739 
741  Value2SUsMap &Val2SUsMap) {
742  for (auto &I : Val2SUsMap)
743  addChainDependencies(SU, I.second,
744  Val2SUsMap.getTrueMemOrderLatency());
745 }
746 
748  Value2SUsMap &Val2SUsMap,
749  ValueType V) {
750  Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
751  if (Itr != Val2SUsMap.end())
752  addChainDependencies(SU, Itr->second,
753  Val2SUsMap.getTrueMemOrderLatency());
754 }
755 
757  assert (BarrierChain != nullptr);
758 
759  for (auto &I : map) {
760  SUList &sus = I.second;
761  for (auto *SU : sus)
762  SU->addPredBarrier(BarrierChain);
763  }
764  map.clear();
765 }
766 
768  assert (BarrierChain != nullptr);
769 
770  // Go through all lists of SUs.
771  for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
772  Value2SUsMap::iterator CurrItr = I++;
773  SUList &sus = CurrItr->second;
774  SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
775  for (; SUItr != SUEE; ++SUItr) {
776  // Stop on BarrierChain or any instruction above it.
777  if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
778  break;
779 
780  (*SUItr)->addPredBarrier(BarrierChain);
781  }
782 
783  // Remove also the BarrierChain from list if present.
784  if (SUItr != SUEE && *SUItr == BarrierChain)
785  SUItr++;
786 
787  // Remove all SUs that are now successors of BarrierChain.
788  if (SUItr != sus.begin())
789  sus.erase(sus.begin(), SUItr);
790  }
791 
792  // Remove all entries with empty su lists.
793  map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
794  return (mapEntry.second.empty()); });
795 
796  // Recompute the size of the map (NumNodes).
797  map.reComputeSize();
798 }
799 
800 /// If RegPressure is non-null, compute register pressure as a side effect. The
801 /// DAG builder is an efficient place to do it because it already visits
802 /// operands.
804  RegPressureTracker *RPTracker,
805  PressureDiffs *PDiffs,
806  LiveIntervals *LIS,
807  bool TrackLaneMasks) {
809  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
810  : ST.useAA();
811  AAForDep = UseAA ? AA : nullptr;
812 
813  BarrierChain = nullptr;
814 
815  this->TrackLaneMasks = TrackLaneMasks;
816  MISUnitMap.clear();
818 
819  // Create an SUnit for each real instruction.
820  initSUnits();
821 
822  if (PDiffs)
823  PDiffs->init(SUnits.size());
824 
825  // We build scheduling units by walking a block's instruction list
826  // from bottom to top.
827 
828  // Each MIs' memory operand(s) is analyzed to a list of underlying
829  // objects. The SU is then inserted in the SUList(s) mapped from the
830  // Value(s). Each Value thus gets mapped to lists of SUs depending
831  // on it, stores and loads kept separately. Two SUs are trivially
832  // non-aliasing if they both depend on only identified Values and do
833  // not share any common Value.
834  Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
835 
836  // Certain memory accesses are known to not alias any SU in Stores
837  // or Loads, and have therefore their own 'NonAlias'
838  // domain. E.g. spill / reload instructions never alias LLVM I/R
839  // Values. It would be nice to assume that this type of memory
840  // accesses always have a proper memory operand modelling, and are
841  // therefore never unanalyzable, but this is conservatively not
842  // done.
843  Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
844 
845  // Remove any stale debug info; sometimes BuildSchedGraph is called again
846  // without emitting the info from the previous call.
847  DbgValues.clear();
848  FirstDbgValue = nullptr;
849 
850  assert(Defs.empty() && Uses.empty() &&
851  "Only BuildGraph should update Defs/Uses");
854 
855  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
856  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
857  unsigned NumVirtRegs = MRI.getNumVirtRegs();
858  CurrentVRegDefs.setUniverse(NumVirtRegs);
859  CurrentVRegUses.setUniverse(NumVirtRegs);
860 
861  // Model data dependencies between instructions being scheduled and the
862  // ExitSU.
864 
865  // Walk the list of instructions, from bottom moving up.
866  MachineInstr *DbgMI = nullptr;
868  MII != MIE; --MII) {
869  MachineInstr &MI = *std::prev(MII);
870  if (DbgMI) {
871  DbgValues.push_back(std::make_pair(DbgMI, &MI));
872  DbgMI = nullptr;
873  }
874 
875  if (MI.isDebugValue()) {
876  DbgMI = &MI;
877  continue;
878  }
879  SUnit *SU = MISUnitMap[&MI];
880  assert(SU && "No SUnit mapped to this MI");
881 
882  if (RPTracker) {
883  RegisterOperands RegOpers;
884  RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
885  if (TrackLaneMasks) {
886  SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
887  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
888  }
889  if (PDiffs != nullptr)
890  PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
891 
892  RPTracker->recedeSkipDebugValues();
893  assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
894  RPTracker->recede(RegOpers);
895  }
896 
897  assert(
898  (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
899  "Cannot schedule terminators or labels!");
900 
901  // Add register-based dependencies (data, anti, and output).
902  // For some instructions (calls, returns, inline-asm, etc.) there can
903  // be explicit uses and implicit defs, in which case the use will appear
904  // on the operand list before the def. Do two passes over the operand
905  // list to make sure that defs are processed before any uses.
906  bool HasVRegDef = false;
907  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
908  const MachineOperand &MO = MI.getOperand(j);
909  if (!MO.isReg() || !MO.isDef())
910  continue;
911  unsigned Reg = MO.getReg();
913  addPhysRegDeps(SU, j);
914  } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
915  HasVRegDef = true;
916  addVRegDefDeps(SU, j);
917  }
918  }
919  // Now process all uses.
920  for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
921  const MachineOperand &MO = MI.getOperand(j);
922  // Only look at use operands.
923  // We do not need to check for MO.readsReg() here because subsequent
924  // subregister defs will get output dependence edges and need no
925  // additional use dependencies.
926  if (!MO.isReg() || !MO.isUse())
927  continue;
928  unsigned Reg = MO.getReg();
930  addPhysRegDeps(SU, j);
931  } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
932  addVRegUseDeps(SU, j);
933  }
934  }
935 
936  // If we haven't seen any uses in this scheduling region, create a
937  // dependence edge to ExitSU to model the live-out latency. This is required
938  // for vreg defs with no in-region use, and prefetches with no vreg def.
939  //
940  // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
941  // check currently relies on being called before adding chain deps.
942  if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
943  SDep Dep(SU, SDep::Artificial);
944  Dep.setLatency(SU->Latency - 1);
945  ExitSU.addPred(Dep);
946  }
947 
948  // Add memory dependencies (Note: isStoreToStackSlot and
949  // isLoadFromStackSLot are not usable after stack slots are lowered to
950  // actual addresses).
951 
952  // This is a barrier event that acts as a pivotal node in the DAG.
953  if (isGlobalMemoryObject(AA, &MI)) {
954 
955  // Become the barrier chain.
956  if (BarrierChain)
958  BarrierChain = SU;
959 
960  DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
961  << BarrierChain->NodeNum << ").\n";);
962 
963  // Add dependencies against everything below it and clear maps.
964  addBarrierChain(Stores);
965  addBarrierChain(Loads);
966  addBarrierChain(NonAliasStores);
967  addBarrierChain(NonAliasLoads);
968 
969  continue;
970  }
971 
972  // If it's not a store or a variant load, we're done.
973  if (!MI.mayStore() &&
974  !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
975  continue;
976 
977  // Always add dependecy edge to BarrierChain if present.
978  if (BarrierChain)
980 
981  // Find the underlying objects for MI. The Objs vector is either
982  // empty, or filled with the Values of memory locations which this
983  // SU depends on. An empty vector means the memory location is
984  // unknown, and may alias anything.
987 
988  if (MI.mayStore()) {
989  if (Objs.empty()) {
990  // An unknown store depends on all stores and loads.
991  addChainDependencies(SU, Stores);
992  addChainDependencies(SU, NonAliasStores);
993  addChainDependencies(SU, Loads);
994  addChainDependencies(SU, NonAliasLoads);
995 
996  // Map this store to 'UnknownValue'.
997  Stores.insert(SU, UnknownValue);
998  } else {
999  // Add precise dependencies against all previously seen memory
1000  // accesses mapped to the same Value(s).
1001  for (const UnderlyingObject &UnderlObj : Objs) {
1002  ValueType V = UnderlObj.getValue();
1003  bool ThisMayAlias = UnderlObj.mayAlias();
1004 
1005  // Add dependencies to previous stores and loads mapped to V.
1006  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
1007  addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
1008  }
1009  // Update the store map after all chains have been added to avoid adding
1010  // self-loop edge if multiple underlying objects are present.
1011  for (const UnderlyingObject &UnderlObj : Objs) {
1012  ValueType V = UnderlObj.getValue();
1013  bool ThisMayAlias = UnderlObj.mayAlias();
1014 
1015  // Map this store to V.
1016  (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
1017  }
1018  // The store may have dependencies to unanalyzable loads and
1019  // stores.
1020  addChainDependencies(SU, Loads, UnknownValue);
1021  addChainDependencies(SU, Stores, UnknownValue);
1022  }
1023  } else { // SU is a load.
1024  if (Objs.empty()) {
1025  // An unknown load depends on all stores.
1026  addChainDependencies(SU, Stores);
1027  addChainDependencies(SU, NonAliasStores);
1028 
1029  Loads.insert(SU, UnknownValue);
1030  } else {
1031  for (const UnderlyingObject &UnderlObj : Objs) {
1032  ValueType V = UnderlObj.getValue();
1033  bool ThisMayAlias = UnderlObj.mayAlias();
1034 
1035  // Add precise dependencies against all previously seen stores
1036  // mapping to the same Value(s).
1037  addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
1038 
1039  // Map this load to V.
1040  (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
1041  }
1042  // The load may have dependencies to unanalyzable stores.
1043  addChainDependencies(SU, Stores, UnknownValue);
1044  }
1045  }
1046 
1047  // Reduce maps if they grow huge.
1048  if (Stores.size() + Loads.size() >= HugeRegion) {
1049  DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
1050  reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
1051  }
1052  if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
1053  DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
1054  reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
1055  }
1056  }
1057 
1058  if (DbgMI)
1059  FirstDbgValue = DbgMI;
1060 
1061  Defs.clear();
1062  Uses.clear();
1065 }
1066 
1068  PSV->printCustom(OS);
1069  return OS;
1070 }
1071 
1073  for (auto &Itr : *this) {
1074  if (Itr.first.is<const Value*>()) {
1075  const Value *V = Itr.first.get<const Value*>();
1076  if (isa<UndefValue>(V))
1077  dbgs() << "Unknown";
1078  else
1079  V->printAsOperand(dbgs());
1080  }
1081  else if (Itr.first.is<const PseudoSourceValue*>())
1082  dbgs() << Itr.first.get<const PseudoSourceValue*>();
1083  else
1084  llvm_unreachable("Unknown Value type.");
1085 
1086  dbgs() << " : ";
1087  dumpSUList(Itr.second);
1088  }
1089 }
1090 
1091 /// Reduce maps in FIFO order, by N SUs. This is better than turning
1092 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since
1093 /// it avoids unnecessary edges between seen SUs above the new
1094 /// BarrierChain, and those below it.
1095 void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
1096  Value2SUsMap &loads, unsigned N) {
1097  DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
1098  stores.dump();
1099  dbgs() << "Loading SUnits:\n";
1100  loads.dump());
1101 
1102  // Insert all SU's NodeNums into a vector and sort it.
1103  std::vector<unsigned> NodeNums;
1104  NodeNums.reserve(stores.size() + loads.size());
1105  for (auto &I : stores)
1106  for (auto *SU : I.second)
1107  NodeNums.push_back(SU->NodeNum);
1108  for (auto &I : loads)
1109  for (auto *SU : I.second)
1110  NodeNums.push_back(SU->NodeNum);
1111  std::sort(NodeNums.begin(), NodeNums.end());
1112 
1113  // The N last elements in NodeNums will be removed, and the SU with
1114  // the lowest NodeNum of them will become the new BarrierChain to
1115  // let the not yet seen SUs have a dependency to the removed SUs.
1116  assert (N <= NodeNums.size());
1117  SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1118  if (BarrierChain) {
1119  // The aliasing and non-aliasing maps reduce independently of each
1120  // other, but share a common BarrierChain. Check if the
1121  // newBarrierChain is above the former one. If it is not, it may
1122  // introduce a loop to use newBarrierChain, so keep the old one.
1123  if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1124  BarrierChain->addPredBarrier(newBarrierChain);
1125  BarrierChain = newBarrierChain;
1126  DEBUG(dbgs() << "Inserting new barrier chain: SU("
1127  << BarrierChain->NodeNum << ").\n";);
1128  }
1129  else
1130  DEBUG(dbgs() << "Keeping old barrier chain: SU("
1131  << BarrierChain->NodeNum << ").\n";);
1132  }
1133  else
1134  BarrierChain = newBarrierChain;
1135 
1136  insertBarrierChain(stores);
1137  insertBarrierChain(loads);
1138 
1139  DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
1140  stores.dump();
1141  dbgs() << "Loading SUnits:\n";
1142  loads.dump());
1143 }
1144 
1145 /// \brief Initialize register live-range state for updating kills.
1146 void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
1147  // Start with no live registers.
1148  LiveRegs.reset();
1149 
1150  // Examine the live-in regs of all successors.
1151  for (const MachineBasicBlock *Succ : BB->successors()) {
1152  for (const auto &LI : Succ->liveins()) {
1153  // Repeat, for reg and all subregs.
1154  for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
1155  SubRegs.isValid(); ++SubRegs)
1156  LiveRegs.set(*SubRegs);
1157  }
1158  }
1159 }
1160 
1161 /// \brief If we change a kill flag on the bundle instruction implicit register
1162 /// operands, then we also need to propagate that to any instructions inside
1163 /// the bundle which had the same kill state.
1164 static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
1165  bool NewKillState,
1166  const TargetRegisterInfo *TRI) {
1167  if (MI->getOpcode() != TargetOpcode::BUNDLE)
1168  return;
1169 
1170  // Walk backwards from the last instruction in the bundle to the first.
1171  // Once we set a kill flag on an instruction, we bail out, as otherwise we
1172  // might set it on too many operands. We will clear as many flags as we
1173  // can though.
1176  while (Begin != End) {
1177  if (NewKillState) {
1178  if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
1179  return;
1180  } else
1181  (--End)->clearRegisterKills(Reg, TRI);
1182  }
1183 }
1184 
1185 bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
1186  // Setting kill flag...
1187  if (!MO.isKill()) {
1188  MO.setIsKill(true);
1189  toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
1190  return false;
1191  }
1192 
1193  // If MO itself is live, clear the kill flag...
1194  if (LiveRegs.test(MO.getReg())) {
1195  MO.setIsKill(false);
1196  toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
1197  return false;
1198  }
1199 
1200  // If any subreg of MO is live, then create an imp-def for that
1201  // subreg and keep MO marked as killed.
1202  MO.setIsKill(false);
1203  toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
1204  bool AllDead = true;
1205  const unsigned SuperReg = MO.getReg();
1206  MachineInstrBuilder MIB(MF, MI);
1207  for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
1208  if (LiveRegs.test(*SubRegs)) {
1209  MIB.addReg(*SubRegs, RegState::ImplicitDefine);
1210  AllDead = false;
1211  }
1212  }
1213 
1214  if(AllDead) {
1215  MO.setIsKill(true);
1216  toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
1217  }
1218  return false;
1219 }
1220 
1221 // FIXME: Reuse the LivePhysRegs utility for this.
1222 void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
1223  DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
1224 
1225  LiveRegs.resize(TRI->getNumRegs());
1226  BitVector killedRegs(TRI->getNumRegs());
1227 
1228  startBlockForKills(MBB);
1229 
1230  // Examine block from end to start...
1231  unsigned Count = MBB->size();
1232  for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
1233  I != E; --Count) {
1234  MachineInstr &MI = *--I;
1235  if (MI.isDebugValue())
1236  continue;
1237 
1238  // Update liveness. Registers that are defed but not used in this
1239  // instruction are now dead. Mark register and all subregs as they
1240  // are completely defined.
1241  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1242  MachineOperand &MO = MI.getOperand(i);
1243  if (MO.isRegMask())
1244  LiveRegs.clearBitsNotInMask(MO.getRegMask());
1245  if (!MO.isReg()) continue;
1246  unsigned Reg = MO.getReg();
1247  if (Reg == 0) continue;
1248  if (!MO.isDef()) continue;
1249  // Ignore two-addr defs.
1250  if (MI.isRegTiedToUseOperand(i)) continue;
1251 
1252  // Repeat for reg and all subregs.
1253  for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
1254  SubRegs.isValid(); ++SubRegs)
1255  LiveRegs.reset(*SubRegs);
1256  }
1257 
1258  // Examine all used registers and set/clear kill flag. When a
1259  // register is used multiple times we only set the kill flag on
1260  // the first use. Don't set kill flags on undef operands.
1261  killedRegs.reset();
1262 
1263  // toggleKillFlag can append new operands (implicit defs), so using
1264  // a range-based loop is not safe. The new operands will be appended
1265  // at the end of the operand list and they don't need to be visited,
1266  // so iterating until the currently last operand is ok.
1267  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
1268  MachineOperand &MO = MI.getOperand(i);
1269  if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
1270  unsigned Reg = MO.getReg();
1271  if ((Reg == 0) || MRI.isReserved(Reg)) continue;
1272 
1273  bool kill = false;
1274  if (!killedRegs.test(Reg)) {
1275  kill = true;
1276  // A register is not killed if any subregs are live...
1277  for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
1278  if (LiveRegs.test(*SubRegs)) {
1279  kill = false;
1280  break;
1281  }
1282  }
1283 
1284  // If subreg is not live, then register is killed if it became
1285  // live in this instruction
1286  if (kill)
1287  kill = !LiveRegs.test(Reg);
1288  }
1289 
1290  if (MO.isKill() != kill) {
1291  DEBUG(dbgs() << "Fixing " << MO << " in ");
1292  toggleKillFlag(&MI, MO);
1293  DEBUG(MI.dump());
1294  DEBUG({
1295  if (MI.getOpcode() == TargetOpcode::BUNDLE) {
1298  while (++Begin != End)
1299  DEBUG(Begin->dump());
1300  }
1301  });
1302  }
1303 
1304  killedRegs.set(Reg);
1305  }
1306 
1307  // Mark any used register (that is not using undef) and subregs as
1308  // now live...
1309  for (const MachineOperand &MO : MI.operands()) {
1310  if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
1311  unsigned Reg = MO.getReg();
1312  if ((Reg == 0) || MRI.isReserved(Reg)) continue;
1313 
1314  for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
1315  SubRegs.isValid(); ++SubRegs)
1316  LiveRegs.set(*SubRegs);
1317  }
1318  }
1319 }
1320 
1321 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
1322 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1323  SU->getInstr()->dump();
1324 #endif
1325 }
1326 
1327 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1328  std::string s;
1329  raw_string_ostream oss(s);
1330  if (SU == &EntrySU)
1331  oss << "<entry>";
1332  else if (SU == &ExitSU)
1333  oss << "<exit>";
1334  else
1335  SU->getInstr()->print(oss, /*SkipOpers=*/true);
1336  return oss.str();
1337 }
1338 
1339 /// Return the basic block label. It is not necessarilly unique because a block
1340 /// contains multiple scheduling regions. But it is fine for visualization.
1341 std::string ScheduleDAGInstrs::getDAGName() const {
1342  return "dag." + BB->getFullName();
1343 }
1344 
1345 //===----------------------------------------------------------------------===//
1346 // SchedDFSResult Implementation
1347 //===----------------------------------------------------------------------===//
1348 
1349 namespace llvm {
1350 /// \brief Internal state used to compute SchedDFSResult.
1352  SchedDFSResult &R;
1353 
1354  /// Join DAG nodes into equivalence classes by their subtree.
1355  IntEqClasses SubtreeClasses;
1356  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1357  std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
1358 
1359  struct RootData {
1360  unsigned NodeID;
1361  unsigned ParentNodeID; // Parent node (member of the parent subtree).
1362  unsigned SubInstrCount; // Instr count in this tree only, not children.
1363 
1364  RootData(unsigned id): NodeID(id),
1365  ParentNodeID(SchedDFSResult::InvalidSubtreeID),
1366  SubInstrCount(0) {}
1367 
1368  unsigned getSparseSetIndex() const { return NodeID; }
1369  };
1370 
1371  SparseSet<RootData> RootSet;
1372 
1373 public:
1374  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1375  RootSet.setUniverse(R.DFSNodeData.size());
1376  }
1377 
1378  /// Return true if this node been visited by the DFS traversal.
1379  ///
1380  /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1381  /// ID. Later, SubtreeID is updated but remains valid.
1382  bool isVisited(const SUnit *SU) const {
1383  return R.DFSNodeData[SU->NodeNum].SubtreeID
1384  != SchedDFSResult::InvalidSubtreeID;
1385  }
1386 
1387  /// Initialize this node's instruction count. We don't need to flag the node
1388  /// visited until visitPostorder because the DAG cannot have cycles.
1389  void visitPreorder(const SUnit *SU) {
1390  R.DFSNodeData[SU->NodeNum].InstrCount =
1391  SU->getInstr()->isTransient() ? 0 : 1;
1392  }
1393 
1394  /// Called once for each node after all predecessors are visited. Revisit this
1395  /// node's predecessors and potentially join them now that we know the ILP of
1396  /// the other predecessors.
1397  void visitPostorderNode(const SUnit *SU) {
1398  // Mark this node as the root of a subtree. It may be joined with its
1399  // successors later.
1400  R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1401  RootData RData(SU->NodeNum);
1402  RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1403 
1404  // If any predecessors are still in their own subtree, they either cannot be
1405  // joined or are large enough to remain separate. If this parent node's
1406  // total instruction count is not greater than a child subtree by at least
1407  // the subtree limit, then try to join it now since splitting subtrees is
1408  // only useful if multiple high-pressure paths are possible.
1409  unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1410  for (const SDep &PredDep : SU->Preds) {
1411  if (PredDep.getKind() != SDep::Data)
1412  continue;
1413  unsigned PredNum = PredDep.getSUnit()->NodeNum;
1414  if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1415  joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1416 
1417  // Either link or merge the TreeData entry from the child to the parent.
1418  if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1419  // If the predecessor's parent is invalid, this is a tree edge and the
1420  // current node is the parent.
1421  if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1422  RootSet[PredNum].ParentNodeID = SU->NodeNum;
1423  }
1424  else if (RootSet.count(PredNum)) {
1425  // The predecessor is not a root, but is still in the root set. This
1426  // must be the new parent that it was just joined to. Note that
1427  // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1428  // set to the original parent.
1429  RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1430  RootSet.erase(PredNum);
1431  }
1432  }
1433  RootSet[SU->NodeNum] = RData;
1434  }
1435 
1436  /// Called once for each tree edge after calling visitPostOrderNode on the
1437  /// predecessor. Increment the parent node's instruction count and
1438  /// preemptively join this subtree to its parent's if it is small enough.
1439  void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1440  R.DFSNodeData[Succ->NodeNum].InstrCount
1441  += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1442  joinPredSubtree(PredDep, Succ);
1443  }
1444 
1445  /// Add a connection for cross edges.
1446  void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1447  ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
1448  }
1449 
1450  /// Set each node's subtree ID to the representative ID and record connections
1451  /// between trees.
1452  void finalize() {
1453  SubtreeClasses.compress();
1454  R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1455  assert(SubtreeClasses.getNumClasses() == RootSet.size()
1456  && "number of roots should match trees");
1457  for (const RootData &Root : RootSet) {
1458  unsigned TreeID = SubtreeClasses[Root.NodeID];
1459  if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1460  R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1461  R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1462  // Note that SubInstrCount may be greater than InstrCount if we joined
1463  // subtrees across a cross edge. InstrCount will be attributed to the
1464  // original parent, while SubInstrCount will be attributed to the joined
1465  // parent.
1466  }
1467  R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1468  R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1469  DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1470  for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1471  R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1472  DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1473  << R.DFSNodeData[Idx].SubtreeID << '\n');
1474  }
1475  for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
1476  unsigned PredTree = SubtreeClasses[P.first->NodeNum];
1477  unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
1478  if (PredTree == SuccTree)
1479  continue;
1480  unsigned Depth = P.first->getDepth();
1481  addConnection(PredTree, SuccTree, Depth);
1482  addConnection(SuccTree, PredTree, Depth);
1483  }
1484  }
1485 
1486 protected:
1487  /// Join the predecessor subtree with the successor that is its DFS
1488  /// parent. Apply some heuristics before joining.
1489  bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1490  bool CheckLimit = true) {
1491  assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1492 
1493  // Check if the predecessor is already joined.
1494  const SUnit *PredSU = PredDep.getSUnit();
1495  unsigned PredNum = PredSU->NodeNum;
1496  if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1497  return false;
1498 
1499  // Four is the magic number of successors before a node is considered a
1500  // pinch point.
1501  unsigned NumDataSucs = 0;
1502  for (const SDep &SuccDep : PredSU->Succs) {
1503  if (SuccDep.getKind() == SDep::Data) {
1504  if (++NumDataSucs >= 4)
1505  return false;
1506  }
1507  }
1508  if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1509  return false;
1510  R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1511  SubtreeClasses.join(Succ->NodeNum, PredNum);
1512  return true;
1513  }
1514 
1515  /// Called by finalize() to record a connection between trees.
1516  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1517  if (!Depth)
1518  return;
1519 
1520  do {
1522  R.SubtreeConnections[FromTree];
1523  for (SchedDFSResult::Connection &C : Connections) {
1524  if (C.TreeID == ToTree) {
1525  C.Level = std::max(C.Level, Depth);
1526  return;
1527  }
1528  }
1529  Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1530  FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1531  } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1532  }
1533 };
1534 } // namespace llvm
1535 
1536 namespace {
1537 /// \brief Manage the stack used by a reverse depth-first search over the DAG.
1538 class SchedDAGReverseDFS {
1539  std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
1540 public:
1541  bool isComplete() const { return DFSStack.empty(); }
1542 
1543  void follow(const SUnit *SU) {
1544  DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
1545  }
1546  void advance() { ++DFSStack.back().second; }
1547 
1548  const SDep *backtrack() {
1549  DFSStack.pop_back();
1550  return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1551  }
1552 
1553  const SUnit *getCurr() const { return DFSStack.back().first; }
1554 
1555  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1556 
1557  SUnit::const_pred_iterator getPredEnd() const {
1558  return getCurr()->Preds.end();
1559  }
1560 };
1561 } // anonymous
1562 
1563 static bool hasDataSucc(const SUnit *SU) {
1564  for (const SDep &SuccDep : SU->Succs) {
1565  if (SuccDep.getKind() == SDep::Data &&
1566  !SuccDep.getSUnit()->isBoundaryNode())
1567  return true;
1568  }
1569  return false;
1570 }
1571 
1572 /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
1573 /// search from this root.
1574 void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
1575  if (!IsBottomUp)
1576  llvm_unreachable("Top-down ILP metric is unimplemnted");
1577 
1578  SchedDFSImpl Impl(*this);
1579  for (const SUnit &SU : SUnits) {
1580  if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1581  continue;
1582 
1583  SchedDAGReverseDFS DFS;
1584  Impl.visitPreorder(&SU);
1585  DFS.follow(&SU);
1586  for (;;) {
1587  // Traverse the leftmost path as far as possible.
1588  while (DFS.getPred() != DFS.getPredEnd()) {
1589  const SDep &PredDep = *DFS.getPred();
1590  DFS.advance();
1591  // Ignore non-data edges.
1592  if (PredDep.getKind() != SDep::Data
1593  || PredDep.getSUnit()->isBoundaryNode()) {
1594  continue;
1595  }
1596  // An already visited edge is a cross edge, assuming an acyclic DAG.
1597  if (Impl.isVisited(PredDep.getSUnit())) {
1598  Impl.visitCrossEdge(PredDep, DFS.getCurr());
1599  continue;
1600  }
1601  Impl.visitPreorder(PredDep.getSUnit());
1602  DFS.follow(PredDep.getSUnit());
1603  }
1604  // Visit the top of the stack in postorder and backtrack.
1605  const SUnit *Child = DFS.getCurr();
1606  const SDep *PredDep = DFS.backtrack();
1607  Impl.visitPostorderNode(Child);
1608  if (PredDep)
1609  Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1610  if (DFS.isComplete())
1611  break;
1612  }
1613  }
1614  Impl.finalize();
1615 }
1616 
1617 /// The root of the given SubtreeID was just scheduled. For all subtrees
1618 /// connected to this tree, record the depth of the connection so that the
1619 /// nearest connected subtrees can be prioritized.
1620 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1621  for (const Connection &C : SubtreeConnections[SubtreeID]) {
1622  SubtreeConnectLevels[C.TreeID] =
1623  std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1624  DEBUG(dbgs() << " Tree: " << C.TreeID
1625  << " @" << SubtreeConnectLevels[C.TreeID] << '\n');
1626  }
1627 }
1628 
1630 void ILPValue::print(raw_ostream &OS) const {
1631  OS << InstrCount << " / " << Length << " = ";
1632  if (!Length)
1633  OS << "BADILP";
1634  else
1635  OS << format("%g", ((double)InstrCount / Length));
1636 }
1637 
1639 void ILPValue::dump() const {
1640  dbgs() << *this << '\n';
1641 }
1642 
1643 namespace llvm {
1644 
1647  Val.print(OS);
1648  return OS;
1649 }
1650 
1651 } // namespace llvm
MachineLoop * L
iterator end()
Returns an iterator past this container.
std::vector< std::pair< ValueType, SUList > >::iterator iterator
Definition: MapVector.h:40
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
void print(raw_ostream &OS, bool SkipOpers=false, const TargetInstrInfo *TII=nullptr) const
virtual bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
virtual void finishBlock()
finishBlock - Clean up after scheduling in the given block.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Join the predecessor subtree with the successor that is its DFS parent.
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG ""prior to scheduling, at which point a trade-off ""is made to avoid excessive compile time."))
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
void clear()
Definition: MapVector.h:72
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void insert(SUnit *SU, ValueType V)
Add SU to the SUList of V.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
size_t i
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:450
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
void init(unsigned N)
Initialize an array of N PressureDiffs.
Record a physical register access.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
addVRegDefDeps - Add register output and data dependencies from this SUnit to instructions that occur...
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
bool TrackLaneMasks
Whether lane masks should get tracked.
static LaneBitmask getAll()
Definition: LaneBitmask.h:75
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:605
void setIsUndef(bool Val=true)
MachineInstr * getInstr() const
getInstr - Return the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:389
bool isDead() const
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:35
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
Definition: MachineInstr.h:353
void init(const MCSchedModel &sm, const TargetSubtargetInfo *sti, const TargetInstrInfo *tii)
Initialize the machine model for instruction scheduling.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
MachineBasicBlock::iterator begin() const
begin - Return an iterator to the top of the current scheduling region.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:32
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:283
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolve and cache a resolved scheduling class for an SUnit.
The two locations do not alias at all.
Definition: AliasAnalysis.h:79
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI DAG construction"))
static unsigned InstrCount
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
iterator_range< mmo_iterator > memoperands()
Definition: MachineInstr.h:365
Kind
Kind - These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:48
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx. ...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:301
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU's instruction that defines a physical register.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:440
SmallVector< SDep, 4 > Preds
Definition: ScheduleDAG.h:258
virtual void startBlock(MachineBasicBlock *BB)
startBlock - Prepare to perform scheduling in the given block.
The two locations may or may not alias. This is the least precise result.
Definition: AliasAnalysis.h:81
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void clearList(ValueType V)
Clears the list of SUs mapped to V.
iterator_range< succ_iterator > successors()
A register anti-dependedence (aka WAR).
Definition: ScheduleDAG.h:50
void insertBarrierChain(Value2SUsMap &map)
Insert a barrier chain in a huge region, far below current SU.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
MachineFunction & MF
Definition: ScheduleDAG.h:581
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
buildSchedGraph - Build SUnits from the MachineBasicBlock that we are input.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
A description of a memory reference used in the backend.
unsigned NumSuccs
Definition: ScheduleDAG.h:269
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addSchedBarrierDeps()
addSchedBarrierDeps - Add dependencies from instructions in the current list of instructions being sc...
const HexagonInstrInfo * TII
Kind getKind() const
getKind - Return an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:509
static void getUnderlyingObjects(const Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout &DL)
getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects and adds support for basic ptrto...
'undef' values are things that do not have specified contents.
Definition: Constants.h:1258
An individual mapping from virtual register number to SUnit.
void setInstr(MachineInstr *MI)
setInstr - Assign the instruction for the SUnit.
Definition: ScheduleDAG.h:382
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static bool hasDataSucc(const SUnit *SU)
static void advance(T &it, size_t Val)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:49
unsigned SubReg
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:592
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool hasPhysRegUses
Definition: ScheduleDAG.h:281
Reg
All possible values of the reg field in the ModR/M byte.
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:82
bool hasPhysRegDefs
Definition: ScheduleDAG.h:282
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool isUndef() const
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
constexpr bool any() const
Definition: LaneBitmask.h:51
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:66
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:51
bool isKill() const
bool hasReservedResource
Definition: ScheduleDAG.h:291
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
addVRegUseDeps - Add a register data dependency if the instruction that defines the virtual register ...
MachineBasicBlock * MBB
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:3473
void print(raw_ostream &OS) const
Function Alias Analysis false
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
void addChainDependencies(SUnit *SU, SUList &sus, unsigned Latency)
Add dependencies as needed from all SUs in list to SU.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
List of registers defined and used by a machine instruction.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the scheduler state for the next scheduling region.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
iterator find(const ValueType &Key)
Definition: MapVector.h:131
void visitPreorder(const SUnit *SU)
Initialize this node's instruction count.
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:131
format_object< Ts...> format(const char *Fmt, const Ts &...Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
TargetInstrInfo - Interface to description of machine instruction set.
bool isDebugValue() const
Definition: MachineInstr.h:777
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
SDep - Scheduling dependency.
Definition: ScheduleDAG.h:45
bool isUnbuffered
Definition: ScheduleDAG.h:290
#define P(N)
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
unsigned const MachineRegisterInfo * MRI
Array of PressureDiffs.
void clearDAG()
clearDAG - clear the DAG state (between regions).
Definition: ScheduleDAG.cpp:50
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static unsigned getReductionSize()
unsigned short Latency
Definition: ScheduleDAG.h:275
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:55
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Add a connection for cross edges.
Internal state used to compute SchedDFSResult.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:73
MCRegAliasIterator enumerates all registers aliasing Reg.
bool isPosition() const
Definition: MachineInstr.h:775
static const unsigned End
Track the current register pressure at some position in the instruction stream, and remember the high...
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:373
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
Definition: MachineInstr.h:910
virtual void exitRegion()
Notify that the scheduler has finished scheduling the current region.
self_iterator getIterator()
Definition: ilist_node.h:81
const MachineFrameInfo & MFI
void clear()
Clears the set.
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, const DataLayout &DL, MachineInstr *MIa, MachineInstr *MIb)
This returns true if the two MIs need a chain edge between them.
unsigned getSubReg() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
MCSubRegIterator enumerates all sub-registers of Reg.
std::string & str()
Flushes the stream contents to the target string and returns the string's reference.
Definition: raw_ostream.h:479
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isVisited(const SUnit *SU) const
Return true if this node been visited by the DFS traversal.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
void setIsKill(bool Val=true)
iterator find(const KeyT &Key)
Find an element by its key.
static void DFS(BasicBlock *Root, SetVector< BasicBlock * > &Set)
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
addPhysRegDeps - Add register dependencies (data, anti, and output) from this SUnit to following inst...
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, bool NewKillState, const TargetRegisterInfo *TRI)
If we change a kill flag on the bundle instruction implicit register operands, then we also need to p...
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Remove in FIFO order some SUs from huge maps.
bool isCommutable
Definition: ScheduleDAG.h:280
Iterator for intrusive lists based on ilist_node.
bool hasTailCall() const
Returns true if the function contains a tail call.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
This is a utility class that provides an abstraction for the common functionality between Instruction...
Definition: Operator.h:33
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Reg2SUnitsMap Defs
State internal to DAG building.
void eraseAll(const KeyT &K)
Erase all elements with the given key.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:625
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
CHAIN = SC CHAIN, Imm128 - System call.
void dump(const TargetInstrInfo *TII=nullptr) const
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:66
bool addPredBarrier(SUnit *SU)
addPredBarrier - This adds a barrier edge to SU by calling addPred(), with latency 0 generally or lat...
Definition: ScheduleDAG.h:402
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
Definition: MachineInstr.h:833
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many ""nodes at a time. Defaults to HugeRegion / 2."))
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
const Value * getValue() const
Return the base address of the memory access.
Special value supplied for machine level alias analysis.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:195
MachineBasicBlock::iterator end() const
end - Return an iterator to the bottom of the current scheduling region.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:360
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void setLatency(unsigned Lat)
setLatency - Set the latency for this edge.
Definition: ScheduleDAG.h:144
**iterator erase(iterator I)
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:49
TargetSubtargetInfo - Generic base class for all target subtargets.
Representation of each machine instruction.
Definition: MachineInstr.h:52
void finalize()
Set each node's subtree ID to the representative ID and record connections between trees...
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
bool hasOneDef(unsigned RegNo) const
Return true if there is exactly one operand defining the specified register.
const TargetRegisterInfo * TRI
Definition: ScheduleDAG.h:580
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
SUnit * getSUnit() const
Definition: ScheduleDAG.h:503
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:424
bool isConstantPhysReg(unsigned PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
Mapping from virtual register to SUnit including an operand index.
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Add a chain edge between SUa and SUb, but only if both AliasAnalysis and Target fail to deny the depe...
const TargetInstrInfo * TII
Definition: ScheduleDAG.h:579
raw_ostream & operator<<(raw_ostream &OS, const APInt &I)
Definition: APInt.h:1726
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
unsigned NodeNum
Definition: ScheduleDAG.h:266
iterator begin()
Definition: MapVector.h:53
const unsigned Kind
unsigned getReg() const
getReg - Returns the register number.
SUnit * newSUnit(MachineInstr *MI)
newSUnit - Creates a new SUnit and return a ptr to it.
void addBarrierChain(Value2SUsMap &map)
Add barrier chain edges from all SUs in map, and then clear the map.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
bool addPred(const SDep &D, bool Required=true)
addPred - This adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:65
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:463
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:71
void clear()
Clears map from all contents.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
void initSUnits()
Create an SUnit for each real instruction, numbered in top-down topological order.
static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
getUnderlyingObjectsForInstr - If this machine instr has memory reference information and it can be t...
SmallVector< SDep, 4 > Succs
Definition: ScheduleDAG.h:259
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:68
uint64_t getSize() const
Return the size in bytes of the memory reference.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:100
static bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI)
Return true if MI is an instruction we are unable to reason about (like a call or something with unmo...
MachineBasicBlock * BB
State specific to the current scheduling region.
bool empty() const
Returns true if the set is empty.
IRTranslator LLVM IR MI
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
MachineBasicBlock::instr_iterator getBundleEnd(MachineBasicBlock::instr_iterator I)
Returns an iterator pointing beyond the bundle containing I.
static void dumpSUList(ScheduleDAGInstrs::SUList &L)
MachineRegisterInfo & MRI
Definition: ScheduleDAG.h:582
std::vector< SUnit > SUnits
Definition: ScheduleDAG.h:583
SchedDFSImpl(SchedDFSResult &r)
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:76
static const Value * getUnderlyingObjectFromInt(const Value *V)
getUnderlyingObjectFromInt - This is the function that does the work of looking through basic ptrtoin...
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
void reComputeSize(void)
Count the number of SUs in this map after a reduction.
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:431
hexagon widen stores
SUnit - Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:244
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:358