LLVM  9.0.0svn
SystemZHazardRecognizer.cpp
Go to the documentation of this file.
1 //=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a hazard recognizer for the SystemZ scheduler.
10 //
11 // This class is used by the SystemZ scheduling strategy to maintain
12 // the state during scheduling, and provide cost functions for
13 // scheduling candidates. This includes:
14 //
15 // * Decoder grouping. A decoder group can maximally hold 3 uops, and
16 // instructions that always begin a new group should be scheduled when
17 // the current decoder group is empty.
18 // * Processor resources usage. It is beneficial to balance the use of
19 // resources.
20 //
21 // A goal is to consider all instructions, also those outside of any
22 // scheduling region. Such instructions are "advanced" past and include
23 // single instructions before a scheduling region, branches etc.
24 //
25 // A block that has only one predecessor continues scheduling with the state
26 // of it (which may be updated by emitting branches).
27 //
28 // ===---------------------------------------------------------------------===//
29 
31 #include "llvm/ADT/Statistic.h"
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "machine-scheduler"
36 
37 // This is the limit of processor resource usage at which the
38 // scheduler should try to look for other instructions (not using the
39 // critical resource).
40 static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
41  cl::desc("The OOO window for processor "
42  "resources during scheduling."),
43  cl::init(8));
44 
45 unsigned SystemZHazardRecognizer::
46 getNumDecoderSlots(SUnit *SU) const {
47  const MCSchedClassDesc *SC = getSchedClass(SU);
48  if (!SC->isValid())
49  return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
50 
51  assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
52  "Only cracked instruction can have 2 uops.");
53  assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
54  "Expanded instructions always group alone.");
55  assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
56  "Expanded instructions fill the group(s).");
57 
58  return SC->NumMicroOps;
59 }
60 
61 unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
62  unsigned Idx = CurrGroupSize;
63  if (GrpCount % 2)
64  Idx += 3;
65 
66  if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
67  if (Idx == 1 || Idx == 2)
68  Idx = 3;
69  else if (Idx == 4 || Idx == 5)
70  Idx = 0;
71  }
72 
73  return Idx;
74 }
75 
77 getHazardType(SUnit *m, int Stalls) {
78  return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
79 }
80 
82  CurrGroupSize = 0;
83  CurrGroupHas4RegOps = false;
84  clearProcResCounters();
85  GrpCount = 0;
86  LastFPdOpCycleIdx = UINT_MAX;
87  LastEmittedMI = nullptr;
88  LLVM_DEBUG(CurGroupDbg = "";);
89 }
90 
91 bool
92 SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
93  const MCSchedClassDesc *SC = getSchedClass(SU);
94  if (!SC->isValid())
95  return true;
96 
97  // A cracked instruction only fits into schedule if the current
98  // group is empty.
99  if (SC->BeginGroup)
100  return (CurrGroupSize == 0);
101 
102  // An instruction with 4 register operands will not fit in last slot.
103  assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
104  "Current decoder group is already full!");
105  if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
106  return false;
107 
108  // Since a full group is handled immediately in EmitInstruction(),
109  // SU should fit into current group. NumSlots should be 1 or 0,
110  // since it is not a cracked or expanded instruction.
111  assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
112  "Expected normal instruction to fit in non-full group!");
113 
114  return true;
115 }
116 
117 bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
118  const MachineFunction &MF = *MI->getParent()->getParent();
119  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
120  const MCInstrDesc &MID = MI->getDesc();
121  unsigned Count = 0;
122  for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
123  const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
124  if (RC == nullptr)
125  continue;
126  if (OpIdx >= MID.getNumDefs() &&
127  MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
128  continue;
129  Count++;
130  }
131  return Count >= 4;
132 }
133 
134 void SystemZHazardRecognizer::nextGroup() {
135  if (CurrGroupSize == 0)
136  return;
137 
138  LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
139  LLVM_DEBUG(CurGroupDbg = "";);
140 
141  int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
142  assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
143  "Current decoder group bad.");
144 
145  // Reset counter for next group.
146  CurrGroupSize = 0;
147  CurrGroupHas4RegOps = false;
148 
149  GrpCount += ((unsigned) NumGroups);
150 
151  // Decrease counters for execution units.
152  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
153  ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
154  ? (ProcResourceCounters[i] - NumGroups)
155  : 0);
156 
157  // Clear CriticalResourceIdx if it is now below the threshold.
158  if (CriticalResourceIdx != UINT_MAX &&
159  (ProcResourceCounters[CriticalResourceIdx] <=
161  CriticalResourceIdx = UINT_MAX;
162 
163  LLVM_DEBUG(dumpState(););
164 }
165 
166 #ifndef NDEBUG // Debug output
168  OS << "SU(" << SU->NodeNum << "):";
169  OS << TII->getName(SU->getInstr()->getOpcode());
170 
171  const MCSchedClassDesc *SC = getSchedClass(SU);
172  if (!SC->isValid())
173  return;
174 
176  PI = SchedModel->getWriteProcResBegin(SC),
177  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
178  const MCProcResourceDesc &PRD =
179  *SchedModel->getProcResource(PI->ProcResourceIdx);
180  std::string FU(PRD.Name);
181  // trim e.g. Z13_FXaUnit -> FXa
182  FU = FU.substr(FU.find("_") + 1);
183  size_t Pos = FU.find("Unit");
184  if (Pos != std::string::npos)
185  FU.resize(Pos);
186  if (FU == "LS") // LSUnit -> LSU
187  FU = "LSU";
188  OS << "/" << FU;
189 
190  if (PI->Cycles > 1)
191  OS << "(" << PI->Cycles << "cyc)";
192  }
193 
194  if (SC->NumMicroOps > 1)
195  OS << "/" << SC->NumMicroOps << "uops";
196  if (SC->BeginGroup && SC->EndGroup)
197  OS << "/GroupsAlone";
198  else if (SC->BeginGroup)
199  OS << "/BeginsGroup";
200  else if (SC->EndGroup)
201  OS << "/EndsGroup";
202  if (SU->isUnbuffered)
203  OS << "/Unbuffered";
204  if (has4RegOps(SU->getInstr()))
205  OS << "/4RegOps";
206 }
207 
208 void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
209  dbgs() << "++ " << Msg;
210  dbgs() << ": ";
211 
212  if (CurGroupDbg.empty())
213  dbgs() << " <empty>\n";
214  else {
215  dbgs() << "{ " << CurGroupDbg << " }";
216  dbgs() << " (" << CurrGroupSize << " decoder slot"
217  << (CurrGroupSize > 1 ? "s":"")
218  << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
219  << ")\n";
220  }
221 }
222 
224  bool any = false;
225 
226  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
227  if (ProcResourceCounters[i] > 0) {
228  any = true;
229  break;
230  }
231 
232  if (!any)
233  return;
234 
235  dbgs() << "++ | Resource counters: ";
236  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
237  if (ProcResourceCounters[i] > 0)
238  dbgs() << SchedModel->getProcResource(i)->Name
239  << ":" << ProcResourceCounters[i] << " ";
240  dbgs() << "\n";
241 
242  if (CriticalResourceIdx != UINT_MAX)
243  dbgs() << "++ | Critical resource: "
244  << SchedModel->getProcResource(CriticalResourceIdx)->Name
245  << "\n";
246 }
247 
249  dumpCurrGroup("| Current decoder group");
250  dbgs() << "++ | Current cycle index: "
251  << getCurrCycleIdx() << "\n";
253  if (LastFPdOpCycleIdx != UINT_MAX)
254  dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
255 }
256 
257 #endif //NDEBUG
258 
259 void SystemZHazardRecognizer::clearProcResCounters() {
260  ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
261  CriticalResourceIdx = UINT_MAX;
262 }
263 
264 static inline bool isBranchRetTrap(MachineInstr *MI) {
265  return (MI->isBranch() || MI->isReturn() ||
266  MI->getOpcode() == SystemZ::CondTrap);
267 }
268 
269 // Update state with SU as the next scheduled unit.
272  const MCSchedClassDesc *SC = getSchedClass(SU);
273  LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
274  dbgs() << "\n";);
275  LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
276 
277  // If scheduling an SU that must begin a new decoder group, move on
278  // to next group.
279  if (!fitsIntoCurrentGroup(SU))
280  nextGroup();
281 
283  if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
284 
285  LastEmittedMI = SU->getInstr();
286 
287  // After returning from a call, we don't know much about the state.
288  if (SU->isCall) {
289  LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
290  Reset();
291  LastEmittedMI = SU->getInstr();
292  return;
293  }
294 
295  // Increase counter for execution unit(s).
297  PI = SchedModel->getWriteProcResBegin(SC),
298  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
299  // Don't handle FPd together with the other resources.
300  if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
301  continue;
302  int &CurrCounter =
303  ProcResourceCounters[PI->ProcResourceIdx];
304  CurrCounter += PI->Cycles;
305  // Check if this is now the new critical resource.
306  if ((CurrCounter > ProcResCostLim) &&
307  (CriticalResourceIdx == UINT_MAX ||
308  (PI->ProcResourceIdx != CriticalResourceIdx &&
309  CurrCounter >
310  ProcResourceCounters[CriticalResourceIdx]))) {
311  LLVM_DEBUG(
312  dbgs() << "++ New critical resource: "
313  << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
314  << "\n";);
315  CriticalResourceIdx = PI->ProcResourceIdx;
316  }
317  }
318 
319  // Make note of an instruction that uses a blocking resource (FPd).
320  if (SU->isUnbuffered) {
321  LastFPdOpCycleIdx = getCurrCycleIdx(SU);
322  LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
323  << "\n";);
324  }
325 
326  // Insert SU into current group by increasing number of slots used
327  // in current group.
328  CurrGroupSize += getNumDecoderSlots(SU);
329  CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
330  unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
331  assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
332  && "SU does not fit into decoder group!");
333 
334  // Check if current group is now full/ended. If so, move on to next
335  // group to be ready to evaluate more candidates.
336  if (CurrGroupSize >= GroupLim || SC->EndGroup)
337  nextGroup();
338 }
339 
341  const MCSchedClassDesc *SC = getSchedClass(SU);
342  if (!SC->isValid())
343  return 0;
344 
345  // If SU begins new group, it can either break a current group early
346  // or fit naturally if current group is empty (negative cost).
347  if (SC->BeginGroup) {
348  if (CurrGroupSize)
349  return 3 - CurrGroupSize;
350  return -1;
351  }
352 
353  // Similarly, a group-ending SU may either fit well (last in group), or
354  // end the group prematurely.
355  if (SC->EndGroup) {
356  unsigned resultingGroupSize =
357  (CurrGroupSize + getNumDecoderSlots(SU));
358  if (resultingGroupSize < 3)
359  return (3 - resultingGroupSize);
360  return -1;
361  }
362 
363  // An instruction with 4 register operands will not fit in last slot.
364  if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
365  return 1;
366 
367  // Most instructions can be placed in any decoder slot.
368  return 0;
369 }
370 
371 bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
372  assert (SU->isUnbuffered);
373  // If this is the first FPd op, it should be scheduled high.
374  if (LastFPdOpCycleIdx == UINT_MAX)
375  return true;
376  // If this is not the first PFd op, it should go into the other side
377  // of the processor to use the other FPd unit there. This should
378  // generally happen if two FPd ops are placed with 2 other
379  // instructions between them (modulo 6).
380  unsigned SUCycleIdx = getCurrCycleIdx(SU);
381  if (LastFPdOpCycleIdx > SUCycleIdx)
382  return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
383  return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
384 }
385 
388  int Cost = 0;
389 
390  const MCSchedClassDesc *SC = getSchedClass(SU);
391  if (!SC->isValid())
392  return 0;
393 
394  // For a FPd op, either return min or max value as indicated by the
395  // distance to any prior FPd op.
396  if (SU->isUnbuffered)
397  Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
398  // For other instructions, give a cost to the use of the critical resource.
399  else if (CriticalResourceIdx != UINT_MAX) {
401  PI = SchedModel->getWriteProcResBegin(SC),
402  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
403  if (PI->ProcResourceIdx == CriticalResourceIdx)
404  Cost = PI->Cycles;
405  }
406 
407  return Cost;
408 }
409 
411  bool TakenBranch) {
412  // Make a temporary SUnit.
413  SUnit SU(MI, 0);
414 
415  // Set interesting flags.
416  SU.isCall = MI->isCall();
417 
418  const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
419  for (const MCWriteProcResEntry &PRE :
420  make_range(SchedModel->getWriteProcResBegin(SC),
421  SchedModel->getWriteProcResEnd(SC))) {
422  switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
423  case 0:
424  SU.hasReservedResource = true;
425  break;
426  case 1:
427  SU.isUnbuffered = true;
428  break;
429  default:
430  break;
431  }
432  }
433 
434  unsigned GroupSizeBeforeEmit = CurrGroupSize;
435  EmitInstruction(&SU);
436 
437  if (!TakenBranch && isBranchRetTrap(MI)) {
438  // NT Branch on second slot ends group.
439  if (GroupSizeBeforeEmit == 1)
440  nextGroup();
441  }
442 
443  if (TakenBranch && CurrGroupSize > 0)
444  nextGroup();
445 
446  assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
447  "Scheduler: unhandled terminator!");
448 }
449 
452  // Current decoder group
453  CurrGroupSize = Incoming->CurrGroupSize;
454  LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
455 
456  // Processor resources
457  ProcResourceCounters = Incoming->ProcResourceCounters;
458  CriticalResourceIdx = Incoming->CriticalResourceIdx;
459 
460  // FPd
461  LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
462  GrpCount = Incoming->GrpCount;
463 }
const SystemZRegisterInfo & getRegisterInfo() const
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:634
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
unsigned const TargetRegisterInfo * TRI
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void Reset() override
Reset - This callback is invoked when a new block of instructions is about to be schedule.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:650
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:410
static cl::opt< int > ProcResCostLim("procres-cost-lim", cl::Hidden, cl::desc("The OOO window for processor " "resources during scheduling."), cl::init(8))
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:412
SystemZHazardRecognizer maintains the state for one MBB during scheduling.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:407
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
HazardType getHazardType(SUnit *m, int Stalls=0) override
getHazardType - Return the hazard type of emitting this node.
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
bool isValid() const
Definition: MCSchedule.h:127
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:658
static bool isBranchRetTrap(MachineInstr *MI)
void copyState(SystemZHazardRecognizer *Incoming)
Copy counters from end of single predecessor.
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:624
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
void dumpCurrGroup(std::string Msg="") const
void emitInstruction(MachineInstr *MI, bool TakenBranch=false)
Wrap a non-scheduled instruction in an SU and emit it.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:188
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
CHAIN = SC CHAIN, Imm128 - System call.
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:255
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
IRTranslator LLVM IR MI
#define LLVM_DEBUG(X)
Definition: Debug.h:122
void dumpSU(SUnit *SU, raw_ostream &OS) const
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242