LLVM 18.0.0git
TargetSchedule.cpp
Go to the documentation of this file.
1//===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a wrapper around MCSchedModel that allows the interface
10// to benefit from information currently only available in TargetInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
20#include "llvm/MC/MCInstrDesc.h"
22#include "llvm/MC/MCSchedule.h"
26#include <algorithm>
27#include <cassert>
28#include <cstdint>
29#include <numeric>
30
31using namespace llvm;
32
34 cl::desc("Use TargetSchedModel for latency lookup"));
35
37 cl::desc("Use InstrItineraryData for latency lookup"));
38
40 "sched-model-force-enable-intervals", cl::Hidden, cl::init(false),
41 cl::desc("Force the use of resource intervals in the schedule model"));
42
44 return EnableSchedModel && SchedModel.hasInstrSchedModel();
45}
46
48 return EnableSchedItins && !InstrItins.isEmpty();
49}
50
52 STI = TSInfo;
53 SchedModel = TSInfo->getSchedModel();
54 TII = TSInfo->getInstrInfo();
55 STI->initInstrItins(InstrItins);
56
57 unsigned NumRes = SchedModel.getNumProcResourceKinds();
58 ResourceFactors.resize(NumRes);
59 ResourceLCM = SchedModel.IssueWidth;
60 for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
61 unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
62 if (NumUnits > 0)
63 ResourceLCM = std::lcm(ResourceLCM, NumUnits);
64 }
65 MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
66 for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
67 unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
68 ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
69 }
70}
71
72/// Returns true only if instruction is specified as single issue.
74 const MCSchedClassDesc *SC) const {
75 if (hasInstrSchedModel()) {
76 if (!SC)
78 if (SC->isValid())
79 return SC->BeginGroup;
80 }
81 return false;
82}
83
85 const MCSchedClassDesc *SC) const {
86 if (hasInstrSchedModel()) {
87 if (!SC)
89 if (SC->isValid())
90 return SC->EndGroup;
91 }
92 return false;
93}
94
96 const MCSchedClassDesc *SC) const {
97 if (hasInstrItineraries()) {
98 int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
99 return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI);
100 }
101 if (hasInstrSchedModel()) {
102 if (!SC)
103 SC = resolveSchedClass(MI);
104 if (SC->isValid())
105 return SC->NumMicroOps;
106 }
107 return MI->isTransient() ? 0 : 1;
108}
109
110// The machine model may explicitly specify an invalid latency, which
111// effectively means infinite latency. Since users of the TargetSchedule API
112// don't know how to handle this, we convert it to a very large latency that is
113// easy to distinguish when debugging the DAG but won't induce overflow.
114static unsigned capLatency(int Cycles) {
115 return Cycles >= 0 ? Cycles : 1000;
116}
117
118/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
119/// evaluation of predicates that depend on instruction operands or flags.
121resolveSchedClass(const MachineInstr *MI) const {
122 // Get the definition's scheduling class descriptor from this machine model.
123 unsigned SchedClass = MI->getDesc().getSchedClass();
124 const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
125 if (!SCDesc->isValid())
126 return SCDesc;
127
128#ifndef NDEBUG
129 unsigned NIter = 0;
130#endif
131 while (SCDesc->isVariant()) {
132 assert(++NIter < 6 && "Variants are nested deeper than the magic number");
133
134 SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
135 SCDesc = SchedModel.getSchedClassDesc(SchedClass);
136 }
137 return SCDesc;
138}
139
140/// Find the def index of this operand. This index maps to the machine model and
141/// is independent of use operands. Def operands may be reordered with uses or
142/// merged with uses without affecting the def index (e.g. before/after
143/// regalloc). However, an instruction's def operands must never be reordered
144/// with respect to each other.
145static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
146 unsigned DefIdx = 0;
147 for (unsigned i = 0; i != DefOperIdx; ++i) {
148 const MachineOperand &MO = MI->getOperand(i);
149 if (MO.isReg() && MO.isDef())
150 ++DefIdx;
151 }
152 return DefIdx;
153}
154
155/// Find the use index of this operand. This is independent of the instruction's
156/// def operands.
157///
158/// Note that uses are not determined by the operand's isUse property, which
159/// is simply the inverse of isDef. Here we consider any readsReg operand to be
160/// a "use". The machine model allows an operand to be both a Def and Use.
161static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
162 unsigned UseIdx = 0;
163 for (unsigned i = 0; i != UseOperIdx; ++i) {
164 const MachineOperand &MO = MI->getOperand(i);
165 if (MO.isReg() && MO.readsReg() && !MO.isDef())
166 ++UseIdx;
167 }
168 return UseIdx;
169}
170
171// Top-level API for clients that know the operand indices.
173 const MachineInstr *DefMI, unsigned DefOperIdx,
174 const MachineInstr *UseMI, unsigned UseOperIdx) const {
175
177 return TII->defaultDefLatency(SchedModel, *DefMI);
178
179 if (hasInstrItineraries()) {
180 int OperLatency = 0;
181 if (UseMI) {
182 OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
183 *UseMI, UseOperIdx);
184 }
185 else {
186 unsigned DefClass = DefMI->getDesc().getSchedClass();
187 OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
188 }
189 if (OperLatency >= 0)
190 return OperLatency;
191
192 // No operand latency was found.
193 unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
194
195 // Expected latency is the max of the stage latency and itinerary props.
196 // Rather than directly querying InstrItins stage latency, we call a TII
197 // hook to allow subtargets to specialize latency. This hook is only
198 // applicable to the InstrItins model. InstrSchedModel should model all
199 // special cases without TII hooks.
200 InstrLatency =
201 std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
202 return InstrLatency;
203 }
204 // hasInstrSchedModel()
206 unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
207 if (DefIdx < SCDesc->NumWriteLatencyEntries) {
208 // Lookup the definition's write latency in SubtargetInfo.
209 const MCWriteLatencyEntry *WLEntry =
210 STI->getWriteLatencyEntry(SCDesc, DefIdx);
211 unsigned WriteID = WLEntry->WriteResourceID;
212 unsigned Latency = capLatency(WLEntry->Cycles);
213 if (!UseMI)
214 return Latency;
215
216 // Lookup the use's latency adjustment in SubtargetInfo.
217 const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
218 if (UseDesc->NumReadAdvanceEntries == 0)
219 return Latency;
220 unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
221 int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
222 if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
223 return 0;
224 return Latency - Advance;
225 }
226 // If DefIdx does not exist in the model (e.g. implicit defs), then return
227 // unit latency (defaultDefLatency may be too conservative).
228#ifndef NDEBUG
229 if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
230 !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
231 SchedModel.isComplete()) {
232 errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
233 << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
234 llvm_unreachable("incomplete machine model");
235 }
236#endif
237 // FIXME: Automatically giving all implicit defs defaultDefLatency is
238 // undesirable. We should only do it for defs that are known to the MC
239 // desc like flags. Truly implicit defs should get 1 cycle latency.
240 return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
241}
242
243unsigned
244TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
245 return capLatency(MCSchedModel::computeInstrLatency(*STI, SCDesc));
246}
247
248unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
249 assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
250 unsigned SCIdx = TII->get(Opcode).getSchedClass();
251 return capLatency(SchedModel.computeInstrLatency(*STI, SCIdx));
252}
253
254unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const {
255 if (hasInstrSchedModel())
256 return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst));
257 return computeInstrLatency(Inst.getOpcode());
258}
259
260unsigned
261TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
262 bool UseDefaultDefLatency) const {
263 // For the itinerary model, fall back to the old subtarget hook.
264 // Allow subtargets to compute Bundle latencies outside the machine model.
265 if (hasInstrItineraries() || MI->isBundle() ||
266 (!hasInstrSchedModel() && !UseDefaultDefLatency))
267 return TII->getInstrLatency(&InstrItins, *MI);
268
269 if (hasInstrSchedModel()) {
270 const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
271 if (SCDesc->isValid())
272 return computeInstrLatency(*SCDesc);
273 }
274 return TII->defaultDefLatency(SchedModel, *MI);
275}
276
278computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
279 const MachineInstr *DepMI) const {
280 if (!SchedModel.isOutOfOrder())
281 return 1;
282
283 // Out-of-order processor can dispatch WAW dependencies in the same cycle.
284
285 // Treat predication as a data dependency for out-of-order cpus. In-order
286 // cpus do not need to treat predicated writes specially.
287 //
288 // TODO: The following hack exists because predication passes do not
289 // correctly append imp-use operands, and readsReg() strangely returns false
290 // for predicated defs.
291 Register Reg = DefMI->getOperand(DefOperIdx).getReg();
292 const MachineFunction &MF = *DefMI->getMF();
294 if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
295 return computeInstrLatency(DefMI);
296
297 // If we have a per operand scheduling model, check if this def is writing
298 // an unbuffered resource. If so, it treated like an in-order cpu.
299 if (hasInstrSchedModel()) {
301 if (SCDesc->isValid()) {
302 for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
303 *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
304 if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize)
305 return 1;
306 }
307 }
308 }
309 return 0;
310}
311
312double
314 if (hasInstrItineraries()) {
315 unsigned SchedClass = MI->getDesc().getSchedClass();
318 }
319
320 if (hasInstrSchedModel())
322
323 return 0.0;
324}
325
326double
328 unsigned SchedClass = TII->get(Opcode).getSchedClass();
332 if (hasInstrSchedModel()) {
333 const MCSchedClassDesc &SCDesc = *SchedModel.getSchedClassDesc(SchedClass);
334 if (SCDesc.isValid() && !SCDesc.isVariant())
335 return MCSchedModel::getReciprocalThroughput(*STI, SCDesc);
336 }
337
338 return 0.0;
339}
340
341double
343 if (hasInstrSchedModel())
344 return SchedModel.getReciprocalThroughput(*STI, *TII, MI);
345 return computeReciprocalThroughput(MI.getOpcode());
346}
347
350 return true;
351
352 return SchedModel.EnableIntervals;
353}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx)
Find the use index of this operand.
static unsigned capLatency(int Cycles)
static cl::opt< bool > EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), cl::desc("Use TargetSchedModel for latency lookup"))
static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx)
Find the def index of this operand.
static cl::opt< bool > ForceEnableIntervals("sched-model-force-enable-intervals", cl::Hidden, cl::init(false), cl::desc("Force the use of resource intervals in the schedule model"))
static cl::opt< bool > EnableSchedItins("scheditins", cl::Hidden, cl::init(true), cl::desc("Use InstrItineraryData for latency lookup"))
int getNumMicroOps(unsigned ItinClassIndx) const
Return the number of micro-ops that the given class decodes to.
int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getOpcode() const
Definition: MCInst.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
int getReadAdvanceCycles(const MCSchedClassDesc *SC, unsigned UseIdx, unsigned WriteResID) const
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
void initInstrItins(InstrItineraryData &InstrItins) const
Initialize an InstrItineraryData instance.
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:540
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
MachineOperand class - Representation of each machine instruction operand.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
void resize(size_type N)
Definition: SmallVector.h:642
virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr &MI) const
Return the number of u-operations the given machine instruction will be decoded to on the target cpu.
virtual int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const
virtual bool isPredicated(const MachineInstr &MI) const
Returns true if the instruction is already predicated.
virtual unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const
Compute the instruction latency of a given instruction.
unsigned defaultDefLatency(const MCSchedModel &SchedModel, const MachineInstr &DefMI) const
Return the default expected latency for a def based on its opcode.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
bool mustBeginGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if new group must begin.
const MCSchedClassDesc * resolveSchedClass(const MachineInstr *MI) const
Return the MCSchedClassDesc for this instruction.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
double computeReciprocalThroughput(const MachineInstr *MI) const
Compute the reciprocal throughput of the given instruction.
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
const InstrItineraryData * getInstrItineraries() const
bool hasInstrItineraries() const
Return true if this machine model includes cycle-to-cycle itinerary data.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
virtual unsigned resolveSchedClass(unsigned SchedClass, const MachineInstr *MI, const TargetSchedModel *SchedModel) const
Resolve a SchedClass at runtime, where SchedClass identifies an MCSchedClassDesc with the isVariant p...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
bool isValid() const
Definition: MCSchedule.h:136
bool isVariant() const
Definition: MCSchedule.h:139
uint16_t NumReadAdvanceEntries
Definition: MCSchedule.h:134
bool isOutOfOrder() const
Return true if machine supports out of order execution.
Definition: MCSchedule.h:347
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:360
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:349
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:334
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Definition: MCSchedule.cpp:42
unsigned IssueWidth
Definition: MCSchedule.h:265
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:353
static double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Definition: MCSchedule.cpp:90
bool isComplete() const
Return true if this machine model data for all instructions with a scheduling class (itinerary class ...
Definition: MCSchedule.h:344
Specify the latency in cpu cycles for a particular scheduling class and def index.
Definition: MCSchedule.h:86
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63