Line data Source code
1 : //===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file implements a wrapper around MCSchedModel that allows the interface
11 : // to benefit from information currently only available in TargetInstrInfo.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "llvm/CodeGen/TargetSchedule.h"
16 : #include "llvm/CodeGen/MachineFunction.h"
17 : #include "llvm/CodeGen/MachineInstr.h"
18 : #include "llvm/CodeGen/MachineOperand.h"
19 : #include "llvm/CodeGen/TargetInstrInfo.h"
20 : #include "llvm/CodeGen/TargetRegisterInfo.h"
21 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
22 : #include "llvm/MC/MCInstrDesc.h"
23 : #include "llvm/MC/MCInstrItineraries.h"
24 : #include "llvm/MC/MCSchedule.h"
25 : #include "llvm/Support/CommandLine.h"
26 : #include "llvm/Support/ErrorHandling.h"
27 : #include "llvm/Support/raw_ostream.h"
28 : #include <algorithm>
29 : #include <cassert>
30 : #include <cstdint>
31 :
32 : using namespace llvm;
33 :
34 : static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
35 : cl::desc("Use TargetSchedModel for latency lookup"));
36 :
37 : static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
38 : cl::desc("Use InstrItineraryData for latency lookup"));
39 :
40 61691156 : bool TargetSchedModel::hasInstrSchedModel() const {
41 61691156 : return EnableSchedModel && SchedModel.hasInstrSchedModel();
42 : }
43 :
44 24289308 : bool TargetSchedModel::hasInstrItineraries() const {
45 24289308 : return EnableSchedItins && !InstrItins.isEmpty();
46 : }
47 :
48 : static unsigned gcd(unsigned Dividend, unsigned Divisor) {
49 : // Dividend and Divisor will be naturally swapped as needed.
50 12134270 : while (Divisor) {
51 6249226 : unsigned Rem = Dividend % Divisor;
52 : Dividend = Divisor;
53 : Divisor = Rem;
54 : };
55 : return Dividend;
56 : }
57 :
58 : static unsigned lcm(unsigned A, unsigned B) {
59 11770088 : unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
60 : assert((LCM >= A && LCM >= B) && "LCM overflow");
61 : return LCM;
62 : }
63 :
64 1286468 : void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) {
65 1286468 : STI = TSInfo;
66 1286468 : SchedModel = TSInfo->getSchedModel();
67 1286468 : TII = TSInfo->getInstrInfo();
68 1286468 : STI->initInstrItins(InstrItins);
69 :
70 1286468 : unsigned NumRes = SchedModel.getNumProcResourceKinds();
71 1286468 : ResourceFactors.resize(NumRes);
72 1286468 : ResourceLCM = SchedModel.IssueWidth;
73 7580592 : for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
74 6294124 : unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
75 6294124 : if (NumUnits > 0)
76 11770088 : ResourceLCM = lcm(ResourceLCM, NumUnits);
77 : }
78 1286468 : MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
79 7580592 : for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
80 6294124 : unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
81 6294124 : ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
82 : }
83 1286468 : }
84 :
85 : /// Returns true only if instruction is specified as single issue.
86 2463773 : bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI,
87 : const MCSchedClassDesc *SC) const {
88 2463773 : if (hasInstrSchedModel()) {
89 1036589 : if (!SC)
90 1036589 : SC = resolveSchedClass(MI);
91 1036589 : if (SC->isValid())
92 1029988 : return SC->BeginGroup;
93 : }
94 : return false;
95 : }
96 :
97 8626058 : bool TargetSchedModel::mustEndGroup(const MachineInstr *MI,
98 : const MCSchedClassDesc *SC) const {
99 8626058 : if (hasInstrSchedModel()) {
100 3171254 : if (!SC)
101 3171254 : SC = resolveSchedClass(MI);
102 3171254 : if (SC->isValid())
103 3167962 : return SC->EndGroup;
104 : }
105 : return false;
106 : }
107 :
108 15284014 : unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
109 : const MCSchedClassDesc *SC) const {
110 15284014 : if (hasInstrItineraries()) {
111 331990 : int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
112 331990 : return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI);
113 : }
114 14952024 : if (hasInstrSchedModel()) {
115 6806846 : if (!SC)
116 5718716 : SC = resolveSchedClass(MI);
117 6806846 : if (SC->isValid())
118 6782834 : return SC->NumMicroOps;
119 : }
120 : return MI->isTransient() ? 0 : 1;
121 : }
122 :
123 : // The machine model may explicitly specify an invalid latency, which
124 : // effectively means infinite latency. Since users of the TargetSchedule API
125 : // don't know how to handle this, we convert it to a very large latency that is
126 : // easy to distinguish when debugging the DAG but won't induce overflow.
127 : static unsigned capLatency(int Cycles) {
128 2710095 : return Cycles >= 0 ? Cycles : 1000;
129 : }
130 :
131 : /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
132 : /// evaluation of predicates that depend on instruction operands or flags.
133 15565205 : const MCSchedClassDesc *TargetSchedModel::
134 : resolveSchedClass(const MachineInstr *MI) const {
135 : // Get the definition's scheduling class descriptor from this machine model.
136 15565205 : unsigned SchedClass = MI->getDesc().getSchedClass();
137 15565205 : const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
138 15565205 : if (!SCDesc->isValid())
139 : return SCDesc;
140 :
141 : #ifndef NDEBUG
142 : unsigned NIter = 0;
143 : #endif
144 16677874 : while (SCDesc->isVariant()) {
145 : assert(++NIter < 6 && "Variants are nested deeper than the magic number");
146 :
147 1181663 : SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
148 1181663 : SCDesc = SchedModel.getSchedClassDesc(SchedClass);
149 : }
150 : return SCDesc;
151 : }
152 :
153 : /// Find the def index of this operand. This index maps to the machine model and
154 : /// is independent of use operands. Def operands may be reordered with uses or
155 : /// merged with uses without affecting the def index (e.g. before/after
156 : /// regalloc). However, an instruction's def operands must never be reordered
157 : /// with respect to each other.
158 : static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
159 : unsigned DefIdx = 0;
160 1913271 : for (unsigned i = 0; i != DefOperIdx; ++i) {
161 281255 : const MachineOperand &MO = MI->getOperand(i);
162 281255 : if (MO.isReg() && MO.isDef())
163 90234 : ++DefIdx;
164 : }
165 : return DefIdx;
166 : }
167 :
168 : /// Find the use index of this operand. This is independent of the instruction's
169 : /// def operands.
170 : ///
171 : /// Note that uses are not determined by the operand's isUse property, which
172 : /// is simply the inverse of isDef. Here we consider any readsReg operand to be
173 : /// a "use". The machine model allows an operand to be both a Def and Use.
174 16939 : static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
175 : unsigned UseIdx = 0;
176 41909 : for (unsigned i = 0; i != UseOperIdx; ++i) {
177 24970 : const MachineOperand &MO = MI->getOperand(i);
178 32921 : if (MO.isReg() && MO.readsReg() && !MO.isDef())
179 7854 : ++UseIdx;
180 : }
181 16939 : return UseIdx;
182 : }
183 :
184 : // Top-level API for clients that know the operand indices.
185 4170776 : unsigned TargetSchedModel::computeOperandLatency(
186 : const MachineInstr *DefMI, unsigned DefOperIdx,
187 : const MachineInstr *UseMI, unsigned UseOperIdx) const {
188 :
189 4170776 : if (!hasInstrSchedModel() && !hasInstrItineraries())
190 2107198 : return TII->defaultDefLatency(SchedModel, *DefMI);
191 :
192 2063578 : if (hasInstrItineraries()) {
193 : int OperLatency = 0;
194 431562 : if (UseMI) {
195 728138 : OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
196 364069 : *UseMI, UseOperIdx);
197 : }
198 : else {
199 67493 : unsigned DefClass = DefMI->getDesc().getSchedClass();
200 : OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
201 : }
202 405531 : if (OperLatency >= 0)
203 167018 : return OperLatency;
204 :
205 : // No operand latency was found.
206 264544 : unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
207 :
208 : // Expected latency is the max of the stage latency and itinerary props.
209 : // Rather than directly querying InstrItins stage latency, we call a TII
210 : // hook to allow subtargets to specialize latency. This hook is only
211 : // applicable to the InstrItins model. InstrSchedModel should model all
212 : // special cases without TII hooks.
213 : InstrLatency =
214 264544 : std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
215 264544 : return InstrLatency;
216 : }
217 : // hasInstrSchedModel()
218 1632016 : const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
219 : unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
220 1632016 : if (DefIdx < SCDesc->NumWriteLatencyEntries) {
221 : // Lookup the definition's write latency in SubtargetInfo.
222 : const MCWriteLatencyEntry *WLEntry =
223 1284326 : STI->getWriteLatencyEntry(SCDesc, DefIdx);
224 1284326 : unsigned WriteID = WLEntry->WriteResourceID;
225 1284326 : unsigned Latency = capLatency(WLEntry->Cycles);
226 1284326 : if (!UseMI)
227 : return Latency;
228 :
229 : // Lookup the use's latency adjustment in SubtargetInfo.
230 1151260 : const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
231 1151260 : if (UseDesc->NumReadAdvanceEntries == 0)
232 : return Latency;
233 16939 : unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
234 16939 : int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
235 16939 : if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
236 : return 0;
237 10580 : return Latency - Advance;
238 : }
239 : // If DefIdx does not exist in the model (e.g. implicit defs), then return
240 : // unit latency (defaultDefLatency may be too conservative).
241 : #ifndef NDEBUG
242 : if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
243 : && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
244 : && SchedModel.isComplete()) {
245 : errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
246 : << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
247 : llvm_unreachable("incomplete machine model");
248 : }
249 : #endif
250 : // FIXME: Automatically giving all implicit defs defaultDefLatency is
251 : // undesirable. We should only do it for defs that are known to the MC
252 : // desc like flags. Truly implicit defs should get 1 cycle latency.
253 55431 : return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
254 : }
255 :
256 : unsigned
257 1348744 : TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
258 1348744 : return capLatency(MCSchedModel::computeInstrLatency(*STI, SCDesc));
259 : }
260 :
261 16164 : unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
262 : assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
263 32328 : unsigned SCIdx = TII->get(Opcode).getSchedClass();
264 16164 : return capLatency(SchedModel.computeInstrLatency(*STI, SCIdx));
265 : }
266 :
267 60861 : unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const {
268 60861 : if (hasInstrSchedModel())
269 60861 : return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst));
270 0 : return computeInstrLatency(Inst.getOpcode());
271 : }
272 :
273 : unsigned
274 3927024 : TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
275 : bool UseDefaultDefLatency) const {
276 : // For the itinerary model, fall back to the old subtarget hook.
277 : // Allow subtargets to compute Bundle latencies outside the machine model.
278 7418800 : if (hasInstrItineraries() || MI->isBundle() ||
279 5620651 : (!hasInstrSchedModel() && !UseDefaultDefLatency))
280 445957 : return TII->getInstrLatency(&InstrItins, *MI);
281 :
282 3481067 : if (hasInstrSchedModel()) {
283 1362901 : const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
284 1362901 : if (SCDesc->isValid())
285 1348744 : return computeInstrLatency(*SCDesc);
286 : }
287 2132323 : return TII->defaultDefLatency(SchedModel, *MI);
288 : }
289 :
290 1308678 : unsigned TargetSchedModel::
291 : computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
292 : const MachineInstr *DepMI) const {
293 1308678 : if (!SchedModel.isOutOfOrder())
294 : return 1;
295 :
296 : // Out-of-order processor can dispatch WAW dependencies in the same cycle.
297 :
298 : // Treat predication as a data dependency for out-of-order cpus. In-order
299 : // cpus do not need to treat predicated writes specially.
300 : //
301 : // TODO: The following hack exists because predication passes do not
302 : // correctly append imp-use operands, and readsReg() strangely returns false
303 : // for predicated defs.
304 984661 : unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
305 984661 : const MachineFunction &MF = *DefMI->getMF();
306 984661 : const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
307 984661 : if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
308 1 : return computeInstrLatency(DefMI);
309 :
310 : // If we have a per operand scheduling model, check if this def is writing
311 : // an unbuffered resource. If so, it treated like an in-order cpu.
312 984660 : if (hasInstrSchedModel()) {
313 151147 : const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
314 151147 : if (SCDesc->isValid()) {
315 490492 : for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
316 490492 : *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
317 682210 : if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize)
318 : return 1;
319 : }
320 : }
321 : }
322 : return 0;
323 : }
324 :
325 : double
326 8 : TargetSchedModel::computeReciprocalThroughput(const MachineInstr *MI) const {
327 8 : if (hasInstrItineraries()) {
328 0 : unsigned SchedClass = MI->getDesc().getSchedClass();
329 0 : return MCSchedModel::getReciprocalThroughput(SchedClass,
330 0 : *getInstrItineraries());
331 : }
332 :
333 8 : if (hasInstrSchedModel())
334 8 : return MCSchedModel::getReciprocalThroughput(*STI, *resolveSchedClass(MI));
335 :
336 : return 0.0;
337 : }
338 :
339 : double
340 0 : TargetSchedModel::computeReciprocalThroughput(unsigned Opcode) const {
341 0 : unsigned SchedClass = TII->get(Opcode).getSchedClass();
342 0 : if (hasInstrItineraries())
343 0 : return MCSchedModel::getReciprocalThroughput(SchedClass,
344 0 : *getInstrItineraries());
345 0 : if (hasInstrSchedModel()) {
346 0 : const MCSchedClassDesc &SCDesc = *SchedModel.getSchedClassDesc(SchedClass);
347 0 : if (SCDesc.isValid() && !SCDesc.isVariant())
348 0 : return MCSchedModel::getReciprocalThroughput(*STI, SCDesc);
349 : }
350 :
351 : return 0.0;
352 : }
353 :
354 : double
355 60861 : TargetSchedModel::computeReciprocalThroughput(const MCInst &MI) const {
356 60861 : if (hasInstrSchedModel())
357 60861 : return SchedModel.getReciprocalThroughput(*STI, *TII, MI);
358 0 : return computeReciprocalThroughput(MI.getOpcode());
359 : }
360 :
|