LLVM 20.0.0git
MachineTraceMetrics.h
Go to the documentation of this file.
1//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interface for the MachineTraceMetrics analysis pass
10// that estimates CPU resource usage and critical data dependency paths through
11// preferred traces. This is useful for super-scalar CPUs where execution speed
12// can be limited both by data dependencies and by limited execution resources.
13//
14// Out-of-order CPUs will often be executing instructions from multiple basic
15// blocks at the same time. This makes it difficult to estimate the resource
16// usage accurately in a single basic block. Resources can be estimated better
17// by looking at a trace through the current basic block.
18//
19// For every block, the MachineTraceMetrics pass will pick a preferred trace
20// that passes through the block. The trace is chosen based on loop structure,
21// branch probabilities, and resource usage. The intention is to pick likely
22// traces that would be the most affected by code transformations.
23//
24// It is expensive to compute a full arbitrary trace for every block, so to
25// save some computations, traces are chosen to be convergent. This means that
26// if the traces through basic blocks A and B ever cross when moving away from
27// A and B, they never diverge again. This applies in both directions - If the
28// traces meet above A and B, they won't diverge when going further back.
29//
30// Traces tend to align with loops. The trace through a block in an inner loop
31// will begin at the loop entry block and end at a back edge. If there are
32// nested loops, the trace may begin and end at those instead.
33//
34// For each trace, we compute the critical path length, which is the number of
35// cycles required to execute the trace when execution is limited by data
36// dependencies only. We also compute the resource height, which is the number
37// of cycles required to execute all instructions in the trace when ignoring
38// data dependencies.
39//
40// Every instruction in the current block has a slack - the number of cycles
41// execution of the instruction can be delayed without extending the critical
42// path.
43//
44//===----------------------------------------------------------------------===//
45
46#ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
47#define LLVM_CODEGEN_MACHINETRACEMETRICS_H
48
49#include "llvm/ADT/SparseSet.h"
50#include "llvm/ADT/ArrayRef.h"
51#include "llvm/ADT/DenseMap.h"
56
57namespace llvm {
58
59class AnalysisUsage;
60class MachineFunction;
61class MachineInstr;
62class MachineLoop;
63class MachineLoopInfo;
64class MachineRegisterInfo;
65struct MCSchedClassDesc;
66class raw_ostream;
67class TargetInstrInfo;
68class TargetRegisterInfo;
69
70// Keep track of physreg data dependencies by recording each live register unit.
71// Associate each regunit with an instruction operand. Depending on the
72// direction instructions are scanned, it could be the operand that defined the
73// regunit, or the highest operand to read the regunit.
75 unsigned RegUnit;
76 unsigned Cycle = 0;
77 const MachineInstr *MI = nullptr;
78 unsigned Op = 0;
79
80 unsigned getSparseSetIndex() const { return RegUnit; }
81
82 LiveRegUnit(unsigned RU) : RegUnit(RU) {}
83};
84
85/// Strategies for selecting traces.
87 /// Select the trace through a block that has the fewest instructions.
89 /// Select the trace that contains only the current basic block. For instance,
90 /// this strategy can be used by MachineCombiner to make better decisions when
91 /// we estimate critical path for in-order cores.
94};
95
97 const MachineFunction *MF = nullptr;
98 const TargetInstrInfo *TII = nullptr;
99 const TargetRegisterInfo *TRI = nullptr;
100 const MachineRegisterInfo *MRI = nullptr;
101 const MachineLoopInfo *Loops = nullptr;
102 TargetSchedModel SchedModel;
103
104public:
105 friend class Ensemble;
106 friend class Trace;
107
108 class Ensemble;
109
110 static char ID;
111
113
114 void getAnalysisUsage(AnalysisUsage&) const override;
116 void releaseMemory() override;
117 void verifyAnalysis() const override;
118
119 /// Per-basic block information that doesn't depend on the trace through the
120 /// block.
122 /// The number of non-trivial instructions in the block.
123 /// Doesn't count PHI and COPY instructions that are likely to be removed.
124 unsigned InstrCount = ~0u;
125
126 /// True when the block contains calls.
127 bool HasCalls = false;
128
129 FixedBlockInfo() = default;
130
131 /// Returns true when resource information for this block has been computed.
132 bool hasResources() const { return InstrCount != ~0u; }
133
134 /// Invalidate resource information.
135 void invalidate() { InstrCount = ~0u; }
136 };
137
138 /// Get the fixed resource information about MBB. Compute it on demand.
139 const FixedBlockInfo *getResources(const MachineBasicBlock*);
140
141 /// Get the scaled number of cycles used per processor resource in MBB.
142 /// This is an array with SchedModel.getNumProcResourceKinds() entries.
143 /// The getResources() function above must have been called first.
144 ///
145 /// These numbers have already been scaled by SchedModel.getResourceFactor().
146 ArrayRef<unsigned> getProcReleaseAtCycles(unsigned MBBNum) const;
147
148 /// A virtual register or regunit required by a basic block or its trace
149 /// successors.
150 struct LiveInReg {
151 /// The virtual register required, or a register unit.
153
154 /// For virtual registers: Minimum height of the defining instruction.
155 /// For regunits: Height of the highest user in the trace.
156 unsigned Height;
157
159 };
160
161 /// Per-basic block information that relates to a specific trace through the
162 /// block. Convergent traces means that only one of these is required per
163 /// block in a trace ensemble.
165 /// Trace predecessor, or NULL for the first block in the trace.
166 /// Valid when hasValidDepth().
167 const MachineBasicBlock *Pred = nullptr;
168
169 /// Trace successor, or NULL for the last block in the trace.
170 /// Valid when hasValidHeight().
171 const MachineBasicBlock *Succ = nullptr;
172
173 /// The block number of the head of the trace. (When hasValidDepth()).
174 unsigned Head;
175
176 /// The block number of the tail of the trace. (When hasValidHeight()).
177 unsigned Tail;
178
179 /// Accumulated number of instructions in the trace above this block.
180 /// Does not include instructions in this block.
181 unsigned InstrDepth = ~0u;
182
183 /// Accumulated number of instructions in the trace below this block.
184 /// Includes instructions in this block.
185 unsigned InstrHeight = ~0u;
186
187 TraceBlockInfo() = default;
188
189 /// Returns true if the depth resources have been computed from the trace
190 /// above this block.
191 bool hasValidDepth() const { return InstrDepth != ~0u; }
192
193 /// Returns true if the height resources have been computed from the trace
194 /// below this block.
195 bool hasValidHeight() const { return InstrHeight != ~0u; }
196
197 /// Invalidate depth resources when some block above this one has changed.
199
200 /// Invalidate height resources when a block below this one has changed.
202
203 /// Assuming that this is a dominator of TBI, determine if it contains
204 /// useful instruction depths. A dominating block can be above the current
205 /// trace head, and any dependencies from such a far away dominator are not
206 /// expected to affect the critical path.
207 ///
208 /// Also returns true when TBI == this.
209 bool isUsefulDominator(const TraceBlockInfo &TBI) const {
210 // The trace for TBI may not even be calculated yet.
211 if (!hasValidDepth() || !TBI.hasValidDepth())
212 return false;
213 // Instruction depths are only comparable if the traces share a head.
214 if (Head != TBI.Head)
215 return false;
216 // It is almost always the case that TBI belongs to the same trace as
217 // this block, but rare convoluted cases involving irreducible control
218 // flow, a dominator may share a trace head without actually being on the
219 // same trace as TBI. This is not a big problem as long as it doesn't
220 // increase the instruction depth.
222 }
223
224 // Data-dependency-related information. Per-instruction depth and height
225 // are computed from data dependencies in the current trace, using
226 // itinerary data.
227
228 /// Instruction depths have been computed. This implies hasValidDepth().
230
231 /// Instruction heights have been computed. This implies hasValidHeight().
233
234 /// Critical path length. This is the number of cycles in the longest data
235 /// dependency chain through the trace. This is only valid when both
236 /// HasValidInstrDepths and HasValidInstrHeights are set.
237 unsigned CriticalPath;
238
239 /// Live-in registers. These registers are defined above the current block
240 /// and used by this block or a block below it.
241 /// This does not include PHI uses in the current block, but it does
242 /// include PHI uses in deeper blocks.
244
245 void print(raw_ostream&) const;
246 void dump() const { print(dbgs()); }
247 };
248
249 /// InstrCycles represents the cycle height and depth of an instruction in a
250 /// trace.
251 struct InstrCycles {
252 /// Earliest issue cycle as determined by data dependencies and instruction
253 /// latencies from the beginning of the trace. Data dependencies from
254 /// before the trace are not included.
255 unsigned Depth;
256
257 /// Minimum number of cycles from this instruction is issued to the of the
258 /// trace, as determined by data dependencies and instruction latencies.
259 unsigned Height;
260 };
261
262 /// A trace represents a plausible sequence of executed basic blocks that
263 /// passes through the current basic block one. The Trace class serves as a
264 /// handle to internal cached data structures.
265 class Trace {
266 Ensemble &TE;
267 TraceBlockInfo &TBI;
268
269 unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
270
271 public:
272 explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
273
274 void print(raw_ostream&) const;
275 void dump() const { print(dbgs()); }
276
277 /// Compute the total number of instructions in the trace.
278 unsigned getInstrCount() const {
279 return TBI.InstrDepth + TBI.InstrHeight;
280 }
281
282 /// Return the resource depth of the top/bottom of the trace center block.
283 /// This is the number of cycles required to execute all instructions from
284 /// the trace head to the trace center block. The resource depth only
285 /// considers execution resources, it ignores data dependencies.
286 /// When Bottom is set, instructions in the trace center block are included.
287 unsigned getResourceDepth(bool Bottom) const;
288
289 /// Return the resource length of the trace. This is the number of cycles
290 /// required to execute the instructions in the trace if they were all
291 /// independent, exposing the maximum instruction-level parallelism.
292 ///
293 /// Any blocks in Extrablocks are included as if they were part of the
294 /// trace. Likewise, extra resources required by the specified scheduling
295 /// classes are included. For the caller to account for extra machine
296 /// instructions, it must first resolve each instruction's scheduling class.
297 unsigned getResourceLength(
298 ArrayRef<const MachineBasicBlock *> Extrablocks = std::nullopt,
299 ArrayRef<const MCSchedClassDesc *> ExtraInstrs = std::nullopt,
300 ArrayRef<const MCSchedClassDesc *> RemoveInstrs = std::nullopt) const;
301
302 /// Return the length of the (data dependency) critical path through the
303 /// trace.
304 unsigned getCriticalPath() const { return TBI.CriticalPath; }
305
306 /// Return the depth and height of MI. The depth is only valid for
307 /// instructions in or above the trace center block. The height is only
308 /// valid for instructions in or below the trace center block.
310 return TE.Cycles.lookup(&MI);
311 }
312
313 /// Return the slack of MI. This is the number of cycles MI can be delayed
314 /// before the critical path becomes longer.
315 /// MI must be an instruction in the trace center block.
316 unsigned getInstrSlack(const MachineInstr &MI) const;
317
318 /// Return the Depth of a PHI instruction in a trace center block successor.
319 /// The PHI does not have to be part of the trace.
320 unsigned getPHIDepth(const MachineInstr &PHI) const;
321
322 /// A dependence is useful if the basic block of the defining instruction
323 /// is part of the trace of the user instruction. It is assumed that DefMI
324 /// dominates UseMI (see also isUsefulDominator).
325 bool isDepInTrace(const MachineInstr &DefMI,
326 const MachineInstr &UseMI) const;
327 };
328
329 /// A trace ensemble is a collection of traces selected using the same
330 /// strategy, for example 'minimum resource height'. There is one trace for
331 /// every block in the function.
332 class Ensemble {
333 friend class Trace;
334
337 SmallVector<unsigned, 0> ProcResourceDepths;
338 SmallVector<unsigned, 0> ProcResourceHeights;
339
340 void computeTrace(const MachineBasicBlock*);
341 void computeDepthResources(const MachineBasicBlock*);
342 void computeHeightResources(const MachineBasicBlock*);
343 unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
344 void computeInstrDepths(const MachineBasicBlock*);
345 void computeInstrHeights(const MachineBasicBlock*);
346 void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
348
349 protected:
351
352 explicit Ensemble(MachineTraceMetrics*);
353
356 const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
359 ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
360 ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
361
362 public:
363 virtual ~Ensemble();
364
365 virtual const char *getName() const = 0;
366 void print(raw_ostream &) const;
367 void dump() const { print(dbgs()); }
368 void invalidate(const MachineBasicBlock *MBB);
369 void verify() const;
370
371 /// Get the trace that passes through MBB.
372 /// The trace is computed on demand.
374
375 /// Updates the depth of an machine instruction, given RegUnits.
376 void updateDepth(TraceBlockInfo &TBI, const MachineInstr&,
377 SparseSet<LiveRegUnit> &RegUnits);
378 void updateDepth(const MachineBasicBlock *, const MachineInstr&,
379 SparseSet<LiveRegUnit> &RegUnits);
380
381 /// Updates the depth of the instructions from Start to End.
384 SparseSet<LiveRegUnit> &RegUnits);
385
386 };
387
388 /// Get the trace ensemble representing the given trace selection strategy.
389 /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
390 /// and valid for the lifetime of the analysis pass.
392
393 /// Invalidate cached information about MBB. This must be called *before* MBB
394 /// is erased, or the CFG is otherwise changed.
395 ///
396 /// This invalidates per-block information about resource usage for MBB only,
397 /// and it invalidates per-trace information for any trace that passes
398 /// through MBB.
399 ///
400 /// Call Ensemble::getTrace() again to update any trace handles.
401 void invalidate(const MachineBasicBlock *MBB);
402
403private:
404 // One entry per basic block, indexed by block number.
406
407 // Cycles consumed on each processor resource per block.
408 // The number of processor resource kinds is constant for a given subtarget,
409 // but it is not known at compile time. The number of cycles consumed by
410 // block B on processor resource R is at ProcReleaseAtCycles[B*Kinds + R]
411 // where Kinds = SchedModel.getNumProcResourceKinds().
412 SmallVector<unsigned, 0> ProcReleaseAtCycles;
413
414 // One ensemble per strategy.
415 Ensemble
416 *Ensembles[static_cast<size_t>(MachineTraceStrategy::TS_NumStrategies)];
417
418 // Convert scaled resource usage to a cycle count that can be compared with
419 // latencies.
420 unsigned getCycles(unsigned Scaled) {
421 unsigned Factor = SchedModel.getLatencyFactor();
422 return (Scaled + Factor - 1) / Factor;
423 }
424};
425
427 const MachineTraceMetrics::Trace &Tr) {
428 Tr.print(OS);
429 return OS;
430}
431
434 En.print(OS);
435 return OS;
436}
437
438} // end namespace llvm
439
440#endif // LLVM_CODEGEN_MACHINETRACEMETRICS_H
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
@ Scaled
MachineBasicBlock & MBB
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
IRTranslator LLVM IR MI
LiveInRegUnits addLiveIns(MBB)
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
A possibly irreducible generalization of a Loop.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
void invalidate(const MachineBasicBlock *MBB)
Invalidate traces through BadMBB.
ArrayRef< unsigned > getProcResourceHeights(unsigned MBBNum) const
Get an array of processor resource heights for MBB.
virtual const MachineBasicBlock * pickTracePred(const MachineBasicBlock *)=0
void updateDepth(TraceBlockInfo &TBI, const MachineInstr &, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of an machine instruction, given RegUnits.
const MachineLoop * getLoopFor(const MachineBasicBlock *) const
const TraceBlockInfo * getHeightResources(const MachineBasicBlock *) const
void updateDepths(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, SparseSet< LiveRegUnit > &RegUnits)
Updates the depth of the instructions from Start to End.
const TraceBlockInfo * getDepthResources(const MachineBasicBlock *) const
virtual const char * getName() const =0
ArrayRef< unsigned > getProcResourceDepths(unsigned MBBNum) const
Get an array of processor resource depths for MBB.
virtual const MachineBasicBlock * pickTraceSucc(const MachineBasicBlock *)=0
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned getInstrCount() const
Compute the total number of instructions in the trace.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getInstrSlack(const MachineInstr &MI) const
Return the slack of MI.
bool isDepInTrace(const MachineInstr &DefMI, const MachineInstr &UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=std::nullopt, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=std::nullopt, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=std::nullopt) const
Return the resource length of the trace.
Trace(Ensemble &te, TraceBlockInfo &tbi)
unsigned getCriticalPath() const
Return the length of the (data dependency) critical path through the trace.
unsigned getPHIDepth(const MachineInstr &PHI) const
Return the Depth of a PHI instruction in a trace center block successor.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
bool runOnMachineFunction(MachineFunction &) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
void getAnalysisUsage(AnalysisUsage &) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
void verifyAnalysis() const override
verifyAnalysis() - This member can be implemented by a analysis pass to check state of analysis infor...
const FixedBlockInfo * getResources(const MachineBasicBlock *)
Get the fixed resource information about MBB. Compute it on demand.
ArrayRef< unsigned > getProcReleaseAtCycles(unsigned MBBNum) const
Get the scaled number of cycles used per processor resource in MBB.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
unsigned getLatencyFactor() const
Multiply cycle count by this factor to normalize it relative to other resources.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineTraceStrategy
Strategies for selecting traces.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
@ TS_Local
Select the trace that contains only the current basic block.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
unsigned getSparseSetIndex() const
const MachineInstr * MI
LiveRegUnit(unsigned RU)
Per-basic block information that doesn't depend on the trace through the block.
bool hasResources() const
Returns true when resource information for this block has been computed.
unsigned InstrCount
The number of non-trivial instructions in the block.
void invalidate()
Invalidate resource information.
bool HasCalls
True when the block contains calls.
InstrCycles represents the cycle height and depth of an instruction in a trace.
unsigned Height
Minimum number of cycles from this instruction is issued to the of the trace, as determined by data d...
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
A virtual register or regunit required by a basic block or its trace successors.
unsigned Height
For virtual registers: Minimum height of the defining instruction.
Register Reg
The virtual register required, or a register unit.
LiveInReg(Register Reg, unsigned Height=0)
Per-basic block information that relates to a specific trace through the block.
unsigned InstrDepth
Accumulated number of instructions in the trace above this block.
void invalidateDepth()
Invalidate depth resources when some block above this one has changed.
const MachineBasicBlock * Pred
Trace predecessor, or NULL for the first block in the trace.
unsigned InstrHeight
Accumulated number of instructions in the trace below this block.
SmallVector< LiveInReg, 4 > LiveIns
Live-in registers.
const MachineBasicBlock * Succ
Trace successor, or NULL for the last block in the trace.
bool hasValidDepth() const
Returns true if the depth resources have been computed from the trace above this block.
bool isUsefulDominator(const TraceBlockInfo &TBI) const
Assuming that this is a dominator of TBI, determine if it contains useful instruction depths.
void invalidateHeight()
Invalidate height resources when a block below this one has changed.
unsigned CriticalPath
Critical path length.
unsigned Head
The block number of the head of the trace. (When hasValidDepth()).
bool HasValidInstrDepths
Instruction depths have been computed. This implies hasValidDepth().
bool hasValidHeight() const
Returns true if the height resources have been computed from the trace below this block.
unsigned Tail
The block number of the tail of the trace. (When hasValidHeight()).
bool HasValidInstrHeights
Instruction heights have been computed. This implies hasValidHeight().