LLVM  3.7.0
MachineCombiner.cpp
Go to the documentation of this file.
1 //===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The machine combiner pass uses machine trace metrics to ensure the combined
11 // instructions does not lengthen the critical path or the resource depth.
12 //===----------------------------------------------------------------------===//
13 #define DEBUG_TYPE "machine-combiner"
14 
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/CodeGen/Passes.h"
27 #include "llvm/Support/Debug.h"
32 
33 using namespace llvm;
34 
35 STATISTIC(NumInstCombined, "Number of machineinst combined");
36 
37 namespace {
38 class MachineCombiner : public MachineFunctionPass {
39  const TargetInstrInfo *TII;
40  const TargetRegisterInfo *TRI;
41  MCSchedModel SchedModel;
43  MachineTraceMetrics *Traces;
45 
46  TargetSchedModel TSchedModel;
47 
48  /// True if optimizing for code size.
49  bool OptSize;
50 
51 public:
52  static char ID;
53  MachineCombiner() : MachineFunctionPass(ID) {
55  }
56  void getAnalysisUsage(AnalysisUsage &AU) const override;
57  bool runOnMachineFunction(MachineFunction &MF) override;
58  const char *getPassName() const override { return "Machine InstCombiner"; }
59 
60 private:
61  bool doSubstitute(unsigned NewSize, unsigned OldSize);
62  bool combineInstructions(MachineBasicBlock *);
63  MachineInstr *getOperandDef(const MachineOperand &MO);
64  unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
65  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
66  MachineTraceMetrics::Trace BlockTrace);
67  unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
68  MachineTraceMetrics::Trace BlockTrace);
69  bool
70  improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
71  MachineTraceMetrics::Trace BlockTrace,
73  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
74  bool NewCodeHasLessInsts);
75  bool preservesResourceLen(MachineBasicBlock *MBB,
76  MachineTraceMetrics::Trace BlockTrace,
79  void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
81 };
82 }
83 
84 char MachineCombiner::ID = 0;
86 
87 INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
88  "Machine InstCombiner", false, false)
90 INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
91  false, false)
92 
93 void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
94  AU.setPreservesCFG();
95  AU.addPreserved<MachineDominatorTree>();
96  AU.addPreserved<MachineLoopInfo>();
97  AU.addRequired<MachineTraceMetrics>();
98  AU.addPreserved<MachineTraceMetrics>();
100 }
101 
102 MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
103  MachineInstr *DefInstr = nullptr;
104  // We need a virtual register definition.
106  DefInstr = MRI->getUniqueVRegDef(MO.getReg());
107  // PHI's have no depth etc.
108  if (DefInstr && DefInstr->isPHI())
109  DefInstr = nullptr;
110  return DefInstr;
111 }
112 
113 /// Computes depth of instructions in vector \InsInstr.
114 ///
115 /// \param InsInstrs is a vector of machine instructions
116 /// \param InstrIdxForVirtReg is a dense map of virtual register to index
117 /// of defining machine instruction in \p InsInstrs
118 /// \param BlockTrace is a trace of machine instructions
119 ///
120 /// \returns Depth of last instruction in \InsInstrs ("NewRoot")
121 unsigned
122 MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
123  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
124  MachineTraceMetrics::Trace BlockTrace) {
125 
126  SmallVector<unsigned, 16> InstrDepth;
127  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
128 
129  // For each instruction in the new sequence compute the depth based on the
130  // operands. Use the trace information when possible. For new operands which
131  // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
132  for (auto *InstrPtr : InsInstrs) { // for each Use
133  unsigned IDepth = 0;
134  DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
135  for (const MachineOperand &MO : InstrPtr->operands()) {
136  // Check for virtual register operand.
138  continue;
139  if (!MO.isUse())
140  continue;
141  unsigned DepthOp = 0;
142  unsigned LatencyOp = 0;
144  InstrIdxForVirtReg.find(MO.getReg());
145  if (II != InstrIdxForVirtReg.end()) {
146  // Operand is new virtual register not in trace
147  assert(II->second < InstrDepth.size() && "Bad Index");
148  MachineInstr *DefInstr = InsInstrs[II->second];
149  assert(DefInstr &&
150  "There must be a definition for a new virtual register");
151  DepthOp = InstrDepth[II->second];
152  LatencyOp = TSchedModel.computeOperandLatency(
153  DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
154  InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
155  } else {
156  MachineInstr *DefInstr = getOperandDef(MO);
157  if (DefInstr) {
158  DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth;
159  LatencyOp = TSchedModel.computeOperandLatency(
160  DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
161  InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
162  }
163  }
164  IDepth = std::max(IDepth, DepthOp + LatencyOp);
165  }
166  InstrDepth.push_back(IDepth);
167  }
168  unsigned NewRootIdx = InsInstrs.size() - 1;
169  return InstrDepth[NewRootIdx];
170 }
171 
172 /// Computes instruction latency as max of latency of defined operands.
173 ///
174 /// \param Root is a machine instruction that could be replaced by NewRoot.
175 /// It is used to compute a more accurate latency information for NewRoot in
176 /// case there is a dependent instruction in the same trace (\p BlockTrace)
177 /// \param NewRoot is the instruction for which the latency is computed
178 /// \param BlockTrace is a trace of machine instructions
179 ///
180 /// \returns Latency of \p NewRoot
181 unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
182  MachineTraceMetrics::Trace BlockTrace) {
183 
184  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
185 
186  // Check each definition in NewRoot and compute the latency
187  unsigned NewRootLatency = 0;
188 
189  for (const MachineOperand &MO : NewRoot->operands()) {
190  // Check for virtual register operand.
192  continue;
193  if (!MO.isDef())
194  continue;
195  // Get the first instruction that uses MO
196  MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
197  RI++;
198  MachineInstr *UseMO = RI->getParent();
199  unsigned LatencyOp = 0;
200  if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) {
201  LatencyOp = TSchedModel.computeOperandLatency(
202  NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
203  UseMO->findRegisterUseOperandIdx(MO.getReg()));
204  } else {
205  LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
206  }
207  NewRootLatency = std::max(NewRootLatency, LatencyOp);
208  }
209  return NewRootLatency;
210 }
211 
212 /// True when the new instruction sequence does not lengthen the critical path
213 /// and the new sequence has less instructions or the new sequence improves the
214 /// critical path.
215 /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
216 /// The new code sequence ends in MI NewRoot. A necessary condition for the new
217 /// sequence to replace the old sequence is that it cannot lengthen the critical
218 /// path. This is decided by the formula:
219 /// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
220 /// If the new sequence has an equal length critical path but does not reduce
221 /// the number of instructions (NewCodeHasLessInsts is false), then it is not
222 /// considered an improvement. The slack is the number of cycles Root can be
223 /// delayed before the critical patch becomes longer.
224 bool MachineCombiner::improvesCriticalPathLen(
225  MachineBasicBlock *MBB, MachineInstr *Root,
226  MachineTraceMetrics::Trace BlockTrace,
228  DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
229  bool NewCodeHasLessInsts) {
230 
231  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
232  // NewRoot is the last instruction in the \p InsInstrs vector.
233  // Get depth and latency of NewRoot.
234  unsigned NewRootIdx = InsInstrs.size() - 1;
235  MachineInstr *NewRoot = InsInstrs[NewRootIdx];
236  unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
237  unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
238 
239  // Get depth, latency and slack of Root.
240  unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
241  unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
242  unsigned RootSlack = BlockTrace.getInstrSlack(Root);
243 
244  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
245  dbgs() << " NewRootDepth: " << NewRootDepth
246  << " NewRootLatency: " << NewRootLatency << "\n";
247  dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
248  << " RootSlack: " << RootSlack << "\n";
249  dbgs() << " NewRootDepth + NewRootLatency "
250  << NewRootDepth + NewRootLatency << "\n";
251  dbgs() << " RootDepth + RootLatency + RootSlack "
252  << RootDepth + RootLatency + RootSlack << "\n";);
253 
254  unsigned NewCycleCount = NewRootDepth + NewRootLatency;
255  unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
256 
257  if (NewCodeHasLessInsts)
258  return NewCycleCount <= OldCycleCount;
259  else
260  return NewCycleCount < OldCycleCount;
261 }
262 
263 /// helper routine to convert instructions into SC
264 void MachineCombiner::instr2instrSC(
267  for (auto *InstrPtr : Instrs) {
268  unsigned Opc = InstrPtr->getOpcode();
269  unsigned Idx = TII->get(Opc).getSchedClass();
270  const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
271  InstrsSC.push_back(SC);
272  }
273 }
274 /// True when the new instructions do not increase resource length
275 bool MachineCombiner::preservesResourceLen(
278  SmallVectorImpl<MachineInstr *> &DelInstrs) {
279 
280  // Compute current resource length
281 
282  //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
284  MBBarr.push_back(MBB);
285  unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
286 
287  // Deal with SC rather than Instructions.
290 
291  instr2instrSC(InsInstrs, InsInstrsSC);
292  instr2instrSC(DelInstrs, DelInstrsSC);
293 
294  ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
295  ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
296 
297  // Compute new resource length.
298  unsigned ResLenAfterCombine =
299  BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
300 
301  DEBUG(dbgs() << "RESOURCE DATA: \n";
302  dbgs() << " resource len before: " << ResLenBeforeCombine
303  << " after: " << ResLenAfterCombine << "\n";);
304 
305  return ResLenAfterCombine <= ResLenBeforeCombine;
306 }
307 
308 /// \returns true when new instruction sequence should be generated
309 /// independent if it lengthens critical path or not
310 bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
311  if (OptSize && (NewSize < OldSize))
312  return true;
313  if (!TSchedModel.hasInstrSchedModel())
314  return true;
315  return false;
316 }
317 
318 /// Substitute a slow code sequence with a faster one by
319 /// evaluating instruction combining pattern.
320 /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
321 /// combining based on machine trace metrics. Only combine a sequence of
322 /// instructions when this neither lengthens the critical path nor increases
323 /// resource pressure. When optimizing for codesize always combine when the new
324 /// sequence is shorter.
325 bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
326  bool Changed = false;
327  DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
328 
329  auto BlockIter = MBB->begin();
330 
331  while (BlockIter != MBB->end()) {
332  auto &MI = *BlockIter++;
333 
334  DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
336  // The motivating example is:
337  //
338  // MUL Other MUL_op1 MUL_op2 Other
339  // \ / \ | /
340  // ADD/SUB => MADD/MSUB
341  // (=Root) (=NewRoot)
342 
343  // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
344  // usually beneficial for code size it unfortunately can hurt performance
345  // when the ADD is on the critical path, but the MUL is not. With the
346  // substitution the MUL becomes part of the critical path (in form of the
347  // MADD) and can lengthen it on architectures where the MADD latency is
348  // longer than the ADD latency.
349  //
350  // For each instruction we check if it can be the root of a combiner
351  // pattern. Then for each pattern the new code sequence in form of MI is
352  // generated and evaluated. When the efficiency criteria (don't lengthen
353  // critical path, don't use more resources) is met the new sequence gets
354  // hooked up into the basic block before the old sequence is removed.
355  //
356  // The algorithm does not try to evaluate all patterns and pick the best.
357  // This is only an artificial restriction though. In practice there is
358  // mostly one pattern, and getMachineCombinerPatterns() can order patterns
359  // based on an internal cost heuristic.
360 
361  if (TII->getMachineCombinerPatterns(MI, Patterns)) {
362  for (auto P : Patterns) {
365  DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
366  if (!MinInstr)
367  MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
368  MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
369  Traces->verifyAnalysis();
370  TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
371  InstrIdxForVirtReg);
372  unsigned NewInstCount = InsInstrs.size();
373  unsigned OldInstCount = DelInstrs.size();
374  // Found pattern, but did not generate alternative sequence.
375  // This can happen e.g. when an immediate could not be materialized
376  // in a single instruction.
377  if (!NewInstCount)
378  continue;
379  // Substitute when we optimize for codesize and the new sequence has
380  // fewer instructions OR
381  // the new sequence neither lengthens the critical path nor increases
382  // resource pressure.
383  if (doSubstitute(NewInstCount, OldInstCount) ||
384  (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
385  InstrIdxForVirtReg,
386  NewInstCount < OldInstCount) &&
387  preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
388  for (auto *InstrPtr : InsInstrs)
389  MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
390  for (auto *InstrPtr : DelInstrs)
391  InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
392 
393  Changed = true;
394  ++NumInstCombined;
395 
396  Traces->invalidate(MBB);
397  Traces->verifyAnalysis();
398  // Eagerly stop after the first pattern fires.
399  break;
400  } else {
401  // Cleanup instructions of the alternative code sequence. There is no
402  // use for them.
403  MachineFunction *MF = MBB->getParent();
404  for (auto *InstrPtr : InsInstrs)
405  MF->DeleteMachineInstr(InstrPtr);
406  }
407  InstrIdxForVirtReg.clear();
408  }
409  }
410  }
411 
412  return Changed;
413 }
414 
415 bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
416  const TargetSubtargetInfo &STI = MF.getSubtarget();
417  TII = STI.getInstrInfo();
418  TRI = STI.getRegisterInfo();
419  SchedModel = STI.getSchedModel();
420  TSchedModel.init(SchedModel, &STI, TII);
421  MRI = &MF.getRegInfo();
422  Traces = &getAnalysis<MachineTraceMetrics>();
423  MinInstr = 0;
424 
426 
427  DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
428  if (!TII->useMachineCombiner()) {
429  DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n");
430  return false;
431  }
432 
433  bool Changed = false;
434 
435  // Try to combine instructions.
436  for (auto &MBB : MF)
437  Changed |= combineInstructions(&MBB);
438 
439  return Changed;
440 }
void push_back(const T &Elt)
Definition: SmallVector.h:222
char & MachineCombinerID
This pass performs instruction combining using trace metrics to estimate critical-path and resource d...
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
machine combiner
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
bool isDepInTrace(const MachineInstr *DefMI, const MachineInstr *UseMI) const
A dependence is useful if the basic block of the defining instruction is part of the trace of the use...
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
machine Machine false
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:295
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
bool isPHI() const
Definition: MachineInstr.h:757
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
bool isReg() const
isReg - Tests if this is a MO_Register operand.
The core instruction combiner logic.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Select the trace through a block that has the fewest instructions.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:301
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
TargetInstrInfo - Interface to description of machine instruction set.
bundle_iterator< MachineInstr, instr_iterator > iterator
#define P(N)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
int findRegisterDefOperandIdx(unsigned Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found...
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:101
unsigned getResourceLength(ArrayRef< const MachineBasicBlock * > Extrablocks=None, ArrayRef< const MCSchedClassDesc * > ExtraInstrs=None, ArrayRef< const MCSchedClassDesc * > RemoveInstrs=None) const
Return the resource length of the trace.
Represent the analysis usage information of a pass.
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
int findRegisterUseOperandIdx(unsigned Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found...
MachineOperand class - Representation of each machine instruction operand.
CHAIN = SC CHAIN, Imm128 - System call.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
StringRef getName() const
getName - Return the name of the corresponding LLVM basic block, or "(null)".
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
InstrCycles getInstrCycles(const MachineInstr *MI) const
Return the depth and height of MI.
Representation of each machine instruction.
Definition: MachineInstr.h:51
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
iterator end()
Definition: DenseMap.h:68
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
unsigned getReg() const
getReg - Returns the register number.
virtual const TargetInstrInfo * getInstrInfo() const
#define DEBUG(X)
Definition: Debug.h:92
void initializeMachineCombinerPass(PassRegistry &)
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:136
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
INITIALIZE_PASS_BEGIN(MachineCombiner,"machine-combiner","Machine InstCombiner", false, false) INITIALIZE_PASS_END(MachineCombiner
unsigned getInstrSlack(const MachineInstr *MI) const
Return the slack of MI.