LLVM  6.0.0svn
MLxExpansionPass.cpp
Go to the documentation of this file.
1 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
11 // multiple and add / sub instructions) when special VMLx hazards are detected.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ARM.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMSubtarget.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Support/Debug.h"
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "mlx-expansion"
31 
32 static cl::opt<bool>
33 ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
34 static cl::opt<unsigned>
35 ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
36 
37 STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
38 
39 namespace {
40  struct MLxExpansion : public MachineFunctionPass {
41  static char ID;
42  MLxExpansion() : MachineFunctionPass(ID) {}
43 
44  bool runOnMachineFunction(MachineFunction &Fn) override;
45 
46  StringRef getPassName() const override {
47  return "ARM MLA / MLS expansion pass";
48  }
49 
50  private:
51  const ARMBaseInstrInfo *TII;
52  const TargetRegisterInfo *TRI;
54 
55  bool isLikeA9;
56  bool isSwift;
57  unsigned MIIdx;
58  MachineInstr* LastMIs[4];
60 
61  void clearStack();
62  void pushStack(MachineInstr *MI);
63  MachineInstr *getAccDefMI(MachineInstr *MI) const;
64  unsigned getDefReg(MachineInstr *MI) const;
65  bool hasLoopHazard(MachineInstr *MI) const;
66  bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
67  bool FindMLxHazard(MachineInstr *MI);
68  void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
69  unsigned MulOpc, unsigned AddSubOpc,
70  bool NegAcc, bool HasLane);
71  bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
72  };
73  char MLxExpansion::ID = 0;
74 }
75 
76 void MLxExpansion::clearStack() {
77  std::fill(LastMIs, LastMIs + 4, nullptr);
78  MIIdx = 0;
79 }
80 
81 void MLxExpansion::pushStack(MachineInstr *MI) {
82  LastMIs[MIIdx] = MI;
83  if (++MIIdx == 4)
84  MIIdx = 0;
85 }
86 
87 MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
88  // Look past COPY and INSERT_SUBREG instructions to find the
89  // real definition MI. This is important for _sfp instructions.
90  unsigned Reg = MI->getOperand(1).getReg();
92  return nullptr;
93 
94  MachineBasicBlock *MBB = MI->getParent();
95  MachineInstr *DefMI = MRI->getVRegDef(Reg);
96  while (true) {
97  if (DefMI->getParent() != MBB)
98  break;
99  if (DefMI->isCopyLike()) {
100  Reg = DefMI->getOperand(1).getReg();
102  DefMI = MRI->getVRegDef(Reg);
103  continue;
104  }
105  } else if (DefMI->isInsertSubreg()) {
106  Reg = DefMI->getOperand(2).getReg();
108  DefMI = MRI->getVRegDef(Reg);
109  continue;
110  }
111  }
112  break;
113  }
114  return DefMI;
115 }
116 
117 unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
118  unsigned Reg = MI->getOperand(0).getReg();
120  !MRI->hasOneNonDBGUse(Reg))
121  return Reg;
122 
123  MachineBasicBlock *MBB = MI->getParent();
124  MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg);
125  if (UseMI->getParent() != MBB)
126  return Reg;
127 
128  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
129  Reg = UseMI->getOperand(0).getReg();
131  !MRI->hasOneNonDBGUse(Reg))
132  return Reg;
133  UseMI = &*MRI->use_instr_nodbg_begin(Reg);
134  if (UseMI->getParent() != MBB)
135  return Reg;
136  }
137 
138  return Reg;
139 }
140 
141 /// hasLoopHazard - Check whether an MLx instruction is chained to itself across
142 /// a single-MBB loop.
143 bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
144  unsigned Reg = MI->getOperand(1).getReg();
146  return false;
147 
148  MachineBasicBlock *MBB = MI->getParent();
149  MachineInstr *DefMI = MRI->getVRegDef(Reg);
150  while (true) {
151 outer_continue:
152  if (DefMI->getParent() != MBB)
153  break;
154 
155  if (DefMI->isPHI()) {
156  for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
157  if (DefMI->getOperand(i + 1).getMBB() == MBB) {
158  unsigned SrcReg = DefMI->getOperand(i).getReg();
160  DefMI = MRI->getVRegDef(SrcReg);
161  goto outer_continue;
162  }
163  }
164  }
165  } else if (DefMI->isCopyLike()) {
166  Reg = DefMI->getOperand(1).getReg();
168  DefMI = MRI->getVRegDef(Reg);
169  continue;
170  }
171  } else if (DefMI->isInsertSubreg()) {
172  Reg = DefMI->getOperand(2).getReg();
174  DefMI = MRI->getVRegDef(Reg);
175  continue;
176  }
177  }
178 
179  break;
180  }
181 
182  return DefMI == MI;
183 }
184 
185 bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
186  // FIXME: Detect integer instructions properly.
187  const MCInstrDesc &MCID = MI->getDesc();
188  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
189  if (MI->mayStore())
190  return false;
191  unsigned Opcode = MCID.getOpcode();
192  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
193  return false;
194  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
195  return MI->readsRegister(Reg, TRI);
196  return false;
197 }
198 
199 static bool isFpMulInstruction(unsigned Opcode) {
200  switch (Opcode) {
201  case ARM::VMULS:
202  case ARM::VMULfd:
203  case ARM::VMULfq:
204  case ARM::VMULD:
205  case ARM::VMULslfd:
206  case ARM::VMULslfq:
207  return true;
208  default:
209  return false;
210  }
211 }
212 
213 bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
214  if (NumExpand >= ExpandLimit)
215  return false;
216 
217  if (ForceExapnd)
218  return true;
219 
220  MachineInstr *DefMI = getAccDefMI(MI);
221  if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
222  // r0 = vmla
223  // r3 = vmla r0, r1, r2
224  // takes 16 - 17 cycles
225  //
226  // r0 = vmla
227  // r4 = vmul r1, r2
228  // r3 = vadd r0, r4
229  // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
230  IgnoreStall.insert(DefMI);
231  return true;
232  }
233 
234  // On Swift, we mostly care about hazards from multiplication instructions
235  // writing the accumulator and the pipelining of loop iterations by out-of-
236  // order execution.
237  if (isSwift)
238  return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
239 
240  if (IgnoreStall.count(MI))
241  return false;
242 
243  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
244  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
245  // preserves the in-order retirement of the instructions.
246  // Look at the next few instructions, if *most* of them can cause hazards,
247  // then the scheduler can't *fix* this, we'd better break up the VMLA.
248  unsigned Limit1 = isLikeA9 ? 1 : 4;
249  unsigned Limit2 = isLikeA9 ? 1 : 4;
250  for (unsigned i = 1; i <= 4; ++i) {
251  int Idx = ((int)MIIdx - i + 4) % 4;
252  MachineInstr *NextMI = LastMIs[Idx];
253  if (!NextMI)
254  continue;
255 
256  if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
257  if (i <= Limit1)
258  return true;
259  }
260 
261  // Look for VMLx RAW hazard.
262  if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
263  return true;
264  }
265 
266  return false;
267 }
268 
269 /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
270 /// of MUL + ADD / SUB instructions.
271 void
272 MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
273  unsigned MulOpc, unsigned AddSubOpc,
274  bool NegAcc, bool HasLane) {
275  unsigned DstReg = MI->getOperand(0).getReg();
276  bool DstDead = MI->getOperand(0).isDead();
277  unsigned AccReg = MI->getOperand(1).getReg();
278  unsigned Src1Reg = MI->getOperand(2).getReg();
279  unsigned Src2Reg = MI->getOperand(3).getReg();
280  bool Src1Kill = MI->getOperand(2).isKill();
281  bool Src2Kill = MI->getOperand(3).isKill();
282  unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
283  unsigned NextOp = HasLane ? 5 : 4;
284  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
285  unsigned PredReg = MI->getOperand(++NextOp).getReg();
286 
287  const MCInstrDesc &MCID1 = TII->get(MulOpc);
288  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
289  const MachineFunction &MF = *MI->getParent()->getParent();
290  unsigned TmpReg = MRI->createVirtualRegister(
291  TII->getRegClass(MCID1, 0, TRI, MF));
292 
293  MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
294  .addReg(Src1Reg, getKillRegState(Src1Kill))
295  .addReg(Src2Reg, getKillRegState(Src2Kill));
296  if (HasLane)
297  MIB.addImm(LaneImm);
298  MIB.addImm(Pred).addReg(PredReg);
299 
300  MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
301  .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
302 
303  if (NegAcc) {
304  bool AccKill = MRI->hasOneNonDBGUse(AccReg);
305  MIB.addReg(TmpReg, getKillRegState(true))
306  .addReg(AccReg, getKillRegState(AccKill));
307  } else {
308  MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
309  }
310  MIB.addImm(Pred).addReg(PredReg);
311 
312  DEBUG({
313  dbgs() << "Expanding: " << *MI;
314  dbgs() << " to:\n";
316  MII = std::prev(MII);
317  MachineInstr &MI2 = *MII;
318  MII = std::prev(MII);
319  MachineInstr &MI1 = *MII;
320  dbgs() << " " << MI1;
321  dbgs() << " " << MI2;
322  });
323 
324  MI->eraseFromParent();
325  ++NumExpand;
326 }
327 
328 bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
329  bool Changed = false;
330 
331  clearStack();
332  IgnoreStall.clear();
333 
334  unsigned Skip = 0;
335  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
336  while (MII != E) {
337  MachineInstr *MI = &*MII++;
338 
339  if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy())
340  continue;
341 
342  const MCInstrDesc &MCID = MI->getDesc();
343  if (MI->isBarrier()) {
344  clearStack();
345  Skip = 0;
346  continue;
347  }
348 
349  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
350  if (Domain == ARMII::DomainGeneral) {
351  if (++Skip == 2)
352  // Assume dual issues of non-VFP / NEON instructions.
353  pushStack(nullptr);
354  } else {
355  Skip = 0;
356 
357  unsigned MulOpc, AddSubOpc;
358  bool NegAcc, HasLane;
359  if (!TII->isFpMLxInstruction(MCID.getOpcode(),
360  MulOpc, AddSubOpc, NegAcc, HasLane) ||
361  !FindMLxHazard(MI))
362  pushStack(MI);
363  else {
364  ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
365  Changed = true;
366  }
367  }
368  }
369 
370  return Changed;
371 }
372 
373 bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
374  if (skipFunction(*Fn.getFunction()))
375  return false;
376 
377  TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
378  TRI = Fn.getSubtarget().getRegisterInfo();
379  MRI = &Fn.getRegInfo();
380  const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
381  if (!STI->expandMLx())
382  return false;
383  isLikeA9 = STI->isLikeA9() || STI->isSwift();
384  isSwift = STI->isSwift();
385 
386  bool Modified = false;
387  for (MachineBasicBlock &MBB : Fn)
388  Modified |= ExpandFPMLxInstructions(MBB);
389 
390  return Modified;
391 }
392 
394  return new MLxExpansion();
395 }
FunctionPass * createMLxExpansionPass()
MachineBasicBlock * getMBB() const
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:268
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
STATISTIC(NumFunctions, "Total number of functions")
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:871
bool isPHI() const
Definition: MachineInstr.h:826
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Reg
All possible values of the reg field in the ModR/M byte.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
static cl::opt< unsigned > ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:287
static cl::opt< bool > ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden)
virtual const TargetInstrInfo * getInstrInfo() const
reverse_iterator rend()
reverse_iterator rbegin()
unsigned getKillRegState(bool B)
unsigned getDeadRegState(bool B)
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:639
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
bool isCopy() const
Definition: MachineInstr.h:857
bool isImplicitDef() const
Definition: MachineInstr.h:831
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, const TargetRegisterInfo &TRI)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
bool isInsertSubreg() const
Definition: MachineInstr.h:841
MachineInstrBuilder MachineInstrBuilder & DefMI
int64_t getImm() const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
Definition: MachineInstr.h:927
bool expandMLx() const
Definition: ARMSubtarget.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:139
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:59
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
bool isSwift() const
Definition: ARMSubtarget.h:520
bool isPosition() const
Definition: MachineInstr.h:814
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:203
#define DEBUG(X)
Definition: Debug.h:118
IRTranslator LLVM IR MI
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:465
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
static bool isFpMulInstruction(unsigned Opcode)
bool isLikeA9() const
Definition: ARMSubtarget.h:522