LLVM  9.0.0svn
MLxExpansionPass.cpp
Go to the documentation of this file.
1 //===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
10 // multiple and add / sub instructions) when special VMLx hazards are detected.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARM.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMSubtarget.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Support/Debug.h"
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "mlx-expansion"
30 
31 static cl::opt<bool>
32 ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
33 static cl::opt<unsigned>
34 ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
35 
36 STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
37 
38 namespace {
39  struct MLxExpansion : public MachineFunctionPass {
40  static char ID;
41  MLxExpansion() : MachineFunctionPass(ID) {}
42 
43  bool runOnMachineFunction(MachineFunction &Fn) override;
44 
45  StringRef getPassName() const override {
46  return "ARM MLA / MLS expansion pass";
47  }
48 
49  private:
50  const ARMBaseInstrInfo *TII;
51  const TargetRegisterInfo *TRI;
53 
54  bool isLikeA9;
55  bool isSwift;
56  unsigned MIIdx;
57  MachineInstr* LastMIs[4];
59 
60  void clearStack();
61  void pushStack(MachineInstr *MI);
62  MachineInstr *getAccDefMI(MachineInstr *MI) const;
63  unsigned getDefReg(MachineInstr *MI) const;
64  bool hasLoopHazard(MachineInstr *MI) const;
65  bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
66  bool FindMLxHazard(MachineInstr *MI);
67  void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
68  unsigned MulOpc, unsigned AddSubOpc,
69  bool NegAcc, bool HasLane);
70  bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
71  };
72  char MLxExpansion::ID = 0;
73 }
74 
75 void MLxExpansion::clearStack() {
76  std::fill(LastMIs, LastMIs + 4, nullptr);
77  MIIdx = 0;
78 }
79 
80 void MLxExpansion::pushStack(MachineInstr *MI) {
81  LastMIs[MIIdx] = MI;
82  if (++MIIdx == 4)
83  MIIdx = 0;
84 }
85 
86 MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
87  // Look past COPY and INSERT_SUBREG instructions to find the
88  // real definition MI. This is important for _sfp instructions.
89  unsigned Reg = MI->getOperand(1).getReg();
91  return nullptr;
92 
93  MachineBasicBlock *MBB = MI->getParent();
94  MachineInstr *DefMI = MRI->getVRegDef(Reg);
95  while (true) {
96  if (DefMI->getParent() != MBB)
97  break;
98  if (DefMI->isCopyLike()) {
99  Reg = DefMI->getOperand(1).getReg();
101  DefMI = MRI->getVRegDef(Reg);
102  continue;
103  }
104  } else if (DefMI->isInsertSubreg()) {
105  Reg = DefMI->getOperand(2).getReg();
107  DefMI = MRI->getVRegDef(Reg);
108  continue;
109  }
110  }
111  break;
112  }
113  return DefMI;
114 }
115 
116 unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
117  unsigned Reg = MI->getOperand(0).getReg();
119  !MRI->hasOneNonDBGUse(Reg))
120  return Reg;
121 
122  MachineBasicBlock *MBB = MI->getParent();
123  MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg);
124  if (UseMI->getParent() != MBB)
125  return Reg;
126 
127  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
128  Reg = UseMI->getOperand(0).getReg();
130  !MRI->hasOneNonDBGUse(Reg))
131  return Reg;
132  UseMI = &*MRI->use_instr_nodbg_begin(Reg);
133  if (UseMI->getParent() != MBB)
134  return Reg;
135  }
136 
137  return Reg;
138 }
139 
140 /// hasLoopHazard - Check whether an MLx instruction is chained to itself across
141 /// a single-MBB loop.
142 bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
143  unsigned Reg = MI->getOperand(1).getReg();
145  return false;
146 
147  MachineBasicBlock *MBB = MI->getParent();
148  MachineInstr *DefMI = MRI->getVRegDef(Reg);
149  while (true) {
150 outer_continue:
151  if (DefMI->getParent() != MBB)
152  break;
153 
154  if (DefMI->isPHI()) {
155  for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
156  if (DefMI->getOperand(i + 1).getMBB() == MBB) {
157  unsigned SrcReg = DefMI->getOperand(i).getReg();
159  DefMI = MRI->getVRegDef(SrcReg);
160  goto outer_continue;
161  }
162  }
163  }
164  } else if (DefMI->isCopyLike()) {
165  Reg = DefMI->getOperand(1).getReg();
167  DefMI = MRI->getVRegDef(Reg);
168  continue;
169  }
170  } else if (DefMI->isInsertSubreg()) {
171  Reg = DefMI->getOperand(2).getReg();
173  DefMI = MRI->getVRegDef(Reg);
174  continue;
175  }
176  }
177 
178  break;
179  }
180 
181  return DefMI == MI;
182 }
183 
184 bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
185  // FIXME: Detect integer instructions properly.
186  const MCInstrDesc &MCID = MI->getDesc();
187  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
188  if (MI->mayStore())
189  return false;
190  unsigned Opcode = MCID.getOpcode();
191  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
192  return false;
193  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
194  return MI->readsRegister(Reg, TRI);
195  return false;
196 }
197 
198 static bool isFpMulInstruction(unsigned Opcode) {
199  switch (Opcode) {
200  case ARM::VMULS:
201  case ARM::VMULfd:
202  case ARM::VMULfq:
203  case ARM::VMULD:
204  case ARM::VMULslfd:
205  case ARM::VMULslfq:
206  return true;
207  default:
208  return false;
209  }
210 }
211 
212 bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
213  if (NumExpand >= ExpandLimit)
214  return false;
215 
216  if (ForceExapnd)
217  return true;
218 
219  MachineInstr *DefMI = getAccDefMI(MI);
220  if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
221  // r0 = vmla
222  // r3 = vmla r0, r1, r2
223  // takes 16 - 17 cycles
224  //
225  // r0 = vmla
226  // r4 = vmul r1, r2
227  // r3 = vadd r0, r4
228  // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
229  IgnoreStall.insert(DefMI);
230  return true;
231  }
232 
233  // On Swift, we mostly care about hazards from multiplication instructions
234  // writing the accumulator and the pipelining of loop iterations by out-of-
235  // order execution.
236  if (isSwift)
237  return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
238 
239  if (IgnoreStall.count(MI))
240  return false;
241 
242  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
243  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
244  // preserves the in-order retirement of the instructions.
245  // Look at the next few instructions, if *most* of them can cause hazards,
246  // then the scheduler can't *fix* this, we'd better break up the VMLA.
247  unsigned Limit1 = isLikeA9 ? 1 : 4;
248  unsigned Limit2 = isLikeA9 ? 1 : 4;
249  for (unsigned i = 1; i <= 4; ++i) {
250  int Idx = ((int)MIIdx - i + 4) % 4;
251  MachineInstr *NextMI = LastMIs[Idx];
252  if (!NextMI)
253  continue;
254 
255  if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
256  if (i <= Limit1)
257  return true;
258  }
259 
260  // Look for VMLx RAW hazard.
261  if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
262  return true;
263  }
264 
265  return false;
266 }
267 
268 /// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
269 /// of MUL + ADD / SUB instructions.
270 void
271 MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
272  unsigned MulOpc, unsigned AddSubOpc,
273  bool NegAcc, bool HasLane) {
274  unsigned DstReg = MI->getOperand(0).getReg();
275  bool DstDead = MI->getOperand(0).isDead();
276  unsigned AccReg = MI->getOperand(1).getReg();
277  unsigned Src1Reg = MI->getOperand(2).getReg();
278  unsigned Src2Reg = MI->getOperand(3).getReg();
279  bool Src1Kill = MI->getOperand(2).isKill();
280  bool Src2Kill = MI->getOperand(3).isKill();
281  unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
282  unsigned NextOp = HasLane ? 5 : 4;
283  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
284  unsigned PredReg = MI->getOperand(++NextOp).getReg();
285 
286  const MCInstrDesc &MCID1 = TII->get(MulOpc);
287  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
288  const MachineFunction &MF = *MI->getParent()->getParent();
289  unsigned TmpReg = MRI->createVirtualRegister(
290  TII->getRegClass(MCID1, 0, TRI, MF));
291 
292  MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
293  .addReg(Src1Reg, getKillRegState(Src1Kill))
294  .addReg(Src2Reg, getKillRegState(Src2Kill));
295  if (HasLane)
296  MIB.addImm(LaneImm);
297  MIB.addImm(Pred).addReg(PredReg);
298 
299  MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
300  .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
301 
302  if (NegAcc) {
303  bool AccKill = MRI->hasOneNonDBGUse(AccReg);
304  MIB.addReg(TmpReg, getKillRegState(true))
305  .addReg(AccReg, getKillRegState(AccKill));
306  } else {
307  MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
308  }
309  MIB.addImm(Pred).addReg(PredReg);
310 
311  LLVM_DEBUG({
312  dbgs() << "Expanding: " << *MI;
313  dbgs() << " to:\n";
315  MII = std::prev(MII);
316  MachineInstr &MI2 = *MII;
317  MII = std::prev(MII);
318  MachineInstr &MI1 = *MII;
319  dbgs() << " " << MI1;
320  dbgs() << " " << MI2;
321  });
322 
323  MI->eraseFromParent();
324  ++NumExpand;
325 }
326 
327 bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
328  bool Changed = false;
329 
330  clearStack();
331  IgnoreStall.clear();
332 
333  unsigned Skip = 0;
334  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
335  while (MII != E) {
336  MachineInstr *MI = &*MII++;
337 
338  if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy())
339  continue;
340 
341  const MCInstrDesc &MCID = MI->getDesc();
342  if (MI->isBarrier()) {
343  clearStack();
344  Skip = 0;
345  continue;
346  }
347 
348  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
349  if (Domain == ARMII::DomainGeneral) {
350  if (++Skip == 2)
351  // Assume dual issues of non-VFP / NEON instructions.
352  pushStack(nullptr);
353  } else {
354  Skip = 0;
355 
356  unsigned MulOpc, AddSubOpc;
357  bool NegAcc, HasLane;
358  if (!TII->isFpMLxInstruction(MCID.getOpcode(),
359  MulOpc, AddSubOpc, NegAcc, HasLane) ||
360  !FindMLxHazard(MI))
361  pushStack(MI);
362  else {
363  ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
364  Changed = true;
365  }
366  }
367  }
368 
369  return Changed;
370 }
371 
372 bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
373  if (skipFunction(Fn.getFunction()))
374  return false;
375 
376  TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
378  MRI = &Fn.getRegInfo();
379  const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
380  if (!STI->expandMLx())
381  return false;
382  isLikeA9 = STI->isLikeA9() || STI->isSwift();
383  isSwift = STI->isSwift();
384 
385  bool Modified = false;
386  for (MachineBasicBlock &MBB : Fn)
387  Modified |= ExpandFPMLxInstructions(MBB);
388 
389  return Modified;
390 }
391 
393  return new MLxExpansion();
394 }
FunctionPass * createMLxExpansionPass()
MachineBasicBlock * getMBB() const
This class represents lattice values for constants.
Definition: AllocatorList.h:23
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
bool isCopyLike() const
Return true if the instruction behaves like a copy.
bool isPHI() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
static cl::opt< unsigned > ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
static cl::opt< bool > ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden)
virtual const TargetInstrInfo * getInstrInfo() const
reverse_iterator rend()
reverse_iterator rbegin()
unsigned getKillRegState(bool B)
unsigned getDeadRegState(bool B)
unsigned getDefRegState(bool B)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:819
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
bool isCopy() const
bool isImplicitDef() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, const TargetRegisterInfo &TRI)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
bool isInsertSubreg() const
MachineInstrBuilder MachineInstrBuilder & DefMI
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
bool expandMLx() const
Definition: ARMSubtarget.h:614
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:63
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool isSwift() const
Definition: ARMSubtarget.h:562
bool isPosition() const
Definition: MachineInstr.h:994
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:203
IRTranslator LLVM IR MI
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:639
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
static bool isFpMulInstruction(unsigned Opcode)
bool isLikeA9() const
Definition: ARMSubtarget.h:564