LLVM  4.0.0
PPCMIPeephole.cpp
Go to the documentation of this file.
1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 //
10 // This pass performs peephole optimizations to clean up ugly code
11 // sequences at the MachineInstruction layer. It runs at the end of
12 // the SSA phases, following VSX swap removal. A pass of dead code
13 // elimination follows this one for quick clean-up of any dead
14 // instructions introduced here. Although we could do this as callbacks
15 // from the generic peephole pass, this would have a couple of bad
16 // effects: it might remove optimization opportunities for VSX swap
17 // removal, and it would miss cleanups made possible following VSX
18 // swap removal.
19 //
20 //===---------------------------------------------------------------------===//
21 
22 #include "PPCInstrInfo.h"
23 #include "PPC.h"
24 #include "PPCInstrBuilder.h"
25 #include "PPCTargetMachine.h"
29 #include "llvm/Support/Debug.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "ppc-mi-peepholes"
34 
35 namespace llvm {
37 }
38 
39 namespace {
40 
41 struct PPCMIPeephole : public MachineFunctionPass {
42 
43  static char ID;
44  const PPCInstrInfo *TII;
45  MachineFunction *MF;
47 
48  PPCMIPeephole() : MachineFunctionPass(ID) {
50  }
51 
52 private:
53  // Initialize class variables.
54  void initialize(MachineFunction &MFParm);
55 
56  // Perform peepholes.
57  bool simplifyCode(void);
58 
59  // Find the "true" register represented by SrcReg (following chains
60  // of copies and subreg_to_reg operations).
61  unsigned lookThruCopyLike(unsigned SrcReg);
62 
63 public:
64  // Main entry point for this pass.
65  bool runOnMachineFunction(MachineFunction &MF) override {
66  if (skipFunction(*MF.getFunction()))
67  return false;
68  initialize(MF);
69  return simplifyCode();
70  }
71 };
72 
73 // Initialize class variables.
75  MF = &MFParm;
76  MRI = &MF->getRegInfo();
77  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
78  DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
79  DEBUG(MF->dump());
80 }
81 
82 // Perform peephole optimizations.
83 bool PPCMIPeephole::simplifyCode(void) {
84  bool Simplified = false;
85  MachineInstr* ToErase = nullptr;
86 
87  for (MachineBasicBlock &MBB : *MF) {
88  for (MachineInstr &MI : MBB) {
89 
90  // If the previous instruction was marked for elimination,
91  // remove it now.
92  if (ToErase) {
93  ToErase->eraseFromParent();
94  ToErase = nullptr;
95  }
96 
97  // Ignore debug instructions.
98  if (MI.isDebugValue())
99  continue;
100 
101  // Per-opcode peepholes.
102  switch (MI.getOpcode()) {
103 
104  default:
105  break;
106 
107  case PPC::XXPERMDI: {
108  // Perform simplifications of 2x64 vector swaps and splats.
109  // A swap is identified by an immediate value of 2, and a splat
110  // is identified by an immediate value of 0 or 3.
111  int Immed = MI.getOperand(3).getImm();
112 
113  if (Immed != 1) {
114 
115  // For each of these simplifications, we need the two source
116  // regs to match. Unfortunately, MachineCSE ignores COPY and
117  // SUBREG_TO_REG, so for example we can see
118  // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
119  // We have to look through chains of COPY and SUBREG_TO_REG
120  // to find the real source values for comparison.
121  unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
122  unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
123 
124  if (TrueReg1 == TrueReg2
126  MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
127  unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
128 
129  // If this is a splat fed by a splatting load, the splat is
130  // redundant. Replace with a copy. This doesn't happen directly due
131  // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
132  // a load of a double to a vector of 64-bit integers.
133  auto isConversionOfLoadAndSplat = [=]() -> bool {
134  if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
135  return false;
136  unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
138  MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
139  if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
140  return true;
141  }
142  return false;
143  };
144  if (DefMI && (Immed == 0 || Immed == 3)) {
145  if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
146  DEBUG(dbgs()
147  << "Optimizing load-and-splat/splat "
148  "to load-and-splat/copy: ");
149  DEBUG(MI.dump());
150  BuildMI(MBB, &MI, MI.getDebugLoc(),
151  TII->get(PPC::COPY), MI.getOperand(0).getReg())
152  .addOperand(MI.getOperand(1));
153  ToErase = &MI;
154  Simplified = true;
155  }
156  }
157 
158  // If this is a splat or a swap fed by another splat, we
159  // can replace it with a copy.
160  if (DefOpc == PPC::XXPERMDI) {
161  unsigned FeedImmed = DefMI->getOperand(3).getImm();
162  unsigned FeedReg1
163  = lookThruCopyLike(DefMI->getOperand(1).getReg());
164  unsigned FeedReg2
165  = lookThruCopyLike(DefMI->getOperand(2).getReg());
166 
167  if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
168  DEBUG(dbgs()
169  << "Optimizing splat/swap or splat/splat "
170  "to splat/copy: ");
171  DEBUG(MI.dump());
172  BuildMI(MBB, &MI, MI.getDebugLoc(),
173  TII->get(PPC::COPY), MI.getOperand(0).getReg())
174  .addOperand(MI.getOperand(1));
175  ToErase = &MI;
176  Simplified = true;
177  }
178 
179  // If this is a splat fed by a swap, we can simplify modify
180  // the splat to splat the other value from the swap's input
181  // parameter.
182  else if ((Immed == 0 || Immed == 3)
183  && FeedImmed == 2 && FeedReg1 == FeedReg2) {
184  DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
185  DEBUG(MI.dump());
186  MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
187  MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
188  MI.getOperand(3).setImm(3 - Immed);
189  Simplified = true;
190  }
191 
192  // If this is a swap fed by a swap, we can replace it
193  // with a copy from the first swap's input.
194  else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
195  DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
196  DEBUG(MI.dump());
197  BuildMI(MBB, &MI, MI.getDebugLoc(),
198  TII->get(PPC::COPY), MI.getOperand(0).getReg())
199  .addOperand(DefMI->getOperand(1));
200  ToErase = &MI;
201  Simplified = true;
202  }
203  } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
204  (DefMI->getOperand(2).getImm() == 0 ||
205  DefMI->getOperand(2).getImm() == 3)) {
206  // Splat fed by another splat - switch the output of the first
207  // and remove the second.
208  DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
209  ToErase = &MI;
210  Simplified = true;
211  DEBUG(dbgs() << "Removing redundant splat: ");
212  DEBUG(MI.dump());
213  }
214  }
215  }
216  break;
217  }
218  case PPC::VSPLTB:
219  case PPC::VSPLTH:
220  case PPC::XXSPLTW: {
221  unsigned MyOpcode = MI.getOpcode();
222  unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
223  unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
225  break;
226  MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
227  if (!DefMI)
228  break;
229  unsigned DefOpcode = DefMI->getOpcode();
230  auto isConvertOfSplat = [=]() -> bool {
231  if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
232  return false;
233  unsigned ConvReg = DefMI->getOperand(1).getReg();
235  return false;
236  MachineInstr *Splt = MRI->getVRegDef(ConvReg);
237  return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
238  Splt->getOpcode() == PPC::XXSPLTW);
239  };
240  bool AlreadySplat = (MyOpcode == DefOpcode) ||
241  (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
242  (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
243  (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
244  (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
245  (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
246  (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
247  // If the instruction[s] that feed this splat have already splat
248  // the value, this splat is redundant.
249  if (AlreadySplat) {
250  DEBUG(dbgs() << "Changing redundant splat to a copy: ");
251  DEBUG(MI.dump());
252  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
253  MI.getOperand(0).getReg())
254  .addOperand(MI.getOperand(OpNo));
255  ToErase = &MI;
256  Simplified = true;
257  }
258  // Splat fed by a shift. Usually when we align value to splat into
259  // vector element zero.
260  if (DefOpcode == PPC::XXSLDWI) {
261  unsigned ShiftRes = DefMI->getOperand(0).getReg();
262  unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
263  unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
264  unsigned ShiftImm = DefMI->getOperand(3).getImm();
265  unsigned SplatImm = MI.getOperand(2).getImm();
266  if (ShiftOp1 == ShiftOp2) {
267  unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
268  if (MRI->hasOneNonDBGUse(ShiftRes)) {
269  DEBUG(dbgs() << "Removing redundant shift: ");
270  DEBUG(DefMI->dump());
271  ToErase = DefMI;
272  }
273  Simplified = true;
274  DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
275  " to " << NewElem << " in instruction: ");
276  DEBUG(MI.dump());
277  MI.getOperand(1).setReg(ShiftOp1);
278  MI.getOperand(2).setImm(NewElem);
279  }
280  }
281  break;
282  }
283  case PPC::XVCVDPSP: {
284  // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
285  unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
287  break;
288  MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
289 
290  // This can occur when building a vector of single precision or integer
291  // values.
292  if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
293  unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
294  unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
295  if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
297  break;
298  MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
299  MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
300 
301  if (!P1 || !P2)
302  break;
303 
304  // Remove the passed FRSP instruction if it only feeds this MI and
305  // set any uses of that FRSP (in this MI) to the source of the FRSP.
306  auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
307  if (RoundInstr->getOpcode() == PPC::FRSP &&
308  MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
309  Simplified = true;
310  unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
311  unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
312  MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
313  for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
314  if (Use.getOperand(i).isReg() &&
315  Use.getOperand(i).getReg() == FRSPDefines)
316  Use.getOperand(i).setReg(ConvReg1);
317  DEBUG(dbgs() << "Removing redundant FRSP:\n");
318  DEBUG(RoundInstr->dump());
319  DEBUG(dbgs() << "As it feeds instruction:\n");
320  DEBUG(MI.dump());
321  DEBUG(dbgs() << "Through instruction:\n");
322  DEBUG(DefMI->dump());
323  RoundInstr->eraseFromParent();
324  }
325  };
326 
327  // If the input to XVCVDPSP is a vector that was built (even
328  // partially) out of FRSP's, the FRSP(s) can safely be removed
329  // since this instruction performs the same operation.
330  if (P1 != P2) {
331  removeFRSPIfPossible(P1);
332  removeFRSPIfPossible(P2);
333  break;
334  }
335  removeFRSPIfPossible(P1);
336  }
337  break;
338  }
339  }
340  }
341  // If the last instruction was marked for elimination,
342  // remove it now.
343  if (ToErase) {
344  ToErase->eraseFromParent();
345  ToErase = nullptr;
346  }
347  }
348 
349  return Simplified;
350 }
351 
352 // This is used to find the "true" source register for an
353 // XXPERMDI instruction, since MachineCSE does not handle the
354 // "copy-like" operations (Copy and SubregToReg). Returns
355 // the original SrcReg unless it is the target of a copy-like
356 // operation, in which case we chain backwards through all
357 // such operations to the ultimate source register. If a
358 // physical register is encountered, we stop the search.
359 unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
360 
361  while (true) {
362 
363  MachineInstr *MI = MRI->getVRegDef(SrcReg);
364  if (!MI->isCopyLike())
365  return SrcReg;
366 
367  unsigned CopySrcReg;
368  if (MI->isCopy())
369  CopySrcReg = MI->getOperand(1).getReg();
370  else {
371  assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
372  CopySrcReg = MI->getOperand(2).getReg();
373  }
374 
375  if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
376  return CopySrcReg;
377 
378  SrcReg = CopySrcReg;
379  }
380 }
381 
382 } // end default namespace
383 
384 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
385  "PowerPC MI Peephole Optimization", false, false)
387  "PowerPC MI Peephole Optimization", false, false)
388 
389 char PPCMIPeephole::ID = 0;
391 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
392 
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
size_t i
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
void initializePPCMIPeepholePass(PassRegistry &)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
void dump() const
Definition: Pass.cpp:122
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
#define DEBUG_TYPE
MachineBasicBlock * MBB
bool isCopyLike() const
Return true if the instruction behaves like a copy.
Definition: MachineInstr.h:819
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,"PowerPC MI Peephole Optimization", false, false) INITIALIZE_PASS_END(PPCMIPeephole
int64_t getImm() const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned const MachineRegisterInfo * MRI
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
PowerPC MI Peephole false
bool isCopy() const
Definition: MachineInstr.h:807
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
void dump(const TargetInstrInfo *TII=nullptr) const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringRef > StandardNames)
initialize - Initialize the set of available library functions based on the specified target triple...
bool isSubregToReg() const
Definition: MachineInstr.h:798
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:52
PowerPC MI Peephole Optimization
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
unsigned getReg() const
getReg - Returns the register number.
FunctionPass * createPPCMIPeepholePass()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG(X)
Definition: Debug.h:100
IRTranslator LLVM IR MI
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:40