LLVM 23.0.0git
PPCVSXFMAMutate.cpp
Go to the documentation of this file.
1//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass mutates the form of VSX FMA instructions to avoid unnecessary
10// copies.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPC.h"
15#include "PPCInstrInfo.h"
16#include "PPCTargetMachine.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
31#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37// Temporarily disable FMA mutation by default, since it doesn't handle
38// cross-basic-block intervals well.
39// See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html
40// http://reviews.llvm.org/D17087
42 "disable-ppc-vsx-fma-mutation",
43 cl::desc("Disable VSX FMA instruction mutation"), cl::init(true),
45
46#define DEBUG_TYPE "ppc-vsx-fma-mutate"
47
48namespace llvm { namespace PPC {
50} }
51
52namespace {
53 // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
54 // (Altivec and scalar floating-point registers), we need to transform the
55 // copies into subregister copies with other restrictions.
56 struct PPCVSXFMAMutate : public MachineFunctionPass {
57 static char ID;
58 PPCVSXFMAMutate() : MachineFunctionPass(ID) {}
59
60 LiveIntervals *LIS;
61 const PPCInstrInfo *TII;
62
63protected:
64 bool processBlock(MachineBasicBlock &MBB) {
65 bool Changed = false;
66
67 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
68 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
70 I != IE; ++I) {
71 MachineInstr &MI = *I;
72
73 // The default (A-type) VSX FMA form kills the addend (it is taken from
74 // the target register, which is then updated to reflect the result of
75 // the FMA). If the instruction, however, kills one of the registers
76 // used for the product, then we can use the M-form instruction (which
77 // will take that value from the to-be-defined register).
78
79 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
80 if (AltOpc == -1)
81 continue;
82
83 // This pass is run after register coalescing, and so we're looking for
84 // a situation like this:
85 // ...
86 // %5 = COPY %9; VSLRC:%5,%9
87 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
88 // implicit %rm; VSLRC:%5,%17,%16
89 // ...
90 // %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19,
91 // implicit %rm; VSLRC:%9,%17,%19
92 // ...
93 // Where we can eliminate the copy by changing from the A-type to the
94 // M-type instruction. Specifically, for this example, this means:
95 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
96 // implicit %rm; VSLRC:%5,%17,%16
97 // is replaced by:
98 // %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9,
99 // implicit %rm; VSLRC:%16,%18,%9
100 // and we remove: %5 = COPY %9; VSLRC:%5,%9
101
102 SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
103
104 VNInfo *AddendValNo =
105 LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn();
106
107 // This can be null if the register is undef.
108 if (!AddendValNo)
109 continue;
110
111 MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
112
113 // The addend and this instruction must be in the same block.
114
115 if (!AddendMI || AddendMI->getParent() != MI.getParent())
116 continue;
117
118 // The addend must be a full copy within the same register class.
119
120 if (!AddendMI->isFullCopy())
121 continue;
122
123 Register AddendSrcReg = AddendMI->getOperand(1).getReg();
124 if (AddendSrcReg.isVirtual()) {
125 if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
126 MRI.getRegClass(AddendSrcReg))
127 continue;
128 } else {
129 // If AddendSrcReg is a physical register, make sure the destination
130 // register class contains it.
131 if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
132 ->contains(AddendSrcReg))
133 continue;
134 }
135
136 // In theory, there could be other uses of the addend copy before this
137 // fma. We could deal with this, but that would require additional
138 // logic below and I suspect it will not occur in any relevant
139 // situations. Additionally, check whether the copy source is killed
140 // prior to the fma. In order to replace the addend here with the
141 // source of the copy, it must still be live here. We can't use
142 // interval testing for a physical register, so as long as we're
143 // walking the MIs we may as well test liveness here.
144 //
145 // FIXME: There is a case that occurs in practice, like this:
146 // %9 = COPY %f1; VSSRC:%9
147 // ...
148 // %6 = COPY %9; VSSRC:%6,%9
149 // %7 = COPY %9; VSSRC:%7,%9
150 // %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC:
151 // %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC:
152 // %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC:
153 // which prevents an otherwise-profitable transformation.
154 bool OtherUsers = false, KillsAddendSrc = false;
155 for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
156 J != JE; --J) {
157 if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
158 OtherUsers = true;
159 break;
160 }
161 if (J->modifiesRegister(AddendSrcReg, TRI) ||
162 J->killsRegister(AddendSrcReg, TRI)) {
163 KillsAddendSrc = true;
164 break;
165 }
166 }
167
168 if (OtherUsers || KillsAddendSrc)
169 continue;
170
171
172 // The transformation doesn't work well with things like:
173 // %5 = A-form-op %5, %11, %5;
174 // unless %11 is also a kill, so skip when it is not,
175 // and check operand 3 to see it is also a kill to handle the case:
176 // %5 = A-form-op %5, %5, %11;
177 // where %5 and %11 are both kills. This case would be skipped
178 // otherwise.
179 Register OldFMAReg = MI.getOperand(0).getReg();
180
181 // Find one of the product operands that is killed by this instruction.
182 unsigned KilledProdOp = 0, OtherProdOp = 0;
183 Register Reg2 = MI.getOperand(2).getReg();
184 Register Reg3 = MI.getOperand(3).getReg();
185 if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
186 && Reg2 != OldFMAReg) {
187 KilledProdOp = 2;
188 OtherProdOp = 3;
189 } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill()
190 && Reg3 != OldFMAReg) {
191 KilledProdOp = 3;
192 OtherProdOp = 2;
193 }
194
195 // If there are no usable killed product operands, then this
196 // transformation is likely not profitable.
197 if (!KilledProdOp)
198 continue;
199
200 // If the addend copy is used only by this MI, then the addend source
201 // register is likely not live here. This could be fixed (based on the
202 // legality checks above, the live range for the addend source register
203 // could be extended), but it seems likely that such a trivial copy can
204 // be coalesced away later, and thus is not worth the effort.
205 if (AddendSrcReg.isVirtual() &&
206 !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
207 continue;
208
209 // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
210
211 Register KilledProdReg = MI.getOperand(KilledProdOp).getReg();
212 Register OtherProdReg = MI.getOperand(OtherProdOp).getReg();
213
214 unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
215 unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
216 unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg();
217
218 bool AddRegKill = AddendMI->getOperand(1).isKill();
219 bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill();
220 bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill();
221
222 bool AddRegUndef = AddendMI->getOperand(1).isUndef();
223 bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef();
224 bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef();
225
226 // If there isn't a class that fits, we can't perform the transform.
227 // This is needed for correctness with a mixture of VSX and Altivec
228 // instructions to make sure that a low VSX register is not assigned to
229 // the Altivec instruction.
230 if (!MRI.constrainRegClass(KilledProdReg,
231 MRI.getRegClass(OldFMAReg)))
232 continue;
233
234 assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
235 "Addend copy not tied to old FMA output!");
236
237 LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n " << MI);
238
239 MI.getOperand(0).setReg(KilledProdReg);
240 MI.getOperand(1).setReg(KilledProdReg);
241 MI.getOperand(3).setReg(AddendSrcReg);
242
243 MI.getOperand(0).setSubReg(KilledProdSubReg);
244 MI.getOperand(1).setSubReg(KilledProdSubReg);
245 MI.getOperand(3).setSubReg(AddSubReg);
246
247 MI.getOperand(1).setIsKill(KilledProdRegKill);
248 MI.getOperand(3).setIsKill(AddRegKill);
249
250 MI.getOperand(1).setIsUndef(KilledProdRegUndef);
251 MI.getOperand(3).setIsUndef(AddRegUndef);
252
253 MI.setDesc(TII->get(AltOpc));
254
255 // If the addend is also a multiplicand, replace it with the addend
256 // source in both places.
257 if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
258 MI.getOperand(2).setReg(AddendSrcReg);
259 MI.getOperand(2).setSubReg(AddSubReg);
260 MI.getOperand(2).setIsKill(AddRegKill);
261 MI.getOperand(2).setIsUndef(AddRegUndef);
262 } else {
263 MI.getOperand(2).setReg(OtherProdReg);
264 MI.getOperand(2).setSubReg(OtherProdSubReg);
265 MI.getOperand(2).setIsKill(OtherProdRegKill);
266 MI.getOperand(2).setIsUndef(OtherProdRegUndef);
267 }
268
269 LLVM_DEBUG(dbgs() << " -> " << MI);
270
271 // The killed product operand was killed here, so we can reuse it now
272 // for the result of the fma.
273
274 LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
275 VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
276 for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
277 UI != UE;) {
278 MachineOperand &UseMO = *UI;
279 MachineInstr *UseMI = UseMO.getParent();
280 ++UI;
281
282 // Don't replace the result register of the copy we're about to erase.
283 if (UseMI == AddendMI)
284 continue;
285
286 UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
287 }
288
289 // Recalculate the live intervals of the killed product operand.
290 LIS->removeInterval(KilledProdReg);
291 LiveInterval &NewFMAInt =
292 LIS->createAndComputeVirtRegInterval(KilledProdReg);
293
294 LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
295 (void)NewFMAInt;
296
297 // Extend the live interval of the addend source (it might end at the
298 // copy to be removed, or somewhere in between there and here). This
299 // is necessary only if it is a physical register.
300 if (!AddendSrcReg.isVirtual())
301 for (MCRegUnit Unit : TRI->regunits(AddendSrcReg.asMCReg())) {
302 LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
303 AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
304 FMAIdx.getRegSlot());
305 LLVM_DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
306 }
307
308 FMAInt.removeValNo(FMAValNo);
309 LLVM_DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
310
311 // Remove the (now unused) copy.
312
313 LLVM_DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
314 LIS->RemoveMachineInstrFromMaps(*AddendMI);
315 AddendMI->eraseFromParent();
316
317 Changed = true;
318 }
319
320 return Changed;
321 }
322
323public:
324 bool runOnMachineFunction(MachineFunction &MF) override {
325 if (skipFunction(MF.getFunction()))
326 return false;
327
328 // If we don't have VSX then go ahead and return without doing
329 // anything.
330 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
331 if (!STI.hasVSX())
332 return false;
333
334 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
335
336 TII = STI.getInstrInfo();
337
338 bool Changed = false;
339
341 return Changed;
342
343 for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
344 if (processBlock(B))
345 Changed = true;
346
347 return Changed;
348 }
349
350 void getAnalysisUsage(AnalysisUsage &AU) const override {
351 AU.addRequired<LiveIntervalsWrapperPass>();
352 AU.addPreserved<LiveIntervalsWrapperPass>();
353 AU.addRequired<SlotIndexesWrapperPass>();
354 AU.addPreserved<SlotIndexesWrapperPass>();
355 AU.addRequired<MachineDominatorTreeWrapperPass>();
356 AU.addPreserved<MachineDominatorTreeWrapperPass>();
357 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
359 }
360 };
361}
362
364 "PowerPC VSX FMA Mutation", false, false)
369 "PowerPC VSX FMA Mutation", false, false)
370
371char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
372
373char PPCVSXFMAMutate::ID = 0;
375 return new PPCVSXFMAMutate();
376}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static cl::opt< bool > DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", cl::desc("Disable VSX FMA instruction mutation"), cl::init(true), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
SI Optimize VGPR LiveRange
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define LLVM_DEBUG(...)
Definition Debug.h:114
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
LLVM_ABI void removeValNo(VNInfo *ValNo)
removeValNo - Remove all the segments defined by the specified value#.
LLVM_ABI std::pair< VNInfo *, bool > extendInBlock(ArrayRef< SlotIndex > Undefs, SlotIndex StartIdx, SlotIndex Kill)
Attempt to extend a value defined after StartIdx to include Use.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock * getParent() const
bool isFullCopy() const
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
LLVM_ABI void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
const PPCInstrInfo * getInstrInfo() const override
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex def
The index of the defining instruction.
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
Define some predicates that are used for node matching.
int getAltVSXFMAOpcode(uint16_t Opcode)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionPass * createPPCVSXFMAMutatePass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
char & PPCVSXFMAMutateID