LLVM 23.0.0git
PPCVSXFMAMutate.cpp
Go to the documentation of this file.
1//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass mutates the form of VSX FMA instructions to avoid unnecessary
10// copies.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPC.h"
15#include "PPCInstrInfo.h"
16#include "PPCTargetMachine.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
32#include "llvm/Support/Debug.h"
35
36using namespace llvm;
37
38// Temporarily disable FMA mutation by default, since it doesn't handle
39// cross-basic-block intervals well.
40// See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html
41// http://reviews.llvm.org/D17087
43 "disable-ppc-vsx-fma-mutation",
44 cl::desc("Disable VSX FMA instruction mutation"), cl::init(true),
46
47#define DEBUG_TYPE "ppc-vsx-fma-mutate"
48
49namespace llvm { namespace PPC {
51} }
52
53namespace {
54 // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
55 // (Altivec and scalar floating-point registers), we need to transform the
56 // copies into subregister copies with other restrictions.
57 struct PPCVSXFMAMutate : public MachineFunctionPass {
58 static char ID;
59 PPCVSXFMAMutate() : MachineFunctionPass(ID) {}
60
61 LiveIntervals *LIS;
62 const PPCInstrInfo *TII;
63
64protected:
65 bool processBlock(MachineBasicBlock &MBB) {
66 bool Changed = false;
67
68 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
69 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
71 I != IE; ++I) {
72 MachineInstr &MI = *I;
73
74 // The default (A-type) VSX FMA form kills the addend (it is taken from
75 // the target register, which is then updated to reflect the result of
76 // the FMA). If the instruction, however, kills one of the registers
77 // used for the product, then we can use the M-form instruction (which
78 // will take that value from the to-be-defined register).
79
80 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
81 if (AltOpc == -1)
82 continue;
83
84 // This pass is run after register coalescing, and so we're looking for
85 // a situation like this:
86 // ...
87 // %5 = COPY %9; VSLRC:%5,%9
88 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
89 // implicit %rm; VSLRC:%5,%17,%16
90 // ...
91 // %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19,
92 // implicit %rm; VSLRC:%9,%17,%19
93 // ...
94 // Where we can eliminate the copy by changing from the A-type to the
95 // M-type instruction. Specifically, for this example, this means:
96 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
97 // implicit %rm; VSLRC:%5,%17,%16
98 // is replaced by:
99 // %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9,
100 // implicit %rm; VSLRC:%16,%18,%9
101 // and we remove: %5 = COPY %9; VSLRC:%5,%9
102
103 SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
104
105 VNInfo *AddendValNo =
106 LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn();
107
108 // This can be null if the register is undef.
109 if (!AddendValNo)
110 continue;
111
112 MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
113
114 // The addend and this instruction must be in the same block.
115
116 if (!AddendMI || AddendMI->getParent() != MI.getParent())
117 continue;
118
119 // The addend must be a full copy within the same register class.
120
121 if (!AddendMI->isFullCopy())
122 continue;
123
124 Register AddendSrcReg = AddendMI->getOperand(1).getReg();
125 if (AddendSrcReg.isVirtual()) {
126 if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
127 MRI.getRegClass(AddendSrcReg))
128 continue;
129 } else {
130 // If AddendSrcReg is a physical register, make sure the destination
131 // register class contains it.
132 if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
133 ->contains(AddendSrcReg))
134 continue;
135 }
136
137 // In theory, there could be other uses of the addend copy before this
138 // fma. We could deal with this, but that would require additional
139 // logic below and I suspect it will not occur in any relevant
140 // situations. Additionally, check whether the copy source is killed
141 // prior to the fma. In order to replace the addend here with the
142 // source of the copy, it must still be live here. We can't use
143 // interval testing for a physical register, so as long as we're
144 // walking the MIs we may as well test liveness here.
145 //
146 // FIXME: There is a case that occurs in practice, like this:
147 // %9 = COPY %f1; VSSRC:%9
148 // ...
149 // %6 = COPY %9; VSSRC:%6,%9
150 // %7 = COPY %9; VSSRC:%7,%9
151 // %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC:
152 // %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC:
153 // %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC:
154 // which prevents an otherwise-profitable transformation.
155 bool OtherUsers = false, KillsAddendSrc = false;
156 for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
157 J != JE; --J) {
158 if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
159 OtherUsers = true;
160 break;
161 }
162 if (J->modifiesRegister(AddendSrcReg, TRI) ||
163 J->killsRegister(AddendSrcReg, TRI)) {
164 KillsAddendSrc = true;
165 break;
166 }
167 }
168
169 if (OtherUsers || KillsAddendSrc)
170 continue;
171
172
173 // The transformation doesn't work well with things like:
174 // %5 = A-form-op %5, %11, %5;
175 // unless %11 is also a kill, so skip when it is not,
176 // and check operand 3 to see it is also a kill to handle the case:
177 // %5 = A-form-op %5, %5, %11;
178 // where %5 and %11 are both kills. This case would be skipped
179 // otherwise.
180 Register OldFMAReg = MI.getOperand(0).getReg();
181
182 // Find one of the product operands that is killed by this instruction.
183 unsigned KilledProdOp = 0, OtherProdOp = 0;
184 Register Reg2 = MI.getOperand(2).getReg();
185 Register Reg3 = MI.getOperand(3).getReg();
186 if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
187 && Reg2 != OldFMAReg) {
188 KilledProdOp = 2;
189 OtherProdOp = 3;
190 } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill()
191 && Reg3 != OldFMAReg) {
192 KilledProdOp = 3;
193 OtherProdOp = 2;
194 }
195
196 // If there are no usable killed product operands, then this
197 // transformation is likely not profitable.
198 if (!KilledProdOp)
199 continue;
200
201 // If the addend copy is used only by this MI, then the addend source
202 // register is likely not live here. This could be fixed (based on the
203 // legality checks above, the live range for the addend source register
204 // could be extended), but it seems likely that such a trivial copy can
205 // be coalesced away later, and thus is not worth the effort.
206 if (AddendSrcReg.isVirtual() &&
207 !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
208 continue;
209
210 // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
211
212 Register KilledProdReg = MI.getOperand(KilledProdOp).getReg();
213 Register OtherProdReg = MI.getOperand(OtherProdOp).getReg();
214
215 unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
216 unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
217 unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg();
218
219 bool AddRegKill = AddendMI->getOperand(1).isKill();
220 bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill();
221 bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill();
222
223 bool AddRegUndef = AddendMI->getOperand(1).isUndef();
224 bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef();
225 bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef();
226
227 // If there isn't a class that fits, we can't perform the transform.
228 // This is needed for correctness with a mixture of VSX and Altivec
229 // instructions to make sure that a low VSX register is not assigned to
230 // the Altivec instruction.
231 if (!MRI.constrainRegClass(KilledProdReg,
232 MRI.getRegClass(OldFMAReg)))
233 continue;
234
235 assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
236 "Addend copy not tied to old FMA output!");
237
238 LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n " << MI);
239
240 MI.getOperand(0).setReg(KilledProdReg);
241 MI.getOperand(1).setReg(KilledProdReg);
242 MI.getOperand(3).setReg(AddendSrcReg);
243
244 MI.getOperand(0).setSubReg(KilledProdSubReg);
245 MI.getOperand(1).setSubReg(KilledProdSubReg);
246 MI.getOperand(3).setSubReg(AddSubReg);
247
248 MI.getOperand(1).setIsKill(KilledProdRegKill);
249 MI.getOperand(3).setIsKill(AddRegKill);
250
251 MI.getOperand(1).setIsUndef(KilledProdRegUndef);
252 MI.getOperand(3).setIsUndef(AddRegUndef);
253
254 MI.setDesc(TII->get(AltOpc));
255
256 // If the addend is also a multiplicand, replace it with the addend
257 // source in both places.
258 if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
259 MI.getOperand(2).setReg(AddendSrcReg);
260 MI.getOperand(2).setSubReg(AddSubReg);
261 MI.getOperand(2).setIsKill(AddRegKill);
262 MI.getOperand(2).setIsUndef(AddRegUndef);
263 } else {
264 MI.getOperand(2).setReg(OtherProdReg);
265 MI.getOperand(2).setSubReg(OtherProdSubReg);
266 MI.getOperand(2).setIsKill(OtherProdRegKill);
267 MI.getOperand(2).setIsUndef(OtherProdRegUndef);
268 }
269
270 LLVM_DEBUG(dbgs() << " -> " << MI);
271
272 // The killed product operand was killed here, so we can reuse it now
273 // for the result of the fma.
274
275 LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
276 VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
277 for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
278 UI != UE;) {
279 MachineOperand &UseMO = *UI;
280 MachineInstr *UseMI = UseMO.getParent();
281 ++UI;
282
283 // Don't replace the result register of the copy we're about to erase.
284 if (UseMI == AddendMI)
285 continue;
286
287 UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
288 }
289
290 // Recalculate the live intervals of the killed product operand.
291 LIS->removeInterval(KilledProdReg);
292 LiveInterval &NewFMAInt =
293 LIS->createAndComputeVirtRegInterval(KilledProdReg);
294
295 LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
296 (void)NewFMAInt;
297
298 // Extend the live interval of the addend source (it might end at the
299 // copy to be removed, or somewhere in between there and here). This
300 // is necessary only if it is a physical register.
301 if (!AddendSrcReg.isVirtual())
302 for (MCRegUnit Unit : TRI->regunits(AddendSrcReg.asMCReg())) {
303 LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
304 AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
305 FMAIdx.getRegSlot());
306 LLVM_DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
307 }
308
309 FMAInt.removeValNo(FMAValNo);
310 LLVM_DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
311
312 // Remove the (now unused) copy.
313
314 LLVM_DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
315 LIS->RemoveMachineInstrFromMaps(*AddendMI);
316 AddendMI->eraseFromParent();
317
318 Changed = true;
319 }
320
321 return Changed;
322 }
323
324public:
325 bool runOnMachineFunction(MachineFunction &MF) override {
326 if (skipFunction(MF.getFunction()))
327 return false;
328
329 // If we don't have VSX then go ahead and return without doing
330 // anything.
331 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
332 if (!STI.hasVSX())
333 return false;
334
335 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
336
337 TII = STI.getInstrInfo();
338
339 bool Changed = false;
340
342 return Changed;
343
344 for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
345 if (processBlock(B))
346 Changed = true;
347
348 return Changed;
349 }
350
351 void getAnalysisUsage(AnalysisUsage &AU) const override {
352 AU.addRequired<LiveIntervalsWrapperPass>();
353 AU.addPreserved<LiveIntervalsWrapperPass>();
354 AU.addRequired<SlotIndexesWrapperPass>();
355 AU.addPreserved<SlotIndexesWrapperPass>();
356 AU.addRequired<MachineDominatorTreeWrapperPass>();
357 AU.addPreserved<MachineDominatorTreeWrapperPass>();
358 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
360 }
361 };
362}
363
365 "PowerPC VSX FMA Mutation", false, false)
370 "PowerPC VSX FMA Mutation", false, false)
371
372char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
373
374char PPCVSXFMAMutate::ID = 0;
376 return new PPCVSXFMAMutate();
377}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static cl::opt< bool > DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", cl::desc("Disable VSX FMA instruction mutation"), cl::init(true), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
SI Optimize VGPR LiveRange
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define LLVM_DEBUG(...)
Definition Debug.h:114
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
LLVM_ABI void removeValNo(VNInfo *ValNo)
removeValNo - Remove all the segments defined by the specified value#.
LLVM_ABI std::pair< VNInfo *, bool > extendInBlock(ArrayRef< SlotIndex > Undefs, SlotIndex StartIdx, SlotIndex Kill)
Attempt to extend a value defined after StartIdx to include Use.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock * getParent() const
bool isFullCopy() const
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
LLVM_ABI void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
const PPCInstrInfo * getInstrInfo() const override
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex def
The index of the defining instruction.
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
Define some predicates that are used for node matching.
int getAltVSXFMAOpcode(uint16_t Opcode)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionPass * createPPCVSXFMAMutatePass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
char & PPCVSXFMAMutateID