LLVM 22.0.0git
PPCVSXFMAMutate.cpp
Go to the documentation of this file.
1//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass mutates the form of VSX FMA instructions to avoid unnecessary
10// copies.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPC.h"
15#include "PPCInstrInfo.h"
16#include "PPCTargetMachine.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
30#include "llvm/Support/Debug.h"
33
34using namespace llvm;
35
36// Temporarily disable FMA mutation by default, since it doesn't handle
37// cross-basic-block intervals well.
38// See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html
39// http://reviews.llvm.org/D17087
41 "disable-ppc-vsx-fma-mutation",
42 cl::desc("Disable VSX FMA instruction mutation"), cl::init(true),
44
45#define DEBUG_TYPE "ppc-vsx-fma-mutate"
46
47namespace llvm { namespace PPC {
49} }
50
51namespace {
52 // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
53 // (Altivec and scalar floating-point registers), we need to transform the
54 // copies into subregister copies with other restrictions.
55 struct PPCVSXFMAMutate : public MachineFunctionPass {
56 static char ID;
57 PPCVSXFMAMutate() : MachineFunctionPass(ID) {}
58
59 LiveIntervals *LIS;
60 const PPCInstrInfo *TII;
61
62protected:
63 bool processBlock(MachineBasicBlock &MBB) {
64 bool Changed = false;
65
66 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
67 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
69 I != IE; ++I) {
70 MachineInstr &MI = *I;
71
72 // The default (A-type) VSX FMA form kills the addend (it is taken from
73 // the target register, which is then updated to reflect the result of
74 // the FMA). If the instruction, however, kills one of the registers
75 // used for the product, then we can use the M-form instruction (which
76 // will take that value from the to-be-defined register).
77
78 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
79 if (AltOpc == -1)
80 continue;
81
82 // This pass is run after register coalescing, and so we're looking for
83 // a situation like this:
84 // ...
85 // %5 = COPY %9; VSLRC:%5,%9
86 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
87 // implicit %rm; VSLRC:%5,%17,%16
88 // ...
89 // %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19,
90 // implicit %rm; VSLRC:%9,%17,%19
91 // ...
92 // Where we can eliminate the copy by changing from the A-type to the
93 // M-type instruction. Specifically, for this example, this means:
94 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
95 // implicit %rm; VSLRC:%5,%17,%16
96 // is replaced by:
97 // %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9,
98 // implicit %rm; VSLRC:%16,%18,%9
99 // and we remove: %5 = COPY %9; VSLRC:%5,%9
100
101 SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
102
103 VNInfo *AddendValNo =
104 LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn();
105
106 // This can be null if the register is undef.
107 if (!AddendValNo)
108 continue;
109
110 MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
111
112 // The addend and this instruction must be in the same block.
113
114 if (!AddendMI || AddendMI->getParent() != MI.getParent())
115 continue;
116
117 // The addend must be a full copy within the same register class.
118
119 if (!AddendMI->isFullCopy())
120 continue;
121
122 Register AddendSrcReg = AddendMI->getOperand(1).getReg();
123 if (AddendSrcReg.isVirtual()) {
124 if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
125 MRI.getRegClass(AddendSrcReg))
126 continue;
127 } else {
128 // If AddendSrcReg is a physical register, make sure the destination
129 // register class contains it.
130 if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
131 ->contains(AddendSrcReg))
132 continue;
133 }
134
135 // In theory, there could be other uses of the addend copy before this
136 // fma. We could deal with this, but that would require additional
137 // logic below and I suspect it will not occur in any relevant
138 // situations. Additionally, check whether the copy source is killed
139 // prior to the fma. In order to replace the addend here with the
140 // source of the copy, it must still be live here. We can't use
141 // interval testing for a physical register, so as long as we're
142 // walking the MIs we may as well test liveness here.
143 //
144 // FIXME: There is a case that occurs in practice, like this:
145 // %9 = COPY %f1; VSSRC:%9
146 // ...
147 // %6 = COPY %9; VSSRC:%6,%9
148 // %7 = COPY %9; VSSRC:%7,%9
149 // %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC:
150 // %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC:
151 // %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC:
152 // which prevents an otherwise-profitable transformation.
153 bool OtherUsers = false, KillsAddendSrc = false;
154 for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
155 J != JE; --J) {
156 if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
157 OtherUsers = true;
158 break;
159 }
160 if (J->modifiesRegister(AddendSrcReg, TRI) ||
161 J->killsRegister(AddendSrcReg, TRI)) {
162 KillsAddendSrc = true;
163 break;
164 }
165 }
166
167 if (OtherUsers || KillsAddendSrc)
168 continue;
169
170
171 // The transformation doesn't work well with things like:
172 // %5 = A-form-op %5, %11, %5;
173 // unless %11 is also a kill, so skip when it is not,
174 // and check operand 3 to see it is also a kill to handle the case:
175 // %5 = A-form-op %5, %5, %11;
176 // where %5 and %11 are both kills. This case would be skipped
177 // otherwise.
178 Register OldFMAReg = MI.getOperand(0).getReg();
179
180 // Find one of the product operands that is killed by this instruction.
181 unsigned KilledProdOp = 0, OtherProdOp = 0;
182 Register Reg2 = MI.getOperand(2).getReg();
183 Register Reg3 = MI.getOperand(3).getReg();
184 if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
185 && Reg2 != OldFMAReg) {
186 KilledProdOp = 2;
187 OtherProdOp = 3;
188 } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill()
189 && Reg3 != OldFMAReg) {
190 KilledProdOp = 3;
191 OtherProdOp = 2;
192 }
193
194 // If there are no usable killed product operands, then this
195 // transformation is likely not profitable.
196 if (!KilledProdOp)
197 continue;
198
199 // If the addend copy is used only by this MI, then the addend source
200 // register is likely not live here. This could be fixed (based on the
201 // legality checks above, the live range for the addend source register
202 // could be extended), but it seems likely that such a trivial copy can
203 // be coalesced away later, and thus is not worth the effort.
204 if (AddendSrcReg.isVirtual() &&
205 !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
206 continue;
207
208 // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
209
210 Register KilledProdReg = MI.getOperand(KilledProdOp).getReg();
211 Register OtherProdReg = MI.getOperand(OtherProdOp).getReg();
212
213 unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
214 unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
215 unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg();
216
217 bool AddRegKill = AddendMI->getOperand(1).isKill();
218 bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill();
219 bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill();
220
221 bool AddRegUndef = AddendMI->getOperand(1).isUndef();
222 bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef();
223 bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef();
224
225 // If there isn't a class that fits, we can't perform the transform.
226 // This is needed for correctness with a mixture of VSX and Altivec
227 // instructions to make sure that a low VSX register is not assigned to
228 // the Altivec instruction.
229 if (!MRI.constrainRegClass(KilledProdReg,
230 MRI.getRegClass(OldFMAReg)))
231 continue;
232
233 assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
234 "Addend copy not tied to old FMA output!");
235
236 LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n " << MI);
237
238 MI.getOperand(0).setReg(KilledProdReg);
239 MI.getOperand(1).setReg(KilledProdReg);
240 MI.getOperand(3).setReg(AddendSrcReg);
241
242 MI.getOperand(0).setSubReg(KilledProdSubReg);
243 MI.getOperand(1).setSubReg(KilledProdSubReg);
244 MI.getOperand(3).setSubReg(AddSubReg);
245
246 MI.getOperand(1).setIsKill(KilledProdRegKill);
247 MI.getOperand(3).setIsKill(AddRegKill);
248
249 MI.getOperand(1).setIsUndef(KilledProdRegUndef);
250 MI.getOperand(3).setIsUndef(AddRegUndef);
251
252 MI.setDesc(TII->get(AltOpc));
253
254 // If the addend is also a multiplicand, replace it with the addend
255 // source in both places.
256 if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
257 MI.getOperand(2).setReg(AddendSrcReg);
258 MI.getOperand(2).setSubReg(AddSubReg);
259 MI.getOperand(2).setIsKill(AddRegKill);
260 MI.getOperand(2).setIsUndef(AddRegUndef);
261 } else {
262 MI.getOperand(2).setReg(OtherProdReg);
263 MI.getOperand(2).setSubReg(OtherProdSubReg);
264 MI.getOperand(2).setIsKill(OtherProdRegKill);
265 MI.getOperand(2).setIsUndef(OtherProdRegUndef);
266 }
267
268 LLVM_DEBUG(dbgs() << " -> " << MI);
269
270 // The killed product operand was killed here, so we can reuse it now
271 // for the result of the fma.
272
273 LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
274 VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
275 for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
276 UI != UE;) {
277 MachineOperand &UseMO = *UI;
278 MachineInstr *UseMI = UseMO.getParent();
279 ++UI;
280
281 // Don't replace the result register of the copy we're about to erase.
282 if (UseMI == AddendMI)
283 continue;
284
285 UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
286 }
287
288 // Recalculate the live intervals of the killed product operand.
289 LIS->removeInterval(KilledProdReg);
290 LiveInterval &NewFMAInt =
291 LIS->createAndComputeVirtRegInterval(KilledProdReg);
292
293 LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
294 (void)NewFMAInt;
295
296 // Extend the live interval of the addend source (it might end at the
297 // copy to be removed, or somewhere in between there and here). This
298 // is necessary only if it is a physical register.
299 if (!AddendSrcReg.isVirtual())
300 for (MCRegUnit Unit : TRI->regunits(AddendSrcReg.asMCReg())) {
301 LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
302 AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
303 FMAIdx.getRegSlot());
304 LLVM_DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
305 }
306
307 FMAInt.removeValNo(FMAValNo);
308 LLVM_DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
309
310 // Remove the (now unused) copy.
311
312 LLVM_DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
313 LIS->RemoveMachineInstrFromMaps(*AddendMI);
314 AddendMI->eraseFromParent();
315
316 Changed = true;
317 }
318
319 return Changed;
320 }
321
322public:
323 bool runOnMachineFunction(MachineFunction &MF) override {
324 if (skipFunction(MF.getFunction()))
325 return false;
326
327 // If we don't have VSX then go ahead and return without doing
328 // anything.
329 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
330 if (!STI.hasVSX())
331 return false;
332
333 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
334
335 TII = STI.getInstrInfo();
336
337 bool Changed = false;
338
340 return Changed;
341
342 for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
343 if (processBlock(B))
344 Changed = true;
345
346 return Changed;
347 }
348
349 void getAnalysisUsage(AnalysisUsage &AU) const override {
350 AU.addRequired<LiveIntervalsWrapperPass>();
351 AU.addPreserved<LiveIntervalsWrapperPass>();
352 AU.addRequired<SlotIndexesWrapperPass>();
353 AU.addPreserved<SlotIndexesWrapperPass>();
354 AU.addRequired<MachineDominatorTreeWrapperPass>();
355 AU.addPreserved<MachineDominatorTreeWrapperPass>();
357 }
358 };
359}
360
362 "PowerPC VSX FMA Mutation", false, false)
367 "PowerPC VSX FMA Mutation", false, false)
368
369char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
370
371char PPCVSXFMAMutate::ID = 0;
373 return new PPCVSXFMAMutate();
374}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static cl::opt< bool > DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", cl::desc("Disable VSX FMA instruction mutation"), cl::init(true), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
SI Optimize VGPR LiveRange
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define LLVM_DEBUG(...)
Definition Debug.h:114
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
LLVM_ABI void removeValNo(VNInfo *ValNo)
removeValNo - Remove all the segments defined by the specified value#.
LLVM_ABI std::pair< VNInfo *, bool > extendInBlock(ArrayRef< SlotIndex > Undefs, SlotIndex StartIdx, SlotIndex Kill)
Attempt to extend a value defined after StartIdx to include Use.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineBasicBlock * getParent() const
bool isFullCopy() const
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
LLVM_ABI void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
const PPCInstrInfo * getInstrInfo() const override
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex def
The index of the defining instruction.
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
Define some predicates that are used for node matching.
int getAltVSXFMAOpcode(uint16_t Opcode)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createPPCVSXFMAMutatePass()
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
unsigned MCRegUnit
Register units are used to compute register aliasing.
Definition MCRegister.h:30
char & PPCVSXFMAMutateID