LLVM 20.0.0git
PPCVSXFMAMutate.cpp
Go to the documentation of this file.
1//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass mutates the form of VSX FMA instructions to avoid unnecessary
10// copies.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPC.h"
15#include "PPCInstrInfo.h"
16#include "PPCTargetMachine.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
31#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37// Temporarily disable FMA mutation by default, since it doesn't handle
38// cross-basic-block intervals well.
39// See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html
40// http://reviews.llvm.org/D17087
42 "disable-ppc-vsx-fma-mutation",
43 cl::desc("Disable VSX FMA instruction mutation"), cl::init(true),
45
46#define DEBUG_TYPE "ppc-vsx-fma-mutate"
47
48namespace llvm { namespace PPC {
50} }
51
52namespace {
53 // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
54 // (Altivec and scalar floating-point registers), we need to transform the
55 // copies into subregister copies with other restrictions.
56 struct PPCVSXFMAMutate : public MachineFunctionPass {
57 static char ID;
58 PPCVSXFMAMutate() : MachineFunctionPass(ID) {
60 }
61
62 LiveIntervals *LIS;
63 const PPCInstrInfo *TII;
64
65protected:
66 bool processBlock(MachineBasicBlock &MBB) {
67 bool Changed = false;
68
70 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
72 I != IE; ++I) {
73 MachineInstr &MI = *I;
74
75 // The default (A-type) VSX FMA form kills the addend (it is taken from
76 // the target register, which is then updated to reflect the result of
77 // the FMA). If the instruction, however, kills one of the registers
78 // used for the product, then we can use the M-form instruction (which
79 // will take that value from the to-be-defined register).
80
81 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
82 if (AltOpc == -1)
83 continue;
84
85 // This pass is run after register coalescing, and so we're looking for
86 // a situation like this:
87 // ...
88 // %5 = COPY %9; VSLRC:%5,%9
89 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
90 // implicit %rm; VSLRC:%5,%17,%16
91 // ...
92 // %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19,
93 // implicit %rm; VSLRC:%9,%17,%19
94 // ...
95 // Where we can eliminate the copy by changing from the A-type to the
96 // M-type instruction. Specifically, for this example, this means:
97 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
98 // implicit %rm; VSLRC:%5,%17,%16
99 // is replaced by:
100 // %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9,
101 // implicit %rm; VSLRC:%16,%18,%9
102 // and we remove: %5 = COPY %9; VSLRC:%5,%9
103
104 SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
105
106 VNInfo *AddendValNo =
107 LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn();
108
109 // This can be null if the register is undef.
110 if (!AddendValNo)
111 continue;
112
113 MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
114
115 // The addend and this instruction must be in the same block.
116
117 if (!AddendMI || AddendMI->getParent() != MI.getParent())
118 continue;
119
120 // The addend must be a full copy within the same register class.
121
122 if (!AddendMI->isFullCopy())
123 continue;
124
125 Register AddendSrcReg = AddendMI->getOperand(1).getReg();
126 if (AddendSrcReg.isVirtual()) {
127 if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
128 MRI.getRegClass(AddendSrcReg))
129 continue;
130 } else {
131 // If AddendSrcReg is a physical register, make sure the destination
132 // register class contains it.
133 if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
134 ->contains(AddendSrcReg))
135 continue;
136 }
137
138 // In theory, there could be other uses of the addend copy before this
139 // fma. We could deal with this, but that would require additional
140 // logic below and I suspect it will not occur in any relevant
141 // situations. Additionally, check whether the copy source is killed
142 // prior to the fma. In order to replace the addend here with the
143 // source of the copy, it must still be live here. We can't use
144 // interval testing for a physical register, so as long as we're
145 // walking the MIs we may as well test liveness here.
146 //
147 // FIXME: There is a case that occurs in practice, like this:
148 // %9 = COPY %f1; VSSRC:%9
149 // ...
150 // %6 = COPY %9; VSSRC:%6,%9
151 // %7 = COPY %9; VSSRC:%7,%9
152 // %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC:
153 // %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC:
154 // %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC:
155 // which prevents an otherwise-profitable transformation.
156 bool OtherUsers = false, KillsAddendSrc = false;
157 for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
158 J != JE; --J) {
159 if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
160 OtherUsers = true;
161 break;
162 }
163 if (J->modifiesRegister(AddendSrcReg, TRI) ||
164 J->killsRegister(AddendSrcReg, TRI)) {
165 KillsAddendSrc = true;
166 break;
167 }
168 }
169
170 if (OtherUsers || KillsAddendSrc)
171 continue;
172
173
174 // The transformation doesn't work well with things like:
175 // %5 = A-form-op %5, %11, %5;
176 // unless %11 is also a kill, so skip when it is not,
177 // and check operand 3 to see it is also a kill to handle the case:
178 // %5 = A-form-op %5, %5, %11;
179 // where %5 and %11 are both kills. This case would be skipped
180 // otherwise.
181 Register OldFMAReg = MI.getOperand(0).getReg();
182
183 // Find one of the product operands that is killed by this instruction.
184 unsigned KilledProdOp = 0, OtherProdOp = 0;
185 Register Reg2 = MI.getOperand(2).getReg();
186 Register Reg3 = MI.getOperand(3).getReg();
187 if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
188 && Reg2 != OldFMAReg) {
189 KilledProdOp = 2;
190 OtherProdOp = 3;
191 } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill()
192 && Reg3 != OldFMAReg) {
193 KilledProdOp = 3;
194 OtherProdOp = 2;
195 }
196
197 // If there are no usable killed product operands, then this
198 // transformation is likely not profitable.
199 if (!KilledProdOp)
200 continue;
201
202 // If the addend copy is used only by this MI, then the addend source
203 // register is likely not live here. This could be fixed (based on the
204 // legality checks above, the live range for the addend source register
205 // could be extended), but it seems likely that such a trivial copy can
206 // be coalesced away later, and thus is not worth the effort.
207 if (AddendSrcReg.isVirtual() &&
208 !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
209 continue;
210
211 // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
212
213 Register KilledProdReg = MI.getOperand(KilledProdOp).getReg();
214 Register OtherProdReg = MI.getOperand(OtherProdOp).getReg();
215
216 unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
217 unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
218 unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg();
219
220 bool AddRegKill = AddendMI->getOperand(1).isKill();
221 bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill();
222 bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill();
223
224 bool AddRegUndef = AddendMI->getOperand(1).isUndef();
225 bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef();
226 bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef();
227
228 // If there isn't a class that fits, we can't perform the transform.
229 // This is needed for correctness with a mixture of VSX and Altivec
230 // instructions to make sure that a low VSX register is not assigned to
231 // the Altivec instruction.
232 if (!MRI.constrainRegClass(KilledProdReg,
233 MRI.getRegClass(OldFMAReg)))
234 continue;
235
236 assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
237 "Addend copy not tied to old FMA output!");
238
239 LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n " << MI);
240
241 MI.getOperand(0).setReg(KilledProdReg);
242 MI.getOperand(1).setReg(KilledProdReg);
243 MI.getOperand(3).setReg(AddendSrcReg);
244
245 MI.getOperand(0).setSubReg(KilledProdSubReg);
246 MI.getOperand(1).setSubReg(KilledProdSubReg);
247 MI.getOperand(3).setSubReg(AddSubReg);
248
249 MI.getOperand(1).setIsKill(KilledProdRegKill);
250 MI.getOperand(3).setIsKill(AddRegKill);
251
252 MI.getOperand(1).setIsUndef(KilledProdRegUndef);
253 MI.getOperand(3).setIsUndef(AddRegUndef);
254
255 MI.setDesc(TII->get(AltOpc));
256
257 // If the addend is also a multiplicand, replace it with the addend
258 // source in both places.
259 if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
260 MI.getOperand(2).setReg(AddendSrcReg);
261 MI.getOperand(2).setSubReg(AddSubReg);
262 MI.getOperand(2).setIsKill(AddRegKill);
263 MI.getOperand(2).setIsUndef(AddRegUndef);
264 } else {
265 MI.getOperand(2).setReg(OtherProdReg);
266 MI.getOperand(2).setSubReg(OtherProdSubReg);
267 MI.getOperand(2).setIsKill(OtherProdRegKill);
268 MI.getOperand(2).setIsUndef(OtherProdRegUndef);
269 }
270
271 LLVM_DEBUG(dbgs() << " -> " << MI);
272
273 // The killed product operand was killed here, so we can reuse it now
274 // for the result of the fma.
275
276 LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
277 VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
278 for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
279 UI != UE;) {
280 MachineOperand &UseMO = *UI;
281 MachineInstr *UseMI = UseMO.getParent();
282 ++UI;
283
284 // Don't replace the result register of the copy we're about to erase.
285 if (UseMI == AddendMI)
286 continue;
287
288 UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
289 }
290
291 // Recalculate the live intervals of the killed product operand.
292 LIS->removeInterval(KilledProdReg);
293 LiveInterval &NewFMAInt =
294 LIS->createAndComputeVirtRegInterval(KilledProdReg);
295
296 LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
297 (void)NewFMAInt;
298
299 // Extend the live interval of the addend source (it might end at the
300 // copy to be removed, or somewhere in between there and here). This
301 // is necessary only if it is a physical register.
302 if (!AddendSrcReg.isVirtual())
303 for (MCRegUnit Unit : TRI->regunits(AddendSrcReg.asMCReg())) {
304 LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
305 AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
306 FMAIdx.getRegSlot());
307 LLVM_DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
308 }
309
310 FMAInt.removeValNo(FMAValNo);
311 LLVM_DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
312
313 // Remove the (now unused) copy.
314
315 LLVM_DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
316 LIS->RemoveMachineInstrFromMaps(*AddendMI);
317 AddendMI->eraseFromParent();
318
319 Changed = true;
320 }
321
322 return Changed;
323 }
324
325public:
326 bool runOnMachineFunction(MachineFunction &MF) override {
327 if (skipFunction(MF.getFunction()))
328 return false;
329
330 // If we don't have VSX then go ahead and return without doing
331 // anything.
332 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
333 if (!STI.hasVSX())
334 return false;
335
336 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
337
338 TII = STI.getInstrInfo();
339
340 bool Changed = false;
341
343 return Changed;
344
346 if (processBlock(B))
347 Changed = true;
348
349 return Changed;
350 }
351
352 void getAnalysisUsage(AnalysisUsage &AU) const override {
360 }
361 };
362}
363
365 "PowerPC VSX FMA Mutation", false, false)
370 "PowerPC VSX FMA Mutation", false, false)
371
372char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
373
374char PPCVSXFMAMutate::ID = 0;
376 return new PPCVSXFMAMutate();
377}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DEBUG(...)
Definition: Debug.h:106
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
PowerPC VSX FMA Mutation
static cl::opt< bool > DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", cl::desc("Disable VSX FMA instruction mutation"), cl::init(true), cl::Hidden)
#define DEBUG_TYPE
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const
Return the first index in the given basic block.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
bool isKill() const
Return true if the live-in value is killed by this instruction.
Definition: LiveInterval.h:112
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
void removeValNo(VNInfo *ValNo)
removeValNo - Remove all the segments defined by the specified value#.
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542
std::pair< VNInfo *, bool > extendInBlock(ArrayRef< SlotIndex > Undefs, SlotIndex StartIdx, SlotIndex Kill)
Attempt to extend a value defined after StartIdx to include Use.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool isFullCopy() const
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:150
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
int getAltVSXFMAOpcode(uint16_t Opcode)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createPPCVSXFMAMutatePass()
void initializePPCVSXFMAMutatePass(PassRegistry &)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
char & PPCVSXFMAMutateID