LLVM 18.0.0git
PPCVSXFMAMutate.cpp
Go to the documentation of this file.
1//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass mutates the form of VSX FMA instructions to avoid unnecessary
10// copies.
11//
12//===----------------------------------------------------------------------===//
13
15#include "PPC.h"
16#include "PPCInstrBuilder.h"
17#include "PPCInstrInfo.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
33#include "llvm/MC/MCAsmInfo.h"
36#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42// Temporarily disable FMA mutation by default, since it doesn't handle
43// cross-basic-block intervals well.
44// See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html
45// http://reviews.llvm.org/D17087
47 "disable-ppc-vsx-fma-mutation",
48 cl::desc("Disable VSX FMA instruction mutation"), cl::init(true),
50
51#define DEBUG_TYPE "ppc-vsx-fma-mutate"
52
53namespace llvm { namespace PPC {
55} }
56
57namespace {
58 // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
59 // (Altivec and scalar floating-point registers), we need to transform the
60 // copies into subregister copies with other restrictions.
61 struct PPCVSXFMAMutate : public MachineFunctionPass {
62 static char ID;
63 PPCVSXFMAMutate() : MachineFunctionPass(ID) {
65 }
66
67 LiveIntervals *LIS;
68 const PPCInstrInfo *TII;
69
70protected:
71 bool processBlock(MachineBasicBlock &MBB) {
72 bool Changed = false;
73
75 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
77 I != IE; ++I) {
78 MachineInstr &MI = *I;
79
80 // The default (A-type) VSX FMA form kills the addend (it is taken from
81 // the target register, which is then updated to reflect the result of
82 // the FMA). If the instruction, however, kills one of the registers
83 // used for the product, then we can use the M-form instruction (which
84 // will take that value from the to-be-defined register).
85
86 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
87 if (AltOpc == -1)
88 continue;
89
90 // This pass is run after register coalescing, and so we're looking for
91 // a situation like this:
92 // ...
93 // %5 = COPY %9; VSLRC:%5,%9
94 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
95 // implicit %rm; VSLRC:%5,%17,%16
96 // ...
97 // %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19,
98 // implicit %rm; VSLRC:%9,%17,%19
99 // ...
100 // Where we can eliminate the copy by changing from the A-type to the
101 // M-type instruction. Specifically, for this example, this means:
102 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
103 // implicit %rm; VSLRC:%5,%17,%16
104 // is replaced by:
105 // %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9,
106 // implicit %rm; VSLRC:%16,%18,%9
107 // and we remove: %5 = COPY %9; VSLRC:%5,%9
108
109 SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
110
111 VNInfo *AddendValNo =
112 LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn();
113
114 // This can be null if the register is undef.
115 if (!AddendValNo)
116 continue;
117
118 MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
119
120 // The addend and this instruction must be in the same block.
121
122 if (!AddendMI || AddendMI->getParent() != MI.getParent())
123 continue;
124
125 // The addend must be a full copy within the same register class.
126
127 if (!AddendMI->isFullCopy())
128 continue;
129
130 Register AddendSrcReg = AddendMI->getOperand(1).getReg();
131 if (AddendSrcReg.isVirtual()) {
132 if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
133 MRI.getRegClass(AddendSrcReg))
134 continue;
135 } else {
136 // If AddendSrcReg is a physical register, make sure the destination
137 // register class contains it.
138 if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
139 ->contains(AddendSrcReg))
140 continue;
141 }
142
143 // In theory, there could be other uses of the addend copy before this
144 // fma. We could deal with this, but that would require additional
145 // logic below and I suspect it will not occur in any relevant
146 // situations. Additionally, check whether the copy source is killed
147 // prior to the fma. In order to replace the addend here with the
148 // source of the copy, it must still be live here. We can't use
149 // interval testing for a physical register, so as long as we're
150 // walking the MIs we may as well test liveness here.
151 //
152 // FIXME: There is a case that occurs in practice, like this:
153 // %9 = COPY %f1; VSSRC:%9
154 // ...
155 // %6 = COPY %9; VSSRC:%6,%9
156 // %7 = COPY %9; VSSRC:%7,%9
157 // %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC:
158 // %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC:
159 // %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC:
160 // which prevents an otherwise-profitable transformation.
161 bool OtherUsers = false, KillsAddendSrc = false;
162 for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
163 J != JE; --J) {
164 if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
165 OtherUsers = true;
166 break;
167 }
168 if (J->modifiesRegister(AddendSrcReg, TRI) ||
169 J->killsRegister(AddendSrcReg, TRI)) {
170 KillsAddendSrc = true;
171 break;
172 }
173 }
174
175 if (OtherUsers || KillsAddendSrc)
176 continue;
177
178
179 // The transformation doesn't work well with things like:
180 // %5 = A-form-op %5, %11, %5;
181 // unless %11 is also a kill, so skip when it is not,
182 // and check operand 3 to see it is also a kill to handle the case:
183 // %5 = A-form-op %5, %5, %11;
184 // where %5 and %11 are both kills. This case would be skipped
185 // otherwise.
186 Register OldFMAReg = MI.getOperand(0).getReg();
187
188 // Find one of the product operands that is killed by this instruction.
189 unsigned KilledProdOp = 0, OtherProdOp = 0;
190 Register Reg2 = MI.getOperand(2).getReg();
191 Register Reg3 = MI.getOperand(3).getReg();
192 if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
193 && Reg2 != OldFMAReg) {
194 KilledProdOp = 2;
195 OtherProdOp = 3;
196 } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill()
197 && Reg3 != OldFMAReg) {
198 KilledProdOp = 3;
199 OtherProdOp = 2;
200 }
201
202 // If there are no usable killed product operands, then this
203 // transformation is likely not profitable.
204 if (!KilledProdOp)
205 continue;
206
207 // If the addend copy is used only by this MI, then the addend source
208 // register is likely not live here. This could be fixed (based on the
209 // legality checks above, the live range for the addend source register
210 // could be extended), but it seems likely that such a trivial copy can
211 // be coalesced away later, and thus is not worth the effort.
212 if (AddendSrcReg.isVirtual() &&
213 !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
214 continue;
215
216 // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
217
218 Register KilledProdReg = MI.getOperand(KilledProdOp).getReg();
219 Register OtherProdReg = MI.getOperand(OtherProdOp).getReg();
220
221 unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
222 unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
223 unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg();
224
225 bool AddRegKill = AddendMI->getOperand(1).isKill();
226 bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill();
227 bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill();
228
229 bool AddRegUndef = AddendMI->getOperand(1).isUndef();
230 bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef();
231 bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef();
232
233 // If there isn't a class that fits, we can't perform the transform.
234 // This is needed for correctness with a mixture of VSX and Altivec
235 // instructions to make sure that a low VSX register is not assigned to
236 // the Altivec instruction.
237 if (!MRI.constrainRegClass(KilledProdReg,
238 MRI.getRegClass(OldFMAReg)))
239 continue;
240
241 assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
242 "Addend copy not tied to old FMA output!");
243
244 LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n " << MI);
245
246 MI.getOperand(0).setReg(KilledProdReg);
247 MI.getOperand(1).setReg(KilledProdReg);
248 MI.getOperand(3).setReg(AddendSrcReg);
249
250 MI.getOperand(0).setSubReg(KilledProdSubReg);
251 MI.getOperand(1).setSubReg(KilledProdSubReg);
252 MI.getOperand(3).setSubReg(AddSubReg);
253
254 MI.getOperand(1).setIsKill(KilledProdRegKill);
255 MI.getOperand(3).setIsKill(AddRegKill);
256
257 MI.getOperand(1).setIsUndef(KilledProdRegUndef);
258 MI.getOperand(3).setIsUndef(AddRegUndef);
259
260 MI.setDesc(TII->get(AltOpc));
261
262 // If the addend is also a multiplicand, replace it with the addend
263 // source in both places.
264 if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
265 MI.getOperand(2).setReg(AddendSrcReg);
266 MI.getOperand(2).setSubReg(AddSubReg);
267 MI.getOperand(2).setIsKill(AddRegKill);
268 MI.getOperand(2).setIsUndef(AddRegUndef);
269 } else {
270 MI.getOperand(2).setReg(OtherProdReg);
271 MI.getOperand(2).setSubReg(OtherProdSubReg);
272 MI.getOperand(2).setIsKill(OtherProdRegKill);
273 MI.getOperand(2).setIsUndef(OtherProdRegUndef);
274 }
275
276 LLVM_DEBUG(dbgs() << " -> " << MI);
277
278 // The killed product operand was killed here, so we can reuse it now
279 // for the result of the fma.
280
281 LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
282 VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
283 for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
284 UI != UE;) {
285 MachineOperand &UseMO = *UI;
286 MachineInstr *UseMI = UseMO.getParent();
287 ++UI;
288
289 // Don't replace the result register of the copy we're about to erase.
290 if (UseMI == AddendMI)
291 continue;
292
293 UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
294 }
295
296 // Extend the live intervals of the killed product operand to hold the
297 // fma result.
298
299 LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
300 for (auto &AI : FMAInt) {
301 // Don't add the segment that corresponds to the original copy.
302 if (AI.valno == AddendValNo)
303 continue;
304
305 VNInfo *NewFMAValNo =
306 NewFMAInt.getNextValue(AI.start, LIS->getVNInfoAllocator());
307
308 NewFMAInt.addSegment(
309 LiveInterval::Segment(AI.start, AI.end, NewFMAValNo));
310 }
311 LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
312
313 // Extend the live interval of the addend source (it might end at the
314 // copy to be removed, or somewhere in between there and here). This
315 // is necessary only if it is a physical register.
316 if (!AddendSrcReg.isVirtual())
317 for (MCRegUnit Unit : TRI->regunits(AddendSrcReg.asMCReg())) {
318 LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
319 AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
320 FMAIdx.getRegSlot());
321 LLVM_DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
322 }
323
324 FMAInt.removeValNo(FMAValNo);
325 LLVM_DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
326
327 // Remove the (now unused) copy.
328
329 LLVM_DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
330 LIS->RemoveMachineInstrFromMaps(*AddendMI);
331 AddendMI->eraseFromParent();
332
333 Changed = true;
334 }
335
336 return Changed;
337 }
338
339public:
340 bool runOnMachineFunction(MachineFunction &MF) override {
341 if (skipFunction(MF.getFunction()))
342 return false;
343
344 // If we don't have VSX then go ahead and return without doing
345 // anything.
346 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
347 if (!STI.hasVSX())
348 return false;
349
350 LIS = &getAnalysis<LiveIntervals>();
351
352 TII = STI.getInstrInfo();
353
354 bool Changed = false;
355
357 return Changed;
358
360 if (processBlock(B))
361 Changed = true;
362
363 return Changed;
364 }
365
366 void getAnalysisUsage(AnalysisUsage &AU) const override {
374 }
375 };
376}
377
379 "PowerPC VSX FMA Mutation", false, false)
384 "PowerPC VSX FMA Mutation", false, false)
385
386char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
387
388char PPCVSXFMAMutate::ID = 0;
390 return new PPCVSXFMAMutate();
391}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
PowerPC VSX FMA Mutation
static cl::opt< bool > DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation", cl::desc("Disable VSX FMA instruction mutation"), cl::init(true), cl::Hidden)
#define DEBUG_TYPE
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static constexpr uint32_t Opcode
Definition: aarch32.h:200
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const
Return the first index in the given basic block.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
VNInfo::Allocator & getVNInfoAllocator()
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
bool isKill() const
Return true if the live-in value is killed by this instruction.
Definition: LiveInterval.h:112
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542
std::pair< VNInfo *, bool > extendInBlock(ArrayRef< SlotIndex > Undefs, SlotIndex StartIdx, SlotIndex Kill)
Attempt to extend a value defined after StartIdx to include Use.
VNInfo * getNextValue(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator)
getNextValue - Create a new value number and return it.
Definition: LiveInterval.h:331
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:326
bool isFullCopy() const
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:240
SlotIndexes pass.
Definition: SlotIndexes.h:300
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
int getAltVSXFMAOpcode(uint16_t Opcode)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createPPCVSXFMAMutatePass()
void initializePPCVSXFMAMutatePass(PassRegistry &)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
char & PPCVSXFMAMutateID
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162