LLVM 17.0.0git
MVEVPTBlockPass.cpp
Go to the documentation of this file.
1//===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARM.h"
11#include "ARMSubtarget.h"
13#include "Thumb2InstrInfo.h"
14#include "llvm/ADT/SmallSet.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
25#include "llvm/IR/DebugLoc.h"
26#include "llvm/MC/MCInstrDesc.h"
28#include "llvm/Support/Debug.h"
29#include <cassert>
30#include <new>
31
32using namespace llvm;
33
34#define DEBUG_TYPE "arm-mve-vpt"
35
36namespace {
37class MVEVPTBlock : public MachineFunctionPass {
38public:
39 static char ID;
40 const Thumb2InstrInfo *TII;
42
43 MVEVPTBlock() : MachineFunctionPass(ID) {}
44
45 bool runOnMachineFunction(MachineFunction &Fn) override;
46
49 MachineFunctionProperties::Property::NoVRegs);
50 }
51
52 StringRef getPassName() const override {
53 return "MVE VPT block insertion pass";
54 }
55
56private:
57 bool InsertVPTBlocks(MachineBasicBlock &MBB);
58};
59
60char MVEVPTBlock::ID = 0;
61
62} // end anonymous namespace
63
64INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
65
68 unsigned &NewOpcode) {
69 // Search backwards to the instruction that defines VPR. This may or not
70 // be a VCMP, we check that after this loop. If we find another instruction
71 // that reads cpsr, we return nullptr.
73 while (CmpMI != MI->getParent()->begin()) {
74 --CmpMI;
75 if (CmpMI->modifiesRegister(ARM::VPR, TRI))
76 break;
77 if (CmpMI->readsRegister(ARM::VPR, TRI))
78 break;
79 }
80
81 if (CmpMI == MI)
82 return nullptr;
83 NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode());
84 if (NewOpcode == 0)
85 return nullptr;
86
87 // Search forward from CmpMI to MI, checking if either register was def'd
88 if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI),
89 MI, TRI))
90 return nullptr;
91 if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI),
92 MI, TRI))
93 return nullptr;
94 return &*CmpMI;
95}
96
97// Advances Iter past a block of predicated instructions.
98// Returns true if it successfully skipped the whole block of predicated
99// instructions. Returns false when it stopped early (due to MaxSteps), or if
100// Iter didn't point to a predicated instruction.
103 unsigned MaxSteps,
104 unsigned &NumInstrsSteppedOver) {
106 Register PredReg;
107 NumInstrsSteppedOver = 0;
108
109 while (Iter != EndIter) {
110 if (Iter->isDebugInstr()) {
111 // Skip debug instructions
112 ++Iter;
113 continue;
114 }
115
116 NextPred = getVPTInstrPredicate(*Iter, PredReg);
117 assert(NextPred != ARMVCC::Else &&
118 "VPT block pass does not expect Else preds");
119 if (NextPred == ARMVCC::None || MaxSteps == 0)
120 break;
121 --MaxSteps;
122 ++Iter;
123 ++NumInstrsSteppedOver;
124 };
125
126 return NumInstrsSteppedOver != 0 &&
127 (NextPred == ARMVCC::None || Iter == EndIter);
128}
129
130// Returns true if at least one instruction in the range [Iter, End) defines
131// or kills VPR.
134 for (; Iter != End; ++Iter)
135 if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR))
136 return true;
137 return false;
138}
139
140// Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize.
142 switch (BlockSize) {
143 case 1:
144 return ARM::PredBlockMask::T;
145 case 2:
146 return ARM::PredBlockMask::TT;
147 case 3:
148 return ARM::PredBlockMask::TTT;
149 case 4:
150 return ARM::PredBlockMask::TTTT;
151 default:
152 llvm_unreachable("Invalid BlockSize!");
153 }
154}
155
156// Given an iterator (Iter) that points at an instruction with a "Then"
157// predicate, tries to create the largest block of continuous predicated
158// instructions possible, and returns the VPT Block Mask of that block.
159//
160// This will try to perform some minor optimization in order to maximize the
161// size of the block.
165 SmallVectorImpl<MachineInstr *> &DeadInstructions) {
166 MachineBasicBlock::instr_iterator BlockBeg = Iter;
167 (void)BlockBeg;
169 "Expected a Predicated Instruction");
170
171 LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump());
172
173 unsigned BlockSize;
174 StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize);
175
177 std::next(BlockBeg);
178 AddedInstIter != Iter; ++AddedInstIter) {
179 if (AddedInstIter->isDebugInstr())
180 continue;
181 dbgs() << " adding: ";
182 AddedInstIter->dump();
183 });
184
185 // Generate the initial BlockMask
187
188 // Remove VPNOTs while there's still room in the block, so we can make the
189 // largest block possible.
190 ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else;
191 while (BlockSize < 4 && Iter != EndIter &&
192 Iter->getOpcode() == ARM::MVE_VPNOT) {
193
194 // Try to skip all of the predicated instructions after the VPNOT, stopping
195 // after (4 - BlockSize). If we can't skip them all, stop.
196 unsigned ElseInstCnt = 0;
197 MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter);
198 if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize),
199 ElseInstCnt))
200 break;
201
202 // Check if this VPNOT can be removed or not: It can only be removed if at
203 // least one of the predicated instruction that follows it kills or sets
204 // VPR.
205 if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
206 break;
207
208 LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump());
209
210 // Record the new size of the block
211 BlockSize += ElseInstCnt;
212 assert(BlockSize <= 4 && "Block is too large!");
213
214 // Record the VPNot to remove it later.
215 DeadInstructions.push_back(&*Iter);
216 ++Iter;
217
218 // Replace the predicates of the instructions we're adding.
219 // Note that we are using "Iter" to iterate over the block so we can update
220 // it at the same time.
221 for (; Iter != VPNOTBlockEndIter; ++Iter) {
222 if (Iter->isDebugInstr())
223 continue;
224
225 // Find the register in which the predicate is
226 int OpIdx = findFirstVPTPredOperandIdx(*Iter);
227 assert(OpIdx != -1);
228
229 // Change the predicate and update the mask
230 Iter->getOperand(OpIdx).setImm(CurrentPredicate);
231 BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate);
232
233 LLVM_DEBUG(dbgs() << " adding : "; Iter->dump());
234 }
235
236 CurrentPredicate =
237 (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then);
238 }
239 return BlockMask;
240}
241
242bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
243 bool Modified = false;
244 MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
245 MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
246
247 SmallVector<MachineInstr *, 4> DeadInstructions;
248
249 while (MBIter != EndIter) {
250 MachineInstr *MI = &*MBIter;
251 Register PredReg;
252 DebugLoc DL = MI->getDebugLoc();
253
254 ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
255
256 // The idea of the predicate is that None, Then and Else are for use when
257 // handling assembly language: they correspond to the three possible
258 // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
259 // from assembly source or disassembled from object code, you expect to
260 // see a mixture whenever there's a long VPT block. But in code
261 // generation, we hope we'll never generate an Else as input to this pass.
262 assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
263
264 if (Pred == ARMVCC::None) {
265 ++MBIter;
266 continue;
267 }
268
269 ARM::PredBlockMask BlockMask =
270 CreateVPTBlock(MBIter, EndIter, DeadInstructions);
271
272 // Search back for a VCMP that can be folded to create a VPT, or else
273 // create a VPST directly
274 MachineInstrBuilder MIBuilder;
275 unsigned NewOpcode;
276 LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n");
277 if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) {
278 LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump());
279 MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode));
280 MIBuilder.addImm((uint64_t)BlockMask);
281 MIBuilder.add(VCMP->getOperand(1));
282 MIBuilder.add(VCMP->getOperand(2));
283 MIBuilder.add(VCMP->getOperand(3));
284
285 // We need to remove any kill flags between the original VCMP and the new
286 // insertion point.
287 for (MachineInstr &MII :
288 make_range(VCMP->getIterator(), MI->getIterator())) {
289 MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI);
290 MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI);
291 }
292
293 VCMP->eraseFromParent();
294 } else {
295 MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
296 MIBuilder.addImm((uint64_t)BlockMask);
297 }
298
299 // Erase all dead instructions (VPNOT's). Do that now so that they do not
300 // mess with the bundle creation.
301 for (MachineInstr *DeadMI : DeadInstructions)
302 DeadMI->eraseFromParent();
303 DeadInstructions.clear();
304
306 Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter);
307
308 Modified = true;
309 }
310
311 return Modified;
312}
313
314bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
315 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
316
317 if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
318 return false;
319
320 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
321 TRI = STI.getRegisterInfo();
322
323 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
324 << "********** Function: " << Fn.getName() << '\n');
325
326 bool Modified = false;
327 for (MachineBasicBlock &MBB : Fn)
328 Modified |= InsertVPTBlocks(MBB);
329
330 LLVM_DEBUG(dbgs() << "**************************************\n");
331 return Modified;
332}
333
334/// createMVEVPTBlock - Returns an instance of the MVE VPT block
335/// insertion pass.
336FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize)
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, unsigned MaxSteps, unsigned &NumInstrsSteppedOver)
static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, MachineBasicBlock::iterator End)
static MachineInstr * findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, const TargetRegisterInfo *TRI, unsigned &NewOpcode)
#define DEBUG_TYPE
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const int BlockSize
Definition: TarWriter.cpp:33
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:262
bool isThumb2() const
Definition: ARMSubtarget.h:421
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:274
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Iterator for intrusive lists based on ilist_node.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
PredBlockMask
Mask values for IT and VPT Blocks, to be used by MCOperands.
Definition: ARMBaseInfo.h:105
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
ARM::PredBlockMask expandPredBlockMask(ARM::PredBlockMask BlockMask, ARMVCC::VPTCodes Kind)
Definition: ARMBaseInfo.cpp:18
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
FunctionPass * createMVEVPTBlockPass()
createMVEVPTBlock - Returns an instance of the MVE VPT block insertion pass.