LLVM 22.0.0git
MVEVPTBlockPass.cpp
Go to the documentation of this file.
1//===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARM.h"
11#include "ARMSubtarget.h"
12#include "Thumb2InstrInfo.h"
14#include "llvm/ADT/Statistic.h"
15#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/DebugLoc.h"
24#include "llvm/Support/Debug.h"
25#include <cassert>
26#include <new>
27
28using namespace llvm;
29
30#define DEBUG_TYPE "arm-mve-vpt"
31
32namespace {
33class MVEVPTBlock : public MachineFunctionPass {
34public:
35 static char ID;
36 const Thumb2InstrInfo *TII;
38
39 MVEVPTBlock() : MachineFunctionPass(ID) {}
40
41 bool runOnMachineFunction(MachineFunction &Fn) override;
42
43 MachineFunctionProperties getRequiredProperties() const override {
44 return MachineFunctionProperties().setNoVRegs();
45 }
46
47 StringRef getPassName() const override {
48 return "MVE VPT block insertion pass";
49 }
50
51private:
52 bool InsertVPTBlocks(MachineBasicBlock &MBB);
53};
54
55char MVEVPTBlock::ID = 0;
56
57} // end anonymous namespace
58
59INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
60
63 unsigned &NewOpcode) {
64 // Search backwards to the instruction that defines VPR. This may or not
65 // be a VCMP, we check that after this loop. If we find another instruction
66 // that reads cpsr, we return nullptr.
68 while (CmpMI != MI->getParent()->begin()) {
69 --CmpMI;
70 if (CmpMI->modifiesRegister(ARM::VPR, TRI))
71 break;
72 if (CmpMI->readsRegister(ARM::VPR, TRI))
73 break;
74 }
75
76 if (CmpMI == MI)
77 return nullptr;
78 NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode());
79 if (NewOpcode == 0)
80 return nullptr;
81
82 // Search forward from CmpMI to MI, checking if either register was def'd
83 if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI),
84 MI, TRI))
85 return nullptr;
86 if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI),
87 MI, TRI))
88 return nullptr;
89 return &*CmpMI;
90}
91
92// Advances Iter past a block of predicated instructions.
93// Returns true if it successfully skipped the whole block of predicated
94// instructions. Returns false when it stopped early (due to MaxSteps), or if
95// Iter didn't point to a predicated instruction.
98 unsigned MaxSteps,
99 unsigned &NumInstrsSteppedOver) {
101 Register PredReg;
102 NumInstrsSteppedOver = 0;
103
104 while (Iter != EndIter) {
105 if (Iter->isDebugInstr()) {
106 // Skip debug instructions
107 ++Iter;
108 continue;
109 }
110
111 NextPred = getVPTInstrPredicate(*Iter, PredReg);
112 assert(NextPred != ARMVCC::Else &&
113 "VPT block pass does not expect Else preds");
114 if (NextPred == ARMVCC::None || MaxSteps == 0)
115 break;
116 --MaxSteps;
117 ++Iter;
118 ++NumInstrsSteppedOver;
119 };
120
121 return NumInstrsSteppedOver != 0 &&
122 (NextPred == ARMVCC::None || Iter == EndIter);
123}
124
125// Returns true if at least one instruction in the range [Iter, End) defines
126// or kills VPR.
129 for (; Iter != End; ++Iter)
130 if (Iter->definesRegister(ARM::VPR, /*TRI=*/nullptr) ||
131 Iter->killsRegister(ARM::VPR, /*TRI=*/nullptr))
132 return true;
133 return false;
134}
135
136// Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize.
138 switch (BlockSize) {
139 case 1:
141 case 2:
143 case 3:
145 case 4:
147 default:
148 llvm_unreachable("Invalid BlockSize!");
149 }
150}
151
152// Given an iterator (Iter) that points at an instruction with a "Then"
153// predicate, tries to create the largest block of continuous predicated
154// instructions possible, and returns the VPT Block Mask of that block.
155//
156// This will try to perform some minor optimization in order to maximize the
157// size of the block.
161 SmallVectorImpl<MachineInstr *> &DeadInstructions) {
162 MachineBasicBlock::instr_iterator BlockBeg = Iter;
163 (void)BlockBeg;
165 "Expected a Predicated Instruction");
166
167 LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump());
168
169 unsigned BlockSize;
170 StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize);
171
173 std::next(BlockBeg);
174 AddedInstIter != Iter; ++AddedInstIter) {
175 if (AddedInstIter->isDebugInstr())
176 continue;
177 dbgs() << " adding: ";
178 AddedInstIter->dump();
179 });
180
181 // Generate the initial BlockMask
183
184 // Remove VPNOTs while there's still room in the block, so we can make the
185 // largest block possible.
186 ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else;
187 while (BlockSize < 4 && Iter != EndIter &&
188 Iter->getOpcode() == ARM::MVE_VPNOT) {
189
190 // Try to skip all of the predicated instructions after the VPNOT, stopping
191 // after (4 - BlockSize). If we can't skip them all, stop.
192 unsigned ElseInstCnt = 0;
193 MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter);
194 if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize),
195 ElseInstCnt))
196 break;
197
198 // Check if this VPNOT can be removed or not: It can only be removed if at
199 // least one of the predicated instruction that follows it kills or sets
200 // VPR.
201 if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
202 break;
203
204 LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump());
205
206 // Record the new size of the block
207 BlockSize += ElseInstCnt;
208 assert(BlockSize <= 4 && "Block is too large!");
209
210 // Record the VPNot to remove it later.
211 DeadInstructions.push_back(&*Iter);
212 ++Iter;
213
214 // Replace the predicates of the instructions we're adding.
215 // Note that we are using "Iter" to iterate over the block so we can update
216 // it at the same time.
217 for (; Iter != VPNOTBlockEndIter; ++Iter) {
218 if (Iter->isDebugInstr())
219 continue;
220
221 // Find the register in which the predicate is
223 assert(OpIdx != -1);
224
225 // Change the predicate and update the mask
226 Iter->getOperand(OpIdx).setImm(CurrentPredicate);
227 BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate);
228
229 LLVM_DEBUG(dbgs() << " adding : "; Iter->dump());
230 }
231
232 CurrentPredicate =
233 (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then);
234 }
235 return BlockMask;
236}
237
238bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
239 bool Modified = false;
240 MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
241 MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
242
243 SmallVector<MachineInstr *, 4> DeadInstructions;
244
245 while (MBIter != EndIter) {
246 MachineInstr *MI = &*MBIter;
247 Register PredReg;
248 DebugLoc DL = MI->getDebugLoc();
249
250 ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
251
252 // The idea of the predicate is that None, Then and Else are for use when
253 // handling assembly language: they correspond to the three possible
254 // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
255 // from assembly source or disassembled from object code, you expect to
256 // see a mixture whenever there's a long VPT block. But in code
257 // generation, we hope we'll never generate an Else as input to this pass.
258 assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
259
260 if (Pred == ARMVCC::None) {
261 ++MBIter;
262 continue;
263 }
264
265 ARM::PredBlockMask BlockMask =
266 CreateVPTBlock(MBIter, EndIter, DeadInstructions);
267
268 // Search back for a VCMP that can be folded to create a VPT, or else
269 // create a VPST directly
270 MachineInstrBuilder MIBuilder;
271 unsigned NewOpcode;
272 LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n");
273 if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) {
274 LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump());
275 MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode));
276 MIBuilder.addImm((uint64_t)BlockMask);
277 MIBuilder.add(VCMP->getOperand(1));
278 MIBuilder.add(VCMP->getOperand(2));
279 MIBuilder.add(VCMP->getOperand(3));
280
281 // We need to remove any kill flags between the original VCMP and the new
282 // insertion point.
283 for (MachineInstr &MII :
284 make_range(VCMP->getIterator(), MI->getIterator())) {
285 MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI);
286 MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI);
287 }
288
289 VCMP->eraseFromParent();
290 } else {
291 MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
292 MIBuilder.addImm((uint64_t)BlockMask);
293 }
294
295 // Erase all dead instructions (VPNOT's). Do that now so that they do not
296 // mess with the bundle creation.
297 for (MachineInstr *DeadMI : DeadInstructions)
298 DeadMI->eraseFromParent();
299 DeadInstructions.clear();
300
302 Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter);
303
304 Modified = true;
305 }
306
307 return Modified;
308}
309
310bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
311 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
312
313 if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
314 return false;
315
316 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
317 TRI = STI.getRegisterInfo();
318
319 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
320 << "********** Function: " << Fn.getName() << '\n');
321
322 bool Modified = false;
323 for (MachineBasicBlock &MBB : Fn)
324 Modified |= InsertVPTBlocks(MBB);
325
326 LLVM_DEBUG(dbgs() << "**************************************\n");
327 return Modified;
328}
329
330/// createMVEVPTBlock - Returns an instance of the MVE VPT block
331/// insertion pass.
332FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize)
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, unsigned MaxSteps, unsigned &NumInstrsSteppedOver)
static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, MachineBasicBlock::iterator End)
static MachineInstr * findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, const TargetRegisterInfo *TRI, unsigned &NewOpcode)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
MachineInstr unsigned OpIdx
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define LLVM_DEBUG(...)
Definition Debug.h:114
static const int BlockSize
Definition TarWriter.cpp:33
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb2() const
const ARMBaseRegisterInfo * getRegisterInfo() const override
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
PredBlockMask
Mask values for IT and VPT Blocks, to be used by MCOperands.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
ARM::PredBlockMask expandPredBlockMask(ARM::PredBlockMask BlockMask, ARMVCC::VPTCodes Kind)
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
FunctionPass * createMVEVPTBlockPass()
createMVEVPTBlock - Returns an instance of the MVE VPT block insertion pass.