LLVM 20.0.0git
MVEVPTBlockPass.cpp
Go to the documentation of this file.
1//===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARM.h"
11#include "ARMSubtarget.h"
12#include "Thumb2InstrInfo.h"
14#include "llvm/ADT/Statistic.h"
15#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/DebugLoc.h"
24#include "llvm/Support/Debug.h"
25#include <cassert>
26#include <new>
27
28using namespace llvm;
29
30#define DEBUG_TYPE "arm-mve-vpt"
31
32namespace {
33class MVEVPTBlock : public MachineFunctionPass {
34public:
35 static char ID;
36 const Thumb2InstrInfo *TII;
38
39 MVEVPTBlock() : MachineFunctionPass(ID) {}
40
41 bool runOnMachineFunction(MachineFunction &Fn) override;
42
45 MachineFunctionProperties::Property::NoVRegs);
46 }
47
48 StringRef getPassName() const override {
49 return "MVE VPT block insertion pass";
50 }
51
52private:
53 bool InsertVPTBlocks(MachineBasicBlock &MBB);
54};
55
56char MVEVPTBlock::ID = 0;
57
58} // end anonymous namespace
59
60INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
61
64 unsigned &NewOpcode) {
65 // Search backwards to the instruction that defines VPR. This may or not
66 // be a VCMP, we check that after this loop. If we find another instruction
67 // that reads cpsr, we return nullptr.
69 while (CmpMI != MI->getParent()->begin()) {
70 --CmpMI;
71 if (CmpMI->modifiesRegister(ARM::VPR, TRI))
72 break;
73 if (CmpMI->readsRegister(ARM::VPR, TRI))
74 break;
75 }
76
77 if (CmpMI == MI)
78 return nullptr;
79 NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode());
80 if (NewOpcode == 0)
81 return nullptr;
82
83 // Search forward from CmpMI to MI, checking if either register was def'd
84 if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI),
85 MI, TRI))
86 return nullptr;
87 if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI),
88 MI, TRI))
89 return nullptr;
90 return &*CmpMI;
91}
92
93// Advances Iter past a block of predicated instructions.
94// Returns true if it successfully skipped the whole block of predicated
95// instructions. Returns false when it stopped early (due to MaxSteps), or if
96// Iter didn't point to a predicated instruction.
99 unsigned MaxSteps,
100 unsigned &NumInstrsSteppedOver) {
102 Register PredReg;
103 NumInstrsSteppedOver = 0;
104
105 while (Iter != EndIter) {
106 if (Iter->isDebugInstr()) {
107 // Skip debug instructions
108 ++Iter;
109 continue;
110 }
111
112 NextPred = getVPTInstrPredicate(*Iter, PredReg);
113 assert(NextPred != ARMVCC::Else &&
114 "VPT block pass does not expect Else preds");
115 if (NextPred == ARMVCC::None || MaxSteps == 0)
116 break;
117 --MaxSteps;
118 ++Iter;
119 ++NumInstrsSteppedOver;
120 };
121
122 return NumInstrsSteppedOver != 0 &&
123 (NextPred == ARMVCC::None || Iter == EndIter);
124}
125
126// Returns true if at least one instruction in the range [Iter, End) defines
127// or kills VPR.
130 for (; Iter != End; ++Iter)
131 if (Iter->definesRegister(ARM::VPR, /*TRI=*/nullptr) ||
132 Iter->killsRegister(ARM::VPR, /*TRI=*/nullptr))
133 return true;
134 return false;
135}
136
137// Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize.
139 switch (BlockSize) {
140 case 1:
141 return ARM::PredBlockMask::T;
142 case 2:
143 return ARM::PredBlockMask::TT;
144 case 3:
145 return ARM::PredBlockMask::TTT;
146 case 4:
147 return ARM::PredBlockMask::TTTT;
148 default:
149 llvm_unreachable("Invalid BlockSize!");
150 }
151}
152
153// Given an iterator (Iter) that points at an instruction with a "Then"
154// predicate, tries to create the largest block of continuous predicated
155// instructions possible, and returns the VPT Block Mask of that block.
156//
157// This will try to perform some minor optimization in order to maximize the
158// size of the block.
162 SmallVectorImpl<MachineInstr *> &DeadInstructions) {
163 MachineBasicBlock::instr_iterator BlockBeg = Iter;
164 (void)BlockBeg;
166 "Expected a Predicated Instruction");
167
168 LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump());
169
170 unsigned BlockSize;
171 StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize);
172
174 std::next(BlockBeg);
175 AddedInstIter != Iter; ++AddedInstIter) {
176 if (AddedInstIter->isDebugInstr())
177 continue;
178 dbgs() << " adding: ";
179 AddedInstIter->dump();
180 });
181
182 // Generate the initial BlockMask
184
185 // Remove VPNOTs while there's still room in the block, so we can make the
186 // largest block possible.
187 ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else;
188 while (BlockSize < 4 && Iter != EndIter &&
189 Iter->getOpcode() == ARM::MVE_VPNOT) {
190
191 // Try to skip all of the predicated instructions after the VPNOT, stopping
192 // after (4 - BlockSize). If we can't skip them all, stop.
193 unsigned ElseInstCnt = 0;
194 MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter);
195 if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize),
196 ElseInstCnt))
197 break;
198
199 // Check if this VPNOT can be removed or not: It can only be removed if at
200 // least one of the predicated instruction that follows it kills or sets
201 // VPR.
202 if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter))
203 break;
204
205 LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump());
206
207 // Record the new size of the block
208 BlockSize += ElseInstCnt;
209 assert(BlockSize <= 4 && "Block is too large!");
210
211 // Record the VPNot to remove it later.
212 DeadInstructions.push_back(&*Iter);
213 ++Iter;
214
215 // Replace the predicates of the instructions we're adding.
216 // Note that we are using "Iter" to iterate over the block so we can update
217 // it at the same time.
218 for (; Iter != VPNOTBlockEndIter; ++Iter) {
219 if (Iter->isDebugInstr())
220 continue;
221
222 // Find the register in which the predicate is
223 int OpIdx = findFirstVPTPredOperandIdx(*Iter);
224 assert(OpIdx != -1);
225
226 // Change the predicate and update the mask
227 Iter->getOperand(OpIdx).setImm(CurrentPredicate);
228 BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate);
229
230 LLVM_DEBUG(dbgs() << " adding : "; Iter->dump());
231 }
232
233 CurrentPredicate =
234 (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then);
235 }
236 return BlockMask;
237}
238
239bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
240 bool Modified = false;
241 MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
242 MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
243
244 SmallVector<MachineInstr *, 4> DeadInstructions;
245
246 while (MBIter != EndIter) {
247 MachineInstr *MI = &*MBIter;
248 Register PredReg;
249 DebugLoc DL = MI->getDebugLoc();
250
251 ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
252
253 // The idea of the predicate is that None, Then and Else are for use when
254 // handling assembly language: they correspond to the three possible
255 // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
256 // from assembly source or disassembled from object code, you expect to
257 // see a mixture whenever there's a long VPT block. But in code
258 // generation, we hope we'll never generate an Else as input to this pass.
259 assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
260
261 if (Pred == ARMVCC::None) {
262 ++MBIter;
263 continue;
264 }
265
266 ARM::PredBlockMask BlockMask =
267 CreateVPTBlock(MBIter, EndIter, DeadInstructions);
268
269 // Search back for a VCMP that can be folded to create a VPT, or else
270 // create a VPST directly
271 MachineInstrBuilder MIBuilder;
272 unsigned NewOpcode;
273 LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n");
274 if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) {
275 LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump());
276 MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode));
277 MIBuilder.addImm((uint64_t)BlockMask);
278 MIBuilder.add(VCMP->getOperand(1));
279 MIBuilder.add(VCMP->getOperand(2));
280 MIBuilder.add(VCMP->getOperand(3));
281
282 // We need to remove any kill flags between the original VCMP and the new
283 // insertion point.
284 for (MachineInstr &MII :
285 make_range(VCMP->getIterator(), MI->getIterator())) {
286 MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI);
287 MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI);
288 }
289
290 VCMP->eraseFromParent();
291 } else {
292 MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST));
293 MIBuilder.addImm((uint64_t)BlockMask);
294 }
295
296 // Erase all dead instructions (VPNOT's). Do that now so that they do not
297 // mess with the bundle creation.
298 for (MachineInstr *DeadMI : DeadInstructions)
299 DeadMI->eraseFromParent();
300 DeadInstructions.clear();
301
303 Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter);
304
305 Modified = true;
306 }
307
308 return Modified;
309}
310
311bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
312 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
313
314 if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
315 return false;
316
317 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
318 TRI = STI.getRegisterInfo();
319
320 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
321 << "********** Function: " << Fn.getName() << '\n');
322
323 bool Modified = false;
324 for (MachineBasicBlock &MBB : Fn)
325 Modified |= InsertVPTBlocks(MBB);
326
327 LLVM_DEBUG(dbgs() << "**************************************\n");
328 return Modified;
329}
330
331/// createMVEVPTBlock - Returns an instance of the MVE VPT block
332/// insertion pass.
333FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define LLVM_DEBUG(...)
Definition: Debug.h:106
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize)
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, unsigned MaxSteps, unsigned &NumInstrsSteppedOver)
static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, MachineBasicBlock::iterator End)
static MachineInstr * findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, const TargetRegisterInfo *TRI, unsigned &NewOpcode)
#define DEBUG_TYPE
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const int BlockSize
Definition: TarWriter.cpp:33
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:238
bool isThumb2() const
Definition: ARMSubtarget.h:404
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:250
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
Instructions::iterator instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
PredBlockMask
Mask values for IT and VPT Blocks, to be used by MCOperands.
Definition: ARMBaseInfo.h:105
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI)
Return true if Reg is defd between From and To.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
ARM::PredBlockMask expandPredBlockMask(ARM::PredBlockMask BlockMask, ARMVCC::VPTCodes Kind)
Definition: ARMBaseInfo.cpp:16
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
FunctionPass * createMVEVPTBlockPass()
createMVEVPTBlock - Returns an instance of the MVE VPT block insertion pass.