LLVM 20.0.0git
MVETPAndVPTOptimisationsPass.cpp
Go to the documentation of this file.
1//===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass does a few optimisations related to Tail predicated loops
10/// and MVE VPT blocks before register allocation is performed. For VPT blocks
11/// the goal is to maximize the sizes of the blocks that will be created by the
12/// MVE VPT Block Insertion pass (which runs after register allocation). For
13/// tail predicated loops we transform the loop into something that will
14/// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15///
16//===----------------------------------------------------------------------===//
17
18#include "ARM.h"
19#include "ARMSubtarget.h"
20#include "MVETailPredUtils.h"
21#include "Thumb2InstrInfo.h"
30#include "llvm/Support/Debug.h"
31#include <cassert>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "arm-mve-vpt-opts"
36
37static cl::opt<bool>
38MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
39 cl::desc("Enable merging Loop End and Dec instructions."),
40 cl::init(true));
41
42static cl::opt<bool>
43SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
44 cl::desc("Enable setting lr as a predicate in tail predication regions."),
45 cl::init(true));
46
47namespace {
48class MVETPAndVPTOptimisations : public MachineFunctionPass {
49public:
50 static char ID;
51 const Thumb2InstrInfo *TII;
53
54 MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
55
56 bool runOnMachineFunction(MachineFunction &Fn) override;
57
58 void getAnalysisUsage(AnalysisUsage &AU) const override {
64 }
65
66 StringRef getPassName() const override {
67 return "ARM MVE TailPred and VPT Optimisation Pass";
68 }
69
70private:
71 bool LowerWhileLoopStart(MachineLoop *ML);
72 bool MergeLoopEnd(MachineLoop *ML);
73 bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
74 MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
75 MachineInstr &Instr,
78 bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
79 bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
80 bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
81 bool ConvertVPSEL(MachineBasicBlock &MBB);
82 bool HintDoLoopStartReg(MachineBasicBlock &MBB);
83 MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
84 MachineInstr *LoopStart);
85};
86
87char MVETPAndVPTOptimisations::ID = 0;
88
89} // end anonymous namespace
90
91INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
92 "ARM MVE TailPred and VPT Optimisations pass", false,
93 false)
96INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
97 "ARM MVE TailPred and VPT Optimisations pass", false, false)
98
101 while (MI && MI->getOpcode() == TargetOpcode::COPY &&
102 MI->getOperand(1).getReg().isVirtual())
103 MI = MRI->getVRegDef(MI->getOperand(1).getReg());
104 return MI;
105}
106
107// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
108// corresponding PHI that make up a low overhead loop. Only handles 'do' loops
109// at the moment, returning a t2DoLoopStart in LoopStart.
111 MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
112 MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
113 MachineBasicBlock *Header = ML->getHeader();
114 MachineBasicBlock *Latch = ML->getLoopLatch();
115 if (!Header || !Latch) {
116 LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
117 return false;
118 }
119
120 // Find the loop end from the terminators.
121 LoopEnd = nullptr;
122 for (auto &T : Latch->terminators()) {
123 if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
124 LoopEnd = &T;
125 break;
126 }
127 if (T.getOpcode() == ARM::t2LoopEndDec &&
128 T.getOperand(2).getMBB() == Header) {
129 LoopEnd = &T;
130 break;
131 }
132 }
133 if (!LoopEnd) {
134 LLVM_DEBUG(dbgs() << " no LoopEnd\n");
135 return false;
136 }
137 LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
138
139 // Find the dec from the use of the end. There may be copies between
140 // instructions. We expect the loop to loop like:
141 // $vs = t2DoLoopStart ...
142 // loop:
143 // $vp = phi [ $vs ], [ $vd ]
144 // ...
145 // $vd = t2LoopDec $vp
146 // ...
147 // t2LoopEnd $vd, loop
148 if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
149 LoopDec = LoopEnd;
150 else {
151 LoopDec =
152 LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
153 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
154 LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
155 return false;
156 }
157 }
158 LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
159
160 LoopPhi =
161 LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
162 if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
163 LoopPhi->getNumOperands() != 5 ||
164 (LoopPhi->getOperand(2).getMBB() != Latch &&
165 LoopPhi->getOperand(4).getMBB() != Latch)) {
166 LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
167 return false;
168 }
169 LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
170
171 Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
172 ? LoopPhi->getOperand(3).getReg()
173 : LoopPhi->getOperand(1).getReg();
174 LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
175 if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
176 LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
177 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
178 LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
179 return false;
180 }
181 LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
182
183 return true;
184}
185
187 MachineBasicBlock *MBB = MI->getParent();
188 assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
189 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
190
191 // Subs
193 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
194 MIB.add(MI->getOperand(0));
195 MIB.add(MI->getOperand(1));
196 MIB.addImm(0);
197 MIB.addImm(ARMCC::AL);
198 MIB.addReg(ARM::NoRegister);
199 MIB.addReg(ARM::CPSR, RegState::Define);
200
201 // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
202 for (MachineInstr &I : MBB->terminators()) {
203 if (I.getOpcode() == ARM::t2WhileLoopStart) {
205 BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
206 MIB.add(MI->getOperand(1)); // branch target
207 MIB.addImm(ARMCC::EQ);
208 MIB.addReg(ARM::CPSR);
209 I.eraseFromParent();
210 break;
211 }
212 }
213
214 MI->eraseFromParent();
215}
216
217// The Hardware Loop insertion and ISel Lowering produce the pseudos for the
218// start of a while loop:
219// %a:gprlr = t2WhileLoopSetup %Cnt
220// t2WhileLoopStart %a, %BB
221// We want to convert those to a single instruction which, like t2LoopEndDec and
222// t2DoLoopStartTP is both a terminator and produces a value:
223// %a:grplr: t2WhileLoopStartLR %Cnt, %BB
224//
225// Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
226// t2WhileLoopStart are not valid past regalloc.
227bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
228 LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
229 << ML->getHeader()->getName() << "\n");
230
231 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
232 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
233 return false;
234
235 if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
236 return false;
237
238 Register LR = LoopStart->getOperand(0).getReg();
239 auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
240 return MI.getOpcode() == ARM::t2WhileLoopStart;
241 });
242 if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
243 RevertWhileLoopSetup(LoopStart, TII);
244 RevertLoopDec(LoopStart, TII);
245 RevertLoopEnd(LoopStart, TII);
246 return true;
247 }
248
250 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
251 TII->get(ARM::t2WhileLoopStartLR), LR)
252 .add(LoopStart->getOperand(1))
253 .add(WLSIt->getOperand(1));
254 (void)MI;
255 LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
256
257 WLSIt->eraseFromParent();
258 LoopStart->eraseFromParent();
259 return true;
260}
261
262// Return true if this instruction is invalid in a low overhead loop, usually
263// because it clobbers LR.
265 return MI.isCall() || isLoopStart(MI);
266}
267
268// Starting from PreHeader, search for invalid instructions back until the
269// LoopStart block is reached. If invalid instructions are found, the loop start
270// is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
271// return the new DLS LoopStart if updated.
272MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
273 MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
276 Worklist.push_back(PreHeader);
277 Visited.insert(LoopStart->getParent());
278
279 while (!Worklist.empty()) {
280 MachineBasicBlock *MBB = Worklist.pop_back_val();
281 if (Visited.count(MBB))
282 continue;
283
284 for (MachineInstr &MI : *MBB) {
286 continue;
287
288 LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
289
290 // Create a t2DoLoopStart at the end of the preheader.
292 BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
293 LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
294 MIB.add(LoopStart->getOperand(0));
295 MIB.add(LoopStart->getOperand(1));
296
297 // Make sure to remove the kill flags, to prevent them from being invalid.
298 LoopStart->getOperand(1).setIsKill(false);
299
300 // Revert the t2WhileLoopStartLR to a CMP and Br.
301 RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
302 return MIB;
303 }
304
305 Visited.insert(MBB);
306 for (auto *Pred : MBB->predecessors())
307 Worklist.push_back(Pred);
308 }
309 return LoopStart;
310}
311
312// This function converts loops with t2LoopEnd and t2LoopEnd instructions into
313// a single t2LoopEndDec instruction. To do that it needs to make sure that LR
314// will be valid to be used for the low overhead loop, which means nothing else
315// is using LR (especially calls) and there are no superfluous copies in the
316// loop. The t2LoopEndDec is a branching terminator that produces a value (the
317// decrement) around the loop edge, which means we need to be careful that they
318// will be valid to allocate without any spilling.
319bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
320 if (!MergeEndDec)
321 return false;
322
323 LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
324 << "\n");
325
326 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
327 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
328 return false;
329
330 // Check if there is an illegal instruction (a call) in the low overhead loop
331 // and if so revert it now before we get any further. While loops also need to
332 // check the preheaders, but can be reverted to a DLS loop if needed.
333 auto *PreHeader = ML->getLoopPreheader();
334 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
335 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
336
337 for (MachineBasicBlock *MBB : ML->blocks()) {
338 for (MachineInstr &MI : *MBB) {
340 LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
341 if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
342 RevertDoLoopStart(LoopStart, TII);
343 else
344 RevertWhileLoopStartLR(LoopStart, TII);
345 RevertLoopDec(LoopDec, TII);
346 RevertLoopEnd(LoopEnd, TII);
347 return true;
348 }
349 }
350 }
351
352 // Remove any copies from the loop, to ensure the phi that remains is both
353 // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
354 // that cannot spill, we need to be careful what remains in the loop.
355 Register PhiReg = LoopPhi->getOperand(0).getReg();
356 Register DecReg = LoopDec->getOperand(0).getReg();
357 Register StartReg = LoopStart->getOperand(0).getReg();
358 // Ensure the uses are expected, and collect any copies we want to remove.
360 auto CheckUsers = [&Copies](Register BaseReg,
361 ArrayRef<MachineInstr *> ExpectedUsers,
364 Worklist.push_back(BaseReg);
365 while (!Worklist.empty()) {
366 Register Reg = Worklist.pop_back_val();
367 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
368 if (llvm::is_contained(ExpectedUsers, &MI))
369 continue;
370 if (MI.getOpcode() != TargetOpcode::COPY ||
371 !MI.getOperand(0).getReg().isVirtual()) {
372 LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
373 return false;
374 }
375 Worklist.push_back(MI.getOperand(0).getReg());
376 Copies.push_back(&MI);
377 }
378 }
379 return true;
380 };
381 if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
382 !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
383 !CheckUsers(StartReg, {LoopPhi}, MRI)) {
384 // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
385 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
386 RevertWhileLoopStartLR(LoopStart, TII);
387 RevertLoopDec(LoopDec, TII);
388 RevertLoopEnd(LoopEnd, TII);
389 return true;
390 }
391 return false;
392 }
393
394 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
395 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
396 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
397
398 if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
399 LoopPhi->getOperand(3).setReg(StartReg);
400 LoopPhi->getOperand(1).setReg(DecReg);
401 } else {
402 LoopPhi->getOperand(1).setReg(StartReg);
403 LoopPhi->getOperand(3).setReg(DecReg);
404 }
405
406 SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
407 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
408 if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
409 // If the LoopEnd falls through, need to insert a t2B to the fall-through
410 // block so that the non-analyzable t2LoopEndDec doesn't fall through.
412 BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
413 .addMBB(&*MBBI)
415 }
416
417 // Replace the loop dec and loop end as a single instruction.
419 BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
420 TII->get(ARM::t2LoopEndDec), DecReg)
421 .addReg(PhiReg)
422 .add(LoopEnd->getOperand(1));
423 (void)MI;
424 LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
425
426 LoopDec->eraseFromParent();
427 LoopEnd->eraseFromParent();
428 for (auto *MI : Copies)
429 MI->eraseFromParent();
430 return true;
431}
432
433// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
434// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
435// instruction, making the backend ARMLowOverheadLoops passes job of finding the
436// VCTP operand much simpler.
437bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
439 LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
440 << ML->getHeader()->getName() << "\n");
441
442 // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
443 // in the loop.
444 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
445 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
446 return false;
447 if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
448 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
449 return false;
450
453 for (MachineBasicBlock *BB : ML->blocks()) {
454 for (MachineInstr &MI : *BB)
455 if (isVCTP(&MI))
456 VCTPs.push_back(&MI);
457 else if (findFirstVPTPredOperandIdx(MI) != -1)
458 MVEInstrs.push_back(&MI);
459 }
460
461 if (VCTPs.empty()) {
462 LLVM_DEBUG(dbgs() << " no VCTPs\n");
463 return false;
464 }
465
466 // Check all VCTPs are the same.
467 MachineInstr *FirstVCTP = *VCTPs.begin();
468 for (MachineInstr *VCTP : VCTPs) {
469 LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
470 if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
471 VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
472 LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
473 return false;
474 }
475 }
476
477 // Check for the register being used can be setup before the loop. We expect
478 // this to be:
479 // $vx = ...
480 // loop:
481 // $vp = PHI [ $vx ], [ $vd ]
482 // ..
483 // $vpr = VCTP $vp
484 // ..
485 // $vd = t2SUBri $vp, #n
486 // ..
487 Register CountReg = FirstVCTP->getOperand(1).getReg();
488 if (!CountReg.isVirtual()) {
489 LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
490 return false;
491 }
492 MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
493 if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
494 Phi->getNumOperands() != 5 ||
495 (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
496 Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
497 LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
498 return false;
499 }
500 CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
501 ? Phi->getOperand(3).getReg()
502 : Phi->getOperand(1).getReg();
503
504 // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
505 // the preheader and add the new CountReg to it. We attempt to place it late
506 // in the preheader, but may need to move that earlier based on uses.
507 MachineBasicBlock *MBB = LoopStart->getParent();
509 for (MachineInstr &Use :
510 MRI->use_instructions(LoopStart->getOperand(0).getReg()))
511 if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
512 !DT->dominates(ML->getHeader(), Use.getParent())) {
513 LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
514 return false;
515 }
516
517 unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
518 ? ARM::t2DoLoopStartTP
519 : ARM::t2WhileLoopStartTP;
521 BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
522 .add(LoopStart->getOperand(0))
523 .add(LoopStart->getOperand(1))
524 .addReg(CountReg);
525 if (NewOpc == ARM::t2WhileLoopStartTP)
526 MI.add(LoopStart->getOperand(2));
527 LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
528 << *MI.getInstr());
529 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
530 LoopStart->eraseFromParent();
531
532 if (SetLRPredicate) {
533 // Each instruction in the loop needs to be using LR as the predicate from
534 // the Phi as the predicate.
535 Register LR = LoopPhi->getOperand(0).getReg();
536 for (MachineInstr *MI : MVEInstrs) {
538 MI->getOperand(Idx + 2).setReg(LR);
539 }
540 }
541
542 return true;
543}
544
545// Returns true if Opcode is any VCMP Opcode.
546static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
547
548// Returns true if a VCMP with this Opcode can have its operands swapped.
549// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
550// and VCMPr instructions (since the r is always on the right).
551static bool CanHaveSwappedOperands(unsigned Opcode) {
552 switch (Opcode) {
553 default:
554 return true;
555 case ARM::MVE_VCMPf32:
556 case ARM::MVE_VCMPf16:
557 case ARM::MVE_VCMPf32r:
558 case ARM::MVE_VCMPf16r:
559 case ARM::MVE_VCMPi8r:
560 case ARM::MVE_VCMPi16r:
561 case ARM::MVE_VCMPi32r:
562 case ARM::MVE_VCMPu8r:
563 case ARM::MVE_VCMPu16r:
564 case ARM::MVE_VCMPu32r:
565 case ARM::MVE_VCMPs8r:
566 case ARM::MVE_VCMPs16r:
567 case ARM::MVE_VCMPs32r:
568 return false;
569 }
570}
571
572// Returns the CondCode of a VCMP Instruction.
574 assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
575 return ARMCC::CondCodes(Instr.getOperand(3).getImm());
576}
577
578// Returns true if Cond is equivalent to a VPNOT instruction on the result of
579// Prev. Cond and Prev must be VCMPs.
581 assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
582
583 // Opcodes must match.
584 if (Cond.getOpcode() != Prev.getOpcode())
585 return false;
586
587 MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
588 MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
589
590 // If the VCMP has the opposite condition with the same operands, we can
591 // replace it with a VPNOT
592 ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
593 ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
594 if (ExpectedCode == GetCondCode(Prev))
595 if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
596 return true;
597 // Check again with operands swapped if possible
598 if (!CanHaveSwappedOperands(Cond.getOpcode()))
599 return false;
600 ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
601 return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
602 CondOP2.isIdenticalTo(PrevOP1);
603}
604
605// Returns true if Instr writes to VCCR.
606static bool IsWritingToVCCR(MachineInstr &Instr) {
607 if (Instr.getNumOperands() == 0)
608 return false;
609 MachineOperand &Dst = Instr.getOperand(0);
610 if (!Dst.isReg())
611 return false;
612 Register DstReg = Dst.getReg();
613 if (!DstReg.isVirtual())
614 return false;
615 MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
616 const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
617 return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
618}
619
620// Transforms
621// <Instr that uses %A ('User' Operand)>
622// Into
623// %K = VPNOT %Target
624// <Instr that uses %K ('User' Operand)>
625// And returns the newly inserted VPNOT.
626// This optimization is done in the hopes of preventing spills/reloads of VPR by
627// reducing the number of VCCR values with overlapping lifetimes.
628MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
631 Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
632
633 MachineInstrBuilder MIBuilder =
634 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
635 .addDef(NewResult)
636 .addReg(Target);
638
639 // Make the user use NewResult instead, and clear its kill flag.
640 User.setReg(NewResult);
641 User.setIsKill(false);
642
643 LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
644 MIBuilder.getInstr()->dump());
645
646 return *MIBuilder.getInstr();
647}
648
649// Moves a VPNOT before its first user if an instruction that uses Reg is found
650// in-between the VPNOT and its user.
651// Returns true if there is at least one user of the VPNOT in the block.
654 Register Reg) {
655 assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
657 "The VPNOT cannot be predicated");
658
659 MachineInstr &VPNOT = *Iter;
660 Register VPNOTResult = VPNOT.getOperand(0).getReg();
661 Register VPNOTOperand = VPNOT.getOperand(1).getReg();
662
663 // Whether the VPNOT will need to be moved, and whether we found a user of the
664 // VPNOT.
665 bool MustMove = false, HasUser = false;
666 MachineOperand *VPNOTOperandKiller = nullptr;
667 for (; Iter != MBB.end(); ++Iter) {
668 if (MachineOperand *MO =
669 Iter->findRegisterUseOperand(VPNOTOperand, /*TRI=*/nullptr,
670 /*isKill*/ true)) {
671 // If we find the operand that kills the VPNOTOperand's result, save it.
672 VPNOTOperandKiller = MO;
673 }
674
675 if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) {
676 MustMove = true;
677 continue;
678 }
679
680 if (Iter->findRegisterUseOperandIdx(VPNOTResult, /*TRI=*/nullptr) == -1)
681 continue;
682
683 HasUser = true;
684 if (!MustMove)
685 break;
686
687 // Move the VPNOT right before Iter
688 LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
689 Iter->dump());
690 MBB.splice(Iter, &MBB, VPNOT.getIterator());
691 // If we move the instr, and its operand was killed earlier, remove the kill
692 // flag.
693 if (VPNOTOperandKiller)
694 VPNOTOperandKiller->setIsKill(false);
695
696 break;
697 }
698 return HasUser;
699}
700
701// This optimisation attempts to reduce the number of overlapping lifetimes of
702// VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
703// this replaces
704// %A:vccr = (something)
705// %B:vccr = VPNOT %A
706// %Foo = (some op that uses %B)
707// %Bar = (some op that uses %A)
708// With
709// %A:vccr = (something)
710// %B:vccr = VPNOT %A
711// %Foo = (some op that uses %B)
712// %TMP2:vccr = VPNOT %B
713// %Bar = (some op that uses %A)
714bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
716 SmallVector<MachineInstr *, 4> DeadInstructions;
717 bool Modified = false;
718
719 while (Iter != End) {
720 Register VCCRValue, OppositeVCCRValue;
721 // The first loop looks for 2 unpredicated instructions:
722 // %A:vccr = (instr) ; A is stored in VCCRValue
723 // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
724 for (; Iter != End; ++Iter) {
725 // We're only interested in unpredicated instructions that write to VCCR.
726 if (!IsWritingToVCCR(*Iter) ||
728 continue;
729 Register Dst = Iter->getOperand(0).getReg();
730
731 // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
732 // found what we were looking for.
733 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
734 Iter->findRegisterUseOperandIdx(VCCRValue, /*TRI=*/nullptr) != -1) {
735 // Move the VPNOT closer to its first user if needed, and ignore if it
736 // has no users.
737 if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
738 continue;
739
740 OppositeVCCRValue = Dst;
741 ++Iter;
742 break;
743 }
744
745 // Else, just set VCCRValue.
746 VCCRValue = Dst;
747 }
748
749 // If the first inner loop didn't find anything, stop here.
750 if (Iter == End)
751 break;
752
753 assert(VCCRValue && OppositeVCCRValue &&
754 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
755 "stopped before the end of the block!");
756 assert(VCCRValue != OppositeVCCRValue &&
757 "VCCRValue should not be equal to OppositeVCCRValue!");
758
759 // LastVPNOTResult always contains the same value as OppositeVCCRValue.
760 Register LastVPNOTResult = OppositeVCCRValue;
761
762 // This second loop tries to optimize the remaining instructions.
763 for (; Iter != End; ++Iter) {
764 bool IsInteresting = false;
765
766 if (MachineOperand *MO =
767 Iter->findRegisterUseOperand(VCCRValue, /*TRI=*/nullptr)) {
768 IsInteresting = true;
769
770 // - If the instruction is a VPNOT, it can be removed, and we can just
771 // replace its uses with LastVPNOTResult.
772 // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
773 if (Iter->getOpcode() == ARM::MVE_VPNOT) {
774 Register Result = Iter->getOperand(0).getReg();
775
776 MRI->replaceRegWith(Result, LastVPNOTResult);
777 DeadInstructions.push_back(&*Iter);
778 Modified = true;
779
781 << "Replacing all uses of '" << printReg(Result)
782 << "' with '" << printReg(LastVPNOTResult) << "'\n");
783 } else {
784 MachineInstr &VPNOT =
785 ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
786 Modified = true;
787
788 LastVPNOTResult = VPNOT.getOperand(0).getReg();
789 std::swap(VCCRValue, OppositeVCCRValue);
790
791 LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
792 << "' with '" << printReg(LastVPNOTResult)
793 << "' in instr: " << *Iter);
794 }
795 } else {
796 // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
797 // instead as they contain the same value.
798 if (MachineOperand *MO = Iter->findRegisterUseOperand(
799 OppositeVCCRValue, /*TRI=*/nullptr)) {
800 IsInteresting = true;
801
802 // This is pointless if LastVPNOTResult == OppositeVCCRValue.
803 if (LastVPNOTResult != OppositeVCCRValue) {
804 LLVM_DEBUG(dbgs() << "Replacing usage of '"
805 << printReg(OppositeVCCRValue) << "' with '"
806 << printReg(LastVPNOTResult) << " for instr: ";
807 Iter->dump());
808 MO->setReg(LastVPNOTResult);
809 Modified = true;
810 }
811
812 MO->setIsKill(false);
813 }
814
815 // If this is an unpredicated VPNOT on
816 // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
817 if (Iter->getOpcode() == ARM::MVE_VPNOT &&
819 Register VPNOTOperand = Iter->getOperand(1).getReg();
820 if (VPNOTOperand == LastVPNOTResult ||
821 VPNOTOperand == OppositeVCCRValue) {
822 IsInteresting = true;
823
824 std::swap(VCCRValue, OppositeVCCRValue);
825 LastVPNOTResult = Iter->getOperand(0).getReg();
826 }
827 }
828 }
829
830 // If this instruction was not interesting, and it writes to VCCR, stop.
831 if (!IsInteresting && IsWritingToVCCR(*Iter))
832 break;
833 }
834 }
835
836 for (MachineInstr *DeadInstruction : DeadInstructions)
837 DeadInstruction->eraseFromParent();
838
839 return Modified;
840}
841
842// This optimisation replaces VCMPs with VPNOTs when they are equivalent.
843bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
844 SmallVector<MachineInstr *, 4> DeadInstructions;
845
846 // The last VCMP that we have seen and that couldn't be replaced.
847 // This is reset when an instruction that writes to VCCR/VPR is found, or when
848 // a VCMP is replaced with a VPNOT.
849 // We'll only replace VCMPs with VPNOTs when this is not null, and when the
850 // current VCMP is the opposite of PrevVCMP.
851 MachineInstr *PrevVCMP = nullptr;
852 // If we find an instruction that kills the result of PrevVCMP, we save the
853 // operand here to remove the kill flag in case we need to use PrevVCMP's
854 // result.
855 MachineOperand *PrevVCMPResultKiller = nullptr;
856
857 for (MachineInstr &Instr : MBB.instrs()) {
858 if (PrevVCMP) {
859 if (MachineOperand *MO =
860 Instr.findRegisterUseOperand(PrevVCMP->getOperand(0).getReg(),
861 /*TRI=*/nullptr, /*isKill*/ true)) {
862 // If we come accross the instr that kills PrevVCMP's result, record it
863 // so we can remove the kill flag later if we need to.
864 PrevVCMPResultKiller = MO;
865 }
866 }
867
868 // Ignore predicated instructions.
870 continue;
871
872 // Only look at VCMPs
873 if (!IsVCMP(Instr.getOpcode())) {
874 // If the instruction writes to VCCR, forget the previous VCMP.
875 if (IsWritingToVCCR(Instr))
876 PrevVCMP = nullptr;
877 continue;
878 }
879
880 if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
881 PrevVCMP = &Instr;
882 continue;
883 }
884
885 // The register containing the result of the VCMP that we're going to
886 // replace.
887 Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
888
889 // Build a VPNOT to replace the VCMP, reusing its operands.
890 MachineInstrBuilder MIBuilder =
891 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
892 .add(Instr.getOperand(0))
893 .addReg(PrevVCMPResultReg);
895 LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
896 MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
897 Instr.dump());
898
899 // If we found an instruction that uses, and kills PrevVCMP's result,
900 // remove the kill flag.
901 if (PrevVCMPResultKiller)
902 PrevVCMPResultKiller->setIsKill(false);
903
904 // Finally, mark the old VCMP for removal and reset
905 // PrevVCMP/PrevVCMPResultKiller.
906 DeadInstructions.push_back(&Instr);
907 PrevVCMP = nullptr;
908 PrevVCMPResultKiller = nullptr;
909 }
910
911 for (MachineInstr *DeadInstruction : DeadInstructions)
912 DeadInstruction->eraseFromParent();
913
914 return !DeadInstructions.empty();
915}
916
917bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
919 // Scan through the block, looking for instructions that use constants moves
920 // into VPR that are the negative of one another. These are expected to be
921 // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
922 // mask is kept it or and VPNOT's of it are added or reused as we scan through
923 // the function.
924 unsigned LastVPTImm = 0;
925 Register LastVPTReg = 0;
926 SmallSet<MachineInstr *, 4> DeadInstructions;
927
928 for (MachineInstr &Instr : MBB.instrs()) {
929 // Look for predicated MVE instructions.
930 int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
931 if (PIdx == -1)
932 continue;
933 Register VPR = Instr.getOperand(PIdx + 1).getReg();
934 if (!VPR.isVirtual())
935 continue;
936
937 // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
938 MachineInstr *Copy = MRI->getVRegDef(VPR);
939 if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
940 !Copy->getOperand(1).getReg().isVirtual() ||
941 MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
942 LastVPTReg = 0;
943 continue;
944 }
945 Register GPR = Copy->getOperand(1).getReg();
946
947 // Find the Immediate used by the copy.
948 auto getImm = [&](Register GPR) -> unsigned {
949 MachineInstr *Def = MRI->getVRegDef(GPR);
950 if (Def && (Def->getOpcode() == ARM::t2MOVi ||
951 Def->getOpcode() == ARM::t2MOVi16))
952 return Def->getOperand(1).getImm();
953 return -1U;
954 };
955 unsigned Imm = getImm(GPR);
956 if (Imm == -1U) {
957 LastVPTReg = 0;
958 continue;
959 }
960
961 unsigned NotImm = ~Imm & 0xffff;
962 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
963 MRI->clearKillFlags(LastVPTReg);
964 Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
965 if (MRI->use_empty(VPR)) {
966 DeadInstructions.insert(Copy);
967 if (MRI->hasOneUse(GPR))
968 DeadInstructions.insert(MRI->getVRegDef(GPR));
969 }
970 LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
971 VPR = LastVPTReg;
972 } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
973 // We have found the not of a previous constant. Create a VPNot of the
974 // earlier predicate reg and use it instead of the copy.
975 Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
976 auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
977 TII->get(ARM::MVE_VPNOT), NewVPR)
978 .addReg(LastVPTReg);
980
981 // Use the new register and check if the def is now dead.
982 Instr.getOperand(PIdx + 1).setReg(NewVPR);
983 if (MRI->use_empty(VPR)) {
984 DeadInstructions.insert(Copy);
985 if (MRI->hasOneUse(GPR))
986 DeadInstructions.insert(MRI->getVRegDef(GPR));
987 }
988 LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
989 << Instr);
990 VPR = NewVPR;
991 }
992
993 LastVPTImm = Imm;
994 LastVPTReg = VPR;
995 }
996
997 for (MachineInstr *DI : DeadInstructions)
998 DI->eraseFromParent();
999
1000 return !DeadInstructions.empty();
1001}
1002
1003// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
1004// somewhat blunt approximation to allow tail predicated with vpsel
1005// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
1006// different semantics under tail predication. Until that is modelled we just
1007// convert to a VMOVT (via a predicated VORR) instead.
1008bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
1009 bool HasVCTP = false;
1010 SmallVector<MachineInstr *, 4> DeadInstructions;
1011
1012 for (MachineInstr &MI : MBB.instrs()) {
1013 if (isVCTP(&MI)) {
1014 HasVCTP = true;
1015 continue;
1016 }
1017
1018 if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
1019 continue;
1020
1021 MachineInstrBuilder MIBuilder =
1022 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
1023 .add(MI.getOperand(0))
1024 .add(MI.getOperand(1))
1025 .add(MI.getOperand(1))
1027 .add(MI.getOperand(4))
1028 .add(MI.getOperand(5))
1029 .add(MI.getOperand(2));
1030 // Silence unused variable warning in release builds.
1031 (void)MIBuilder;
1032 LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
1033 dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
1034 DeadInstructions.push_back(&MI);
1035 }
1036
1037 for (MachineInstr *DeadInstruction : DeadInstructions)
1038 DeadInstruction->eraseFromParent();
1039
1040 return !DeadInstructions.empty();
1041}
1042
1043// Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1044// the instruction may be removable as a noop.
1045bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1046 bool Changed = false;
1047 for (MachineInstr &MI : MBB.instrs()) {
1048 if (MI.getOpcode() != ARM::t2DoLoopStart)
1049 continue;
1050 Register R = MI.getOperand(1).getReg();
1051 MachineFunction *MF = MI.getParent()->getParent();
1053 Changed = true;
1054 }
1055 return Changed;
1056}
1057
1058bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1059 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
1060
1061 if (!STI.isThumb2() || !STI.hasLOB())
1062 return false;
1063
1064 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1065 MRI = &Fn.getRegInfo();
1066 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1068 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1069
1070 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1071 << "********** Function: " << Fn.getName() << '\n');
1072
1073 bool Modified = false;
1074 for (MachineLoop *ML : MLI->getLoopsInPreorder()) {
1075 Modified |= LowerWhileLoopStart(ML);
1076 Modified |= MergeLoopEnd(ML);
1077 Modified |= ConvertTailPredLoop(ML, DT);
1078 }
1079
1080 for (MachineBasicBlock &MBB : Fn) {
1081 Modified |= HintDoLoopStartReg(MBB);
1082 Modified |= ReplaceConstByVPNOTs(MBB, DT);
1083 Modified |= ReplaceVCMPsByVPNOTs(MBB);
1084 Modified |= ReduceOldVCCRValueUses(MBB);
1085 Modified |= ConvertVPSEL(MBB);
1086 }
1087
1088 LLVM_DEBUG(dbgs() << "**************************************\n");
1089 return Modified;
1090}
1091
1092/// createMVETPAndVPTOptimisationsPass
1094 return new MVETPAndVPTOptimisations();
1095}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator MBBI
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
static cl::opt< bool > SetLRPredicate("arm-set-lr-predicate", cl::Hidden, cl::desc("Enable setting lr as a predicate in tail predication regions."), cl::init(true))
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
static bool IsInvalidTPInstruction(MachineInstr &MI)
static bool IsVCMP(unsigned Opcode)
ARM MVE TailPred and VPT Optimisations pass
static bool IsWritingToVCCR(MachineInstr &Instr)
#define DEBUG_TYPE
static bool CanHaveSwappedOperands(unsigned Opcode)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file defines the SmallVector class.
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:238
bool isThumb2() const
Definition: ARMSubtarget.h:404
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< iterator > terminators()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getID() const
Return the register class ID number.
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
self_iterator getIterator()
Definition: ilist_node.h:132
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
Definition: ARMBaseInfo.h:71
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860