LLVM 19.0.0git
PPCCTRLoops.cpp
Go to the documentation of this file.
1//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass generates machine instructions for the CTR loops related pseudos:
10// 1: MTCTRloop/DecreaseCTRloop
11// 2: MTCTR8loop/DecreaseCTR8loop
12//
13// If a CTR loop can be generated:
14// 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16// its user branch instruction can be deleted.
17//
18// If a CTR loop can not be generated due to clobber of CTR:
19// 1: MTCTRloop/MTCTR8loop can be deleted.
20// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21// a "cmplwi/cmpldi".
22//
23// This pass runs just before register allocation, because we don't want
24// register allocator to allocate register for DecreaseCTRloop if a CTR can be
25// generated or if a CTR loop can not be generated, we don't have any condition
26// register for the new added "cmplwi/cmpldi".
27//
28//===----------------------------------------------------------------------===//
29
30#include "PPC.h"
31#include "PPCInstrInfo.h"
32#include "PPCSubtarget.h"
33#include "llvm/ADT/Statistic.h"
43#include "llvm/Pass.h"
44#include "llvm/PassRegistry.h"
46#include "llvm/Support/Debug.h"
48#include <cassert>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-ctrloops"
53
54STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
56
57namespace {
58class PPCCTRLoops : public MachineFunctionPass {
59public:
60 static char ID;
61
62 PPCCTRLoops() : MachineFunctionPass(ID) {
64 }
65
66 void getAnalysisUsage(AnalysisUsage &AU) const override {
69 }
70
71 bool runOnMachineFunction(MachineFunction &MF) override;
72
73private:
74 const PPCInstrInfo *TII = nullptr;
75 MachineRegisterInfo *MRI = nullptr;
76
77 bool processLoop(MachineLoop *ML);
78 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80 MachineInstr *Dec);
81 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82};
83} // namespace
84
85char PPCCTRLoops::ID = 0;
86
87INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88 false, false)
90INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
92
93FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94
95bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96 bool Changed = false;
97
98 auto &MLI = getAnalysis<MachineLoopInfo>();
99 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100 MRI = &MF.getRegInfo();
101
102 for (auto *ML : MLI) {
103 if (ML->isOutermost())
104 Changed |= processLoop(ML);
105 }
106
107#ifndef NDEBUG
108 for (const MachineBasicBlock &BB : MF) {
109 for (const MachineInstr &I : BB)
110 assert((I.getOpcode() != PPC::DecreaseCTRloop &&
111 I.getOpcode() != PPC::DecreaseCTR8loop) &&
112 "CTR loop pseudo is not expanded!");
113 }
114#endif
115
116 return Changed;
117}
118
119bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
120 if (!CheckReads) {
121 // If we are only checking for defs, that is we are going to find
122 // definitions before MTCTRloop, for this case:
123 // CTR defination inside the callee of a call instruction will not impact
124 // the defination of MTCTRloop, so we can use definesRegister() for the
125 // check, no need to check the regmask.
126 return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
127 }
128
129 if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
130 return true;
131
132 if (MI->getDesc().isCall())
133 return true;
134
135 // We define the CTR in the loop preheader, so if there is any CTR reader in
136 // the loop, we also can not use CTR loop form.
137 if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
138 return true;
139
140 return false;
141}
142
143bool PPCCTRLoops::processLoop(MachineLoop *ML) {
144 bool Changed = false;
145
146 // Align with HardwareLoop pass, process inner loops first.
147 for (MachineLoop *I : *ML)
148 Changed |= processLoop(I);
149
150 // If any inner loop is changed, outter loop must be without hardware loop
151 // intrinsics.
152 if (Changed)
153 return true;
154
155 auto IsLoopStart = [](MachineInstr &MI) {
156 return MI.getOpcode() == PPC::MTCTRloop ||
157 MI.getOpcode() == PPC::MTCTR8loop;
158 };
159
160 auto SearchForStart =
161 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
162 for (auto &MI : *MBB) {
163 if (IsLoopStart(MI))
164 return &MI;
165 }
166 return nullptr;
167 };
168
169 MachineInstr *Start = nullptr;
170 MachineInstr *Dec = nullptr;
171 bool InvalidCTRLoop = false;
172
173 MachineBasicBlock *Preheader = ML->getLoopPreheader();
174 // If there is no preheader for this loop, there must be no MTCTRloop
175 // either.
176 if (!Preheader)
177 return false;
178
179 Start = SearchForStart(Preheader);
180 // This is not a CTR loop candidate.
181 if (!Start)
182 return false;
183
184 // If CTR is live to the preheader, we can not redefine the CTR register.
185 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
186 InvalidCTRLoop = true;
187
188 // Make sure there is also no CTR clobber in the block preheader between the
189 // begin and MTCTR.
191 std::next(Start->getReverseIterator());
192 I != Preheader->instr_rend(); ++I)
193 // Only check the definitions of CTR. If there is non-dead definition for
194 // the CTR, we conservatively don't generate a CTR loop.
195 if (isCTRClobber(&*I, /* CheckReads */ false)) {
196 InvalidCTRLoop = true;
197 break;
198 }
199
200 // Make sure there is also no CTR clobber/user in the block preheader between
201 // MTCTR and the end.
202 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
203 I != Preheader->instr_end(); ++I)
204 if (isCTRClobber(&*I, /* CheckReads */ true)) {
205 InvalidCTRLoop = true;
206 break;
207 }
208
209 // Find the CTR loop components and decide whether or not to fall back to a
210 // normal loop.
211 for (auto *MBB : reverse(ML->getBlocks())) {
212 for (auto &MI : *MBB) {
213 if (MI.getOpcode() == PPC::DecreaseCTRloop ||
214 MI.getOpcode() == PPC::DecreaseCTR8loop)
215 Dec = &MI;
216 else if (!InvalidCTRLoop)
217 // If any instruction clobber CTR, then we can not generate a CTR loop.
218 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
219 }
220 if (Dec && InvalidCTRLoop)
221 break;
222 }
223
224 assert(Dec && "CTR loop is not complete!");
225
226 if (InvalidCTRLoop) {
227 expandNormalLoops(ML, Start, Dec);
228 ++NumNormalLoops;
229 }
230 else {
231 expandCTRLoops(ML, Start, Dec);
232 ++NumCTRLoops;
233 }
234 return true;
235}
236
237void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
238 MachineInstr *Dec) {
239 bool Is64Bit =
240 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
241
242 MachineBasicBlock *Preheader = Start->getParent();
243 MachineBasicBlock *Exiting = Dec->getParent();
244 assert((Preheader && Exiting) &&
245 "Preheader and exiting should exist for CTR loop!");
246
247 assert(Dec->getOperand(1).getImm() == 1 &&
248 "Loop decrement stride must be 1");
249
250 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
251 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
252
253 Register PHIDef =
254 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
255 : &PPC::GPRC_and_GPRC_NOR0RegClass);
256
257 Start->getParent()->getParent()->getProperties().reset(
258 MachineFunctionProperties::Property::NoPHIs);
259
260 // Generate "PHI" in the header block.
261 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
262 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
263 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
264
265 Register ADDIDef =
266 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
267 : &PPC::GPRC_and_GPRC_NOR0RegClass);
268 // Generate "addi -1" in the exiting block.
269 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
270 .addReg(PHIDef)
271 .addImm(-1);
272
273 // Add other inputs for the PHI node.
274 if (ML->isLoopLatch(Exiting)) {
275 // There must be only two predecessors for the loop header, one is the
276 // Preheader and the other one is loop latch Exiting. In hardware loop
277 // insertion pass, the block containing DecreaseCTRloop must dominate all
278 // loop latches. So there must be only one latch.
279 assert(ML->getHeader()->pred_size() == 2 &&
280 "Loop header predecessor is not right!");
281 PHIMIB.addReg(ADDIDef).addMBB(Exiting);
282 } else {
283 // If the block containing DecreaseCTRloop is not a loop latch, we can use
284 // ADDIDef as the value for all other blocks for the PHI. In hardware loop
285 // insertion pass, the block containing DecreaseCTRloop must dominate all
286 // loop latches.
287 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
288 if (ML->contains(P)) {
289 assert(ML->isLoopLatch(P) &&
290 "Loop's header in-loop predecessor is not loop latch!");
291 PHIMIB.addReg(ADDIDef).addMBB(P);
292 } else
293 assert(P == Preheader &&
294 "CTR loop should not be generated for irreducible loop!");
295 }
296 }
297
298 // Generate the compare in the exiting block.
299 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
300 auto CMPMIB =
301 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
302 .addReg(ADDIDef)
303 .addImm(0);
304
305 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
306 Dec->getOperand(0).getReg())
307 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
308
309 // Remove the pseudo instructions.
310 Start->eraseFromParent();
311 Dec->eraseFromParent();
312}
313
314void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
315 MachineInstr *Dec) {
316 bool Is64Bit =
317 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
318
319 MachineBasicBlock *Preheader = Start->getParent();
320 MachineBasicBlock *Exiting = Dec->getParent();
321
322 (void)Preheader;
323 assert((Preheader && Exiting) &&
324 "Preheader and exiting should exist for CTR loop!");
325
326 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
327
328 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
329 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
330 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
331 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
332 "There should be only one user for loop decrement pseudo!");
333
334 unsigned Opcode = 0;
335 switch (BrInstr->getOpcode()) {
336 case PPC::BC:
337 Opcode = BDNZOpcode;
338 (void) ML;
339 assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
340 "Invalid ctr loop!");
341 break;
342 case PPC::BCn:
343 Opcode = BDZOpcode;
344 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
345 "Invalid ctr loop!");
346 break;
347 default:
348 llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
349 }
350
351 // Generate "bdnz/bdz" in the exiting block just before the terminator.
352 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
353 .addMBB(BrInstr->getOperand(1).getMBB());
354
355 // Remove the pseudo instructions.
356 BrInstr->eraseFromParent();
357 Dec->eraseFromParent();
358}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
loops
Definition: LoopInfo.cpp:1177
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
PowerPC CTR loops generation
Definition: PPCCTRLoops.cpp:90
#define DEBUG_TYPE
Definition: PPCCTRLoops.cpp:52
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
instr_iterator instr_end()
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:327
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:473
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
int64_t getImm() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createPPCCTRLoopsPass()
Definition: PPCCTRLoops.cpp:93
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void initializePPCCTRLoopsPass(PassRegistry &)