LLVM 20.0.0git
PPCCTRLoops.cpp
Go to the documentation of this file.
1//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass generates machine instructions for the CTR loops related pseudos:
10// 1: MTCTRloop/DecreaseCTRloop
11// 2: MTCTR8loop/DecreaseCTR8loop
12//
13// If a CTR loop can be generated:
14// 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16// its user branch instruction can be deleted.
17//
18// If a CTR loop can not be generated due to clobber of CTR:
19// 1: MTCTRloop/MTCTR8loop can be deleted.
20// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21// a "cmplwi/cmpldi".
22//
23// This pass runs just before register allocation, because we don't want
24// register allocator to allocate register for DecreaseCTRloop if a CTR can be
25// generated or if a CTR loop can not be generated, we don't have any condition
26// register for the new added "cmplwi/cmpldi".
27//
28//===----------------------------------------------------------------------===//
29
30#include "PPC.h"
31#include "PPCInstrInfo.h"
32#include "PPCSubtarget.h"
33#include "llvm/ADT/Statistic.h"
43#include "llvm/Pass.h"
44#include "llvm/PassRegistry.h"
46#include <cassert>
47
48using namespace llvm;
49
50#define DEBUG_TYPE "ppc-ctrloops"
51
52STATISTIC(NumCTRLoops, "Number of CTR loops generated");
53STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
54
55namespace {
56class PPCCTRLoops : public MachineFunctionPass {
57public:
58 static char ID;
59
60 PPCCTRLoops() : MachineFunctionPass(ID) {
62 }
63
64 void getAnalysisUsage(AnalysisUsage &AU) const override {
67 }
68
69 bool runOnMachineFunction(MachineFunction &MF) override;
70
71private:
72 const PPCInstrInfo *TII = nullptr;
73 MachineRegisterInfo *MRI = nullptr;
74
75 bool processLoop(MachineLoop *ML);
76 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
77 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
78 MachineInstr *Dec);
79 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
80};
81} // namespace
82
83char PPCCTRLoops::ID = 0;
84
85INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
86 false, false)
88INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
90
91FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
92
93bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
94 bool Changed = false;
95
96 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
97 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
98 MRI = &MF.getRegInfo();
99
100 for (auto *ML : MLI) {
101 if (ML->isOutermost())
102 Changed |= processLoop(ML);
103 }
104
105#ifndef NDEBUG
106 for (const MachineBasicBlock &BB : MF) {
107 for (const MachineInstr &I : BB)
108 assert((I.getOpcode() != PPC::DecreaseCTRloop &&
109 I.getOpcode() != PPC::DecreaseCTR8loop) &&
110 "CTR loop pseudo is not expanded!");
111 }
112#endif
113
114 return Changed;
115}
116
117bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
118 if (!CheckReads) {
119 // If we are only checking for defs, that is we are going to find
120 // definitions before MTCTRloop, for this case:
121 // CTR defination inside the callee of a call instruction will not impact
122 // the defination of MTCTRloop, so we can use definesRegister() for the
123 // check, no need to check the regmask.
124 return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
125 MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr);
126 }
127
128 if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) ||
129 MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr))
130 return true;
131
132 if (MI->getDesc().isCall())
133 return true;
134
135 // We define the CTR in the loop preheader, so if there is any CTR reader in
136 // the loop, we also can not use CTR loop form.
137 if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) ||
138 MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr))
139 return true;
140
141 return false;
142}
143
144bool PPCCTRLoops::processLoop(MachineLoop *ML) {
145 bool Changed = false;
146
147 // Align with HardwareLoop pass, process inner loops first.
148 for (MachineLoop *I : *ML)
149 Changed |= processLoop(I);
150
151 // If any inner loop is changed, outter loop must be without hardware loop
152 // intrinsics.
153 if (Changed)
154 return true;
155
156 auto IsLoopStart = [](MachineInstr &MI) {
157 return MI.getOpcode() == PPC::MTCTRloop ||
158 MI.getOpcode() == PPC::MTCTR8loop;
159 };
160
161 auto SearchForStart =
162 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
163 for (auto &MI : *MBB) {
164 if (IsLoopStart(MI))
165 return &MI;
166 }
167 return nullptr;
168 };
169
170 MachineInstr *Start = nullptr;
171 MachineInstr *Dec = nullptr;
172 bool InvalidCTRLoop = false;
173
174 MachineBasicBlock *Preheader = ML->getLoopPreheader();
175 // If there is no preheader for this loop, there must be no MTCTRloop
176 // either.
177 if (!Preheader)
178 return false;
179
180 Start = SearchForStart(Preheader);
181 // This is not a CTR loop candidate.
182 if (!Start)
183 return false;
184
185 // If CTR is live to the preheader, we can not redefine the CTR register.
186 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
187 InvalidCTRLoop = true;
188
189 // Make sure there is also no CTR clobber in the block preheader between the
190 // begin and MTCTR.
192 std::next(Start->getReverseIterator());
193 I != Preheader->instr_rend(); ++I)
194 // Only check the definitions of CTR. If there is non-dead definition for
195 // the CTR, we conservatively don't generate a CTR loop.
196 if (isCTRClobber(&*I, /* CheckReads */ false)) {
197 InvalidCTRLoop = true;
198 break;
199 }
200
201 // Make sure there is also no CTR clobber/user in the block preheader between
202 // MTCTR and the end.
203 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
204 I != Preheader->instr_end(); ++I)
205 if (isCTRClobber(&*I, /* CheckReads */ true)) {
206 InvalidCTRLoop = true;
207 break;
208 }
209
210 // Find the CTR loop components and decide whether or not to fall back to a
211 // normal loop.
212 for (auto *MBB : reverse(ML->getBlocks())) {
213 for (auto &MI : *MBB) {
214 if (MI.getOpcode() == PPC::DecreaseCTRloop ||
215 MI.getOpcode() == PPC::DecreaseCTR8loop)
216 Dec = &MI;
217 else if (!InvalidCTRLoop)
218 // If any instruction clobber CTR, then we can not generate a CTR loop.
219 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
220 }
221 if (Dec && InvalidCTRLoop)
222 break;
223 }
224
225 assert(Dec && "CTR loop is not complete!");
226
227 if (InvalidCTRLoop) {
228 expandNormalLoops(ML, Start, Dec);
229 ++NumNormalLoops;
230 }
231 else {
232 expandCTRLoops(ML, Start, Dec);
233 ++NumCTRLoops;
234 }
235 return true;
236}
237
238void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
239 MachineInstr *Dec) {
240 bool Is64Bit =
241 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
242
243 MachineBasicBlock *Preheader = Start->getParent();
244 MachineBasicBlock *Exiting = Dec->getParent();
245 assert((Preheader && Exiting) &&
246 "Preheader and exiting should exist for CTR loop!");
247
248 assert(Dec->getOperand(1).getImm() == 1 &&
249 "Loop decrement stride must be 1");
250
251 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
252 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
253
254 Register PHIDef =
255 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
256 : &PPC::GPRC_and_GPRC_NOR0RegClass);
257
258 Start->getParent()->getParent()->getProperties().reset(
259 MachineFunctionProperties::Property::NoPHIs);
260
261 // Generate "PHI" in the header block.
262 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
263 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
264 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
265
266 Register ADDIDef =
267 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
268 : &PPC::GPRC_and_GPRC_NOR0RegClass);
269 // Generate "addi -1" in the exiting block.
270 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
271 .addReg(PHIDef)
272 .addImm(-1);
273
274 // Add other inputs for the PHI node.
275 if (ML->isLoopLatch(Exiting)) {
276 // There must be only two predecessors for the loop header, one is the
277 // Preheader and the other one is loop latch Exiting. In hardware loop
278 // insertion pass, the block containing DecreaseCTRloop must dominate all
279 // loop latches. So there must be only one latch.
280 assert(ML->getHeader()->pred_size() == 2 &&
281 "Loop header predecessor is not right!");
282 PHIMIB.addReg(ADDIDef).addMBB(Exiting);
283 } else {
284 // If the block containing DecreaseCTRloop is not a loop latch, we can use
285 // ADDIDef as the value for all other blocks for the PHI. In hardware loop
286 // insertion pass, the block containing DecreaseCTRloop must dominate all
287 // loop latches.
288 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
289 if (ML->contains(P)) {
290 assert(ML->isLoopLatch(P) &&
291 "Loop's header in-loop predecessor is not loop latch!");
292 PHIMIB.addReg(ADDIDef).addMBB(P);
293 } else
294 assert(P == Preheader &&
295 "CTR loop should not be generated for irreducible loop!");
296 }
297 }
298
299 // Generate the compare in the exiting block.
300 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
301 auto CMPMIB =
302 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
303 .addReg(ADDIDef)
304 .addImm(0);
305
306 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
307 Dec->getOperand(0).getReg())
308 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
309
310 // Remove the pseudo instructions.
311 Start->eraseFromParent();
312 Dec->eraseFromParent();
313}
314
315void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
316 MachineInstr *Dec) {
317 bool Is64Bit =
318 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
319
320 MachineBasicBlock *Preheader = Start->getParent();
321 MachineBasicBlock *Exiting = Dec->getParent();
322
323 (void)Preheader;
324 assert((Preheader && Exiting) &&
325 "Preheader and exiting should exist for CTR loop!");
326
327 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
328
329 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
330 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
331 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
332 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
333 "There should be only one user for loop decrement pseudo!");
334
335 unsigned Opcode = 0;
336 switch (BrInstr->getOpcode()) {
337 case PPC::BC:
338 Opcode = BDNZOpcode;
339 (void) ML;
340 assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
341 "Invalid ctr loop!");
342 break;
343 case PPC::BCn:
344 Opcode = BDZOpcode;
345 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
346 "Invalid ctr loop!");
347 break;
348 default:
349 llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
350 }
351
352 // Generate "bdnz/bdz" in the exiting block just before the terminator.
353 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
354 .addMBB(BrInstr->getOperand(1).getMBB());
355
356 // Remove the pseudo instructions.
357 BrInstr->eraseFromParent();
358 Dec->eraseFromParent();
359}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
loops
Definition: LoopInfo.cpp:1209
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
PowerPC CTR loops generation
Definition: PPCCTRLoops.cpp:88
#define DEBUG_TYPE
Definition: PPCCTRLoops.cpp:50
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
instr_iterator instr_end()
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
int64_t getImm() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createPPCCTRLoopsPass()
Definition: PPCCTRLoops.cpp:91
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
void initializePPCCTRLoopsPass(PassRegistry &)