LLVM  17.0.0git
PPCCTRLoops.cpp
Go to the documentation of this file.
1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates machine instructions for the CTR loops related pseudos:
10 // 1: MTCTRloop/DecreaseCTRloop
11 // 2: MTCTR8loop/DecreaseCTR8loop
12 //
13 // If a CTR loop can be generated:
14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16 // its user branch instruction can be deleted.
17 //
18 // If a CTR loop can not be generated due to clobber of CTR:
19 // 1: MTCTRloop/MTCTR8loop can be deleted.
20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21 // a "cmplwi/cmpldi".
22 //
23 // This pass runs just before register allocation, because we don't want
24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25 // generated or if a CTR loop can not be generated, we don't have any condition
26 // register for the new added "cmplwi/cmpldi".
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "PPC.h"
31 #include "PPCInstrInfo.h"
32 #include "PPCSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
41 #include "llvm/CodeGen/Register.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/PassRegistry.h"
45 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Debug.h"
48 #include <cassert>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "ppc-ctrloops"
53 
54 STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
56 
57 namespace {
58 class PPCCTRLoops : public MachineFunctionPass {
59 public:
60  static char ID;
61 
62  PPCCTRLoops() : MachineFunctionPass(ID) {
64  }
65 
66  void getAnalysisUsage(AnalysisUsage &AU) const override {
69  }
70 
71  bool runOnMachineFunction(MachineFunction &MF) override;
72 
73 private:
74  const PPCInstrInfo *TII = nullptr;
75  MachineRegisterInfo *MRI = nullptr;
76 
77  bool processLoop(MachineLoop *ML);
78  bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79  void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80  MachineInstr *Dec);
81  void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82 };
83 } // namespace
84 
85 char PPCCTRLoops::ID = 0;
86 
87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88  false, false)
90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
92 
93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94 
95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96  bool Changed = false;
97 
98  auto &MLI = getAnalysis<MachineLoopInfo>();
99  TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100  MRI = &MF.getRegInfo();
101 
102  for (auto *ML : MLI) {
103  if (ML->isOutermost())
104  Changed |= processLoop(ML);
105  }
106 
107 #ifndef NDEBUG
108  for (const MachineBasicBlock &BB : MF) {
109  for (const MachineInstr &I : BB)
110  assert((I.getOpcode() != PPC::DecreaseCTRloop &&
111  I.getOpcode() != PPC::DecreaseCTR8loop) &&
112  "CTR loop pseudo is not expanded!");
113  }
114 #endif
115 
116  return Changed;
117 }
118 
119 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
120  if (!CheckReads) {
121  // If we are only checking for defs, that is we are going to find
122  // definitions before MTCTRloop, for this case:
123  // CTR defination inside the callee of a call instruction will not impact
124  // the defination of MTCTRloop, so we can use definesRegister() for the
125  // check, no need to check the regmask.
126  return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
127  }
128 
129  if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
130  return true;
131 
132  if (MI->getDesc().isCall())
133  return true;
134 
135  // We define the CTR in the loop preheader, so if there is any CTR reader in
136  // the loop, we also can not use CTR loop form.
137  if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
138  return true;
139 
140  return false;
141 }
142 
143 bool PPCCTRLoops::processLoop(MachineLoop *ML) {
144  bool Changed = false;
145 
146  // Align with HardwareLoop pass, process inner loops first.
147  for (MachineLoop *I : *ML)
148  Changed |= processLoop(I);
149 
150  // If any inner loop is changed, outter loop must be without hardware loop
151  // intrinsics.
152  if (Changed)
153  return true;
154 
155  auto IsLoopStart = [](MachineInstr &MI) {
156  return MI.getOpcode() == PPC::MTCTRloop ||
157  MI.getOpcode() == PPC::MTCTR8loop;
158  };
159 
160  auto SearchForStart =
161  [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
162  for (auto &MI : *MBB) {
163  if (IsLoopStart(MI))
164  return &MI;
165  }
166  return nullptr;
167  };
168 
169  MachineInstr *Start = nullptr;
170  MachineInstr *Dec = nullptr;
171  bool InvalidCTRLoop = false;
172 
173  MachineBasicBlock *Preheader = ML->getLoopPreheader();
174  // If there is no preheader for this loop, there must be no MTCTRloop
175  // either.
176  if (!Preheader)
177  return false;
178 
179  Start = SearchForStart(Preheader);
180  // This is not a CTR loop candidate.
181  if (!Start)
182  return false;
183 
184  // If CTR is live to the preheader, we can not redefine the CTR register.
185  if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
186  InvalidCTRLoop = true;
187 
188  // Make sure there is also no CTR clobber in the block preheader between the
189  // begin and MTCTR.
191  std::next(Start->getReverseIterator());
192  I != Preheader->instr_rend(); ++I)
193  // Only check the definitions of CTR. If there is non-dead definition for
194  // the CTR, we conservatively don't generate a CTR loop.
195  if (isCTRClobber(&*I, /* CheckReads */ false)) {
196  InvalidCTRLoop = true;
197  break;
198  }
199 
200  // Make sure there is also no CTR clobber/user in the block preheader between
201  // MTCTR and the end.
202  for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
203  I != Preheader->instr_end(); ++I)
204  if (isCTRClobber(&*I, /* CheckReads */ true)) {
205  InvalidCTRLoop = true;
206  break;
207  }
208 
209  // Find the CTR loop components and decide whether or not to fall back to a
210  // normal loop.
211  for (auto *MBB : reverse(ML->getBlocks())) {
212  for (auto &MI : *MBB) {
213  if (MI.getOpcode() == PPC::DecreaseCTRloop ||
214  MI.getOpcode() == PPC::DecreaseCTR8loop)
215  Dec = &MI;
216  else if (!InvalidCTRLoop)
217  // If any instruction clobber CTR, then we can not generate a CTR loop.
218  InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
219  }
220  if (Dec && InvalidCTRLoop)
221  break;
222  }
223 
224  assert(Dec && "CTR loop is not complete!");
225 
226  if (InvalidCTRLoop) {
227  expandNormalLoops(ML, Start, Dec);
228  ++NumNormalLoops;
229  }
230  else {
231  expandCTRLoops(ML, Start, Dec);
232  ++NumCTRLoops;
233  }
234  return true;
235 }
236 
237 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
238  MachineInstr *Dec) {
239  bool Is64Bit =
240  Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
241 
242  MachineBasicBlock *Preheader = Start->getParent();
243  MachineBasicBlock *Exiting = Dec->getParent();
244  assert((Preheader && Exiting) &&
245  "Preheader and exiting should exist for CTR loop!");
246 
247  assert(Dec->getOperand(1).getImm() == 1 &&
248  "Loop decrement stride must be 1");
249 
250  unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
251  unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
252 
253  Register PHIDef =
254  MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
255  : &PPC::GPRC_and_GPRC_NOR0RegClass);
256 
257  Start->getParent()->getParent()->getProperties().reset(
259 
260  // Generate "PHI" in the header block.
261  auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
262  DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
263  PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
264 
265  Register ADDIDef =
266  MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
267  : &PPC::GPRC_and_GPRC_NOR0RegClass);
268  // Generate "addi -1" in the exiting block.
269  BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
270  .addReg(PHIDef)
271  .addImm(-1);
272 
273  // Add other inputs for the PHI node.
274  if (ML->isLoopLatch(Exiting)) {
275  // There must be only two predecessors for the loop header, one is the
276  // Preheader and the other one is loop latch Exiting. In hardware loop
277  // insertion pass, the block containing DecreaseCTRloop must dominate all
278  // loop latches. So there must be only one latch.
279  assert(ML->getHeader()->pred_size() == 2 &&
280  "Loop header predecessor is not right!");
281  PHIMIB.addReg(ADDIDef).addMBB(Exiting);
282  } else {
283  // If the block containing DecreaseCTRloop is not a loop latch, we can use
284  // ADDIDef as the value for all other blocks for the PHI. In hardware loop
285  // insertion pass, the block containing DecreaseCTRloop must dominate all
286  // loop latches.
287  for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
288  if (ML->contains(P)) {
289  assert(ML->isLoopLatch(P) &&
290  "Loop's header in-loop predecessor is not loop latch!");
291  PHIMIB.addReg(ADDIDef).addMBB(P);
292  } else
293  assert(P == Preheader &&
294  "CTR loop should not be generated for irreducible loop!");
295  }
296  }
297 
298  // Generate the compare in the exiting block.
299  Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
300  auto CMPMIB =
301  BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
302  .addReg(ADDIDef)
303  .addImm(0);
304 
305  BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
306  Dec->getOperand(0).getReg())
307  .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
308 
309  // Remove the pseudo instructions.
310  Start->eraseFromParent();
311  Dec->eraseFromParent();
312 }
313 
314 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
315  MachineInstr *Dec) {
316  bool Is64Bit =
317  Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
318 
319  MachineBasicBlock *Preheader = Start->getParent();
320  MachineBasicBlock *Exiting = Dec->getParent();
321 
322  (void)Preheader;
323  assert((Preheader && Exiting) &&
324  "Preheader and exiting should exist for CTR loop!");
325 
326  assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
327 
328  unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
329  unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
330  auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
331  assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
332  "There should be only one user for loop decrement pseudo!");
333 
334  unsigned Opcode = 0;
335  switch (BrInstr->getOpcode()) {
336  case PPC::BC:
337  Opcode = BDNZOpcode;
338  (void) ML;
339  assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
340  "Invalid ctr loop!");
341  break;
342  case PPC::BCn:
343  Opcode = BDZOpcode;
344  assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
345  "Invalid ctr loop!");
346  break;
347  default:
348  llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
349  }
350 
351  // Generate "bdnz/bdz" in the exiting block just before the terminator.
352  BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
353  .addMBB(BrInstr->getOperand(1).getMBB());
354 
355  // Remove the pseudo instructions.
356  BrInstr->eraseFromParent();
357  Dec->eraseFromParent();
358 }
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
generation
PowerPC CTR loops generation
Definition: PPCCTRLoops.cpp:90
llvm::MachineBasicBlock::isLiveIn
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Definition: MachineBasicBlock.cpp:601
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:824
llvm::PPCISD::BDNZ
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
Definition: PPCISelLowering.h:298
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:157
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:95
Statistic.h
ErrorHandling.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::PPCInstrInfo
Definition: PPCInstrInfo.h:212
MachineBasicBlock.h
PassRegistry.h
loops
loops
Definition: LoopInfo.cpp:1177
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
llvm::PPCISD::BDZ
@ BDZ
Definition: PPCISelLowering.h:299
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
MachineRegisterInfo.h
PPCSubtarget.h
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:682
MachineLoopInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::PPCSubtarget
Definition: PPCSubtarget.h:71
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:553
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
PPC.h
llvm::createPPCCTRLoopsPass
FunctionPass * createPPCCTRLoopsPass()
Definition: PPCCTRLoops.cpp:93
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", false, false) INITIALIZE_PASS_END(PPCCTRLoops
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:672
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:299
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:499
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
PPCInstrInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
InlinePriorityMode::ML
@ ML
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:366
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:295
llvm::MachineFunction
Definition: MachineFunction.h:258
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunctionProperties::Property::NoPHIs
@ NoPHIs
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
CodeGen.h
llvm::initializePPCCTRLoopsPass
void initializePPCCTRLoopsPass(PassRegistry &)
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::MachineRegisterInfo::hasOneUse
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
Definition: MachineRegisterInfo.h:532
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484
MachineOperand.h
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
Register.h
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
MachineFunction.h
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:705
InitializePasses.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: PPCCTRLoops.cpp:52
Debug.h