Go to the documentation of this file.
52 #define DEBUG_TYPE "ppc-ctrloops"
54 STATISTIC(NumCTRLoops,
"Number of CTR loops generated");
55 STATISTIC(NumNormalLoops,
"Number of normal compare + branch loops generated");
98 auto &MLI = getAnalysis<MachineLoopInfo>();
102 for (
auto *ML : MLI) {
103 if (
ML->isOutermost())
104 Changed |= processLoop(ML);
110 assert((
I.getOpcode() != PPC::DecreaseCTRloop &&
111 I.getOpcode() != PPC::DecreaseCTR8loop) &&
112 "CTR loop pseudo is not expanded!");
119 bool PPCCTRLoops::isCTRClobber(
MachineInstr *
MI,
bool CheckReads)
const {
126 return MI->definesRegister(PPC::CTR) ||
MI->definesRegister(PPC::CTR8);
129 if (
MI->modifiesRegister(PPC::CTR) ||
MI->modifiesRegister(PPC::CTR8))
132 if (
MI->getDesc().isCall())
137 if (
MI->readsRegister(PPC::CTR) ||
MI->readsRegister(PPC::CTR8))
144 bool Changed =
false;
148 Changed |= processLoop(
I);
156 return MI.getOpcode() == PPC::MTCTRloop ||
157 MI.getOpcode() == PPC::MTCTR8loop;
160 auto SearchForStart =
162 for (
auto &
MI : *
MBB) {
171 bool InvalidCTRLoop =
false;
179 Start = SearchForStart(Preheader);
186 InvalidCTRLoop =
true;
191 std::next(Start->getReverseIterator());
195 if (isCTRClobber(&*
I,
false)) {
196 InvalidCTRLoop =
true;
204 if (isCTRClobber(&*
I,
true)) {
205 InvalidCTRLoop =
true;
212 for (
auto &
MI : *
MBB) {
213 if (
MI.getOpcode() == PPC::DecreaseCTRloop ||
214 MI.getOpcode() == PPC::DecreaseCTR8loop)
216 else if (!InvalidCTRLoop)
218 InvalidCTRLoop |= isCTRClobber(&
MI,
true);
220 if (Dec && InvalidCTRLoop)
224 assert(Dec &&
"CTR loop is not complete!");
226 if (InvalidCTRLoop) {
227 expandNormalLoops(ML, Start, Dec);
231 expandCTRLoops(ML, Start, Dec);
240 Start->getParent()->getParent()->getSubtarget<
PPCSubtarget>().isPPC64();
244 assert((Preheader && Exiting) &&
245 "Preheader and exiting should exist for CTR loop!");
248 "Loop decrement stride must be 1");
250 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
251 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
255 : &PPC::GPRC_and_GPRC_NOR0RegClass);
257 Start->getParent()->getParent()->getProperties().reset(
261 auto PHIMIB =
BuildMI(*
ML->getHeader(),
ML->getHeader()->getFirstNonPHI(),
263 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
267 : &PPC::GPRC_and_GPRC_NOR0RegClass);
274 if (
ML->isLoopLatch(Exiting)) {
279 assert(
ML->getHeader()->pred_size() == 2 &&
280 "Loop header predecessor is not right!");
281 PHIMIB.addReg(ADDIDef).addMBB(Exiting);
288 if (
ML->contains(
P)) {
290 "Loop's header in-loop predecessor is not loop latch!");
291 PHIMIB.addReg(ADDIDef).addMBB(
P);
294 "CTR loop should not be generated for irreducible loop!");
307 .
addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
310 Start->eraseFromParent();
317 Start->getParent()->getParent()->getSubtarget<
PPCSubtarget>().isPPC64();
323 assert((Preheader && Exiting) &&
324 "Preheader and exiting should exist for CTR loop!");
328 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 :
PPC::BDNZ;
329 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 :
PPC::BDZ;
332 "There should be only one user for loop decrement pseudo!");
335 switch (BrInstr->getOpcode()) {
339 assert(
ML->contains(BrInstr->getOperand(1).getMBB()) &&
340 "Invalid ctr loop!");
344 assert(!
ML->contains(BrInstr->getOperand(1).getMBB()) &&
345 "Invalid ctr loop!");
352 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(),
TII->get(Opcode))
353 .
addMBB(BrInstr->getOperand(1).getMBB());
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This is an optimization pass for GlobalISel generic memory operations.
PowerPC CTR loops generation
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
virtual const TargetInstrInfo * getInstrInfo() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
const HexagonInstrInfo * TII
STATISTIC(NumFunctions, "Total number of functions")
FunctionPass * createPPCCTRLoopsPass()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", false, false) INITIALIZE_PASS_END(PPCCTRLoops
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
reverse_instr_iterator instr_rend()
Representation of each machine instruction.
use_instr_iterator use_instr_begin(Register RegNo) const
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
instr_iterator instr_end()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const MachineBasicBlock * getParent() const
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
Iterator for intrusive lists based on ilist_node.
void initializePPCCTRLoopsPass(PassRegistry &)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
auto reverse(ContainerTy &&C)
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.