LLVM 19.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
32#include "PPC.h"
33#include "PPCInstrBuilder.h"
34#include "PPCInstrInfo.h"
36#include "PPCTargetMachine.h"
37#include "llvm/ADT/Statistic.h"
47#include "llvm/Support/Debug.h"
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-mi-peepholes"
53
54STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
55STATISTIC(MultiTOCSaves,
56 "Number of functions with multiple TOC saves that must be kept");
57STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
58STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
59STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
60STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
61STATISTIC(NumConvertedToImmediateForm,
62 "Number of instructions converted to their immediate form");
63STATISTIC(NumFunctionsEnteredInMIPeephole,
64 "Number of functions entered in PPC MI Peepholes");
65STATISTIC(NumFixedPointIterations,
66 "Number of fixed-point iterations converting reg-reg instructions "
67 "to reg-imm ones");
68STATISTIC(NumRotatesCollapsed,
69 "Number of pairs of rotate left, clear left/right collapsed");
70STATISTIC(NumEXTSWAndSLDICombined,
71 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
72STATISTIC(NumLoadImmZeroFoldedAndRemoved,
73 "Number of LI(8) reg, 0 that are folded to r0 and removed");
74
75static cl::opt<bool>
76FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
77 cl::desc("Iterate to a fixed point when attempting to "
78 "convert reg-reg instructions to reg-imm"));
79
80static cl::opt<bool>
81ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
82 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
83
84static cl::opt<bool>
85 EnableSExtElimination("ppc-eliminate-signext",
86 cl::desc("enable elimination of sign-extensions"),
87 cl::init(true), cl::Hidden);
88
89static cl::opt<bool>
90 EnableZExtElimination("ppc-eliminate-zeroext",
91 cl::desc("enable elimination of zero-extensions"),
92 cl::init(true), cl::Hidden);
93
94static cl::opt<bool>
95 EnableTrapOptimization("ppc-opt-conditional-trap",
96 cl::desc("enable optimization of conditional traps"),
97 cl::init(false), cl::Hidden);
98
100 PeepholeXToICounter, "ppc-xtoi-peephole",
101 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
102
103DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
104 "Controls whether PPC per opcode peephole is performed on a MI");
105
106namespace {
107
108struct PPCMIPeephole : public MachineFunctionPass {
109
110 static char ID;
111 const PPCInstrInfo *TII;
112 MachineFunction *MF;
114 LiveVariables *LV;
115
116 PPCMIPeephole() : MachineFunctionPass(ID) {
118 }
119
120private:
124 BlockFrequency EntryFreq;
125 SmallSet<Register, 16> RegsToUpdate;
126
127 // Initialize class variables.
128 void initialize(MachineFunction &MFParm);
129
130 // Perform peepholes.
131 bool simplifyCode();
132
133 // Perform peepholes.
134 bool eliminateRedundantCompare();
135 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
136 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
137 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
138 MachineInstr *&ToErase);
139 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
141
142 // A number of transformations will eliminate the definition of a register
143 // as all of its uses will be removed. However, this leaves a register
144 // without a definition for LiveVariables. Such transformations should
145 // use this function to provide a dummy definition of the register that
146 // will simply be removed by DCE.
147 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
148 BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
149 }
150 void addRegToUpdateWithLine(Register Reg, int Line);
151 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
153 Register Dst);
154
155public:
156
157 void getAnalysisUsage(AnalysisUsage &AU) const override {
167 }
168
169 // Main entry point for this pass.
170 bool runOnMachineFunction(MachineFunction &MF) override {
171 initialize(MF);
172 // At this point, TOC pointer should not be used in a function that uses
173 // PC-Relative addressing.
174 assert((MF.getRegInfo().use_empty(PPC::X2) ||
176 "TOC pointer used in a function using PC-Relative addressing!");
177 if (skipFunction(MF.getFunction()))
178 return false;
179 bool Changed = simplifyCode();
180#ifndef NDEBUG
181 if (Changed)
182 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
183 "-mllvm -disable-ppc-peephole");
184#endif
185 return Changed;
186 }
187};
188
189#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
190void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
192 return;
193 if (RegsToUpdate.insert(Reg).second)
194 LLVM_DEBUG(dbgs() << "Adding register: " << Register::virtReg2Index(Reg)
195 << " on line " << Line
196 << " for re-computation of kill flags\n");
197}
198
199// Initialize class variables.
200void PPCMIPeephole::initialize(MachineFunction &MFParm) {
201 MF = &MFParm;
202 MRI = &MF->getRegInfo();
203 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
204 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
205 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
206 LV = &getAnalysis<LiveVariables>();
207 EntryFreq = MBFI->getEntryFreq();
208 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
209 RegsToUpdate.clear();
210 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
211 LLVM_DEBUG(MF->dump());
212}
213
214static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
216 assert(Op && "Invalid Operand!");
217 if (!Op->isReg())
218 return nullptr;
219
220 Register Reg = Op->getReg();
221 if (!Reg.isVirtual())
222 return nullptr;
223
224 return MRI->getVRegDef(Reg);
225}
226
227// This function returns number of known zero bits in output of MI
228// starting from the most significant bit.
229static unsigned getKnownLeadingZeroCount(const unsigned Reg,
230 const PPCInstrInfo *TII,
231 const MachineRegisterInfo *MRI) {
232 MachineInstr *MI = MRI->getVRegDef(Reg);
233 unsigned Opcode = MI->getOpcode();
234 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
235 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
236 return MI->getOperand(3).getImm();
237
238 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
239 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
240 return MI->getOperand(3).getImm();
241
242 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
243 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
244 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
245 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
246 return 32 + MI->getOperand(3).getImm();
247
248 if (Opcode == PPC::ANDI_rec) {
249 uint16_t Imm = MI->getOperand(2).getImm();
250 return 48 + llvm::countl_zero(Imm);
251 }
252
253 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
254 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
255 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
256 // The result ranges from 0 to 32.
257 return 58;
258
259 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
260 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
261 // The result ranges from 0 to 64.
262 return 57;
263
264 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
265 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
266 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
267 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
268 return 48;
269
270 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
271 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
272 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
273 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
274 return 56;
275
276 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
277 Opcode == PPC::AND8_rec)
278 return std::max(
279 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
280 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
281
282 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
283 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
284 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
285 Opcode == PPC::XOR8_rec)
286 return std::min(
287 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
288 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
289
290 if (TII->isZeroExtended(Reg, MRI))
291 return 32;
292
293 return 0;
294}
295
296// This function maintains a map for the pairs <TOC Save Instr, Keep>
297// Each time a new TOC save is encountered, it checks if any of the existing
298// ones are dominated by the new one. If so, it marks the existing one as
299// redundant by setting it's entry in the map as false. It then adds the new
300// instruction to the map with either true or false depending on if any
301// existing instructions dominated the new one.
302void PPCMIPeephole::UpdateTOCSaves(
303 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
304 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
305 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
306 // here only support it under ELFv2.
307 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
309
310 MachineBasicBlock *Entry = &MF->front();
311 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
312
313 // If the block in which the TOC save resides is in a block that
314 // post-dominates Entry, or a block that is hotter than entry (keep in mind
315 // that early MachineLICM has already run so the TOC save won't be hoisted)
316 // we can just do the save in the prologue.
317 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
318 FI->setMustSaveTOC(true);
319
320 // If we are saving the TOC in the prologue, all the TOC saves can be
321 // removed from the code.
322 if (FI->mustSaveTOC()) {
323 for (auto &TOCSave : TOCSaves)
324 TOCSave.second = false;
325 // Add new instruction to map.
326 TOCSaves[MI] = false;
327 return;
328 }
329 }
330
331 bool Keep = true;
332 for (auto &I : TOCSaves) {
333 MachineInstr *CurrInst = I.first;
334 // If new instruction dominates an existing one, mark existing one as
335 // redundant.
336 if (I.second && MDT->dominates(MI, CurrInst))
337 I.second = false;
338 // Check if the new instruction is redundant.
339 if (MDT->dominates(CurrInst, MI)) {
340 Keep = false;
341 break;
342 }
343 }
344 // Add new instruction to map.
345 TOCSaves[MI] = Keep;
346}
347
348// This function returns a list of all PHI nodes in the tree starting from
349// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
350// The list initially only contains the root PHI. When we visit a PHI node, we
351// add it to the list. We continue to look for other PHI node operands while
352// there are nodes to visit in the list. The function returns false if the
353// optimization cannot be applied on this tree.
354static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
355 MachineInstr *RootPHI,
357 PHIs.push_back(RootPHI);
358 unsigned VisitedIndex = 0;
359 while (VisitedIndex < PHIs.size()) {
360 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
361 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
362 PHIOp != NumOps; PHIOp += 2) {
363 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
364 if (!RegOp.isVirtual())
365 return false;
366 MachineInstr *Instr = MRI->getVRegDef(RegOp);
367 // While collecting the PHI nodes, we check if they can be converted (i.e.
368 // all the operands are either copies, implicit defs or PHI nodes).
369 unsigned Opcode = Instr->getOpcode();
370 if (Opcode == PPC::COPY) {
371 Register Reg = Instr->getOperand(1).getReg();
372 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
373 return false;
374 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
375 return false;
376 // If we detect a cycle in the PHI nodes, we exit. It would be
377 // possible to change cycles as well, but that would add a lot
378 // of complexity for a case that is unlikely to occur with MMA
379 // code.
380 if (Opcode != PPC::PHI)
381 continue;
382 if (llvm::is_contained(PHIs, Instr))
383 return false;
384 PHIs.push_back(Instr);
385 }
386 VisitedIndex++;
387 }
388 return true;
389}
390
391// This function changes the unprimed accumulator PHI nodes in the PHIs list to
392// primed accumulator PHI nodes. The list is traversed in reverse order to
393// change all the PHI operands of a PHI node before changing the node itself.
394// We keep a map to associate each changed PHI node to its non-changed form.
395void PPCMIPeephole::convertUnprimedAccPHIs(
399 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
401 // We check if the current PHI node can be changed by looking at its
402 // operands. If all the operands are either copies from primed
403 // accumulators, implicit definitions or other unprimed accumulator
404 // PHI nodes, we change it.
405 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
406 PHIOp += 2) {
407 Register RegOp = PHI->getOperand(PHIOp).getReg();
408 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
409 unsigned Opcode = PHIInput->getOpcode();
410 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
411 Opcode == PPC::PHI) &&
412 "Unexpected instruction");
413 if (Opcode == PPC::COPY) {
414 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
415 &PPC::ACCRCRegClass &&
416 "Unexpected register class");
417 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
418 } else if (Opcode == PPC::IMPLICIT_DEF) {
419 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
420 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
421 TII->get(PPC::IMPLICIT_DEF), AccReg);
422 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
423 PHI->getOperand(PHIOp + 1)});
424 } else if (Opcode == PPC::PHI) {
425 // We found a PHI operand. At this point we know this operand
426 // has already been changed so we get its associated changed form
427 // from the map.
428 assert(ChangedPHIMap.count(PHIInput) == 1 &&
429 "This PHI node should have already been changed.");
430 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
432 PrimedAccPHI->getOperand(0).getReg(), false),
433 PHI->getOperand(PHIOp + 1)});
434 }
435 }
436 Register AccReg = Dst;
437 // If the PHI node we are changing is the root node, the register it defines
438 // will be the destination register of the original copy (of the PHI def).
439 // For all other PHI's in the list, we need to create another primed
440 // accumulator virtual register as the PHI will no longer define the
441 // unprimed accumulator.
442 if (PHI != PHIs[0])
443 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
445 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
446 for (auto RegMBB : PHIOps) {
447 NewPHI.add(RegMBB.first).add(RegMBB.second);
448 if (MRI->isSSA())
449 addRegToUpdate(RegMBB.first.getReg());
450 }
451 // The liveness of old PHI and new PHI have to be updated.
452 addRegToUpdate(PHI->getOperand(0).getReg());
453 addRegToUpdate(AccReg);
454 ChangedPHIMap[PHI] = NewPHI.getInstr();
455 LLVM_DEBUG(dbgs() << "Converting PHI: ");
456 LLVM_DEBUG(PHI->dump());
457 LLVM_DEBUG(dbgs() << "To: ");
458 LLVM_DEBUG(NewPHI.getInstr()->dump());
459 }
460}
461
462// Perform peephole optimizations.
463bool PPCMIPeephole::simplifyCode() {
464 bool Simplified = false;
465 bool TrapOpt = false;
466 MachineInstr* ToErase = nullptr;
467 std::map<MachineInstr *, bool> TOCSaves;
468 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
469 NumFunctionsEnteredInMIPeephole++;
470 if (ConvertRegReg) {
471 // Fixed-point conversion of reg/reg instructions fed by load-immediate
472 // into reg/imm instructions. FIXME: This is expensive, control it with
473 // an option.
474 bool SomethingChanged = false;
475 do {
476 NumFixedPointIterations++;
477 SomethingChanged = false;
478 for (MachineBasicBlock &MBB : *MF) {
479 for (MachineInstr &MI : MBB) {
480 if (MI.isDebugInstr())
481 continue;
482
483 if (!DebugCounter::shouldExecute(PeepholeXToICounter))
484 continue;
485
486 SmallSet<Register, 4> RRToRIRegsToUpdate;
487 if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
488 continue;
489 for (Register R : RRToRIRegsToUpdate)
491 // The updated instruction may now have new register operands.
492 // Conservatively add them to recompute the flags as well.
493 for (const MachineOperand &MO : MI.operands())
494 if (MO.isReg())
495 addRegToUpdate(MO.getReg());
496 // We don't erase anything in case the def has other uses. Let DCE
497 // remove it if it can be removed.
498 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
499 LLVM_DEBUG(MI.dump());
500 NumConvertedToImmediateForm++;
501 SomethingChanged = true;
502 Simplified = true;
503 continue;
504 }
505 }
506 } while (SomethingChanged && FixedPointRegToImm);
507 }
508
509 // Since we are deleting this instruction, we need to run LiveVariables
510 // on any of its definitions that are marked as needing an update since
511 // we can't run LiveVariables on a deleted register. This only needs
512 // to be done for defs since uses will have their own defining
513 // instructions so we won't be running LiveVariables on a deleted reg.
514 auto recomputeLVForDyingInstr = [&]() {
515 if (RegsToUpdate.empty())
516 return;
517 for (MachineOperand &MO : ToErase->operands()) {
518 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
519 continue;
520 Register RegToUpdate = MO.getReg();
521 RegsToUpdate.erase(RegToUpdate);
522 // If some transformation has introduced an additional definition of
523 // this register (breaking SSA), we can safely convert this def to
524 // a def of an invalid register as the instruction is going away.
525 if (!MRI->getUniqueVRegDef(RegToUpdate))
526 MO.setReg(PPC::NoRegister);
527 LV->recomputeForSingleDefVirtReg(RegToUpdate);
528 }
529 };
530
531 for (MachineBasicBlock &MBB : *MF) {
532 for (MachineInstr &MI : MBB) {
533
534 // If the previous instruction was marked for elimination,
535 // remove it now.
536 if (ToErase) {
537 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
538 LLVM_DEBUG(ToErase->dump());
539 recomputeLVForDyingInstr();
540 ToErase->eraseFromParent();
541 ToErase = nullptr;
542 }
543 // If a conditional trap instruction got optimized to an
544 // unconditional trap, eliminate all the instructions after
545 // the trap.
546 if (EnableTrapOptimization && TrapOpt) {
547 ToErase = &MI;
548 continue;
549 }
550
551 // Ignore debug instructions.
552 if (MI.isDebugInstr())
553 continue;
554
555 if (!DebugCounter::shouldExecute(PeepholePerOpCounter))
556 continue;
557
558 // Per-opcode peepholes.
559 switch (MI.getOpcode()) {
560
561 default:
562 break;
563 case PPC::COPY: {
564 Register Src = MI.getOperand(1).getReg();
565 Register Dst = MI.getOperand(0).getReg();
566 if (!Src.isVirtual() || !Dst.isVirtual())
567 break;
568 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
569 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
570 break;
571
572 // We are copying an unprimed accumulator to a primed accumulator.
573 // If the input to the copy is a PHI that is fed only by (i) copies in
574 // the other direction (ii) implicitly defined unprimed accumulators or
575 // (iii) other PHI nodes satisfying (i) and (ii), we can change
576 // the PHI to a PHI on primed accumulators (as long as we also change
577 // its operands). To detect and change such copies, we first get a list
578 // of all the PHI nodes starting from the root PHI node in BFS order.
579 // We then visit all these PHI nodes to check if they can be changed to
580 // primed accumulator PHI nodes and if so, we change them.
581 MachineInstr *RootPHI = MRI->getVRegDef(Src);
582 if (RootPHI->getOpcode() != PPC::PHI)
583 break;
584
586 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
587 break;
588
589 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
590
591 ToErase = &MI;
592 break;
593 }
594 case PPC::LI:
595 case PPC::LI8: {
596 // If we are materializing a zero, look for any use operands for which
597 // zero means immediate zero. All such operands can be replaced with
598 // PPC::ZERO.
599 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
600 break;
601 Register MIDestReg = MI.getOperand(0).getReg();
602 bool Folded = false;
603 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
604 Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
605 if (MRI->use_nodbg_empty(MIDestReg)) {
606 ++NumLoadImmZeroFoldedAndRemoved;
607 ToErase = &MI;
608 }
609 if (Folded)
610 addRegToUpdate(MIDestReg);
611 Simplified |= Folded;
612 break;
613 }
614 case PPC::STW:
615 case PPC::STD: {
616 MachineFrameInfo &MFI = MF->getFrameInfo();
617 if (MFI.hasVarSizedObjects() ||
618 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
619 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
620 break;
621 // When encountering a TOC save instruction, call UpdateTOCSaves
622 // to add it to the TOCSaves map and mark any existing TOC saves
623 // it dominates as redundant.
624 if (TII->isTOCSaveMI(MI))
625 UpdateTOCSaves(TOCSaves, &MI);
626 break;
627 }
628 case PPC::XXPERMDI: {
629 // Perform simplifications of 2x64 vector swaps and splats.
630 // A swap is identified by an immediate value of 2, and a splat
631 // is identified by an immediate value of 0 or 3.
632 int Immed = MI.getOperand(3).getImm();
633
634 if (Immed == 1)
635 break;
636
637 // For each of these simplifications, we need the two source
638 // regs to match. Unfortunately, MachineCSE ignores COPY and
639 // SUBREG_TO_REG, so for example we can see
640 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
641 // We have to look through chains of COPY and SUBREG_TO_REG
642 // to find the real source values for comparison.
643 Register TrueReg1 =
644 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
645 Register TrueReg2 =
646 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
647
648 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
649 break;
650
651 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
652
653 if (!DefMI)
654 break;
655
656 unsigned DefOpc = DefMI->getOpcode();
657
658 // If this is a splat fed by a splatting load, the splat is
659 // redundant. Replace with a copy. This doesn't happen directly due
660 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
661 // a load of a double to a vector of 64-bit integers.
662 auto isConversionOfLoadAndSplat = [=]() -> bool {
663 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
664 return false;
665 Register FeedReg1 =
666 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
667 if (FeedReg1.isVirtual()) {
668 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
669 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
670 return true;
671 }
672 return false;
673 };
674 if ((Immed == 0 || Immed == 3) &&
675 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
676 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
677 "to load-and-splat/copy: ");
678 LLVM_DEBUG(MI.dump());
679 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
680 MI.getOperand(0).getReg())
681 .add(MI.getOperand(1));
682 addRegToUpdate(MI.getOperand(1).getReg());
683 ToErase = &MI;
684 Simplified = true;
685 }
686
687 // If this is a splat or a swap fed by another splat, we
688 // can replace it with a copy.
689 if (DefOpc == PPC::XXPERMDI) {
690 Register DefReg1 = DefMI->getOperand(1).getReg();
691 Register DefReg2 = DefMI->getOperand(2).getReg();
692 unsigned DefImmed = DefMI->getOperand(3).getImm();
693
694 // If the two inputs are not the same register, check to see if
695 // they originate from the same virtual register after only
696 // copy-like instructions.
697 if (DefReg1 != DefReg2) {
698 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
699 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
700
701 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
702 break;
703 }
704
705 if (DefImmed == 0 || DefImmed == 3) {
706 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
707 "to splat/copy: ");
708 LLVM_DEBUG(MI.dump());
709 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
710 MI.getOperand(0).getReg())
711 .add(MI.getOperand(1));
712 addRegToUpdate(MI.getOperand(1).getReg());
713 ToErase = &MI;
714 Simplified = true;
715 }
716
717 // If this is a splat fed by a swap, we can simplify modify
718 // the splat to splat the other value from the swap's input
719 // parameter.
720 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
721 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
722 LLVM_DEBUG(MI.dump());
723 addRegToUpdate(MI.getOperand(1).getReg());
724 addRegToUpdate(MI.getOperand(2).getReg());
725 MI.getOperand(1).setReg(DefReg1);
726 MI.getOperand(2).setReg(DefReg2);
727 MI.getOperand(3).setImm(3 - Immed);
728 addRegToUpdate(DefReg1);
729 addRegToUpdate(DefReg2);
730 Simplified = true;
731 }
732
733 // If this is a swap fed by a swap, we can replace it
734 // with a copy from the first swap's input.
735 else if (Immed == 2 && DefImmed == 2) {
736 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
737 LLVM_DEBUG(MI.dump());
738 addRegToUpdate(MI.getOperand(1).getReg());
739 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
740 MI.getOperand(0).getReg())
741 .add(DefMI->getOperand(1));
744 ToErase = &MI;
745 Simplified = true;
746 }
747 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
748 DefOpc == PPC::XXPERMDIs &&
749 (DefMI->getOperand(2).getImm() == 0 ||
750 DefMI->getOperand(2).getImm() == 3)) {
751 ToErase = &MI;
752 Simplified = true;
753 // Swap of a splat, convert to copy.
754 if (Immed == 2) {
755 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
756 LLVM_DEBUG(MI.dump());
757 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
758 MI.getOperand(0).getReg())
759 .add(MI.getOperand(1));
760 addRegToUpdate(MI.getOperand(1).getReg());
761 break;
762 }
763 // Splat fed by another splat - switch the output of the first
764 // and remove the second.
765 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
766 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
767 LLVM_DEBUG(MI.dump());
768 } else if (Immed == 2 &&
769 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
770 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
771 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
772 DefOpc == PPC::VSPLTISW)) {
773 // Swap of various vector splats, convert to copy.
774 ToErase = &MI;
775 Simplified = true;
776 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
777 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
778 LLVM_DEBUG(MI.dump());
779 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
780 MI.getOperand(0).getReg())
781 .add(MI.getOperand(1));
782 addRegToUpdate(MI.getOperand(1).getReg());
783 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
784 TII->isLoadFromConstantPool(DefMI)) {
785 const Constant *C = TII->getConstantFromConstantPool(DefMI);
786 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
787 ToErase = &MI;
788 Simplified = true;
790 << "Optimizing swap(splat pattern from constant-pool) "
791 "=> copy(splat pattern from constant-pool): ");
792 LLVM_DEBUG(MI.dump());
793 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
794 MI.getOperand(0).getReg())
795 .add(MI.getOperand(1));
796 addRegToUpdate(MI.getOperand(1).getReg());
797 }
798 }
799 break;
800 }
801 case PPC::VSPLTB:
802 case PPC::VSPLTH:
803 case PPC::XXSPLTW: {
804 unsigned MyOpcode = MI.getOpcode();
805 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
806 Register TrueReg =
807 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
808 if (!TrueReg.isVirtual())
809 break;
810 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
811 if (!DefMI)
812 break;
813 unsigned DefOpcode = DefMI->getOpcode();
814 auto isConvertOfSplat = [=]() -> bool {
815 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
816 return false;
817 Register ConvReg = DefMI->getOperand(1).getReg();
818 if (!ConvReg.isVirtual())
819 return false;
820 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
821 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
822 Splt->getOpcode() == PPC::XXSPLTW);
823 };
824 bool AlreadySplat = (MyOpcode == DefOpcode) ||
825 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
826 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
827 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
828 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
829 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
830 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
831 // If the instruction[s] that feed this splat have already splat
832 // the value, this splat is redundant.
833 if (AlreadySplat) {
834 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
835 LLVM_DEBUG(MI.dump());
836 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
837 MI.getOperand(0).getReg())
838 .add(MI.getOperand(OpNo));
839 addRegToUpdate(MI.getOperand(OpNo).getReg());
840 ToErase = &MI;
841 Simplified = true;
842 }
843 // Splat fed by a shift. Usually when we align value to splat into
844 // vector element zero.
845 if (DefOpcode == PPC::XXSLDWI) {
846 Register ShiftRes = DefMI->getOperand(0).getReg();
847 Register ShiftOp1 = DefMI->getOperand(1).getReg();
848 Register ShiftOp2 = DefMI->getOperand(2).getReg();
849 unsigned ShiftImm = DefMI->getOperand(3).getImm();
850 unsigned SplatImm =
851 MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
852 if (ShiftOp1 == ShiftOp2) {
853 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
854 if (MRI->hasOneNonDBGUse(ShiftRes)) {
855 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
857 ToErase = DefMI;
858 }
859 Simplified = true;
860 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
861 << " to " << NewElem << " in instruction: ");
862 LLVM_DEBUG(MI.dump());
863 addRegToUpdate(MI.getOperand(OpNo).getReg());
864 addRegToUpdate(ShiftOp1);
865 MI.getOperand(OpNo).setReg(ShiftOp1);
866 MI.getOperand(2).setImm(NewElem);
867 }
868 }
869 break;
870 }
871 case PPC::XVCVDPSP: {
872 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
873 Register TrueReg =
874 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
875 if (!TrueReg.isVirtual())
876 break;
877 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
878
879 // This can occur when building a vector of single precision or integer
880 // values.
881 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
882 Register DefsReg1 =
883 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
884 Register DefsReg2 =
885 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
886 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
887 break;
888 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
889 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
890
891 if (!P1 || !P2)
892 break;
893
894 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
895 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
896 // the FRSP/XSRSP.
897 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
898 unsigned Opc = RoundInstr->getOpcode();
899 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
900 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
901 Simplified = true;
902 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
903 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
904 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
905 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
906 if (Use.getOperand(i).isReg() &&
907 Use.getOperand(i).getReg() == FRSPDefines)
908 Use.getOperand(i).setReg(ConvReg1);
909 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
910 LLVM_DEBUG(RoundInstr->dump());
911 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
912 LLVM_DEBUG(MI.dump());
913 LLVM_DEBUG(dbgs() << "Through instruction:\n");
915 addRegToUpdate(ConvReg1);
916 addRegToUpdate(FRSPDefines);
917 ToErase = RoundInstr;
918 }
919 };
920
921 // If the input to XVCVDPSP is a vector that was built (even
922 // partially) out of FRSP's, the FRSP(s) can safely be removed
923 // since this instruction performs the same operation.
924 if (P1 != P2) {
925 removeFRSPIfPossible(P1);
926 removeFRSPIfPossible(P2);
927 break;
928 }
929 removeFRSPIfPossible(P1);
930 }
931 break;
932 }
933 case PPC::EXTSH:
934 case PPC::EXTSH8:
935 case PPC::EXTSH8_32_64: {
936 if (!EnableSExtElimination) break;
937 Register NarrowReg = MI.getOperand(1).getReg();
938 if (!NarrowReg.isVirtual())
939 break;
940
941 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
942 unsigned SrcOpcode = SrcMI->getOpcode();
943 // If we've used a zero-extending load that we will sign-extend,
944 // just do a sign-extending load.
945 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
946 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
947 break;
948 // Determine the new opcode. We need to make sure that if the original
949 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
950 // Likewise if the source is X-Form the new opcode should also be
951 // X-Form.
952 unsigned Opc = PPC::LHA;
953 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
954 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
955 MI.getOpcode() == PPC::EXTSH8_32_64;
956
957 if (SourceIsXForm && MIIs64Bit)
958 Opc = PPC::LHAX8;
959 else if (SourceIsXForm && !MIIs64Bit)
960 Opc = PPC::LHAX;
961 else if (MIIs64Bit)
962 Opc = PPC::LHA8;
963
964 addRegToUpdate(NarrowReg);
965 addRegToUpdate(MI.getOperand(0).getReg());
966
967 // We are removing a definition of NarrowReg which will cause
968 // problems in AliveBlocks. Add an implicit def that will be
969 // removed so that AliveBlocks are updated correctly.
970 addDummyDef(MBB, &MI, NarrowReg);
971 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
972 LLVM_DEBUG(SrcMI->dump());
973 LLVM_DEBUG(dbgs() << "and sign-extension\n");
974 LLVM_DEBUG(MI.dump());
975 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
976 SrcMI->setDesc(TII->get(Opc));
977 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
978 ToErase = &MI;
979 Simplified = true;
980 NumEliminatedSExt++;
981 }
982 break;
983 }
984 case PPC::EXTSW:
985 case PPC::EXTSW_32:
986 case PPC::EXTSW_32_64: {
987 if (!EnableSExtElimination) break;
988 Register NarrowReg = MI.getOperand(1).getReg();
989 if (!NarrowReg.isVirtual())
990 break;
991
992 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
993 unsigned SrcOpcode = SrcMI->getOpcode();
994 // If we've used a zero-extending load that we will sign-extend,
995 // just do a sign-extending load.
996 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
997 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
998 break;
999
1000 // The transformation from a zero-extending load to a sign-extending
1001 // load is only legal when the displacement is a multiple of 4.
1002 // If the displacement is not at least 4 byte aligned, don't perform
1003 // the transformation.
1004 bool IsWordAligned = false;
1005 if (SrcMI->getOperand(1).isGlobal()) {
1006 const GlobalObject *GO =
1007 dyn_cast<GlobalObject>(SrcMI->getOperand(1).getGlobal());
1008 if (GO && GO->getAlign() && *GO->getAlign() >= 4 &&
1009 (SrcMI->getOperand(1).getOffset() % 4 == 0))
1010 IsWordAligned = true;
1011 } else if (SrcMI->getOperand(1).isImm()) {
1012 int64_t Value = SrcMI->getOperand(1).getImm();
1013 if (Value % 4 == 0)
1014 IsWordAligned = true;
1015 }
1016
1017 // Determine the new opcode. We need to make sure that if the original
1018 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1019 // Likewise if the source is X-Form the new opcode should also be
1020 // X-Form.
1021 unsigned Opc = PPC::LWA_32;
1022 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1023 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1024 MI.getOpcode() == PPC::EXTSW_32_64;
1025
1026 if (SourceIsXForm && MIIs64Bit)
1027 Opc = PPC::LWAX;
1028 else if (SourceIsXForm && !MIIs64Bit)
1029 Opc = PPC::LWAX_32;
1030 else if (MIIs64Bit)
1031 Opc = PPC::LWA;
1032
1033 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1034 break;
1035
1036 addRegToUpdate(NarrowReg);
1037 addRegToUpdate(MI.getOperand(0).getReg());
1038
1039 // We are removing a definition of NarrowReg which will cause
1040 // problems in AliveBlocks. Add an implicit def that will be
1041 // removed so that AliveBlocks are updated correctly.
1042 addDummyDef(MBB, &MI, NarrowReg);
1043 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1044 LLVM_DEBUG(SrcMI->dump());
1045 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1046 LLVM_DEBUG(MI.dump());
1047 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1048 SrcMI->setDesc(TII->get(Opc));
1049 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1050 ToErase = &MI;
1051 Simplified = true;
1052 NumEliminatedSExt++;
1053 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1054 TII->isSignExtended(NarrowReg, MRI)) {
1055 // We can eliminate EXTSW if the input is known to be already
1056 // sign-extended.
1057 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1058 Register TmpReg =
1059 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
1060 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
1061 TmpReg);
1062 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
1063 MI.getOperand(0).getReg())
1064 .addReg(TmpReg)
1065 .addReg(NarrowReg)
1066 .addImm(PPC::sub_32);
1067 ToErase = &MI;
1068 Simplified = true;
1069 NumEliminatedSExt++;
1070 }
1071 break;
1072 }
1073 case PPC::RLDICL: {
1074 // We can eliminate RLDICL (e.g. for zero-extension)
1075 // if all bits to clear are already zero in the input.
1076 // This code assume following code sequence for zero-extension.
1077 // %6 = COPY %5:sub_32; (optional)
1078 // %8 = IMPLICIT_DEF;
1079 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1080 if (!EnableZExtElimination) break;
1081
1082 if (MI.getOperand(2).getImm() != 0)
1083 break;
1084
1085 Register SrcReg = MI.getOperand(1).getReg();
1086 if (!SrcReg.isVirtual())
1087 break;
1088
1089 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1090 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1091 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
1092 break;
1093
1094 MachineInstr *ImpDefMI, *SubRegMI;
1095 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1096 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
1097 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1098
1099 SrcMI = SubRegMI;
1100 if (SubRegMI->getOpcode() == PPC::COPY) {
1101 Register CopyReg = SubRegMI->getOperand(1).getReg();
1102 if (CopyReg.isVirtual())
1103 SrcMI = MRI->getVRegDef(CopyReg);
1104 }
1105 if (!SrcMI->getOperand(0).isReg())
1106 break;
1107
1108 unsigned KnownZeroCount =
1109 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
1110 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
1111 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1112 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1113 MI.getOperand(0).getReg())
1114 .addReg(SrcReg);
1115 addRegToUpdate(SrcReg);
1116 ToErase = &MI;
1117 Simplified = true;
1118 NumEliminatedZExt++;
1119 }
1120 break;
1121 }
1122
1123 // TODO: Any instruction that has an immediate form fed only by a PHI
1124 // whose operands are all load immediate can be folded away. We currently
1125 // do this for ADD instructions, but should expand it to arithmetic and
1126 // binary instructions with immediate forms in the future.
1127 case PPC::ADD4:
1128 case PPC::ADD8: {
1129 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1130 assert(PhiOp && "Invalid Operand!");
1131 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1132
1133 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1134 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1135 };
1136
1137 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1138 MachineOperand *PhiOp) {
1139 assert(PhiOp && "Invalid Operand!");
1140 assert(DominatorOp && "Invalid Operand!");
1141 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1142 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1143
1144 // Note: the vregs only show up at odd indices position of PHI Node,
1145 // the even indices position save the BB info.
1146 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1147 MachineInstr *LiMI =
1148 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1149 if (!LiMI ||
1150 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1151 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1152 !MDT->dominates(DefDomMI, LiMI))
1153 return false;
1154 }
1155
1156 return true;
1157 };
1158
1159 MachineOperand Op1 = MI.getOperand(1);
1160 MachineOperand Op2 = MI.getOperand(2);
1161 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1162 std::swap(Op1, Op2);
1163 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1164 break; // We don't have an ADD fed by LI's that can be transformed
1165
1166 // Now we know that Op1 is the PHI node and Op2 is the dominator
1167 Register DominatorReg = Op2.getReg();
1168
1169 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1170 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1171 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1172 MRI->setRegClass(DominatorReg, TRC);
1173
1174 // replace LIs with ADDIs
1175 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1176 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1177 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1178 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1179 LLVM_DEBUG(LiMI->dump());
1180
1181 // There could be repeated registers in the PHI, e.g: %1 =
1182 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1183 // already replaced the def instruction, skip.
1184 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1185 continue;
1186
1187 assert((LiMI->getOpcode() == PPC::LI ||
1188 LiMI->getOpcode() == PPC::LI8) &&
1189 "Invalid Opcode!");
1190 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1191 LiMI->removeOperand(1); // remove the imm of LI
1192 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1193 : PPC::ADDI8));
1194 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1195 .addReg(DominatorReg)
1196 .addImm(LiImm); // restore the imm of LI
1197 LLVM_DEBUG(LiMI->dump());
1198 }
1199
1200 // Replace ADD with COPY
1201 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1202 LLVM_DEBUG(MI.dump());
1203 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1204 MI.getOperand(0).getReg())
1205 .add(Op1);
1206 addRegToUpdate(Op1.getReg());
1207 addRegToUpdate(Op2.getReg());
1208 ToErase = &MI;
1209 Simplified = true;
1210 NumOptADDLIs++;
1211 break;
1212 }
1213 case PPC::RLDICR: {
1214 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1215 combineSEXTAndSHL(MI, ToErase);
1216 break;
1217 }
1218 case PPC::ANDI_rec:
1219 case PPC::ANDI8_rec:
1220 case PPC::ANDIS_rec:
1221 case PPC::ANDIS8_rec: {
1222 Register TrueReg =
1223 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1224 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1225 break;
1226
1227 MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1228 if (!SrcMI)
1229 break;
1230
1231 unsigned SrcOpCode = SrcMI->getOpcode();
1232 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1233 break;
1234
1235 Register SrcReg, DstReg;
1236 SrcReg = SrcMI->getOperand(1).getReg();
1237 DstReg = MI.getOperand(1).getReg();
1238 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
1239 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
1240 if (DstRC != SrcRC)
1241 break;
1242
1243 uint64_t AndImm = MI.getOperand(2).getImm();
1244 if (MI.getOpcode() == PPC::ANDIS_rec ||
1245 MI.getOpcode() == PPC::ANDIS8_rec)
1246 AndImm <<= 16;
1247 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1248 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1249 uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1250
1251 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1252 // if all bits to AND are already zero in the input.
1253 bool PatternResultZero =
1254 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1255 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1256
1257 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1258 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1259 bool PatternRemoveRotate =
1260 SrcMI->getOperand(2).getImm() == 0 &&
1261 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1262 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1263
1264 if (!PatternResultZero && !PatternRemoveRotate)
1265 break;
1266
1267 LLVM_DEBUG(dbgs() << "Combining pair: ");
1268 LLVM_DEBUG(SrcMI->dump());
1269 LLVM_DEBUG(MI.dump());
1270 if (PatternResultZero)
1271 MI.getOperand(2).setImm(0);
1272 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1273 LLVM_DEBUG(dbgs() << "To: ");
1274 LLVM_DEBUG(MI.dump());
1275 addRegToUpdate(MI.getOperand(1).getReg());
1276 addRegToUpdate(SrcMI->getOperand(0).getReg());
1277 Simplified = true;
1278 break;
1279 }
1280 case PPC::RLWINM:
1281 case PPC::RLWINM_rec:
1282 case PPC::RLWINM8:
1283 case PPC::RLWINM8_rec: {
1284 // We might replace operand 1 of the instruction which will
1285 // require we recompute kill flags for it.
1286 Register OrigOp1Reg = MI.getOperand(1).isReg()
1287 ? MI.getOperand(1).getReg()
1288 : PPC::NoRegister;
1289 Simplified = TII->combineRLWINM(MI, &ToErase);
1290 if (Simplified) {
1291 addRegToUpdate(OrigOp1Reg);
1292 if (MI.getOperand(1).isReg())
1293 addRegToUpdate(MI.getOperand(1).getReg());
1294 ++NumRotatesCollapsed;
1295 }
1296 break;
1297 }
1298 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1299 // always trap, we will delete the node if it will never trap.
1300 case PPC::TDI:
1301 case PPC::TWI:
1302 case PPC::TD:
1303 case PPC::TW: {
1304 if (!EnableTrapOptimization) break;
1305 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1306 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1307 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1308 // We can only do the optimization if we can get immediates
1309 // from both operands
1310 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1311 LiMI1->getOpcode() == PPC::LI8)))
1312 break;
1313 if (!IsOperand2Immediate &&
1314 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1315 LiMI2->getOpcode() == PPC::LI8)))
1316 break;
1317
1318 auto ImmOperand0 = MI.getOperand(0).getImm();
1319 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1320 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1321 : LiMI2->getOperand(1).getImm();
1322
1323 // We will replace the MI with an unconditional trap if it will always
1324 // trap.
1325 if ((ImmOperand0 == 31) ||
1326 ((ImmOperand0 & 0x10) &&
1327 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1328 ((ImmOperand0 & 0x8) &&
1329 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1330 ((ImmOperand0 & 0x2) &&
1331 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1332 ((ImmOperand0 & 0x1) &&
1333 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1334 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1335 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1336 TrapOpt = true;
1337 }
1338 // We will delete the MI if it will never trap.
1339 ToErase = &MI;
1340 Simplified = true;
1341 break;
1342 }
1343 }
1344 }
1345
1346 // If the last instruction was marked for elimination,
1347 // remove it now.
1348 if (ToErase) {
1349 recomputeLVForDyingInstr();
1350 ToErase->eraseFromParent();
1351 ToErase = nullptr;
1352 }
1353 // Reset TrapOpt to false at the end of the basic block.
1355 TrapOpt = false;
1356 }
1357
1358 // Eliminate all the TOC save instructions which are redundant.
1359 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1360 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1361 if (FI->mustSaveTOC())
1362 NumTOCSavesInPrologue++;
1363
1364 // We try to eliminate redundant compare instruction.
1365 Simplified |= eliminateRedundantCompare();
1366
1367 // If we have made any modifications and added any registers to the set of
1368 // registers for which we need to update the kill flags, do so by recomputing
1369 // LiveVariables for those registers.
1370 for (Register Reg : RegsToUpdate) {
1371 if (!MRI->reg_empty(Reg))
1373 }
1374 return Simplified;
1375}
1376
1377// helper functions for eliminateRedundantCompare
1378static bool isEqOrNe(MachineInstr *BI) {
1380 unsigned PredCond = PPC::getPredicateCondition(Pred);
1381 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1382}
1383
1384static bool isSupportedCmpOp(unsigned opCode) {
1385 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1386 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1387 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1388 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1389}
1390
1391static bool is64bitCmpOp(unsigned opCode) {
1392 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1393 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1394}
1395
1396static bool isSignedCmpOp(unsigned opCode) {
1397 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1398 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1399}
1400
1401static unsigned getSignedCmpOpCode(unsigned opCode) {
1402 if (opCode == PPC::CMPLD) return PPC::CMPD;
1403 if (opCode == PPC::CMPLW) return PPC::CMPW;
1404 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1405 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1406 return opCode;
1407}
1408
1409// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1410// (LT x) to (LE x-1)
1411static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1412 uint64_t Imm = CMPI->getOperand(2).getImm();
1413 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1414 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1415 return 0;
1416
1418 unsigned PredCond = PPC::getPredicateCondition(Pred);
1419 unsigned PredHint = PPC::getPredicateHint(Pred);
1420 if (PredCond == PPC::PRED_GE)
1421 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1422 if (PredCond == PPC::PRED_LT)
1423 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1424
1425 return 0;
1426}
1427
1428// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1429// (LE x) to (LT x+1)
1430static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1431 uint64_t Imm = CMPI->getOperand(2).getImm();
1432 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1433 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1434 return 0;
1435
1437 unsigned PredCond = PPC::getPredicateCondition(Pred);
1438 unsigned PredHint = PPC::getPredicateHint(Pred);
1439 if (PredCond == PPC::PRED_GT)
1440 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1441 if (PredCond == PPC::PRED_LE)
1442 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1443
1444 return 0;
1445}
1446
1447// This takes a Phi node and returns a register value for the specified BB.
1448static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1450 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1451 MachineOperand &MO = Phi->getOperand(I);
1452 if (MO.getMBB() == MBB)
1453 return Phi->getOperand(I-1).getReg();
1454 }
1455 llvm_unreachable("invalid src basic block for this Phi node\n");
1456 return 0;
1457}
1458
1459// This function tracks the source of the register through register copy.
1460// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1461// assuming that the control comes from BB1 into BB2.
1462static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1464 unsigned SrcReg = Reg;
1465 while (true) {
1466 unsigned NextReg = SrcReg;
1467 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1468 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1469 NextReg = getIncomingRegForBlock(Inst, BB1);
1470 // We track through PHI only once to avoid infinite loop.
1471 BB1 = nullptr;
1472 }
1473 else if (Inst->isFullCopy())
1474 NextReg = Inst->getOperand(1).getReg();
1475 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1476 break;
1477 SrcReg = NextReg;
1478 }
1479 return SrcReg;
1480}
1481
1482static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1483 MachineBasicBlock *&PredMBB,
1484 MachineBasicBlock *&MBBtoMoveCmp,
1486
1487 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1488 auto BII = BB.getFirstInstrTerminator();
1489 // We optimize BBs ending with a conditional branch.
1490 // We check only for BCC here, not BCCLR, because BCCLR
1491 // will be formed only later in the pipeline.
1492 if (BB.succ_size() == 2 &&
1493 BII != BB.instr_end() &&
1494 (*BII).getOpcode() == PPC::BCC &&
1495 (*BII).getOperand(1).isReg()) {
1496 // We optimize only if the condition code is used only by one BCC.
1497 Register CndReg = (*BII).getOperand(1).getReg();
1498 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1499 return false;
1500
1501 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1502 // We assume compare and branch are in the same BB for ease of analysis.
1503 if (CMPI->getParent() != &BB)
1504 return false;
1505
1506 // We skip this BB if a physical register is used in comparison.
1507 for (MachineOperand &MO : CMPI->operands())
1508 if (MO.isReg() && !MO.getReg().isVirtual())
1509 return false;
1510
1511 return true;
1512 }
1513 return false;
1514 };
1515
1516 // If this BB has more than one successor, we can create a new BB and
1517 // move the compare instruction in the new BB.
1518 // So far, we do not move compare instruction to a BB having multiple
1519 // successors to avoid potentially increasing code size.
1520 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1521 return BB.succ_size() == 1;
1522 };
1523
1524 if (!isEligibleBB(MBB))
1525 return false;
1526
1527 unsigned NumPredBBs = MBB.pred_size();
1528 if (NumPredBBs == 1) {
1529 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1530 if (isEligibleBB(*TmpMBB)) {
1531 PredMBB = TmpMBB;
1532 MBBtoMoveCmp = nullptr;
1533 return true;
1534 }
1535 }
1536 else if (NumPredBBs == 2) {
1537 // We check for partially redundant case.
1538 // So far, we support cases with only two predecessors
1539 // to avoid increasing the number of instructions.
1541 MachineBasicBlock *Pred1MBB = *PI;
1542 MachineBasicBlock *Pred2MBB = *(PI+1);
1543
1544 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1545 // We assume Pred1MBB is the BB containing the compare to be merged and
1546 // Pred2MBB is the BB to which we will append a compare instruction.
1547 // Proceed as is if Pred1MBB is different from MBB.
1548 }
1549 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1550 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1551 std::swap(Pred1MBB, Pred2MBB);
1552 }
1553 else return false;
1554
1555 if (Pred1MBB == &MBB)
1556 return false;
1557
1558 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1559 // We cannot move the compare instruction if operands are not available
1560 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1562 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1563 for (int I = 1; I <= 2; I++)
1564 if (CMPI->getOperand(I).isReg()) {
1565 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1566 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1567 return false;
1568 }
1569
1570 PredMBB = Pred1MBB;
1571 MBBtoMoveCmp = Pred2MBB;
1572 return true;
1573 }
1574
1575 return false;
1576}
1577
1578// This function will iterate over the input map containing a pair of TOC save
1579// instruction and a flag. The flag will be set to false if the TOC save is
1580// proven redundant. This function will erase from the basic block all the TOC
1581// saves marked as redundant.
1582bool PPCMIPeephole::eliminateRedundantTOCSaves(
1583 std::map<MachineInstr *, bool> &TOCSaves) {
1584 bool Simplified = false;
1585 int NumKept = 0;
1586 for (auto TOCSave : TOCSaves) {
1587 if (!TOCSave.second) {
1588 TOCSave.first->eraseFromParent();
1589 RemoveTOCSave++;
1590 Simplified = true;
1591 } else {
1592 NumKept++;
1593 }
1594 }
1595
1596 if (NumKept > 1)
1597 MultiTOCSaves++;
1598
1599 return Simplified;
1600}
1601
1602// If multiple conditional branches are executed based on the (essentially)
1603// same comparison, we merge compare instructions into one and make multiple
1604// conditional branches on this comparison.
1605// For example,
1606// if (a == 0) { ... }
1607// else if (a < 0) { ... }
1608// can be executed by one compare and two conditional branches instead of
1609// two pairs of a compare and a conditional branch.
1610//
1611// This method merges two compare instructions in two MBBs and modifies the
1612// compare and conditional branch instructions if needed.
1613// For the above example, the input for this pass looks like:
1614// cmplwi r3, 0
1615// beq 0, .LBB0_3
1616// cmpwi r3, -1
1617// bgt 0, .LBB0_4
1618// So, before merging two compares, we need to modify these instructions as
1619// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1620// beq 0, .LBB0_3
1621// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1622// bge 0, .LBB0_4
1623
1624bool PPCMIPeephole::eliminateRedundantCompare() {
1625 bool Simplified = false;
1626
1627 for (MachineBasicBlock &MBB2 : *MF) {
1628 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1629
1630 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1631 // as an optimization target if
1632 // - both MBBs end with a conditional branch,
1633 // - MBB1 is the only predecessor of MBB2, and
1634 // - compare does not take a physical register as a operand in both MBBs.
1635 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1636 //
1637 // As partially redundant case, we additionally handle if MBB2 has one
1638 // additional predecessor, which has only one successor (MBB2).
1639 // In this case, we move the compare instruction originally in MBB2 into
1640 // MBBtoMoveCmp. This partially redundant case is typically appear by
1641 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1642 //
1643 // Overview of CFG of related basic blocks
1644 // Fully redundant case Partially redundant case
1645 // -------- ---------------- --------
1646 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1647 // -------- ---------------- --------
1648 // | \ (w/ 1 succ) \ | \
1649 // | \ \ | \
1650 // | \ |
1651 // -------- --------
1652 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1653 // -------- and 2 succ) -------- and 2 succ)
1654 // | \ | \
1655 // | \ | \
1656 //
1657 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1658 continue;
1659
1660 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1661 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1662
1663 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1664 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1665 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1666
1667 // We cannot optimize an unsupported compare opcode or
1668 // a mix of 32-bit and 64-bit comparisons
1669 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1670 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1671 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1672 continue;
1673
1674 unsigned NewOpCode = 0;
1675 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1676 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1677 bool SwapOperands = false;
1678
1679 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1680 // Typically, unsigned comparison is used for equality check, but
1681 // we replace it with a signed comparison if the comparison
1682 // to be merged is a signed comparison.
1683 // In other cases of opcode mismatch, we cannot optimize this.
1684
1685 // We cannot change opcode when comparing against an immediate
1686 // if the most significant bit of the immediate is one
1687 // due to the difference in sign extension.
1688 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1689 if (!I->getOperand(2).isImm())
1690 return false;
1691 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1692 return Imm < 0;
1693 };
1694
1695 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1696 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1697 NewOpCode = CMPI1->getOpcode();
1698 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1699 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1700 NewOpCode = CMPI2->getOpcode();
1701 else continue;
1702 }
1703
1704 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1705 // In case of comparisons between two registers, these two registers
1706 // must be same to merge two comparisons.
1707 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1708 nullptr, nullptr, MRI);
1709 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1710 nullptr, nullptr, MRI);
1711 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1712 MBB1, &MBB2, MRI);
1713 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1714 MBB1, &MBB2, MRI);
1715
1716 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1717 // Same pair of registers in the same order; ready to merge as is.
1718 }
1719 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1720 // Same pair of registers in different order.
1721 // We reverse the predicate to merge compare instructions.
1723 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1724 // In case of partial redundancy, we need to swap operands
1725 // in another compare instruction.
1726 SwapOperands = true;
1727 }
1728 else continue;
1729 }
1730 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1731 // In case of comparisons between a register and an immediate,
1732 // the operand register must be same for two compare instructions.
1733 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1734 nullptr, nullptr, MRI);
1735 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1736 MBB1, &MBB2, MRI);
1737 if (Cmp1Operand1 != Cmp2Operand1)
1738 continue;
1739
1740 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1741 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1742
1743 // If immediate are not same, we try to adjust by changing predicate;
1744 // e.g. GT imm means GE (imm+1).
1745 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1746 int Diff = Imm1 - Imm2;
1747 if (Diff < -2 || Diff > 2)
1748 continue;
1749
1750 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1751 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1752 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1753 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1754 if (Diff == 2) {
1755 if (PredToInc2 && PredToDec1) {
1756 NewPredicate2 = PredToInc2;
1757 NewPredicate1 = PredToDec1;
1758 NewImm2++;
1759 NewImm1--;
1760 }
1761 }
1762 else if (Diff == 1) {
1763 if (PredToInc2) {
1764 NewImm2++;
1765 NewPredicate2 = PredToInc2;
1766 }
1767 else if (PredToDec1) {
1768 NewImm1--;
1769 NewPredicate1 = PredToDec1;
1770 }
1771 }
1772 else if (Diff == -1) {
1773 if (PredToDec2) {
1774 NewImm2--;
1775 NewPredicate2 = PredToDec2;
1776 }
1777 else if (PredToInc1) {
1778 NewImm1++;
1779 NewPredicate1 = PredToInc1;
1780 }
1781 }
1782 else if (Diff == -2) {
1783 if (PredToDec2 && PredToInc1) {
1784 NewPredicate2 = PredToDec2;
1785 NewPredicate1 = PredToInc1;
1786 NewImm2--;
1787 NewImm1++;
1788 }
1789 }
1790 }
1791
1792 // We cannot merge two compares if the immediates are not same.
1793 if (NewImm2 != NewImm1)
1794 continue;
1795 }
1796
1797 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1798 LLVM_DEBUG(CMPI1->dump());
1799 LLVM_DEBUG(BI1->dump());
1800 LLVM_DEBUG(CMPI2->dump());
1801 LLVM_DEBUG(BI2->dump());
1802 for (const MachineOperand &MO : CMPI1->operands())
1803 if (MO.isReg())
1804 addRegToUpdate(MO.getReg());
1805 for (const MachineOperand &MO : CMPI2->operands())
1806 if (MO.isReg())
1807 addRegToUpdate(MO.getReg());
1808
1809 // We adjust opcode, predicates and immediate as we determined above.
1810 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1811 CMPI1->setDesc(TII->get(NewOpCode));
1812 }
1813 if (NewPredicate1) {
1814 BI1->getOperand(0).setImm(NewPredicate1);
1815 }
1816 if (NewPredicate2) {
1817 BI2->getOperand(0).setImm(NewPredicate2);
1818 }
1819 if (NewImm1 != Imm1) {
1820 CMPI1->getOperand(2).setImm(NewImm1);
1821 }
1822
1823 if (IsPartiallyRedundant) {
1824 // We touch up the compare instruction in MBB2 and move it to
1825 // a previous BB to handle partially redundant case.
1826 if (SwapOperands) {
1827 Register Op1 = CMPI2->getOperand(1).getReg();
1828 Register Op2 = CMPI2->getOperand(2).getReg();
1829 CMPI2->getOperand(1).setReg(Op2);
1830 CMPI2->getOperand(2).setReg(Op1);
1831 }
1832 if (NewImm2 != Imm2)
1833 CMPI2->getOperand(2).setImm(NewImm2);
1834
1835 for (int I = 1; I <= 2; I++) {
1836 if (CMPI2->getOperand(I).isReg()) {
1837 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1838 if (Inst->getParent() != &MBB2)
1839 continue;
1840
1841 assert(Inst->getOpcode() == PPC::PHI &&
1842 "We cannot support if an operand comes from this BB.");
1843 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1844 CMPI2->getOperand(I).setReg(SrcReg);
1845 addRegToUpdate(SrcReg);
1846 }
1847 }
1848 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1849 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1850
1851 DebugLoc DL = CMPI2->getDebugLoc();
1852 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1853 BuildMI(MBB2, MBB2.begin(), DL,
1854 TII->get(PPC::PHI), NewVReg)
1855 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1856 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1857 BI2->getOperand(1).setReg(NewVReg);
1858 addRegToUpdate(NewVReg);
1859 }
1860 else {
1861 // We finally eliminate compare instruction in MBB2.
1862 // We do not need to treat CMPI2 specially here in terms of re-computing
1863 // live variables even though it is being deleted because:
1864 // - It defines a register that has a single use (already checked in
1865 // eligibleForCompareElimination())
1866 // - The only user (BI2) is no longer using it so the register is dead (no
1867 // def, no uses)
1868 // - We do not attempt to recompute live variables for dead registers
1869 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1870 CMPI2->eraseFromParent();
1871 }
1872
1873 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1874 LLVM_DEBUG(CMPI1->dump());
1875 LLVM_DEBUG(BI1->dump());
1876 LLVM_DEBUG(BI2->dump());
1877 if (IsPartiallyRedundant) {
1878 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1879 << printMBBReference(*MBBtoMoveCmp)
1880 << " to handle partial redundancy.\n");
1881 LLVM_DEBUG(CMPI2->dump());
1882 }
1883 Simplified = true;
1884 }
1885
1886 return Simplified;
1887}
1888
1889// We miss the opportunity to emit an RLDIC when lowering jump tables
1890// since ISEL sees only a single basic block. When selecting, the clear
1891// and shift left will be in different blocks.
1892bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1893 MachineInstr *&ToErase) {
1894 if (MI.getOpcode() != PPC::RLDICR)
1895 return false;
1896
1897 Register SrcReg = MI.getOperand(1).getReg();
1898 if (!SrcReg.isVirtual())
1899 return false;
1900
1901 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1902 if (SrcMI->getOpcode() != PPC::RLDICL)
1903 return false;
1904
1905 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1906 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1907 MachineOperand MOpSHMI = MI.getOperand(2);
1908 MachineOperand MOpMEMI = MI.getOperand(3);
1909 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1910 MOpMEMI.isImm()))
1911 return false;
1912
1913 uint64_t SHSrc = MOpSHSrc.getImm();
1914 uint64_t MBSrc = MOpMBSrc.getImm();
1915 uint64_t SHMI = MOpSHMI.getImm();
1916 uint64_t MEMI = MOpMEMI.getImm();
1917 uint64_t NewSH = SHSrc + SHMI;
1918 uint64_t NewMB = MBSrc - SHMI;
1919 if (NewMB > 63 || NewSH > 63)
1920 return false;
1921
1922 // The bits cleared with RLDICL are [0, MBSrc).
1923 // The bits cleared with RLDICR are (MEMI, 63].
1924 // After the sequence, the bits cleared are:
1925 // [0, MBSrc-SHMI) and (MEMI, 63).
1926 //
1927 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1928 if ((63 - NewSH) != MEMI)
1929 return false;
1930
1931 LLVM_DEBUG(dbgs() << "Converting pair: ");
1932 LLVM_DEBUG(SrcMI->dump());
1933 LLVM_DEBUG(MI.dump());
1934
1935 MI.setDesc(TII->get(PPC::RLDIC));
1936 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1937 MI.getOperand(2).setImm(NewSH);
1938 MI.getOperand(3).setImm(NewMB);
1939 addRegToUpdate(MI.getOperand(1).getReg());
1940 addRegToUpdate(SrcMI->getOperand(0).getReg());
1941
1942 LLVM_DEBUG(dbgs() << "To: ");
1943 LLVM_DEBUG(MI.dump());
1944 NumRotatesCollapsed++;
1945 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1946 if (MRI->use_nodbg_empty(SrcReg)) {
1947 assert(!SrcMI->hasImplicitDef() &&
1948 "Not expecting an implicit def with this instr.");
1949 ToErase = SrcMI;
1950 }
1951 return true;
1952}
1953
1954// For case in LLVM IR
1955// entry:
1956// %iconv = sext i32 %index to i64
1957// br i1 undef label %true, label %false
1958// true:
1959// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1960// ...
1961// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1962// different BBs when conducting instruction selection. We can do a peephole
1963// optimization to combine these two instructions into extswsli after
1964// instruction selection.
1965bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1966 MachineInstr *&ToErase) {
1967 if (MI.getOpcode() != PPC::RLDICR)
1968 return false;
1969
1970 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1971 return false;
1972
1973 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1974
1975 MachineOperand MOpSHMI = MI.getOperand(2);
1976 MachineOperand MOpMEMI = MI.getOperand(3);
1977 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1978 return false;
1979
1980 uint64_t SHMI = MOpSHMI.getImm();
1981 uint64_t MEMI = MOpMEMI.getImm();
1982 if (SHMI + MEMI != 63)
1983 return false;
1984
1985 Register SrcReg = MI.getOperand(1).getReg();
1986 if (!SrcReg.isVirtual())
1987 return false;
1988
1989 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1990 if (SrcMI->getOpcode() != PPC::EXTSW &&
1991 SrcMI->getOpcode() != PPC::EXTSW_32_64)
1992 return false;
1993
1994 // If the register defined by extsw has more than one use, combination is not
1995 // needed.
1996 if (!MRI->hasOneNonDBGUse(SrcReg))
1997 return false;
1998
1999 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
2000 assert(SrcMI->getOperand(1).isReg() &&
2001 "EXTSW's second operand should be a register");
2002 if (!SrcMI->getOperand(1).getReg().isVirtual())
2003 return false;
2004
2005 LLVM_DEBUG(dbgs() << "Combining pair: ");
2006 LLVM_DEBUG(SrcMI->dump());
2007 LLVM_DEBUG(MI.dump());
2008
2009 MachineInstr *NewInstr =
2010 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
2011 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
2012 : TII->get(PPC::EXTSWSLI_32_64),
2013 MI.getOperand(0).getReg())
2014 .add(SrcMI->getOperand(1))
2015 .add(MOpSHMI);
2016 (void)NewInstr;
2017
2018 LLVM_DEBUG(dbgs() << "TO: ");
2019 LLVM_DEBUG(NewInstr->dump());
2020 ++NumEXTSWAndSLDICombined;
2021 ToErase = &MI;
2022 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2023 // can recompute its kill flags. We run DCE immediately after this pass
2024 // to clean up dead instructions such as this.
2025 addRegToUpdate(NewInstr->getOperand(1).getReg());
2026 addRegToUpdate(SrcMI->getOperand(0).getReg());
2027 return true;
2028}
2029
2030} // end default namespace
2031
2033 "PowerPC MI Peephole Optimization", false, false)
2039 "PowerPC MI Peephole Optimization", false, false)
2040
2041char PPCMIPeephole::ID = 0;
2043llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define addRegToUpdate(R)
PowerPC MI Peephole Optimization
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
#define DEBUG_TYPE
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
if(VerifyEach)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
unsigned pred_size() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:643
bool isFullCopy() const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:685
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool isUsingPCRelativeCalls() const
bool isELFv2ABI() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void dump() const
Definition: Pass.cpp:136
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool empty() const
Definition: SmallSet.h:159
bool erase(const T &V)
Definition: SmallSet.h:207
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:811
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializePPCMIPeepholePass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
@ Keep
No function return thunk.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860