LLVM 19.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
32#include "PPC.h"
33#include "PPCInstrBuilder.h"
34#include "PPCInstrInfo.h"
36#include "PPCTargetMachine.h"
37#include "llvm/ADT/Statistic.h"
47#include "llvm/Support/Debug.h"
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-mi-peepholes"
53
54STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
55STATISTIC(MultiTOCSaves,
56 "Number of functions with multiple TOC saves that must be kept");
57STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
58STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
59STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
60STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
61STATISTIC(NumConvertedToImmediateForm,
62 "Number of instructions converted to their immediate form");
63STATISTIC(NumFunctionsEnteredInMIPeephole,
64 "Number of functions entered in PPC MI Peepholes");
65STATISTIC(NumFixedPointIterations,
66 "Number of fixed-point iterations converting reg-reg instructions "
67 "to reg-imm ones");
68STATISTIC(NumRotatesCollapsed,
69 "Number of pairs of rotate left, clear left/right collapsed");
70STATISTIC(NumEXTSWAndSLDICombined,
71 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
72STATISTIC(NumLoadImmZeroFoldedAndRemoved,
73 "Number of LI(8) reg, 0 that are folded to r0 and removed");
74
75static cl::opt<bool>
76FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
77 cl::desc("Iterate to a fixed point when attempting to "
78 "convert reg-reg instructions to reg-imm"));
79
80static cl::opt<bool>
81ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
82 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
83
84static cl::opt<bool>
85 EnableSExtElimination("ppc-eliminate-signext",
86 cl::desc("enable elimination of sign-extensions"),
87 cl::init(true), cl::Hidden);
88
89static cl::opt<bool>
90 EnableZExtElimination("ppc-eliminate-zeroext",
91 cl::desc("enable elimination of zero-extensions"),
92 cl::init(true), cl::Hidden);
93
94static cl::opt<bool>
95 EnableTrapOptimization("ppc-opt-conditional-trap",
96 cl::desc("enable optimization of conditional traps"),
97 cl::init(false), cl::Hidden);
98
100 PeepholeXToICounter, "ppc-xtoi-peephole",
101 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
102
103DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
104 "Controls whether PPC per opcode peephole is performed on a MI");
105
106namespace {
107
108struct PPCMIPeephole : public MachineFunctionPass {
109
110 static char ID;
111 const PPCInstrInfo *TII;
112 MachineFunction *MF;
114 LiveVariables *LV;
115
116 PPCMIPeephole() : MachineFunctionPass(ID) {
118 }
119
120private:
124 BlockFrequency EntryFreq;
125 SmallSet<Register, 16> RegsToUpdate;
126
127 // Initialize class variables.
128 void initialize(MachineFunction &MFParm);
129
130 // Perform peepholes.
131 bool simplifyCode();
132
133 // Perform peepholes.
134 bool eliminateRedundantCompare();
135 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
136 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
137 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
138 MachineInstr *&ToErase);
139 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
141
142 // A number of transformations will eliminate the definition of a register
143 // as all of its uses will be removed. However, this leaves a register
144 // without a definition for LiveVariables. Such transformations should
145 // use this function to provide a dummy definition of the register that
146 // will simply be removed by DCE.
147 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
148 BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
149 }
150 void addRegToUpdateWithLine(Register Reg, int Line);
151 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
153 Register Dst);
154
155public:
156
157 void getAnalysisUsage(AnalysisUsage &AU) const override {
167 }
168
169 // Main entry point for this pass.
170 bool runOnMachineFunction(MachineFunction &MF) override {
171 initialize(MF);
172 // At this point, TOC pointer should not be used in a function that uses
173 // PC-Relative addressing.
174 assert((MF.getRegInfo().use_empty(PPC::X2) ||
176 "TOC pointer used in a function using PC-Relative addressing!");
177 if (skipFunction(MF.getFunction()))
178 return false;
179 bool Changed = simplifyCode();
180#ifndef NDEBUG
181 if (Changed)
182 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
183 "-mllvm -disable-ppc-peephole");
184#endif
185 return Changed;
186 }
187};
188
189#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
190void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
192 return;
193 if (RegsToUpdate.insert(Reg).second)
194 LLVM_DEBUG(dbgs() << "Adding register: " << Register::virtReg2Index(Reg)
195 << " on line " << Line
196 << " for re-computation of kill flags\n");
197}
198
199// Initialize class variables.
200void PPCMIPeephole::initialize(MachineFunction &MFParm) {
201 MF = &MFParm;
202 MRI = &MF->getRegInfo();
203 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
204 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
205 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
206 LV = &getAnalysis<LiveVariables>();
207 EntryFreq = MBFI->getEntryFreq();
208 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
209 RegsToUpdate.clear();
210 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
211 LLVM_DEBUG(MF->dump());
212}
213
214static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
216 assert(Op && "Invalid Operand!");
217 if (!Op->isReg())
218 return nullptr;
219
220 Register Reg = Op->getReg();
221 if (!Reg.isVirtual())
222 return nullptr;
223
224 return MRI->getVRegDef(Reg);
225}
226
227// This function returns number of known zero bits in output of MI
228// starting from the most significant bit.
229static unsigned getKnownLeadingZeroCount(const unsigned Reg,
230 const PPCInstrInfo *TII,
231 const MachineRegisterInfo *MRI) {
232 MachineInstr *MI = MRI->getVRegDef(Reg);
233 unsigned Opcode = MI->getOpcode();
234 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
235 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
236 return MI->getOperand(3).getImm();
237
238 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
239 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
240 return MI->getOperand(3).getImm();
241
242 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
243 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
244 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
245 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
246 return 32 + MI->getOperand(3).getImm();
247
248 if (Opcode == PPC::ANDI_rec) {
249 uint16_t Imm = MI->getOperand(2).getImm();
250 return 48 + llvm::countl_zero(Imm);
251 }
252
253 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
254 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
255 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
256 // The result ranges from 0 to 32.
257 return 58;
258
259 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
260 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
261 // The result ranges from 0 to 64.
262 return 57;
263
264 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
265 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
266 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
267 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
268 return 48;
269
270 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
271 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
272 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
273 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
274 return 56;
275
276 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
277 Opcode == PPC::AND8_rec)
278 return std::max(
279 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
280 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
281
282 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
283 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
284 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
285 Opcode == PPC::XOR8_rec)
286 return std::min(
287 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
288 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
289
290 if (TII->isZeroExtended(Reg, MRI))
291 return 32;
292
293 return 0;
294}
295
296// This function maintains a map for the pairs <TOC Save Instr, Keep>
297// Each time a new TOC save is encountered, it checks if any of the existing
298// ones are dominated by the new one. If so, it marks the existing one as
299// redundant by setting it's entry in the map as false. It then adds the new
300// instruction to the map with either true or false depending on if any
301// existing instructions dominated the new one.
302void PPCMIPeephole::UpdateTOCSaves(
303 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
304 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
305 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
306 // here only support it under ELFv2.
307 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
309
310 MachineBasicBlock *Entry = &MF->front();
311 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
312
313 // If the block in which the TOC save resides is in a block that
314 // post-dominates Entry, or a block that is hotter than entry (keep in mind
315 // that early MachineLICM has already run so the TOC save won't be hoisted)
316 // we can just do the save in the prologue.
317 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
318 FI->setMustSaveTOC(true);
319
320 // If we are saving the TOC in the prologue, all the TOC saves can be
321 // removed from the code.
322 if (FI->mustSaveTOC()) {
323 for (auto &TOCSave : TOCSaves)
324 TOCSave.second = false;
325 // Add new instruction to map.
326 TOCSaves[MI] = false;
327 return;
328 }
329 }
330
331 bool Keep = true;
332 for (auto &I : TOCSaves) {
333 MachineInstr *CurrInst = I.first;
334 // If new instruction dominates an existing one, mark existing one as
335 // redundant.
336 if (I.second && MDT->dominates(MI, CurrInst))
337 I.second = false;
338 // Check if the new instruction is redundant.
339 if (MDT->dominates(CurrInst, MI)) {
340 Keep = false;
341 break;
342 }
343 }
344 // Add new instruction to map.
345 TOCSaves[MI] = Keep;
346}
347
348// This function returns a list of all PHI nodes in the tree starting from
349// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
350// The list initially only contains the root PHI. When we visit a PHI node, we
351// add it to the list. We continue to look for other PHI node operands while
352// there are nodes to visit in the list. The function returns false if the
353// optimization cannot be applied on this tree.
354static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
355 MachineInstr *RootPHI,
357 PHIs.push_back(RootPHI);
358 unsigned VisitedIndex = 0;
359 while (VisitedIndex < PHIs.size()) {
360 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
361 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
362 PHIOp != NumOps; PHIOp += 2) {
363 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
364 if (!RegOp.isVirtual())
365 return false;
366 MachineInstr *Instr = MRI->getVRegDef(RegOp);
367 // While collecting the PHI nodes, we check if they can be converted (i.e.
368 // all the operands are either copies, implicit defs or PHI nodes).
369 unsigned Opcode = Instr->getOpcode();
370 if (Opcode == PPC::COPY) {
371 Register Reg = Instr->getOperand(1).getReg();
372 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
373 return false;
374 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
375 return false;
376 // If we detect a cycle in the PHI nodes, we exit. It would be
377 // possible to change cycles as well, but that would add a lot
378 // of complexity for a case that is unlikely to occur with MMA
379 // code.
380 if (Opcode != PPC::PHI)
381 continue;
382 if (llvm::is_contained(PHIs, Instr))
383 return false;
384 PHIs.push_back(Instr);
385 }
386 VisitedIndex++;
387 }
388 return true;
389}
390
391// This function changes the unprimed accumulator PHI nodes in the PHIs list to
392// primed accumulator PHI nodes. The list is traversed in reverse order to
393// change all the PHI operands of a PHI node before changing the node itself.
394// We keep a map to associate each changed PHI node to its non-changed form.
395void PPCMIPeephole::convertUnprimedAccPHIs(
399 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
401 // We check if the current PHI node can be changed by looking at its
402 // operands. If all the operands are either copies from primed
403 // accumulators, implicit definitions or other unprimed accumulator
404 // PHI nodes, we change it.
405 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
406 PHIOp += 2) {
407 Register RegOp = PHI->getOperand(PHIOp).getReg();
408 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
409 unsigned Opcode = PHIInput->getOpcode();
410 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
411 Opcode == PPC::PHI) &&
412 "Unexpected instruction");
413 if (Opcode == PPC::COPY) {
414 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
415 &PPC::ACCRCRegClass &&
416 "Unexpected register class");
417 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
418 } else if (Opcode == PPC::IMPLICIT_DEF) {
419 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
420 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
421 TII->get(PPC::IMPLICIT_DEF), AccReg);
422 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
423 PHI->getOperand(PHIOp + 1)});
424 } else if (Opcode == PPC::PHI) {
425 // We found a PHI operand. At this point we know this operand
426 // has already been changed so we get its associated changed form
427 // from the map.
428 assert(ChangedPHIMap.count(PHIInput) == 1 &&
429 "This PHI node should have already been changed.");
430 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
432 PrimedAccPHI->getOperand(0).getReg(), false),
433 PHI->getOperand(PHIOp + 1)});
434 }
435 }
436 Register AccReg = Dst;
437 // If the PHI node we are changing is the root node, the register it defines
438 // will be the destination register of the original copy (of the PHI def).
439 // For all other PHI's in the list, we need to create another primed
440 // accumulator virtual register as the PHI will no longer define the
441 // unprimed accumulator.
442 if (PHI != PHIs[0])
443 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
445 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
446 for (auto RegMBB : PHIOps) {
447 NewPHI.add(RegMBB.first).add(RegMBB.second);
448 if (MRI->isSSA())
449 addRegToUpdate(RegMBB.first.getReg());
450 }
451 ChangedPHIMap[PHI] = NewPHI.getInstr();
452 LLVM_DEBUG(dbgs() << "Converting PHI: ");
453 LLVM_DEBUG(PHI->dump());
454 LLVM_DEBUG(dbgs() << "To: ");
455 LLVM_DEBUG(NewPHI.getInstr()->dump());
456 }
457}
458
459// Perform peephole optimizations.
460bool PPCMIPeephole::simplifyCode() {
461 bool Simplified = false;
462 bool TrapOpt = false;
463 MachineInstr* ToErase = nullptr;
464 std::map<MachineInstr *, bool> TOCSaves;
465 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
466 NumFunctionsEnteredInMIPeephole++;
467 if (ConvertRegReg) {
468 // Fixed-point conversion of reg/reg instructions fed by load-immediate
469 // into reg/imm instructions. FIXME: This is expensive, control it with
470 // an option.
471 bool SomethingChanged = false;
472 do {
473 NumFixedPointIterations++;
474 SomethingChanged = false;
475 for (MachineBasicBlock &MBB : *MF) {
476 for (MachineInstr &MI : MBB) {
477 if (MI.isDebugInstr())
478 continue;
479
480 if (!DebugCounter::shouldExecute(PeepholeXToICounter))
481 continue;
482
483 SmallSet<Register, 4> RRToRIRegsToUpdate;
484 if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
485 continue;
486 for (Register R : RRToRIRegsToUpdate)
488 // The updated instruction may now have new register operands.
489 // Conservatively add them to recompute the flags as well.
490 for (const MachineOperand &MO : MI.operands())
491 if (MO.isReg())
492 addRegToUpdate(MO.getReg());
493 // We don't erase anything in case the def has other uses. Let DCE
494 // remove it if it can be removed.
495 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
496 LLVM_DEBUG(MI.dump());
497 NumConvertedToImmediateForm++;
498 SomethingChanged = true;
499 Simplified = true;
500 continue;
501 }
502 }
503 } while (SomethingChanged && FixedPointRegToImm);
504 }
505
506 // Since we are deleting this instruction, we need to run LiveVariables
507 // on any of its definitions that are marked as needing an update since
508 // we can't run LiveVariables on a deleted register. This only needs
509 // to be done for defs since uses will have their own defining
510 // instructions so we won't be running LiveVariables on a deleted reg.
511 auto recomputeLVForDyingInstr = [&]() {
512 if (RegsToUpdate.empty())
513 return;
514 for (MachineOperand &MO : ToErase->operands()) {
515 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
516 continue;
517 Register RegToUpdate = MO.getReg();
518 RegsToUpdate.erase(RegToUpdate);
519 // If some transformation has introduced an additional definition of
520 // this register (breaking SSA), we can safely convert this def to
521 // a def of an invalid register as the instruction is going away.
522 if (!MRI->getUniqueVRegDef(RegToUpdate))
523 MO.setReg(PPC::NoRegister);
524 LV->recomputeForSingleDefVirtReg(RegToUpdate);
525 }
526 };
527
528 for (MachineBasicBlock &MBB : *MF) {
529 for (MachineInstr &MI : MBB) {
530
531 // If the previous instruction was marked for elimination,
532 // remove it now.
533 if (ToErase) {
534 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
535 LLVM_DEBUG(ToErase->dump());
536 recomputeLVForDyingInstr();
537 ToErase->eraseFromParent();
538 ToErase = nullptr;
539 }
540 // If a conditional trap instruction got optimized to an
541 // unconditional trap, eliminate all the instructions after
542 // the trap.
543 if (EnableTrapOptimization && TrapOpt) {
544 ToErase = &MI;
545 continue;
546 }
547
548 // Ignore debug instructions.
549 if (MI.isDebugInstr())
550 continue;
551
552 if (!DebugCounter::shouldExecute(PeepholePerOpCounter))
553 continue;
554
555 // Per-opcode peepholes.
556 switch (MI.getOpcode()) {
557
558 default:
559 break;
560 case PPC::COPY: {
561 Register Src = MI.getOperand(1).getReg();
562 Register Dst = MI.getOperand(0).getReg();
563 if (!Src.isVirtual() || !Dst.isVirtual())
564 break;
565 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
566 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
567 break;
568
569 // We are copying an unprimed accumulator to a primed accumulator.
570 // If the input to the copy is a PHI that is fed only by (i) copies in
571 // the other direction (ii) implicitly defined unprimed accumulators or
572 // (iii) other PHI nodes satisfying (i) and (ii), we can change
573 // the PHI to a PHI on primed accumulators (as long as we also change
574 // its operands). To detect and change such copies, we first get a list
575 // of all the PHI nodes starting from the root PHI node in BFS order.
576 // We then visit all these PHI nodes to check if they can be changed to
577 // primed accumulator PHI nodes and if so, we change them.
578 MachineInstr *RootPHI = MRI->getVRegDef(Src);
579 if (RootPHI->getOpcode() != PPC::PHI)
580 break;
581
583 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
584 break;
585
586 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
587
588 ToErase = &MI;
589 break;
590 }
591 case PPC::LI:
592 case PPC::LI8: {
593 // If we are materializing a zero, look for any use operands for which
594 // zero means immediate zero. All such operands can be replaced with
595 // PPC::ZERO.
596 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
597 break;
598 Register MIDestReg = MI.getOperand(0).getReg();
599 bool Folded = false;
600 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
601 Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
602 if (MRI->use_nodbg_empty(MIDestReg)) {
603 ++NumLoadImmZeroFoldedAndRemoved;
604 ToErase = &MI;
605 }
606 if (Folded)
607 addRegToUpdate(MIDestReg);
608 Simplified |= Folded;
609 break;
610 }
611 case PPC::STW:
612 case PPC::STD: {
613 MachineFrameInfo &MFI = MF->getFrameInfo();
614 if (MFI.hasVarSizedObjects() ||
615 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
616 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
617 break;
618 // When encountering a TOC save instruction, call UpdateTOCSaves
619 // to add it to the TOCSaves map and mark any existing TOC saves
620 // it dominates as redundant.
621 if (TII->isTOCSaveMI(MI))
622 UpdateTOCSaves(TOCSaves, &MI);
623 break;
624 }
625 case PPC::XXPERMDI: {
626 // Perform simplifications of 2x64 vector swaps and splats.
627 // A swap is identified by an immediate value of 2, and a splat
628 // is identified by an immediate value of 0 or 3.
629 int Immed = MI.getOperand(3).getImm();
630
631 if (Immed == 1)
632 break;
633
634 // For each of these simplifications, we need the two source
635 // regs to match. Unfortunately, MachineCSE ignores COPY and
636 // SUBREG_TO_REG, so for example we can see
637 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
638 // We have to look through chains of COPY and SUBREG_TO_REG
639 // to find the real source values for comparison.
640 Register TrueReg1 =
641 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
642 Register TrueReg2 =
643 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
644
645 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
646 break;
647
648 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
649
650 if (!DefMI)
651 break;
652
653 unsigned DefOpc = DefMI->getOpcode();
654
655 // If this is a splat fed by a splatting load, the splat is
656 // redundant. Replace with a copy. This doesn't happen directly due
657 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
658 // a load of a double to a vector of 64-bit integers.
659 auto isConversionOfLoadAndSplat = [=]() -> bool {
660 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
661 return false;
662 Register FeedReg1 =
663 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
664 if (FeedReg1.isVirtual()) {
665 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
666 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
667 return true;
668 }
669 return false;
670 };
671 if ((Immed == 0 || Immed == 3) &&
672 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
673 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
674 "to load-and-splat/copy: ");
675 LLVM_DEBUG(MI.dump());
676 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
677 MI.getOperand(0).getReg())
678 .add(MI.getOperand(1));
679 addRegToUpdate(MI.getOperand(1).getReg());
680 ToErase = &MI;
681 Simplified = true;
682 }
683
684 // If this is a splat or a swap fed by another splat, we
685 // can replace it with a copy.
686 if (DefOpc == PPC::XXPERMDI) {
687 Register DefReg1 = DefMI->getOperand(1).getReg();
688 Register DefReg2 = DefMI->getOperand(2).getReg();
689 unsigned DefImmed = DefMI->getOperand(3).getImm();
690
691 // If the two inputs are not the same register, check to see if
692 // they originate from the same virtual register after only
693 // copy-like instructions.
694 if (DefReg1 != DefReg2) {
695 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
696 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
697
698 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
699 break;
700 }
701
702 if (DefImmed == 0 || DefImmed == 3) {
703 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
704 "to splat/copy: ");
705 LLVM_DEBUG(MI.dump());
706 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
707 MI.getOperand(0).getReg())
708 .add(MI.getOperand(1));
709 addRegToUpdate(MI.getOperand(1).getReg());
710 ToErase = &MI;
711 Simplified = true;
712 }
713
714 // If this is a splat fed by a swap, we can simplify modify
715 // the splat to splat the other value from the swap's input
716 // parameter.
717 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
718 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
719 LLVM_DEBUG(MI.dump());
720 addRegToUpdate(MI.getOperand(1).getReg());
721 addRegToUpdate(MI.getOperand(2).getReg());
722 MI.getOperand(1).setReg(DefReg1);
723 MI.getOperand(2).setReg(DefReg2);
724 MI.getOperand(3).setImm(3 - Immed);
725 addRegToUpdate(DefReg1);
726 addRegToUpdate(DefReg2);
727 Simplified = true;
728 }
729
730 // If this is a swap fed by a swap, we can replace it
731 // with a copy from the first swap's input.
732 else if (Immed == 2 && DefImmed == 2) {
733 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
734 LLVM_DEBUG(MI.dump());
735 addRegToUpdate(MI.getOperand(1).getReg());
736 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
737 MI.getOperand(0).getReg())
738 .add(DefMI->getOperand(1));
741 ToErase = &MI;
742 Simplified = true;
743 }
744 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
745 DefOpc == PPC::XXPERMDIs &&
746 (DefMI->getOperand(2).getImm() == 0 ||
747 DefMI->getOperand(2).getImm() == 3)) {
748 ToErase = &MI;
749 Simplified = true;
750 // Swap of a splat, convert to copy.
751 if (Immed == 2) {
752 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
753 LLVM_DEBUG(MI.dump());
754 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
755 MI.getOperand(0).getReg())
756 .add(MI.getOperand(1));
757 addRegToUpdate(MI.getOperand(1).getReg());
758 break;
759 }
760 // Splat fed by another splat - switch the output of the first
761 // and remove the second.
762 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
763 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
764 LLVM_DEBUG(MI.dump());
765 } else if (Immed == 2 &&
766 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
767 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
768 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
769 DefOpc == PPC::VSPLTISW)) {
770 // Swap of various vector splats, convert to copy.
771 ToErase = &MI;
772 Simplified = true;
773 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
774 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
775 LLVM_DEBUG(MI.dump());
776 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
777 MI.getOperand(0).getReg())
778 .add(MI.getOperand(1));
779 addRegToUpdate(MI.getOperand(1).getReg());
780 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
781 TII->isLoadFromConstantPool(DefMI)) {
782 const Constant *C = TII->getConstantFromConstantPool(DefMI);
783 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
784 ToErase = &MI;
785 Simplified = true;
787 << "Optimizing swap(splat pattern from constant-pool) "
788 "=> copy(splat pattern from constant-pool): ");
789 LLVM_DEBUG(MI.dump());
790 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
791 MI.getOperand(0).getReg())
792 .add(MI.getOperand(1));
793 addRegToUpdate(MI.getOperand(1).getReg());
794 }
795 }
796 break;
797 }
798 case PPC::VSPLTB:
799 case PPC::VSPLTH:
800 case PPC::XXSPLTW: {
801 unsigned MyOpcode = MI.getOpcode();
802 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
803 Register TrueReg =
804 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
805 if (!TrueReg.isVirtual())
806 break;
807 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
808 if (!DefMI)
809 break;
810 unsigned DefOpcode = DefMI->getOpcode();
811 auto isConvertOfSplat = [=]() -> bool {
812 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
813 return false;
814 Register ConvReg = DefMI->getOperand(1).getReg();
815 if (!ConvReg.isVirtual())
816 return false;
817 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
818 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
819 Splt->getOpcode() == PPC::XXSPLTW);
820 };
821 bool AlreadySplat = (MyOpcode == DefOpcode) ||
822 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
823 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
824 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
825 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
826 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
827 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
828 // If the instruction[s] that feed this splat have already splat
829 // the value, this splat is redundant.
830 if (AlreadySplat) {
831 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
832 LLVM_DEBUG(MI.dump());
833 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
834 MI.getOperand(0).getReg())
835 .add(MI.getOperand(OpNo));
836 addRegToUpdate(MI.getOperand(OpNo).getReg());
837 ToErase = &MI;
838 Simplified = true;
839 }
840 // Splat fed by a shift. Usually when we align value to splat into
841 // vector element zero.
842 if (DefOpcode == PPC::XXSLDWI) {
843 Register ShiftRes = DefMI->getOperand(0).getReg();
844 Register ShiftOp1 = DefMI->getOperand(1).getReg();
845 Register ShiftOp2 = DefMI->getOperand(2).getReg();
846 unsigned ShiftImm = DefMI->getOperand(3).getImm();
847 unsigned SplatImm =
848 MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
849 if (ShiftOp1 == ShiftOp2) {
850 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
851 if (MRI->hasOneNonDBGUse(ShiftRes)) {
852 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
854 ToErase = DefMI;
855 }
856 Simplified = true;
857 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
858 << " to " << NewElem << " in instruction: ");
859 LLVM_DEBUG(MI.dump());
860 addRegToUpdate(MI.getOperand(OpNo).getReg());
861 addRegToUpdate(ShiftOp1);
862 MI.getOperand(OpNo).setReg(ShiftOp1);
863 MI.getOperand(2).setImm(NewElem);
864 }
865 }
866 break;
867 }
868 case PPC::XVCVDPSP: {
869 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
870 Register TrueReg =
871 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
872 if (!TrueReg.isVirtual())
873 break;
874 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
875
876 // This can occur when building a vector of single precision or integer
877 // values.
878 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
879 Register DefsReg1 =
880 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
881 Register DefsReg2 =
882 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
883 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
884 break;
885 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
886 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
887
888 if (!P1 || !P2)
889 break;
890
891 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
892 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
893 // the FRSP/XSRSP.
894 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
895 unsigned Opc = RoundInstr->getOpcode();
896 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
897 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
898 Simplified = true;
899 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
900 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
901 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
902 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
903 if (Use.getOperand(i).isReg() &&
904 Use.getOperand(i).getReg() == FRSPDefines)
905 Use.getOperand(i).setReg(ConvReg1);
906 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
907 LLVM_DEBUG(RoundInstr->dump());
908 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
909 LLVM_DEBUG(MI.dump());
910 LLVM_DEBUG(dbgs() << "Through instruction:\n");
912 addRegToUpdate(ConvReg1);
913 addRegToUpdate(FRSPDefines);
914 ToErase = RoundInstr;
915 }
916 };
917
918 // If the input to XVCVDPSP is a vector that was built (even
919 // partially) out of FRSP's, the FRSP(s) can safely be removed
920 // since this instruction performs the same operation.
921 if (P1 != P2) {
922 removeFRSPIfPossible(P1);
923 removeFRSPIfPossible(P2);
924 break;
925 }
926 removeFRSPIfPossible(P1);
927 }
928 break;
929 }
930 case PPC::EXTSH:
931 case PPC::EXTSH8:
932 case PPC::EXTSH8_32_64: {
933 if (!EnableSExtElimination) break;
934 Register NarrowReg = MI.getOperand(1).getReg();
935 if (!NarrowReg.isVirtual())
936 break;
937
938 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
939 unsigned SrcOpcode = SrcMI->getOpcode();
940 // If we've used a zero-extending load that we will sign-extend,
941 // just do a sign-extending load.
942 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
943 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
944 break;
945 // Determine the new opcode. We need to make sure that if the original
946 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
947 // Likewise if the source is X-Form the new opcode should also be
948 // X-Form.
949 unsigned Opc = PPC::LHA;
950 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
951 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
952 MI.getOpcode() == PPC::EXTSH8_32_64;
953
954 if (SourceIsXForm && MIIs64Bit)
955 Opc = PPC::LHAX8;
956 else if (SourceIsXForm && !MIIs64Bit)
957 Opc = PPC::LHAX;
958 else if (MIIs64Bit)
959 Opc = PPC::LHA8;
960
961 addRegToUpdate(NarrowReg);
962 addRegToUpdate(MI.getOperand(0).getReg());
963
964 // We are removing a definition of NarrowReg which will cause
965 // problems in AliveBlocks. Add an implicit def that will be
966 // removed so that AliveBlocks are updated correctly.
967 addDummyDef(MBB, &MI, NarrowReg);
968 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
969 LLVM_DEBUG(SrcMI->dump());
970 LLVM_DEBUG(dbgs() << "and sign-extension\n");
971 LLVM_DEBUG(MI.dump());
972 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
973 SrcMI->setDesc(TII->get(Opc));
974 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
975 ToErase = &MI;
976 Simplified = true;
977 NumEliminatedSExt++;
978 }
979 break;
980 }
981 case PPC::EXTSW:
982 case PPC::EXTSW_32:
983 case PPC::EXTSW_32_64: {
984 if (!EnableSExtElimination) break;
985 Register NarrowReg = MI.getOperand(1).getReg();
986 if (!NarrowReg.isVirtual())
987 break;
988
989 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
990 unsigned SrcOpcode = SrcMI->getOpcode();
991 // If we've used a zero-extending load that we will sign-extend,
992 // just do a sign-extending load.
993 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
994 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
995 break;
996
997 // The transformation from a zero-extending load to a sign-extending
998 // load is only legal when the displacement is a multiple of 4.
999 // If the displacement is not at least 4 byte aligned, don't perform
1000 // the transformation.
1001 bool IsWordAligned = false;
1002 if (SrcMI->getOperand(1).isGlobal()) {
1003 const GlobalObject *GO =
1004 dyn_cast<GlobalObject>(SrcMI->getOperand(1).getGlobal());
1005 if (GO && GO->getAlign() && *GO->getAlign() >= 4 &&
1006 (SrcMI->getOperand(1).getOffset() % 4 == 0))
1007 IsWordAligned = true;
1008 } else if (SrcMI->getOperand(1).isImm()) {
1009 int64_t Value = SrcMI->getOperand(1).getImm();
1010 if (Value % 4 == 0)
1011 IsWordAligned = true;
1012 }
1013
1014 // Determine the new opcode. We need to make sure that if the original
1015 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1016 // Likewise if the source is X-Form the new opcode should also be
1017 // X-Form.
1018 unsigned Opc = PPC::LWA_32;
1019 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1020 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1021 MI.getOpcode() == PPC::EXTSW_32_64;
1022
1023 if (SourceIsXForm && MIIs64Bit)
1024 Opc = PPC::LWAX;
1025 else if (SourceIsXForm && !MIIs64Bit)
1026 Opc = PPC::LWAX_32;
1027 else if (MIIs64Bit)
1028 Opc = PPC::LWA;
1029
1030 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1031 break;
1032
1033 addRegToUpdate(NarrowReg);
1034 addRegToUpdate(MI.getOperand(0).getReg());
1035
1036 // We are removing a definition of NarrowReg which will cause
1037 // problems in AliveBlocks. Add an implicit def that will be
1038 // removed so that AliveBlocks are updated correctly.
1039 addDummyDef(MBB, &MI, NarrowReg);
1040 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1041 LLVM_DEBUG(SrcMI->dump());
1042 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1043 LLVM_DEBUG(MI.dump());
1044 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1045 SrcMI->setDesc(TII->get(Opc));
1046 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1047 ToErase = &MI;
1048 Simplified = true;
1049 NumEliminatedSExt++;
1050 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1051 TII->isSignExtended(NarrowReg, MRI)) {
1052 // We can eliminate EXTSW if the input is known to be already
1053 // sign-extended.
1054 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1055 Register TmpReg =
1056 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
1057 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
1058 TmpReg);
1059 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
1060 MI.getOperand(0).getReg())
1061 .addReg(TmpReg)
1062 .addReg(NarrowReg)
1063 .addImm(PPC::sub_32);
1064 ToErase = &MI;
1065 Simplified = true;
1066 NumEliminatedSExt++;
1067 }
1068 break;
1069 }
1070 case PPC::RLDICL: {
1071 // We can eliminate RLDICL (e.g. for zero-extension)
1072 // if all bits to clear are already zero in the input.
1073 // This code assume following code sequence for zero-extension.
1074 // %6 = COPY %5:sub_32; (optional)
1075 // %8 = IMPLICIT_DEF;
1076 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1077 if (!EnableZExtElimination) break;
1078
1079 if (MI.getOperand(2).getImm() != 0)
1080 break;
1081
1082 Register SrcReg = MI.getOperand(1).getReg();
1083 if (!SrcReg.isVirtual())
1084 break;
1085
1086 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1087 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1088 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
1089 break;
1090
1091 MachineInstr *ImpDefMI, *SubRegMI;
1092 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1093 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
1094 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1095
1096 SrcMI = SubRegMI;
1097 if (SubRegMI->getOpcode() == PPC::COPY) {
1098 Register CopyReg = SubRegMI->getOperand(1).getReg();
1099 if (CopyReg.isVirtual())
1100 SrcMI = MRI->getVRegDef(CopyReg);
1101 }
1102 if (!SrcMI->getOperand(0).isReg())
1103 break;
1104
1105 unsigned KnownZeroCount =
1106 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
1107 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
1108 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1109 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1110 MI.getOperand(0).getReg())
1111 .addReg(SrcReg);
1112 addRegToUpdate(SrcReg);
1113 ToErase = &MI;
1114 Simplified = true;
1115 NumEliminatedZExt++;
1116 }
1117 break;
1118 }
1119
1120 // TODO: Any instruction that has an immediate form fed only by a PHI
1121 // whose operands are all load immediate can be folded away. We currently
1122 // do this for ADD instructions, but should expand it to arithmetic and
1123 // binary instructions with immediate forms in the future.
1124 case PPC::ADD4:
1125 case PPC::ADD8: {
1126 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1127 assert(PhiOp && "Invalid Operand!");
1128 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1129
1130 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1131 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1132 };
1133
1134 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1135 MachineOperand *PhiOp) {
1136 assert(PhiOp && "Invalid Operand!");
1137 assert(DominatorOp && "Invalid Operand!");
1138 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1139 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1140
1141 // Note: the vregs only show up at odd indices position of PHI Node,
1142 // the even indices position save the BB info.
1143 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1144 MachineInstr *LiMI =
1145 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1146 if (!LiMI ||
1147 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1148 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1149 !MDT->dominates(DefDomMI, LiMI))
1150 return false;
1151 }
1152
1153 return true;
1154 };
1155
1156 MachineOperand Op1 = MI.getOperand(1);
1157 MachineOperand Op2 = MI.getOperand(2);
1158 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1159 std::swap(Op1, Op2);
1160 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1161 break; // We don't have an ADD fed by LI's that can be transformed
1162
1163 // Now we know that Op1 is the PHI node and Op2 is the dominator
1164 Register DominatorReg = Op2.getReg();
1165
1166 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1167 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1168 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1169 MRI->setRegClass(DominatorReg, TRC);
1170
1171 // replace LIs with ADDIs
1172 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1173 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1174 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1175 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1176 LLVM_DEBUG(LiMI->dump());
1177
1178 // There could be repeated registers in the PHI, e.g: %1 =
1179 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1180 // already replaced the def instruction, skip.
1181 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1182 continue;
1183
1184 assert((LiMI->getOpcode() == PPC::LI ||
1185 LiMI->getOpcode() == PPC::LI8) &&
1186 "Invalid Opcode!");
1187 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1188 LiMI->removeOperand(1); // remove the imm of LI
1189 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1190 : PPC::ADDI8));
1191 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1192 .addReg(DominatorReg)
1193 .addImm(LiImm); // restore the imm of LI
1194 LLVM_DEBUG(LiMI->dump());
1195 }
1196
1197 // Replace ADD with COPY
1198 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1199 LLVM_DEBUG(MI.dump());
1200 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1201 MI.getOperand(0).getReg())
1202 .add(Op1);
1203 addRegToUpdate(Op1.getReg());
1204 addRegToUpdate(Op2.getReg());
1205 ToErase = &MI;
1206 Simplified = true;
1207 NumOptADDLIs++;
1208 break;
1209 }
1210 case PPC::RLDICR: {
1211 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1212 combineSEXTAndSHL(MI, ToErase);
1213 break;
1214 }
1215 case PPC::ANDI_rec:
1216 case PPC::ANDI8_rec:
1217 case PPC::ANDIS_rec:
1218 case PPC::ANDIS8_rec: {
1219 Register TrueReg =
1220 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1221 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1222 break;
1223
1224 MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1225 if (!SrcMI)
1226 break;
1227
1228 unsigned SrcOpCode = SrcMI->getOpcode();
1229 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1230 break;
1231
1232 Register SrcReg, DstReg;
1233 SrcReg = SrcMI->getOperand(1).getReg();
1234 DstReg = MI.getOperand(1).getReg();
1235 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
1236 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
1237 if (DstRC != SrcRC)
1238 break;
1239
1240 uint64_t AndImm = MI.getOperand(2).getImm();
1241 if (MI.getOpcode() == PPC::ANDIS_rec ||
1242 MI.getOpcode() == PPC::ANDIS8_rec)
1243 AndImm <<= 16;
1244 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1245 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1246 uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1247
1248 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1249 // if all bits to AND are already zero in the input.
1250 bool PatternResultZero =
1251 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1252 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1253
1254 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1255 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1256 bool PatternRemoveRotate =
1257 SrcMI->getOperand(2).getImm() == 0 &&
1258 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1259 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1260
1261 if (!PatternResultZero && !PatternRemoveRotate)
1262 break;
1263
1264 LLVM_DEBUG(dbgs() << "Combining pair: ");
1265 LLVM_DEBUG(SrcMI->dump());
1266 LLVM_DEBUG(MI.dump());
1267 if (PatternResultZero)
1268 MI.getOperand(2).setImm(0);
1269 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1270 LLVM_DEBUG(dbgs() << "To: ");
1271 LLVM_DEBUG(MI.dump());
1272 addRegToUpdate(MI.getOperand(1).getReg());
1273 addRegToUpdate(SrcMI->getOperand(0).getReg());
1274 Simplified = true;
1275 break;
1276 }
1277 case PPC::RLWINM:
1278 case PPC::RLWINM_rec:
1279 case PPC::RLWINM8:
1280 case PPC::RLWINM8_rec: {
1281 // We might replace operand 1 of the instruction which will
1282 // require we recompute kill flags for it.
1283 Register OrigOp1Reg = MI.getOperand(1).isReg()
1284 ? MI.getOperand(1).getReg()
1285 : PPC::NoRegister;
1286 Simplified = TII->combineRLWINM(MI, &ToErase);
1287 if (Simplified) {
1288 addRegToUpdate(OrigOp1Reg);
1289 if (MI.getOperand(1).isReg())
1290 addRegToUpdate(MI.getOperand(1).getReg());
1291 ++NumRotatesCollapsed;
1292 }
1293 break;
1294 }
1295 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1296 // always trap, we will delete the node if it will never trap.
1297 case PPC::TDI:
1298 case PPC::TWI:
1299 case PPC::TD:
1300 case PPC::TW: {
1301 if (!EnableTrapOptimization) break;
1302 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1303 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1304 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1305 // We can only do the optimization if we can get immediates
1306 // from both operands
1307 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1308 LiMI1->getOpcode() == PPC::LI8)))
1309 break;
1310 if (!IsOperand2Immediate &&
1311 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1312 LiMI2->getOpcode() == PPC::LI8)))
1313 break;
1314
1315 auto ImmOperand0 = MI.getOperand(0).getImm();
1316 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1317 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1318 : LiMI2->getOperand(1).getImm();
1319
1320 // We will replace the MI with an unconditional trap if it will always
1321 // trap.
1322 if ((ImmOperand0 == 31) ||
1323 ((ImmOperand0 & 0x10) &&
1324 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1325 ((ImmOperand0 & 0x8) &&
1326 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1327 ((ImmOperand0 & 0x2) &&
1328 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1329 ((ImmOperand0 & 0x1) &&
1330 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1331 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1332 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1333 TrapOpt = true;
1334 }
1335 // We will delete the MI if it will never trap.
1336 ToErase = &MI;
1337 Simplified = true;
1338 break;
1339 }
1340 }
1341 }
1342
1343 // If the last instruction was marked for elimination,
1344 // remove it now.
1345 if (ToErase) {
1346 recomputeLVForDyingInstr();
1347 ToErase->eraseFromParent();
1348 ToErase = nullptr;
1349 }
1350 // Reset TrapOpt to false at the end of the basic block.
1352 TrapOpt = false;
1353 }
1354
1355 // Eliminate all the TOC save instructions which are redundant.
1356 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1357 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1358 if (FI->mustSaveTOC())
1359 NumTOCSavesInPrologue++;
1360
1361 // We try to eliminate redundant compare instruction.
1362 Simplified |= eliminateRedundantCompare();
1363
1364 // If we have made any modifications and added any registers to the set of
1365 // registers for which we need to update the kill flags, do so by recomputing
1366 // LiveVariables for those registers.
1367 for (Register Reg : RegsToUpdate) {
1368 if (!MRI->reg_empty(Reg))
1370 }
1371 return Simplified;
1372}
1373
1374// helper functions for eliminateRedundantCompare
1375static bool isEqOrNe(MachineInstr *BI) {
1377 unsigned PredCond = PPC::getPredicateCondition(Pred);
1378 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1379}
1380
1381static bool isSupportedCmpOp(unsigned opCode) {
1382 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1383 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1384 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1385 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1386}
1387
1388static bool is64bitCmpOp(unsigned opCode) {
1389 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1390 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1391}
1392
1393static bool isSignedCmpOp(unsigned opCode) {
1394 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1395 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1396}
1397
1398static unsigned getSignedCmpOpCode(unsigned opCode) {
1399 if (opCode == PPC::CMPLD) return PPC::CMPD;
1400 if (opCode == PPC::CMPLW) return PPC::CMPW;
1401 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1402 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1403 return opCode;
1404}
1405
1406// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1407// (LT x) to (LE x-1)
1408static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1409 uint64_t Imm = CMPI->getOperand(2).getImm();
1410 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1411 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1412 return 0;
1413
1415 unsigned PredCond = PPC::getPredicateCondition(Pred);
1416 unsigned PredHint = PPC::getPredicateHint(Pred);
1417 if (PredCond == PPC::PRED_GE)
1418 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1419 if (PredCond == PPC::PRED_LT)
1420 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1421
1422 return 0;
1423}
1424
1425// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1426// (LE x) to (LT x+1)
1427static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1428 uint64_t Imm = CMPI->getOperand(2).getImm();
1429 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1430 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1431 return 0;
1432
1434 unsigned PredCond = PPC::getPredicateCondition(Pred);
1435 unsigned PredHint = PPC::getPredicateHint(Pred);
1436 if (PredCond == PPC::PRED_GT)
1437 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1438 if (PredCond == PPC::PRED_LE)
1439 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1440
1441 return 0;
1442}
1443
1444// This takes a Phi node and returns a register value for the specified BB.
1445static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1447 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1448 MachineOperand &MO = Phi->getOperand(I);
1449 if (MO.getMBB() == MBB)
1450 return Phi->getOperand(I-1).getReg();
1451 }
1452 llvm_unreachable("invalid src basic block for this Phi node\n");
1453 return 0;
1454}
1455
1456// This function tracks the source of the register through register copy.
1457// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1458// assuming that the control comes from BB1 into BB2.
1459static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1461 unsigned SrcReg = Reg;
1462 while (true) {
1463 unsigned NextReg = SrcReg;
1464 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1465 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1466 NextReg = getIncomingRegForBlock(Inst, BB1);
1467 // We track through PHI only once to avoid infinite loop.
1468 BB1 = nullptr;
1469 }
1470 else if (Inst->isFullCopy())
1471 NextReg = Inst->getOperand(1).getReg();
1472 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1473 break;
1474 SrcReg = NextReg;
1475 }
1476 return SrcReg;
1477}
1478
1479static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1480 MachineBasicBlock *&PredMBB,
1481 MachineBasicBlock *&MBBtoMoveCmp,
1483
1484 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1485 auto BII = BB.getFirstInstrTerminator();
1486 // We optimize BBs ending with a conditional branch.
1487 // We check only for BCC here, not BCCLR, because BCCLR
1488 // will be formed only later in the pipeline.
1489 if (BB.succ_size() == 2 &&
1490 BII != BB.instr_end() &&
1491 (*BII).getOpcode() == PPC::BCC &&
1492 (*BII).getOperand(1).isReg()) {
1493 // We optimize only if the condition code is used only by one BCC.
1494 Register CndReg = (*BII).getOperand(1).getReg();
1495 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1496 return false;
1497
1498 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1499 // We assume compare and branch are in the same BB for ease of analysis.
1500 if (CMPI->getParent() != &BB)
1501 return false;
1502
1503 // We skip this BB if a physical register is used in comparison.
1504 for (MachineOperand &MO : CMPI->operands())
1505 if (MO.isReg() && !MO.getReg().isVirtual())
1506 return false;
1507
1508 return true;
1509 }
1510 return false;
1511 };
1512
1513 // If this BB has more than one successor, we can create a new BB and
1514 // move the compare instruction in the new BB.
1515 // So far, we do not move compare instruction to a BB having multiple
1516 // successors to avoid potentially increasing code size.
1517 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1518 return BB.succ_size() == 1;
1519 };
1520
1521 if (!isEligibleBB(MBB))
1522 return false;
1523
1524 unsigned NumPredBBs = MBB.pred_size();
1525 if (NumPredBBs == 1) {
1526 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1527 if (isEligibleBB(*TmpMBB)) {
1528 PredMBB = TmpMBB;
1529 MBBtoMoveCmp = nullptr;
1530 return true;
1531 }
1532 }
1533 else if (NumPredBBs == 2) {
1534 // We check for partially redundant case.
1535 // So far, we support cases with only two predecessors
1536 // to avoid increasing the number of instructions.
1538 MachineBasicBlock *Pred1MBB = *PI;
1539 MachineBasicBlock *Pred2MBB = *(PI+1);
1540
1541 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1542 // We assume Pred1MBB is the BB containing the compare to be merged and
1543 // Pred2MBB is the BB to which we will append a compare instruction.
1544 // Proceed as is if Pred1MBB is different from MBB.
1545 }
1546 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1547 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1548 std::swap(Pred1MBB, Pred2MBB);
1549 }
1550 else return false;
1551
1552 if (Pred1MBB == &MBB)
1553 return false;
1554
1555 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1556 // We cannot move the compare instruction if operands are not available
1557 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1559 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1560 for (int I = 1; I <= 2; I++)
1561 if (CMPI->getOperand(I).isReg()) {
1562 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1563 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1564 return false;
1565 }
1566
1567 PredMBB = Pred1MBB;
1568 MBBtoMoveCmp = Pred2MBB;
1569 return true;
1570 }
1571
1572 return false;
1573}
1574
1575// This function will iterate over the input map containing a pair of TOC save
1576// instruction and a flag. The flag will be set to false if the TOC save is
1577// proven redundant. This function will erase from the basic block all the TOC
1578// saves marked as redundant.
1579bool PPCMIPeephole::eliminateRedundantTOCSaves(
1580 std::map<MachineInstr *, bool> &TOCSaves) {
1581 bool Simplified = false;
1582 int NumKept = 0;
1583 for (auto TOCSave : TOCSaves) {
1584 if (!TOCSave.second) {
1585 TOCSave.first->eraseFromParent();
1586 RemoveTOCSave++;
1587 Simplified = true;
1588 } else {
1589 NumKept++;
1590 }
1591 }
1592
1593 if (NumKept > 1)
1594 MultiTOCSaves++;
1595
1596 return Simplified;
1597}
1598
1599// If multiple conditional branches are executed based on the (essentially)
1600// same comparison, we merge compare instructions into one and make multiple
1601// conditional branches on this comparison.
1602// For example,
1603// if (a == 0) { ... }
1604// else if (a < 0) { ... }
1605// can be executed by one compare and two conditional branches instead of
1606// two pairs of a compare and a conditional branch.
1607//
1608// This method merges two compare instructions in two MBBs and modifies the
1609// compare and conditional branch instructions if needed.
1610// For the above example, the input for this pass looks like:
1611// cmplwi r3, 0
1612// beq 0, .LBB0_3
1613// cmpwi r3, -1
1614// bgt 0, .LBB0_4
1615// So, before merging two compares, we need to modify these instructions as
1616// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1617// beq 0, .LBB0_3
1618// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1619// bge 0, .LBB0_4
1620
1621bool PPCMIPeephole::eliminateRedundantCompare() {
1622 bool Simplified = false;
1623
1624 for (MachineBasicBlock &MBB2 : *MF) {
1625 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1626
1627 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1628 // as an optimization target if
1629 // - both MBBs end with a conditional branch,
1630 // - MBB1 is the only predecessor of MBB2, and
1631 // - compare does not take a physical register as a operand in both MBBs.
1632 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1633 //
1634 // As partially redundant case, we additionally handle if MBB2 has one
1635 // additional predecessor, which has only one successor (MBB2).
1636 // In this case, we move the compare instruction originally in MBB2 into
1637 // MBBtoMoveCmp. This partially redundant case is typically appear by
1638 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1639 //
1640 // Overview of CFG of related basic blocks
1641 // Fully redundant case Partially redundant case
1642 // -------- ---------------- --------
1643 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1644 // -------- ---------------- --------
1645 // | \ (w/ 1 succ) \ | \
1646 // | \ \ | \
1647 // | \ |
1648 // -------- --------
1649 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1650 // -------- and 2 succ) -------- and 2 succ)
1651 // | \ | \
1652 // | \ | \
1653 //
1654 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1655 continue;
1656
1657 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1658 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1659
1660 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1661 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1662 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1663
1664 // We cannot optimize an unsupported compare opcode or
1665 // a mix of 32-bit and 64-bit comparisons
1666 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1667 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1668 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1669 continue;
1670
1671 unsigned NewOpCode = 0;
1672 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1673 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1674 bool SwapOperands = false;
1675
1676 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1677 // Typically, unsigned comparison is used for equality check, but
1678 // we replace it with a signed comparison if the comparison
1679 // to be merged is a signed comparison.
1680 // In other cases of opcode mismatch, we cannot optimize this.
1681
1682 // We cannot change opcode when comparing against an immediate
1683 // if the most significant bit of the immediate is one
1684 // due to the difference in sign extension.
1685 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1686 if (!I->getOperand(2).isImm())
1687 return false;
1688 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1689 return Imm < 0;
1690 };
1691
1692 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1693 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1694 NewOpCode = CMPI1->getOpcode();
1695 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1696 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1697 NewOpCode = CMPI2->getOpcode();
1698 else continue;
1699 }
1700
1701 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1702 // In case of comparisons between two registers, these two registers
1703 // must be same to merge two comparisons.
1704 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1705 nullptr, nullptr, MRI);
1706 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1707 nullptr, nullptr, MRI);
1708 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1709 MBB1, &MBB2, MRI);
1710 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1711 MBB1, &MBB2, MRI);
1712
1713 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1714 // Same pair of registers in the same order; ready to merge as is.
1715 }
1716 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1717 // Same pair of registers in different order.
1718 // We reverse the predicate to merge compare instructions.
1720 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1721 // In case of partial redundancy, we need to swap operands
1722 // in another compare instruction.
1723 SwapOperands = true;
1724 }
1725 else continue;
1726 }
1727 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1728 // In case of comparisons between a register and an immediate,
1729 // the operand register must be same for two compare instructions.
1730 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1731 nullptr, nullptr, MRI);
1732 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1733 MBB1, &MBB2, MRI);
1734 if (Cmp1Operand1 != Cmp2Operand1)
1735 continue;
1736
1737 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1738 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1739
1740 // If immediate are not same, we try to adjust by changing predicate;
1741 // e.g. GT imm means GE (imm+1).
1742 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1743 int Diff = Imm1 - Imm2;
1744 if (Diff < -2 || Diff > 2)
1745 continue;
1746
1747 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1748 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1749 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1750 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1751 if (Diff == 2) {
1752 if (PredToInc2 && PredToDec1) {
1753 NewPredicate2 = PredToInc2;
1754 NewPredicate1 = PredToDec1;
1755 NewImm2++;
1756 NewImm1--;
1757 }
1758 }
1759 else if (Diff == 1) {
1760 if (PredToInc2) {
1761 NewImm2++;
1762 NewPredicate2 = PredToInc2;
1763 }
1764 else if (PredToDec1) {
1765 NewImm1--;
1766 NewPredicate1 = PredToDec1;
1767 }
1768 }
1769 else if (Diff == -1) {
1770 if (PredToDec2) {
1771 NewImm2--;
1772 NewPredicate2 = PredToDec2;
1773 }
1774 else if (PredToInc1) {
1775 NewImm1++;
1776 NewPredicate1 = PredToInc1;
1777 }
1778 }
1779 else if (Diff == -2) {
1780 if (PredToDec2 && PredToInc1) {
1781 NewPredicate2 = PredToDec2;
1782 NewPredicate1 = PredToInc1;
1783 NewImm2--;
1784 NewImm1++;
1785 }
1786 }
1787 }
1788
1789 // We cannot merge two compares if the immediates are not same.
1790 if (NewImm2 != NewImm1)
1791 continue;
1792 }
1793
1794 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1795 LLVM_DEBUG(CMPI1->dump());
1796 LLVM_DEBUG(BI1->dump());
1797 LLVM_DEBUG(CMPI2->dump());
1798 LLVM_DEBUG(BI2->dump());
1799 for (const MachineOperand &MO : CMPI1->operands())
1800 if (MO.isReg())
1801 addRegToUpdate(MO.getReg());
1802 for (const MachineOperand &MO : CMPI2->operands())
1803 if (MO.isReg())
1804 addRegToUpdate(MO.getReg());
1805
1806 // We adjust opcode, predicates and immediate as we determined above.
1807 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1808 CMPI1->setDesc(TII->get(NewOpCode));
1809 }
1810 if (NewPredicate1) {
1811 BI1->getOperand(0).setImm(NewPredicate1);
1812 }
1813 if (NewPredicate2) {
1814 BI2->getOperand(0).setImm(NewPredicate2);
1815 }
1816 if (NewImm1 != Imm1) {
1817 CMPI1->getOperand(2).setImm(NewImm1);
1818 }
1819
1820 if (IsPartiallyRedundant) {
1821 // We touch up the compare instruction in MBB2 and move it to
1822 // a previous BB to handle partially redundant case.
1823 if (SwapOperands) {
1824 Register Op1 = CMPI2->getOperand(1).getReg();
1825 Register Op2 = CMPI2->getOperand(2).getReg();
1826 CMPI2->getOperand(1).setReg(Op2);
1827 CMPI2->getOperand(2).setReg(Op1);
1828 }
1829 if (NewImm2 != Imm2)
1830 CMPI2->getOperand(2).setImm(NewImm2);
1831
1832 for (int I = 1; I <= 2; I++) {
1833 if (CMPI2->getOperand(I).isReg()) {
1834 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1835 if (Inst->getParent() != &MBB2)
1836 continue;
1837
1838 assert(Inst->getOpcode() == PPC::PHI &&
1839 "We cannot support if an operand comes from this BB.");
1840 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1841 CMPI2->getOperand(I).setReg(SrcReg);
1842 addRegToUpdate(SrcReg);
1843 }
1844 }
1845 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1846 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1847
1848 DebugLoc DL = CMPI2->getDebugLoc();
1849 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1850 BuildMI(MBB2, MBB2.begin(), DL,
1851 TII->get(PPC::PHI), NewVReg)
1852 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1853 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1854 BI2->getOperand(1).setReg(NewVReg);
1855 addRegToUpdate(NewVReg);
1856 }
1857 else {
1858 // We finally eliminate compare instruction in MBB2.
1859 // We do not need to treat CMPI2 specially here in terms of re-computing
1860 // live variables even though it is being deleted because:
1861 // - It defines a register that has a single use (already checked in
1862 // eligibleForCompareElimination())
1863 // - The only user (BI2) is no longer using it so the register is dead (no
1864 // def, no uses)
1865 // - We do not attempt to recompute live variables for dead registers
1866 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1867 CMPI2->eraseFromParent();
1868 }
1869
1870 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1871 LLVM_DEBUG(CMPI1->dump());
1872 LLVM_DEBUG(BI1->dump());
1873 LLVM_DEBUG(BI2->dump());
1874 if (IsPartiallyRedundant) {
1875 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1876 << printMBBReference(*MBBtoMoveCmp)
1877 << " to handle partial redundancy.\n");
1878 LLVM_DEBUG(CMPI2->dump());
1879 }
1880 Simplified = true;
1881 }
1882
1883 return Simplified;
1884}
1885
1886// We miss the opportunity to emit an RLDIC when lowering jump tables
1887// since ISEL sees only a single basic block. When selecting, the clear
1888// and shift left will be in different blocks.
1889bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1890 MachineInstr *&ToErase) {
1891 if (MI.getOpcode() != PPC::RLDICR)
1892 return false;
1893
1894 Register SrcReg = MI.getOperand(1).getReg();
1895 if (!SrcReg.isVirtual())
1896 return false;
1897
1898 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1899 if (SrcMI->getOpcode() != PPC::RLDICL)
1900 return false;
1901
1902 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1903 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1904 MachineOperand MOpSHMI = MI.getOperand(2);
1905 MachineOperand MOpMEMI = MI.getOperand(3);
1906 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1907 MOpMEMI.isImm()))
1908 return false;
1909
1910 uint64_t SHSrc = MOpSHSrc.getImm();
1911 uint64_t MBSrc = MOpMBSrc.getImm();
1912 uint64_t SHMI = MOpSHMI.getImm();
1913 uint64_t MEMI = MOpMEMI.getImm();
1914 uint64_t NewSH = SHSrc + SHMI;
1915 uint64_t NewMB = MBSrc - SHMI;
1916 if (NewMB > 63 || NewSH > 63)
1917 return false;
1918
1919 // The bits cleared with RLDICL are [0, MBSrc).
1920 // The bits cleared with RLDICR are (MEMI, 63].
1921 // After the sequence, the bits cleared are:
1922 // [0, MBSrc-SHMI) and (MEMI, 63).
1923 //
1924 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1925 if ((63 - NewSH) != MEMI)
1926 return false;
1927
1928 LLVM_DEBUG(dbgs() << "Converting pair: ");
1929 LLVM_DEBUG(SrcMI->dump());
1930 LLVM_DEBUG(MI.dump());
1931
1932 MI.setDesc(TII->get(PPC::RLDIC));
1933 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1934 MI.getOperand(2).setImm(NewSH);
1935 MI.getOperand(3).setImm(NewMB);
1936 addRegToUpdate(MI.getOperand(1).getReg());
1937 addRegToUpdate(SrcMI->getOperand(0).getReg());
1938
1939 LLVM_DEBUG(dbgs() << "To: ");
1940 LLVM_DEBUG(MI.dump());
1941 NumRotatesCollapsed++;
1942 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1943 if (MRI->use_nodbg_empty(SrcReg)) {
1944 assert(!SrcMI->hasImplicitDef() &&
1945 "Not expecting an implicit def with this instr.");
1946 ToErase = SrcMI;
1947 }
1948 return true;
1949}
1950
1951// For case in LLVM IR
1952// entry:
1953// %iconv = sext i32 %index to i64
1954// br i1 undef label %true, label %false
1955// true:
1956// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1957// ...
1958// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1959// different BBs when conducting instruction selection. We can do a peephole
1960// optimization to combine these two instructions into extswsli after
1961// instruction selection.
1962bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1963 MachineInstr *&ToErase) {
1964 if (MI.getOpcode() != PPC::RLDICR)
1965 return false;
1966
1967 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1968 return false;
1969
1970 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1971
1972 MachineOperand MOpSHMI = MI.getOperand(2);
1973 MachineOperand MOpMEMI = MI.getOperand(3);
1974 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1975 return false;
1976
1977 uint64_t SHMI = MOpSHMI.getImm();
1978 uint64_t MEMI = MOpMEMI.getImm();
1979 if (SHMI + MEMI != 63)
1980 return false;
1981
1982 Register SrcReg = MI.getOperand(1).getReg();
1983 if (!SrcReg.isVirtual())
1984 return false;
1985
1986 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1987 if (SrcMI->getOpcode() != PPC::EXTSW &&
1988 SrcMI->getOpcode() != PPC::EXTSW_32_64)
1989 return false;
1990
1991 // If the register defined by extsw has more than one use, combination is not
1992 // needed.
1993 if (!MRI->hasOneNonDBGUse(SrcReg))
1994 return false;
1995
1996 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
1997 assert(SrcMI->getOperand(1).isReg() &&
1998 "EXTSW's second operand should be a register");
1999 if (!SrcMI->getOperand(1).getReg().isVirtual())
2000 return false;
2001
2002 LLVM_DEBUG(dbgs() << "Combining pair: ");
2003 LLVM_DEBUG(SrcMI->dump());
2004 LLVM_DEBUG(MI.dump());
2005
2006 MachineInstr *NewInstr =
2007 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
2008 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
2009 : TII->get(PPC::EXTSWSLI_32_64),
2010 MI.getOperand(0).getReg())
2011 .add(SrcMI->getOperand(1))
2012 .add(MOpSHMI);
2013 (void)NewInstr;
2014
2015 LLVM_DEBUG(dbgs() << "TO: ");
2016 LLVM_DEBUG(NewInstr->dump());
2017 ++NumEXTSWAndSLDICombined;
2018 ToErase = &MI;
2019 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2020 // can recompute its kill flags. We run DCE immediately after this pass
2021 // to clean up dead instructions such as this.
2022 addRegToUpdate(NewInstr->getOperand(1).getReg());
2023 addRegToUpdate(SrcMI->getOperand(0).getReg());
2024 return true;
2025}
2026
2027} // end default namespace
2028
2030 "PowerPC MI Peephole Optimization", false, false)
2036 "PowerPC MI Peephole Optimization", false, false)
2037
2038char PPCMIPeephole::ID = 0;
2040llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define addRegToUpdate(R)
PowerPC MI Peephole Optimization
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
#define DEBUG_TYPE
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
if(VerifyEach)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
unsigned pred_size() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:566
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:343
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:569
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:640
bool isFullCopy() const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:682
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:495
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:576
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool isUsingPCRelativeCalls() const
bool isELFv2ABI() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void dump() const
Definition: Pass.cpp:136
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool empty() const
Definition: SmallSet.h:159
bool erase(const T &V)
Definition: SmallSet.h:207
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializePPCMIPeepholePass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
@ Keep
No function return thunk.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860