LLVM 20.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
32#include "PPC.h"
33#include "PPCInstrInfo.h"
35#include "PPCTargetMachine.h"
36#include "llvm/ADT/Statistic.h"
46#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50
51#define DEBUG_TYPE "ppc-mi-peepholes"
52
53STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
54STATISTIC(MultiTOCSaves,
55 "Number of functions with multiple TOC saves that must be kept");
56STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
57STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
58STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
59STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
60STATISTIC(NumConvertedToImmediateForm,
61 "Number of instructions converted to their immediate form");
62STATISTIC(NumFunctionsEnteredInMIPeephole,
63 "Number of functions entered in PPC MI Peepholes");
64STATISTIC(NumFixedPointIterations,
65 "Number of fixed-point iterations converting reg-reg instructions "
66 "to reg-imm ones");
67STATISTIC(NumRotatesCollapsed,
68 "Number of pairs of rotate left, clear left/right collapsed");
69STATISTIC(NumEXTSWAndSLDICombined,
70 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
71STATISTIC(NumLoadImmZeroFoldedAndRemoved,
72 "Number of LI(8) reg, 0 that are folded to r0 and removed");
73
74static cl::opt<bool>
75FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
76 cl::desc("Iterate to a fixed point when attempting to "
77 "convert reg-reg instructions to reg-imm"));
78
79static cl::opt<bool>
80ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
81 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
82
83static cl::opt<bool>
84 EnableSExtElimination("ppc-eliminate-signext",
85 cl::desc("enable elimination of sign-extensions"),
86 cl::init(true), cl::Hidden);
87
88static cl::opt<bool>
89 EnableZExtElimination("ppc-eliminate-zeroext",
90 cl::desc("enable elimination of zero-extensions"),
91 cl::init(true), cl::Hidden);
92
93static cl::opt<bool>
94 EnableTrapOptimization("ppc-opt-conditional-trap",
95 cl::desc("enable optimization of conditional traps"),
96 cl::init(false), cl::Hidden);
97
99 PeepholeXToICounter, "ppc-xtoi-peephole",
100 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
101
102DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
103 "Controls whether PPC per opcode peephole is performed on a MI");
104
105namespace {
106
107struct PPCMIPeephole : public MachineFunctionPass {
108
109 static char ID;
110 const PPCInstrInfo *TII;
111 MachineFunction *MF;
113 LiveVariables *LV;
114
115 PPCMIPeephole() : MachineFunctionPass(ID) {
117 }
118
119private:
123 BlockFrequency EntryFreq;
124 SmallSet<Register, 16> RegsToUpdate;
125
126 // Initialize class variables.
127 void initialize(MachineFunction &MFParm);
128
129 // Perform peepholes.
130 bool simplifyCode();
131
132 // Perform peepholes.
133 bool eliminateRedundantCompare();
134 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
135 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
136 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
137 MachineInstr *&ToErase);
138 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
140
141 // A number of transformations will eliminate the definition of a register
142 // as all of its uses will be removed. However, this leaves a register
143 // without a definition for LiveVariables. Such transformations should
144 // use this function to provide a dummy definition of the register that
145 // will simply be removed by DCE.
146 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
147 BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
148 }
149 void addRegToUpdateWithLine(Register Reg, int Line);
150 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
152 Register Dst);
153
154public:
155
156 void getAnalysisUsage(AnalysisUsage &AU) const override {
166 }
167
168 // Main entry point for this pass.
169 bool runOnMachineFunction(MachineFunction &MF) override {
170 initialize(MF);
171 // At this point, TOC pointer should not be used in a function that uses
172 // PC-Relative addressing.
173 assert((MF.getRegInfo().use_empty(PPC::X2) ||
175 "TOC pointer used in a function using PC-Relative addressing!");
176 if (skipFunction(MF.getFunction()))
177 return false;
178 bool Changed = simplifyCode();
179#ifndef NDEBUG
180 if (Changed)
181 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
182 "-mllvm -disable-ppc-peephole");
183#endif
184 return Changed;
185 }
186};
187
188#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
189void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
191 return;
192 if (RegsToUpdate.insert(Reg).second)
193 LLVM_DEBUG(dbgs() << "Adding register: " << Register::virtReg2Index(Reg)
194 << " on line " << Line
195 << " for re-computation of kill flags\n");
196}
197
198// Initialize class variables.
199void PPCMIPeephole::initialize(MachineFunction &MFParm) {
200 MF = &MFParm;
201 MRI = &MF->getRegInfo();
202 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
203 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
204 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
205 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
206 EntryFreq = MBFI->getEntryFreq();
207 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
208 RegsToUpdate.clear();
209 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
210 LLVM_DEBUG(MF->dump());
211}
212
213static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
215 assert(Op && "Invalid Operand!");
216 if (!Op->isReg())
217 return nullptr;
218
219 Register Reg = Op->getReg();
220 if (!Reg.isVirtual())
221 return nullptr;
222
223 return MRI->getVRegDef(Reg);
224}
225
226// This function returns number of known zero bits in output of MI
227// starting from the most significant bit.
228static unsigned getKnownLeadingZeroCount(const unsigned Reg,
229 const PPCInstrInfo *TII,
230 const MachineRegisterInfo *MRI) {
231 MachineInstr *MI = MRI->getVRegDef(Reg);
232 unsigned Opcode = MI->getOpcode();
233 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
234 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
235 return MI->getOperand(3).getImm();
236
237 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
238 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
239 return MI->getOperand(3).getImm();
240
241 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
242 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
243 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
244 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
245 return 32 + MI->getOperand(3).getImm();
246
247 if (Opcode == PPC::ANDI_rec) {
248 uint16_t Imm = MI->getOperand(2).getImm();
249 return 48 + llvm::countl_zero(Imm);
250 }
251
252 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
253 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
254 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
255 // The result ranges from 0 to 32.
256 return 58;
257
258 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
259 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
260 // The result ranges from 0 to 64.
261 return 57;
262
263 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
264 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
265 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
266 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
267 return 48;
268
269 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
270 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
271 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
272 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
273 return 56;
274
275 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
276 Opcode == PPC::AND8_rec)
277 return std::max(
278 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
279 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
280
281 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
282 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
283 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
284 Opcode == PPC::XOR8_rec)
285 return std::min(
286 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
287 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
288
289 if (TII->isZeroExtended(Reg, MRI))
290 return 32;
291
292 return 0;
293}
294
295// This function maintains a map for the pairs <TOC Save Instr, Keep>
296// Each time a new TOC save is encountered, it checks if any of the existing
297// ones are dominated by the new one. If so, it marks the existing one as
298// redundant by setting it's entry in the map as false. It then adds the new
299// instruction to the map with either true or false depending on if any
300// existing instructions dominated the new one.
301void PPCMIPeephole::UpdateTOCSaves(
302 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
303 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
304 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
305 // here only support it under ELFv2.
306 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
308
309 MachineBasicBlock *Entry = &MF->front();
310 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
311
312 // If the block in which the TOC save resides is in a block that
313 // post-dominates Entry, or a block that is hotter than entry (keep in mind
314 // that early MachineLICM has already run so the TOC save won't be hoisted)
315 // we can just do the save in the prologue.
316 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
317 FI->setMustSaveTOC(true);
318
319 // If we are saving the TOC in the prologue, all the TOC saves can be
320 // removed from the code.
321 if (FI->mustSaveTOC()) {
322 for (auto &TOCSave : TOCSaves)
323 TOCSave.second = false;
324 // Add new instruction to map.
325 TOCSaves[MI] = false;
326 return;
327 }
328 }
329
330 bool Keep = true;
331 for (auto &I : TOCSaves) {
332 MachineInstr *CurrInst = I.first;
333 // If new instruction dominates an existing one, mark existing one as
334 // redundant.
335 if (I.second && MDT->dominates(MI, CurrInst))
336 I.second = false;
337 // Check if the new instruction is redundant.
338 if (MDT->dominates(CurrInst, MI)) {
339 Keep = false;
340 break;
341 }
342 }
343 // Add new instruction to map.
344 TOCSaves[MI] = Keep;
345}
346
347// This function returns a list of all PHI nodes in the tree starting from
348// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
349// The list initially only contains the root PHI. When we visit a PHI node, we
350// add it to the list. We continue to look for other PHI node operands while
351// there are nodes to visit in the list. The function returns false if the
352// optimization cannot be applied on this tree.
353static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
354 MachineInstr *RootPHI,
356 PHIs.push_back(RootPHI);
357 unsigned VisitedIndex = 0;
358 while (VisitedIndex < PHIs.size()) {
359 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
360 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
361 PHIOp != NumOps; PHIOp += 2) {
362 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
363 if (!RegOp.isVirtual())
364 return false;
365 MachineInstr *Instr = MRI->getVRegDef(RegOp);
366 // While collecting the PHI nodes, we check if they can be converted (i.e.
367 // all the operands are either copies, implicit defs or PHI nodes).
368 unsigned Opcode = Instr->getOpcode();
369 if (Opcode == PPC::COPY) {
370 Register Reg = Instr->getOperand(1).getReg();
371 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
372 return false;
373 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
374 return false;
375 // If we detect a cycle in the PHI nodes, we exit. It would be
376 // possible to change cycles as well, but that would add a lot
377 // of complexity for a case that is unlikely to occur with MMA
378 // code.
379 if (Opcode != PPC::PHI)
380 continue;
381 if (llvm::is_contained(PHIs, Instr))
382 return false;
383 PHIs.push_back(Instr);
384 }
385 VisitedIndex++;
386 }
387 return true;
388}
389
390// This function changes the unprimed accumulator PHI nodes in the PHIs list to
391// primed accumulator PHI nodes. The list is traversed in reverse order to
392// change all the PHI operands of a PHI node before changing the node itself.
393// We keep a map to associate each changed PHI node to its non-changed form.
394void PPCMIPeephole::convertUnprimedAccPHIs(
398 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
400 // We check if the current PHI node can be changed by looking at its
401 // operands. If all the operands are either copies from primed
402 // accumulators, implicit definitions or other unprimed accumulator
403 // PHI nodes, we change it.
404 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
405 PHIOp += 2) {
406 Register RegOp = PHI->getOperand(PHIOp).getReg();
407 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
408 unsigned Opcode = PHIInput->getOpcode();
409 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
410 Opcode == PPC::PHI) &&
411 "Unexpected instruction");
412 if (Opcode == PPC::COPY) {
413 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
414 &PPC::ACCRCRegClass &&
415 "Unexpected register class");
416 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
417 } else if (Opcode == PPC::IMPLICIT_DEF) {
418 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
419 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
420 TII->get(PPC::IMPLICIT_DEF), AccReg);
421 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
422 PHI->getOperand(PHIOp + 1)});
423 } else if (Opcode == PPC::PHI) {
424 // We found a PHI operand. At this point we know this operand
425 // has already been changed so we get its associated changed form
426 // from the map.
427 assert(ChangedPHIMap.count(PHIInput) == 1 &&
428 "This PHI node should have already been changed.");
429 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
431 PrimedAccPHI->getOperand(0).getReg(), false),
432 PHI->getOperand(PHIOp + 1)});
433 }
434 }
435 Register AccReg = Dst;
436 // If the PHI node we are changing is the root node, the register it defines
437 // will be the destination register of the original copy (of the PHI def).
438 // For all other PHI's in the list, we need to create another primed
439 // accumulator virtual register as the PHI will no longer define the
440 // unprimed accumulator.
441 if (PHI != PHIs[0])
442 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
444 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
445 for (auto RegMBB : PHIOps) {
446 NewPHI.add(RegMBB.first).add(RegMBB.second);
447 if (MRI->isSSA())
448 addRegToUpdate(RegMBB.first.getReg());
449 }
450 // The liveness of old PHI and new PHI have to be updated.
451 addRegToUpdate(PHI->getOperand(0).getReg());
452 addRegToUpdate(AccReg);
453 ChangedPHIMap[PHI] = NewPHI.getInstr();
454 LLVM_DEBUG(dbgs() << "Converting PHI: ");
455 LLVM_DEBUG(PHI->dump());
456 LLVM_DEBUG(dbgs() << "To: ");
457 LLVM_DEBUG(NewPHI.getInstr()->dump());
458 }
459}
460
461// Perform peephole optimizations.
462bool PPCMIPeephole::simplifyCode() {
463 bool Simplified = false;
464 bool TrapOpt = false;
465 MachineInstr* ToErase = nullptr;
466 std::map<MachineInstr *, bool> TOCSaves;
467 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
468 NumFunctionsEnteredInMIPeephole++;
469 if (ConvertRegReg) {
470 // Fixed-point conversion of reg/reg instructions fed by load-immediate
471 // into reg/imm instructions. FIXME: This is expensive, control it with
472 // an option.
473 bool SomethingChanged = false;
474 do {
475 NumFixedPointIterations++;
476 SomethingChanged = false;
477 for (MachineBasicBlock &MBB : *MF) {
478 for (MachineInstr &MI : MBB) {
479 if (MI.isDebugInstr())
480 continue;
481
482 if (!DebugCounter::shouldExecute(PeepholeXToICounter))
483 continue;
484
485 SmallSet<Register, 4> RRToRIRegsToUpdate;
486 if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
487 continue;
488 for (Register R : RRToRIRegsToUpdate)
490 // The updated instruction may now have new register operands.
491 // Conservatively add them to recompute the flags as well.
492 for (const MachineOperand &MO : MI.operands())
493 if (MO.isReg())
494 addRegToUpdate(MO.getReg());
495 // We don't erase anything in case the def has other uses. Let DCE
496 // remove it if it can be removed.
497 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
498 LLVM_DEBUG(MI.dump());
499 NumConvertedToImmediateForm++;
500 SomethingChanged = true;
501 Simplified = true;
502 }
503 }
504 } while (SomethingChanged && FixedPointRegToImm);
505 }
506
507 // Since we are deleting this instruction, we need to run LiveVariables
508 // on any of its definitions that are marked as needing an update since
509 // we can't run LiveVariables on a deleted register. This only needs
510 // to be done for defs since uses will have their own defining
511 // instructions so we won't be running LiveVariables on a deleted reg.
512 auto recomputeLVForDyingInstr = [&]() {
513 if (RegsToUpdate.empty())
514 return;
515 for (MachineOperand &MO : ToErase->operands()) {
516 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
517 continue;
518 Register RegToUpdate = MO.getReg();
519 RegsToUpdate.erase(RegToUpdate);
520 // If some transformation has introduced an additional definition of
521 // this register (breaking SSA), we can safely convert this def to
522 // a def of an invalid register as the instruction is going away.
523 if (!MRI->getUniqueVRegDef(RegToUpdate))
524 MO.setReg(PPC::NoRegister);
525 LV->recomputeForSingleDefVirtReg(RegToUpdate);
526 }
527 };
528
529 for (MachineBasicBlock &MBB : *MF) {
530 for (MachineInstr &MI : MBB) {
531
532 // If the previous instruction was marked for elimination,
533 // remove it now.
534 if (ToErase) {
535 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
536 LLVM_DEBUG(ToErase->dump());
537 recomputeLVForDyingInstr();
538 ToErase->eraseFromParent();
539 ToErase = nullptr;
540 }
541 // If a conditional trap instruction got optimized to an
542 // unconditional trap, eliminate all the instructions after
543 // the trap.
544 if (EnableTrapOptimization && TrapOpt) {
545 ToErase = &MI;
546 continue;
547 }
548
549 // Ignore debug instructions.
550 if (MI.isDebugInstr())
551 continue;
552
553 if (!DebugCounter::shouldExecute(PeepholePerOpCounter))
554 continue;
555
556 // Per-opcode peepholes.
557 switch (MI.getOpcode()) {
558
559 default:
560 break;
561 case PPC::COPY: {
562 Register Src = MI.getOperand(1).getReg();
563 Register Dst = MI.getOperand(0).getReg();
564 if (!Src.isVirtual() || !Dst.isVirtual())
565 break;
566 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
567 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
568 break;
569
570 // We are copying an unprimed accumulator to a primed accumulator.
571 // If the input to the copy is a PHI that is fed only by (i) copies in
572 // the other direction (ii) implicitly defined unprimed accumulators or
573 // (iii) other PHI nodes satisfying (i) and (ii), we can change
574 // the PHI to a PHI on primed accumulators (as long as we also change
575 // its operands). To detect and change such copies, we first get a list
576 // of all the PHI nodes starting from the root PHI node in BFS order.
577 // We then visit all these PHI nodes to check if they can be changed to
578 // primed accumulator PHI nodes and if so, we change them.
579 MachineInstr *RootPHI = MRI->getVRegDef(Src);
580 if (RootPHI->getOpcode() != PPC::PHI)
581 break;
582
584 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
585 break;
586
587 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
588
589 ToErase = &MI;
590 break;
591 }
592 case PPC::LI:
593 case PPC::LI8: {
594 // If we are materializing a zero, look for any use operands for which
595 // zero means immediate zero. All such operands can be replaced with
596 // PPC::ZERO.
597 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
598 break;
599 Register MIDestReg = MI.getOperand(0).getReg();
600 bool Folded = false;
601 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
602 Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
603 if (MRI->use_nodbg_empty(MIDestReg)) {
604 ++NumLoadImmZeroFoldedAndRemoved;
605 ToErase = &MI;
606 }
607 if (Folded)
608 addRegToUpdate(MIDestReg);
609 Simplified |= Folded;
610 break;
611 }
612 case PPC::STW:
613 case PPC::STD: {
614 MachineFrameInfo &MFI = MF->getFrameInfo();
615 if (MFI.hasVarSizedObjects() ||
616 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
617 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
618 break;
619 // When encountering a TOC save instruction, call UpdateTOCSaves
620 // to add it to the TOCSaves map and mark any existing TOC saves
621 // it dominates as redundant.
622 if (TII->isTOCSaveMI(MI))
623 UpdateTOCSaves(TOCSaves, &MI);
624 break;
625 }
626 case PPC::XXPERMDI: {
627 // Perform simplifications of 2x64 vector swaps and splats.
628 // A swap is identified by an immediate value of 2, and a splat
629 // is identified by an immediate value of 0 or 3.
630 int Immed = MI.getOperand(3).getImm();
631
632 if (Immed == 1)
633 break;
634
635 // For each of these simplifications, we need the two source
636 // regs to match. Unfortunately, MachineCSE ignores COPY and
637 // SUBREG_TO_REG, so for example we can see
638 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
639 // We have to look through chains of COPY and SUBREG_TO_REG
640 // to find the real source values for comparison.
641 Register TrueReg1 =
642 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
643 Register TrueReg2 =
644 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
645
646 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
647 break;
648
649 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
650
651 if (!DefMI)
652 break;
653
654 unsigned DefOpc = DefMI->getOpcode();
655
656 // If this is a splat fed by a splatting load, the splat is
657 // redundant. Replace with a copy. This doesn't happen directly due
658 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
659 // a load of a double to a vector of 64-bit integers.
660 auto isConversionOfLoadAndSplat = [=]() -> bool {
661 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
662 return false;
663 Register FeedReg1 =
664 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
665 if (FeedReg1.isVirtual()) {
666 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
667 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
668 return true;
669 }
670 return false;
671 };
672 if ((Immed == 0 || Immed == 3) &&
673 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
674 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
675 "to load-and-splat/copy: ");
676 LLVM_DEBUG(MI.dump());
677 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
678 MI.getOperand(0).getReg())
679 .add(MI.getOperand(1));
680 addRegToUpdate(MI.getOperand(1).getReg());
681 ToErase = &MI;
682 Simplified = true;
683 }
684
685 // If this is a splat or a swap fed by another splat, we
686 // can replace it with a copy.
687 if (DefOpc == PPC::XXPERMDI) {
688 Register DefReg1 = DefMI->getOperand(1).getReg();
689 Register DefReg2 = DefMI->getOperand(2).getReg();
690 unsigned DefImmed = DefMI->getOperand(3).getImm();
691
692 // If the two inputs are not the same register, check to see if
693 // they originate from the same virtual register after only
694 // copy-like instructions.
695 if (DefReg1 != DefReg2) {
696 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
697 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
698
699 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
700 break;
701 }
702
703 if (DefImmed == 0 || DefImmed == 3) {
704 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
705 "to splat/copy: ");
706 LLVM_DEBUG(MI.dump());
707 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
708 MI.getOperand(0).getReg())
709 .add(MI.getOperand(1));
710 addRegToUpdate(MI.getOperand(1).getReg());
711 ToErase = &MI;
712 Simplified = true;
713 }
714
715 // If this is a splat fed by a swap, we can simplify modify
716 // the splat to splat the other value from the swap's input
717 // parameter.
718 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
719 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
720 LLVM_DEBUG(MI.dump());
721 addRegToUpdate(MI.getOperand(1).getReg());
722 addRegToUpdate(MI.getOperand(2).getReg());
723 MI.getOperand(1).setReg(DefReg1);
724 MI.getOperand(2).setReg(DefReg2);
725 MI.getOperand(3).setImm(3 - Immed);
726 addRegToUpdate(DefReg1);
727 addRegToUpdate(DefReg2);
728 Simplified = true;
729 }
730
731 // If this is a swap fed by a swap, we can replace it
732 // with a copy from the first swap's input.
733 else if (Immed == 2 && DefImmed == 2) {
734 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
735 LLVM_DEBUG(MI.dump());
736 addRegToUpdate(MI.getOperand(1).getReg());
737 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
738 MI.getOperand(0).getReg())
739 .add(DefMI->getOperand(1));
742 ToErase = &MI;
743 Simplified = true;
744 }
745 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
746 DefOpc == PPC::XXPERMDIs &&
747 (DefMI->getOperand(2).getImm() == 0 ||
748 DefMI->getOperand(2).getImm() == 3)) {
749 ToErase = &MI;
750 Simplified = true;
751 // Swap of a splat, convert to copy.
752 if (Immed == 2) {
753 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
754 LLVM_DEBUG(MI.dump());
755 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
756 MI.getOperand(0).getReg())
757 .add(MI.getOperand(1));
758 addRegToUpdate(MI.getOperand(1).getReg());
759 break;
760 }
761 // Splat fed by another splat - switch the output of the first
762 // and remove the second.
763 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
764 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
765 LLVM_DEBUG(MI.dump());
766 } else if (Immed == 2 &&
767 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
768 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
769 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
770 DefOpc == PPC::VSPLTISW)) {
771 // Swap of various vector splats, convert to copy.
772 ToErase = &MI;
773 Simplified = true;
774 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
775 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
776 LLVM_DEBUG(MI.dump());
777 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
778 MI.getOperand(0).getReg())
779 .add(MI.getOperand(1));
780 addRegToUpdate(MI.getOperand(1).getReg());
781 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
782 TII->isLoadFromConstantPool(DefMI)) {
783 const Constant *C = TII->getConstantFromConstantPool(DefMI);
784 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
785 ToErase = &MI;
786 Simplified = true;
788 << "Optimizing swap(splat pattern from constant-pool) "
789 "=> copy(splat pattern from constant-pool): ");
790 LLVM_DEBUG(MI.dump());
791 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
792 MI.getOperand(0).getReg())
793 .add(MI.getOperand(1));
794 addRegToUpdate(MI.getOperand(1).getReg());
795 }
796 }
797 break;
798 }
799 case PPC::VSPLTB:
800 case PPC::VSPLTH:
801 case PPC::XXSPLTW: {
802 unsigned MyOpcode = MI.getOpcode();
803 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
804 Register TrueReg =
805 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
806 if (!TrueReg.isVirtual())
807 break;
808 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
809 if (!DefMI)
810 break;
811 unsigned DefOpcode = DefMI->getOpcode();
812 auto isConvertOfSplat = [=]() -> bool {
813 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
814 return false;
815 Register ConvReg = DefMI->getOperand(1).getReg();
816 if (!ConvReg.isVirtual())
817 return false;
818 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
819 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
820 Splt->getOpcode() == PPC::XXSPLTW);
821 };
822 bool AlreadySplat = (MyOpcode == DefOpcode) ||
823 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
824 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
825 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
826 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
827 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
828 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
829 // If the instruction[s] that feed this splat have already splat
830 // the value, this splat is redundant.
831 if (AlreadySplat) {
832 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
833 LLVM_DEBUG(MI.dump());
834 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
835 MI.getOperand(0).getReg())
836 .add(MI.getOperand(OpNo));
837 addRegToUpdate(MI.getOperand(OpNo).getReg());
838 ToErase = &MI;
839 Simplified = true;
840 }
841 // Splat fed by a shift. Usually when we align value to splat into
842 // vector element zero.
843 if (DefOpcode == PPC::XXSLDWI) {
844 Register ShiftRes = DefMI->getOperand(0).getReg();
845 Register ShiftOp1 = DefMI->getOperand(1).getReg();
846 Register ShiftOp2 = DefMI->getOperand(2).getReg();
847 unsigned ShiftImm = DefMI->getOperand(3).getImm();
848 unsigned SplatImm =
849 MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
850 if (ShiftOp1 == ShiftOp2) {
851 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
852 if (MRI->hasOneNonDBGUse(ShiftRes)) {
853 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
855 ToErase = DefMI;
856 }
857 Simplified = true;
858 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
859 << " to " << NewElem << " in instruction: ");
860 LLVM_DEBUG(MI.dump());
861 addRegToUpdate(MI.getOperand(OpNo).getReg());
862 addRegToUpdate(ShiftOp1);
863 MI.getOperand(OpNo).setReg(ShiftOp1);
864 MI.getOperand(2).setImm(NewElem);
865 }
866 }
867 break;
868 }
869 case PPC::XVCVDPSP: {
870 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
871 Register TrueReg =
872 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
873 if (!TrueReg.isVirtual())
874 break;
875 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
876
877 // This can occur when building a vector of single precision or integer
878 // values.
879 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
880 Register DefsReg1 =
881 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
882 Register DefsReg2 =
883 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
884 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
885 break;
886 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
887 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
888
889 if (!P1 || !P2)
890 break;
891
892 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
893 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
894 // the FRSP/XSRSP.
895 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
896 unsigned Opc = RoundInstr->getOpcode();
897 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
898 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
899 Simplified = true;
900 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
901 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
902 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
903 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
904 if (Use.getOperand(i).isReg() &&
905 Use.getOperand(i).getReg() == FRSPDefines)
906 Use.getOperand(i).setReg(ConvReg1);
907 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
908 LLVM_DEBUG(RoundInstr->dump());
909 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
910 LLVM_DEBUG(MI.dump());
911 LLVM_DEBUG(dbgs() << "Through instruction:\n");
913 addRegToUpdate(ConvReg1);
914 addRegToUpdate(FRSPDefines);
915 ToErase = RoundInstr;
916 }
917 };
918
919 // If the input to XVCVDPSP is a vector that was built (even
920 // partially) out of FRSP's, the FRSP(s) can safely be removed
921 // since this instruction performs the same operation.
922 if (P1 != P2) {
923 removeFRSPIfPossible(P1);
924 removeFRSPIfPossible(P2);
925 break;
926 }
927 removeFRSPIfPossible(P1);
928 }
929 break;
930 }
931 case PPC::EXTSH:
932 case PPC::EXTSH8:
933 case PPC::EXTSH8_32_64: {
934 if (!EnableSExtElimination) break;
935 Register NarrowReg = MI.getOperand(1).getReg();
936 if (!NarrowReg.isVirtual())
937 break;
938
939 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
940 unsigned SrcOpcode = SrcMI->getOpcode();
941 // If we've used a zero-extending load that we will sign-extend,
942 // just do a sign-extending load.
943 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
944 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
945 break;
946 // Determine the new opcode. We need to make sure that if the original
947 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
948 // Likewise if the source is X-Form the new opcode should also be
949 // X-Form.
950 unsigned Opc = PPC::LHA;
951 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
952 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
953 MI.getOpcode() == PPC::EXTSH8_32_64;
954
955 if (SourceIsXForm && MIIs64Bit)
956 Opc = PPC::LHAX8;
957 else if (SourceIsXForm && !MIIs64Bit)
958 Opc = PPC::LHAX;
959 else if (MIIs64Bit)
960 Opc = PPC::LHA8;
961
962 addRegToUpdate(NarrowReg);
963 addRegToUpdate(MI.getOperand(0).getReg());
964
965 // We are removing a definition of NarrowReg which will cause
966 // problems in AliveBlocks. Add an implicit def that will be
967 // removed so that AliveBlocks are updated correctly.
968 addDummyDef(MBB, &MI, NarrowReg);
969 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
970 LLVM_DEBUG(SrcMI->dump());
971 LLVM_DEBUG(dbgs() << "and sign-extension\n");
972 LLVM_DEBUG(MI.dump());
973 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
974 SrcMI->setDesc(TII->get(Opc));
975 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
976 ToErase = &MI;
977 Simplified = true;
978 NumEliminatedSExt++;
979 }
980 break;
981 }
982 case PPC::EXTSW:
983 case PPC::EXTSW_32:
984 case PPC::EXTSW_32_64: {
985 if (!EnableSExtElimination) break;
986 Register NarrowReg = MI.getOperand(1).getReg();
987 if (!NarrowReg.isVirtual())
988 break;
989
990 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
991 unsigned SrcOpcode = SrcMI->getOpcode();
992 // If we've used a zero-extending load that we will sign-extend,
993 // just do a sign-extending load.
994 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
995 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
996 break;
997
998 // The transformation from a zero-extending load to a sign-extending
999 // load is only legal when the displacement is a multiple of 4.
1000 // If the displacement is not at least 4 byte aligned, don't perform
1001 // the transformation.
1002 bool IsWordAligned = false;
1003 if (SrcMI->getOperand(1).isGlobal()) {
1004 const GlobalObject *GO =
1005 dyn_cast<GlobalObject>(SrcMI->getOperand(1).getGlobal());
1006 if (GO && GO->getAlign() && *GO->getAlign() >= 4 &&
1007 (SrcMI->getOperand(1).getOffset() % 4 == 0))
1008 IsWordAligned = true;
1009 } else if (SrcMI->getOperand(1).isImm()) {
1010 int64_t Value = SrcMI->getOperand(1).getImm();
1011 if (Value % 4 == 0)
1012 IsWordAligned = true;
1013 }
1014
1015 // Determine the new opcode. We need to make sure that if the original
1016 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1017 // Likewise if the source is X-Form the new opcode should also be
1018 // X-Form.
1019 unsigned Opc = PPC::LWA_32;
1020 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1021 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1022 MI.getOpcode() == PPC::EXTSW_32_64;
1023
1024 if (SourceIsXForm && MIIs64Bit)
1025 Opc = PPC::LWAX;
1026 else if (SourceIsXForm && !MIIs64Bit)
1027 Opc = PPC::LWAX_32;
1028 else if (MIIs64Bit)
1029 Opc = PPC::LWA;
1030
1031 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1032 break;
1033
1034 addRegToUpdate(NarrowReg);
1035 addRegToUpdate(MI.getOperand(0).getReg());
1036
1037 // We are removing a definition of NarrowReg which will cause
1038 // problems in AliveBlocks. Add an implicit def that will be
1039 // removed so that AliveBlocks are updated correctly.
1040 addDummyDef(MBB, &MI, NarrowReg);
1041 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1042 LLVM_DEBUG(SrcMI->dump());
1043 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1044 LLVM_DEBUG(MI.dump());
1045 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1046 SrcMI->setDesc(TII->get(Opc));
1047 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1048 ToErase = &MI;
1049 Simplified = true;
1050 NumEliminatedSExt++;
1051 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1052 TII->isSignExtended(NarrowReg, MRI)) {
1053 // We can eliminate EXTSW if the input is known to be already
1054 // sign-extended. However, we are not sure whether a spill will occur
1055 // during register allocation. If there is no promotion, it will use
1056 // 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
1057 // since the register class is 32-bits. Consequently, the high 32-bit
1058 // information will be lost. Therefore, all these instructions in the
1059 // chain used to deduce sign extension to eliminate the 'extsw' will
1060 // need to be promoted to 64-bit pseudo instructions when the 'extsw'
1061 // is eliminated.
1062 TII->promoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);
1063
1064 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1065 Register TmpReg =
1066 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
1067 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
1068 TmpReg);
1069 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
1070 MI.getOperand(0).getReg())
1071 .addReg(TmpReg)
1072 .addReg(NarrowReg)
1073 .addImm(PPC::sub_32);
1074 ToErase = &MI;
1075 Simplified = true;
1076 NumEliminatedSExt++;
1077 }
1078 break;
1079 }
1080 case PPC::RLDICL: {
1081 // We can eliminate RLDICL (e.g. for zero-extension)
1082 // if all bits to clear are already zero in the input.
1083 // This code assume following code sequence for zero-extension.
1084 // %6 = COPY %5:sub_32; (optional)
1085 // %8 = IMPLICIT_DEF;
1086 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1087 if (!EnableZExtElimination) break;
1088
1089 if (MI.getOperand(2).getImm() != 0)
1090 break;
1091
1092 Register SrcReg = MI.getOperand(1).getReg();
1093 if (!SrcReg.isVirtual())
1094 break;
1095
1096 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1097 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1098 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
1099 break;
1100
1101 MachineInstr *ImpDefMI, *SubRegMI;
1102 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1103 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
1104 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1105
1106 SrcMI = SubRegMI;
1107 if (SubRegMI->getOpcode() == PPC::COPY) {
1108 Register CopyReg = SubRegMI->getOperand(1).getReg();
1109 if (CopyReg.isVirtual())
1110 SrcMI = MRI->getVRegDef(CopyReg);
1111 }
1112 if (!SrcMI->getOperand(0).isReg())
1113 break;
1114
1115 unsigned KnownZeroCount =
1116 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
1117 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
1118 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1119 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1120 MI.getOperand(0).getReg())
1121 .addReg(SrcReg);
1122 addRegToUpdate(SrcReg);
1123 ToErase = &MI;
1124 Simplified = true;
1125 NumEliminatedZExt++;
1126 }
1127 break;
1128 }
1129
1130 // TODO: Any instruction that has an immediate form fed only by a PHI
1131 // whose operands are all load immediate can be folded away. We currently
1132 // do this for ADD instructions, but should expand it to arithmetic and
1133 // binary instructions with immediate forms in the future.
1134 case PPC::ADD4:
1135 case PPC::ADD8: {
1136 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1137 assert(PhiOp && "Invalid Operand!");
1138 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1139
1140 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1141 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1142 };
1143
1144 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1145 MachineOperand *PhiOp) {
1146 assert(PhiOp && "Invalid Operand!");
1147 assert(DominatorOp && "Invalid Operand!");
1148 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1149 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1150
1151 // Note: the vregs only show up at odd indices position of PHI Node,
1152 // the even indices position save the BB info.
1153 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1154 MachineInstr *LiMI =
1155 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1156 if (!LiMI ||
1157 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1158 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1159 !MDT->dominates(DefDomMI, LiMI))
1160 return false;
1161 }
1162
1163 return true;
1164 };
1165
1166 MachineOperand Op1 = MI.getOperand(1);
1167 MachineOperand Op2 = MI.getOperand(2);
1168 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1169 std::swap(Op1, Op2);
1170 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1171 break; // We don't have an ADD fed by LI's that can be transformed
1172
1173 // Now we know that Op1 is the PHI node and Op2 is the dominator
1174 Register DominatorReg = Op2.getReg();
1175
1176 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1177 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1178 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1179 MRI->setRegClass(DominatorReg, TRC);
1180
1181 // replace LIs with ADDIs
1182 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1183 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1184 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1185 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1186 LLVM_DEBUG(LiMI->dump());
1187
1188 // There could be repeated registers in the PHI, e.g: %1 =
1189 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1190 // already replaced the def instruction, skip.
1191 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1192 continue;
1193
1194 assert((LiMI->getOpcode() == PPC::LI ||
1195 LiMI->getOpcode() == PPC::LI8) &&
1196 "Invalid Opcode!");
1197 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1198 LiMI->removeOperand(1); // remove the imm of LI
1199 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1200 : PPC::ADDI8));
1201 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1202 .addReg(DominatorReg)
1203 .addImm(LiImm); // restore the imm of LI
1204 LLVM_DEBUG(LiMI->dump());
1205 }
1206
1207 // Replace ADD with COPY
1208 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1209 LLVM_DEBUG(MI.dump());
1210 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1211 MI.getOperand(0).getReg())
1212 .add(Op1);
1213 addRegToUpdate(Op1.getReg());
1214 addRegToUpdate(Op2.getReg());
1215 ToErase = &MI;
1216 Simplified = true;
1217 NumOptADDLIs++;
1218 break;
1219 }
1220 case PPC::RLDICR: {
1221 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1222 combineSEXTAndSHL(MI, ToErase);
1223 break;
1224 }
1225 case PPC::ANDI_rec:
1226 case PPC::ANDI8_rec:
1227 case PPC::ANDIS_rec:
1228 case PPC::ANDIS8_rec: {
1229 Register TrueReg =
1230 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1231 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1232 break;
1233
1234 MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1235 if (!SrcMI)
1236 break;
1237
1238 unsigned SrcOpCode = SrcMI->getOpcode();
1239 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1240 break;
1241
1242 Register SrcReg, DstReg;
1243 SrcReg = SrcMI->getOperand(1).getReg();
1244 DstReg = MI.getOperand(1).getReg();
1245 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
1246 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
1247 if (DstRC != SrcRC)
1248 break;
1249
1250 uint64_t AndImm = MI.getOperand(2).getImm();
1251 if (MI.getOpcode() == PPC::ANDIS_rec ||
1252 MI.getOpcode() == PPC::ANDIS8_rec)
1253 AndImm <<= 16;
1254 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1255 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1256 uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1257
1258 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1259 // if all bits to AND are already zero in the input.
1260 bool PatternResultZero =
1261 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1262 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1263
1264 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1265 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1266 bool PatternRemoveRotate =
1267 SrcMI->getOperand(2).getImm() == 0 &&
1268 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1269 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1270
1271 if (!PatternResultZero && !PatternRemoveRotate)
1272 break;
1273
1274 LLVM_DEBUG(dbgs() << "Combining pair: ");
1275 LLVM_DEBUG(SrcMI->dump());
1276 LLVM_DEBUG(MI.dump());
1277 if (PatternResultZero)
1278 MI.getOperand(2).setImm(0);
1279 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1280 LLVM_DEBUG(dbgs() << "To: ");
1281 LLVM_DEBUG(MI.dump());
1282 addRegToUpdate(MI.getOperand(1).getReg());
1283 addRegToUpdate(SrcMI->getOperand(0).getReg());
1284 Simplified = true;
1285 break;
1286 }
1287 case PPC::RLWINM:
1288 case PPC::RLWINM_rec:
1289 case PPC::RLWINM8:
1290 case PPC::RLWINM8_rec: {
1291 // We might replace operand 1 of the instruction which will
1292 // require we recompute kill flags for it.
1293 Register OrigOp1Reg = MI.getOperand(1).isReg()
1294 ? MI.getOperand(1).getReg()
1295 : PPC::NoRegister;
1296 Simplified = TII->combineRLWINM(MI, &ToErase);
1297 if (Simplified) {
1298 addRegToUpdate(OrigOp1Reg);
1299 if (MI.getOperand(1).isReg())
1300 addRegToUpdate(MI.getOperand(1).getReg());
1301 if (ToErase && ToErase->getOperand(1).isReg())
1302 for (auto UseReg : ToErase->explicit_uses())
1303 if (UseReg.isReg())
1304 addRegToUpdate(UseReg.getReg());
1305 ++NumRotatesCollapsed;
1306 }
1307 break;
1308 }
1309 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1310 // always trap, we will delete the node if it will never trap.
1311 case PPC::TDI:
1312 case PPC::TWI:
1313 case PPC::TD:
1314 case PPC::TW: {
1315 if (!EnableTrapOptimization) break;
1316 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1317 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1318 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1319 // We can only do the optimization if we can get immediates
1320 // from both operands
1321 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1322 LiMI1->getOpcode() == PPC::LI8)))
1323 break;
1324 if (!IsOperand2Immediate &&
1325 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1326 LiMI2->getOpcode() == PPC::LI8)))
1327 break;
1328
1329 auto ImmOperand0 = MI.getOperand(0).getImm();
1330 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1331 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1332 : LiMI2->getOperand(1).getImm();
1333
1334 // We will replace the MI with an unconditional trap if it will always
1335 // trap.
1336 if ((ImmOperand0 == 31) ||
1337 ((ImmOperand0 & 0x10) &&
1338 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1339 ((ImmOperand0 & 0x8) &&
1340 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1341 ((ImmOperand0 & 0x2) &&
1342 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1343 ((ImmOperand0 & 0x1) &&
1344 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1345 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1346 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1347 TrapOpt = true;
1348 }
1349 // We will delete the MI if it will never trap.
1350 ToErase = &MI;
1351 Simplified = true;
1352 break;
1353 }
1354 }
1355 }
1356
1357 // If the last instruction was marked for elimination,
1358 // remove it now.
1359 if (ToErase) {
1360 recomputeLVForDyingInstr();
1361 ToErase->eraseFromParent();
1362 ToErase = nullptr;
1363 }
1364 // Reset TrapOpt to false at the end of the basic block.
1366 TrapOpt = false;
1367 }
1368
1369 // Eliminate all the TOC save instructions which are redundant.
1370 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1371 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1372 if (FI->mustSaveTOC())
1373 NumTOCSavesInPrologue++;
1374
1375 // We try to eliminate redundant compare instruction.
1376 Simplified |= eliminateRedundantCompare();
1377
1378 // If we have made any modifications and added any registers to the set of
1379 // registers for which we need to update the kill flags, do so by recomputing
1380 // LiveVariables for those registers.
1381 for (Register Reg : RegsToUpdate) {
1382 if (!MRI->reg_empty(Reg))
1384 }
1385 return Simplified;
1386}
1387
1388// helper functions for eliminateRedundantCompare
1389static bool isEqOrNe(MachineInstr *BI) {
1391 unsigned PredCond = PPC::getPredicateCondition(Pred);
1392 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1393}
1394
1395static bool isSupportedCmpOp(unsigned opCode) {
1396 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1397 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1398 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1399 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1400}
1401
1402static bool is64bitCmpOp(unsigned opCode) {
1403 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1404 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1405}
1406
1407static bool isSignedCmpOp(unsigned opCode) {
1408 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1409 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1410}
1411
1412static unsigned getSignedCmpOpCode(unsigned opCode) {
1413 if (opCode == PPC::CMPLD) return PPC::CMPD;
1414 if (opCode == PPC::CMPLW) return PPC::CMPW;
1415 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1416 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1417 return opCode;
1418}
1419
1420// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1421// (LT x) to (LE x-1)
1422static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1423 uint64_t Imm = CMPI->getOperand(2).getImm();
1424 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1425 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1426 return 0;
1427
1429 unsigned PredCond = PPC::getPredicateCondition(Pred);
1430 unsigned PredHint = PPC::getPredicateHint(Pred);
1431 if (PredCond == PPC::PRED_GE)
1432 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1433 if (PredCond == PPC::PRED_LT)
1434 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1435
1436 return 0;
1437}
1438
1439// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1440// (LE x) to (LT x+1)
1441static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1442 uint64_t Imm = CMPI->getOperand(2).getImm();
1443 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1444 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1445 return 0;
1446
1448 unsigned PredCond = PPC::getPredicateCondition(Pred);
1449 unsigned PredHint = PPC::getPredicateHint(Pred);
1450 if (PredCond == PPC::PRED_GT)
1451 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1452 if (PredCond == PPC::PRED_LE)
1453 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1454
1455 return 0;
1456}
1457
1458// This takes a Phi node and returns a register value for the specified BB.
1459static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1461 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1462 MachineOperand &MO = Phi->getOperand(I);
1463 if (MO.getMBB() == MBB)
1464 return Phi->getOperand(I-1).getReg();
1465 }
1466 llvm_unreachable("invalid src basic block for this Phi node\n");
1467 return 0;
1468}
1469
1470// This function tracks the source of the register through register copy.
1471// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1472// assuming that the control comes from BB1 into BB2.
1473static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1475 unsigned SrcReg = Reg;
1476 while (true) {
1477 unsigned NextReg = SrcReg;
1478 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1479 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1480 NextReg = getIncomingRegForBlock(Inst, BB1);
1481 // We track through PHI only once to avoid infinite loop.
1482 BB1 = nullptr;
1483 }
1484 else if (Inst->isFullCopy())
1485 NextReg = Inst->getOperand(1).getReg();
1486 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1487 break;
1488 SrcReg = NextReg;
1489 }
1490 return SrcReg;
1491}
1492
1493static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1494 MachineBasicBlock *&PredMBB,
1495 MachineBasicBlock *&MBBtoMoveCmp,
1497
1498 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1499 auto BII = BB.getFirstInstrTerminator();
1500 // We optimize BBs ending with a conditional branch.
1501 // We check only for BCC here, not BCCLR, because BCCLR
1502 // will be formed only later in the pipeline.
1503 if (BB.succ_size() == 2 &&
1504 BII != BB.instr_end() &&
1505 (*BII).getOpcode() == PPC::BCC &&
1506 (*BII).getOperand(1).isReg()) {
1507 // We optimize only if the condition code is used only by one BCC.
1508 Register CndReg = (*BII).getOperand(1).getReg();
1509 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1510 return false;
1511
1512 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1513 // We assume compare and branch are in the same BB for ease of analysis.
1514 if (CMPI->getParent() != &BB)
1515 return false;
1516
1517 // We skip this BB if a physical register is used in comparison.
1518 for (MachineOperand &MO : CMPI->operands())
1519 if (MO.isReg() && !MO.getReg().isVirtual())
1520 return false;
1521
1522 return true;
1523 }
1524 return false;
1525 };
1526
1527 // If this BB has more than one successor, we can create a new BB and
1528 // move the compare instruction in the new BB.
1529 // So far, we do not move compare instruction to a BB having multiple
1530 // successors to avoid potentially increasing code size.
1531 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1532 return BB.succ_size() == 1;
1533 };
1534
1535 if (!isEligibleBB(MBB))
1536 return false;
1537
1538 unsigned NumPredBBs = MBB.pred_size();
1539 if (NumPredBBs == 1) {
1540 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1541 if (isEligibleBB(*TmpMBB)) {
1542 PredMBB = TmpMBB;
1543 MBBtoMoveCmp = nullptr;
1544 return true;
1545 }
1546 }
1547 else if (NumPredBBs == 2) {
1548 // We check for partially redundant case.
1549 // So far, we support cases with only two predecessors
1550 // to avoid increasing the number of instructions.
1552 MachineBasicBlock *Pred1MBB = *PI;
1553 MachineBasicBlock *Pred2MBB = *(PI+1);
1554
1555 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1556 // We assume Pred1MBB is the BB containing the compare to be merged and
1557 // Pred2MBB is the BB to which we will append a compare instruction.
1558 // Proceed as is if Pred1MBB is different from MBB.
1559 }
1560 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1561 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1562 std::swap(Pred1MBB, Pred2MBB);
1563 }
1564 else return false;
1565
1566 if (Pred1MBB == &MBB)
1567 return false;
1568
1569 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1570 // We cannot move the compare instruction if operands are not available
1571 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1573 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1574 for (int I = 1; I <= 2; I++)
1575 if (CMPI->getOperand(I).isReg()) {
1576 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1577 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1578 return false;
1579 }
1580
1581 PredMBB = Pred1MBB;
1582 MBBtoMoveCmp = Pred2MBB;
1583 return true;
1584 }
1585
1586 return false;
1587}
1588
1589// This function will iterate over the input map containing a pair of TOC save
1590// instruction and a flag. The flag will be set to false if the TOC save is
1591// proven redundant. This function will erase from the basic block all the TOC
1592// saves marked as redundant.
1593bool PPCMIPeephole::eliminateRedundantTOCSaves(
1594 std::map<MachineInstr *, bool> &TOCSaves) {
1595 bool Simplified = false;
1596 int NumKept = 0;
1597 for (auto TOCSave : TOCSaves) {
1598 if (!TOCSave.second) {
1599 TOCSave.first->eraseFromParent();
1600 RemoveTOCSave++;
1601 Simplified = true;
1602 } else {
1603 NumKept++;
1604 }
1605 }
1606
1607 if (NumKept > 1)
1608 MultiTOCSaves++;
1609
1610 return Simplified;
1611}
1612
1613// If multiple conditional branches are executed based on the (essentially)
1614// same comparison, we merge compare instructions into one and make multiple
1615// conditional branches on this comparison.
1616// For example,
1617// if (a == 0) { ... }
1618// else if (a < 0) { ... }
1619// can be executed by one compare and two conditional branches instead of
1620// two pairs of a compare and a conditional branch.
1621//
1622// This method merges two compare instructions in two MBBs and modifies the
1623// compare and conditional branch instructions if needed.
1624// For the above example, the input for this pass looks like:
1625// cmplwi r3, 0
1626// beq 0, .LBB0_3
1627// cmpwi r3, -1
1628// bgt 0, .LBB0_4
1629// So, before merging two compares, we need to modify these instructions as
1630// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1631// beq 0, .LBB0_3
1632// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1633// bge 0, .LBB0_4
1634
1635bool PPCMIPeephole::eliminateRedundantCompare() {
1636 bool Simplified = false;
1637
1638 for (MachineBasicBlock &MBB2 : *MF) {
1639 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1640
1641 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1642 // as an optimization target if
1643 // - both MBBs end with a conditional branch,
1644 // - MBB1 is the only predecessor of MBB2, and
1645 // - compare does not take a physical register as a operand in both MBBs.
1646 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1647 //
1648 // As partially redundant case, we additionally handle if MBB2 has one
1649 // additional predecessor, which has only one successor (MBB2).
1650 // In this case, we move the compare instruction originally in MBB2 into
1651 // MBBtoMoveCmp. This partially redundant case is typically appear by
1652 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1653 //
1654 // Overview of CFG of related basic blocks
1655 // Fully redundant case Partially redundant case
1656 // -------- ---------------- --------
1657 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1658 // -------- ---------------- --------
1659 // | \ (w/ 1 succ) \ | \
1660 // | \ \ | \
1661 // | \ |
1662 // -------- --------
1663 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1664 // -------- and 2 succ) -------- and 2 succ)
1665 // | \ | \
1666 // | \ | \
1667 //
1668 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1669 continue;
1670
1671 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1672 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1673
1674 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1675 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1676 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1677
1678 // We cannot optimize an unsupported compare opcode or
1679 // a mix of 32-bit and 64-bit comparisons
1680 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1681 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1682 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1683 continue;
1684
1685 unsigned NewOpCode = 0;
1686 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1687 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1688 bool SwapOperands = false;
1689
1690 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1691 // Typically, unsigned comparison is used for equality check, but
1692 // we replace it with a signed comparison if the comparison
1693 // to be merged is a signed comparison.
1694 // In other cases of opcode mismatch, we cannot optimize this.
1695
1696 // We cannot change opcode when comparing against an immediate
1697 // if the most significant bit of the immediate is one
1698 // due to the difference in sign extension.
1699 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1700 if (!I->getOperand(2).isImm())
1701 return false;
1702 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1703 return Imm < 0;
1704 };
1705
1706 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1707 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1708 NewOpCode = CMPI1->getOpcode();
1709 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1710 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1711 NewOpCode = CMPI2->getOpcode();
1712 else continue;
1713 }
1714
1715 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1716 // In case of comparisons between two registers, these two registers
1717 // must be same to merge two comparisons.
1718 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1719 nullptr, nullptr, MRI);
1720 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1721 nullptr, nullptr, MRI);
1722 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1723 MBB1, &MBB2, MRI);
1724 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1725 MBB1, &MBB2, MRI);
1726
1727 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1728 // Same pair of registers in the same order; ready to merge as is.
1729 }
1730 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1731 // Same pair of registers in different order.
1732 // We reverse the predicate to merge compare instructions.
1734 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1735 // In case of partial redundancy, we need to swap operands
1736 // in another compare instruction.
1737 SwapOperands = true;
1738 }
1739 else continue;
1740 }
1741 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1742 // In case of comparisons between a register and an immediate,
1743 // the operand register must be same for two compare instructions.
1744 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1745 nullptr, nullptr, MRI);
1746 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1747 MBB1, &MBB2, MRI);
1748 if (Cmp1Operand1 != Cmp2Operand1)
1749 continue;
1750
1751 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1752 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1753
1754 // If immediate are not same, we try to adjust by changing predicate;
1755 // e.g. GT imm means GE (imm+1).
1756 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1757 int Diff = Imm1 - Imm2;
1758 if (Diff < -2 || Diff > 2)
1759 continue;
1760
1761 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1762 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1763 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1764 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1765 if (Diff == 2) {
1766 if (PredToInc2 && PredToDec1) {
1767 NewPredicate2 = PredToInc2;
1768 NewPredicate1 = PredToDec1;
1769 NewImm2++;
1770 NewImm1--;
1771 }
1772 }
1773 else if (Diff == 1) {
1774 if (PredToInc2) {
1775 NewImm2++;
1776 NewPredicate2 = PredToInc2;
1777 }
1778 else if (PredToDec1) {
1779 NewImm1--;
1780 NewPredicate1 = PredToDec1;
1781 }
1782 }
1783 else if (Diff == -1) {
1784 if (PredToDec2) {
1785 NewImm2--;
1786 NewPredicate2 = PredToDec2;
1787 }
1788 else if (PredToInc1) {
1789 NewImm1++;
1790 NewPredicate1 = PredToInc1;
1791 }
1792 }
1793 else if (Diff == -2) {
1794 if (PredToDec2 && PredToInc1) {
1795 NewPredicate2 = PredToDec2;
1796 NewPredicate1 = PredToInc1;
1797 NewImm2--;
1798 NewImm1++;
1799 }
1800 }
1801 }
1802
1803 // We cannot merge two compares if the immediates are not same.
1804 if (NewImm2 != NewImm1)
1805 continue;
1806 }
1807
1808 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1809 LLVM_DEBUG(CMPI1->dump());
1810 LLVM_DEBUG(BI1->dump());
1811 LLVM_DEBUG(CMPI2->dump());
1812 LLVM_DEBUG(BI2->dump());
1813 for (const MachineOperand &MO : CMPI1->operands())
1814 if (MO.isReg())
1815 addRegToUpdate(MO.getReg());
1816 for (const MachineOperand &MO : CMPI2->operands())
1817 if (MO.isReg())
1818 addRegToUpdate(MO.getReg());
1819
1820 // We adjust opcode, predicates and immediate as we determined above.
1821 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1822 CMPI1->setDesc(TII->get(NewOpCode));
1823 }
1824 if (NewPredicate1) {
1825 BI1->getOperand(0).setImm(NewPredicate1);
1826 }
1827 if (NewPredicate2) {
1828 BI2->getOperand(0).setImm(NewPredicate2);
1829 }
1830 if (NewImm1 != Imm1) {
1831 CMPI1->getOperand(2).setImm(NewImm1);
1832 }
1833
1834 if (IsPartiallyRedundant) {
1835 // We touch up the compare instruction in MBB2 and move it to
1836 // a previous BB to handle partially redundant case.
1837 if (SwapOperands) {
1838 Register Op1 = CMPI2->getOperand(1).getReg();
1839 Register Op2 = CMPI2->getOperand(2).getReg();
1840 CMPI2->getOperand(1).setReg(Op2);
1841 CMPI2->getOperand(2).setReg(Op1);
1842 }
1843 if (NewImm2 != Imm2)
1844 CMPI2->getOperand(2).setImm(NewImm2);
1845
1846 for (int I = 1; I <= 2; I++) {
1847 if (CMPI2->getOperand(I).isReg()) {
1848 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1849 if (Inst->getParent() != &MBB2)
1850 continue;
1851
1852 assert(Inst->getOpcode() == PPC::PHI &&
1853 "We cannot support if an operand comes from this BB.");
1854 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1855 CMPI2->getOperand(I).setReg(SrcReg);
1856 addRegToUpdate(SrcReg);
1857 }
1858 }
1859 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1860 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1861
1862 DebugLoc DL = CMPI2->getDebugLoc();
1863 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1864 BuildMI(MBB2, MBB2.begin(), DL,
1865 TII->get(PPC::PHI), NewVReg)
1866 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1867 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1868 BI2->getOperand(1).setReg(NewVReg);
1869 addRegToUpdate(NewVReg);
1870 }
1871 else {
1872 // We finally eliminate compare instruction in MBB2.
1873 // We do not need to treat CMPI2 specially here in terms of re-computing
1874 // live variables even though it is being deleted because:
1875 // - It defines a register that has a single use (already checked in
1876 // eligibleForCompareElimination())
1877 // - The only user (BI2) is no longer using it so the register is dead (no
1878 // def, no uses)
1879 // - We do not attempt to recompute live variables for dead registers
1880 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1881 CMPI2->eraseFromParent();
1882 }
1883
1884 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1885 LLVM_DEBUG(CMPI1->dump());
1886 LLVM_DEBUG(BI1->dump());
1887 LLVM_DEBUG(BI2->dump());
1888 if (IsPartiallyRedundant) {
1889 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1890 << printMBBReference(*MBBtoMoveCmp)
1891 << " to handle partial redundancy.\n");
1892 LLVM_DEBUG(CMPI2->dump());
1893 }
1894 Simplified = true;
1895 }
1896
1897 return Simplified;
1898}
1899
1900// We miss the opportunity to emit an RLDIC when lowering jump tables
1901// since ISEL sees only a single basic block. When selecting, the clear
1902// and shift left will be in different blocks.
1903bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1904 MachineInstr *&ToErase) {
1905 if (MI.getOpcode() != PPC::RLDICR)
1906 return false;
1907
1908 Register SrcReg = MI.getOperand(1).getReg();
1909 if (!SrcReg.isVirtual())
1910 return false;
1911
1912 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1913 if (SrcMI->getOpcode() != PPC::RLDICL)
1914 return false;
1915
1916 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1917 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1918 MachineOperand MOpSHMI = MI.getOperand(2);
1919 MachineOperand MOpMEMI = MI.getOperand(3);
1920 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1921 MOpMEMI.isImm()))
1922 return false;
1923
1924 uint64_t SHSrc = MOpSHSrc.getImm();
1925 uint64_t MBSrc = MOpMBSrc.getImm();
1926 uint64_t SHMI = MOpSHMI.getImm();
1927 uint64_t MEMI = MOpMEMI.getImm();
1928 uint64_t NewSH = SHSrc + SHMI;
1929 uint64_t NewMB = MBSrc - SHMI;
1930 if (NewMB > 63 || NewSH > 63)
1931 return false;
1932
1933 // The bits cleared with RLDICL are [0, MBSrc).
1934 // The bits cleared with RLDICR are (MEMI, 63].
1935 // After the sequence, the bits cleared are:
1936 // [0, MBSrc-SHMI) and (MEMI, 63).
1937 //
1938 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1939 if ((63 - NewSH) != MEMI)
1940 return false;
1941
1942 LLVM_DEBUG(dbgs() << "Converting pair: ");
1943 LLVM_DEBUG(SrcMI->dump());
1944 LLVM_DEBUG(MI.dump());
1945
1946 MI.setDesc(TII->get(PPC::RLDIC));
1947 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1948 MI.getOperand(2).setImm(NewSH);
1949 MI.getOperand(3).setImm(NewMB);
1950 addRegToUpdate(MI.getOperand(1).getReg());
1951 addRegToUpdate(SrcMI->getOperand(0).getReg());
1952
1953 LLVM_DEBUG(dbgs() << "To: ");
1954 LLVM_DEBUG(MI.dump());
1955 NumRotatesCollapsed++;
1956 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1957 if (MRI->use_nodbg_empty(SrcReg)) {
1958 assert(!SrcMI->hasImplicitDef() &&
1959 "Not expecting an implicit def with this instr.");
1960 ToErase = SrcMI;
1961 }
1962 return true;
1963}
1964
1965// For case in LLVM IR
1966// entry:
1967// %iconv = sext i32 %index to i64
1968// br i1 undef label %true, label %false
1969// true:
1970// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1971// ...
1972// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1973// different BBs when conducting instruction selection. We can do a peephole
1974// optimization to combine these two instructions into extswsli after
1975// instruction selection.
1976bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1977 MachineInstr *&ToErase) {
1978 if (MI.getOpcode() != PPC::RLDICR)
1979 return false;
1980
1981 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1982 return false;
1983
1984 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1985
1986 MachineOperand MOpSHMI = MI.getOperand(2);
1987 MachineOperand MOpMEMI = MI.getOperand(3);
1988 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1989 return false;
1990
1991 uint64_t SHMI = MOpSHMI.getImm();
1992 uint64_t MEMI = MOpMEMI.getImm();
1993 if (SHMI + MEMI != 63)
1994 return false;
1995
1996 Register SrcReg = MI.getOperand(1).getReg();
1997 if (!SrcReg.isVirtual())
1998 return false;
1999
2000 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
2001 if (SrcMI->getOpcode() != PPC::EXTSW &&
2002 SrcMI->getOpcode() != PPC::EXTSW_32_64)
2003 return false;
2004
2005 // If the register defined by extsw has more than one use, combination is not
2006 // needed.
2007 if (!MRI->hasOneNonDBGUse(SrcReg))
2008 return false;
2009
2010 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
2011 assert(SrcMI->getOperand(1).isReg() &&
2012 "EXTSW's second operand should be a register");
2013 if (!SrcMI->getOperand(1).getReg().isVirtual())
2014 return false;
2015
2016 LLVM_DEBUG(dbgs() << "Combining pair: ");
2017 LLVM_DEBUG(SrcMI->dump());
2018 LLVM_DEBUG(MI.dump());
2019
2020 MachineInstr *NewInstr =
2021 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
2022 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
2023 : TII->get(PPC::EXTSWSLI_32_64),
2024 MI.getOperand(0).getReg())
2025 .add(SrcMI->getOperand(1))
2026 .add(MOpSHMI);
2027 (void)NewInstr;
2028
2029 LLVM_DEBUG(dbgs() << "TO: ");
2030 LLVM_DEBUG(NewInstr->dump());
2031 ++NumEXTSWAndSLDICombined;
2032 ToErase = &MI;
2033 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2034 // can recompute its kill flags. We run DCE immediately after this pass
2035 // to clean up dead instructions such as this.
2036 addRegToUpdate(NewInstr->getOperand(1).getReg());
2037 addRegToUpdate(SrcMI->getOperand(0).getReg());
2038 return true;
2039}
2040
2041} // end default namespace
2042
2044 "PowerPC MI Peephole Optimization", false, false)
2050 "PowerPC MI Peephole Optimization", false, false)
2051
2052char PPCMIPeephole::ID = 0;
2054llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define addRegToUpdate(R)
PowerPC MI Peephole Optimization
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
#define DEBUG_TYPE
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
unsigned pred_size() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
iterator_range< mop_iterator > explicit_uses()
Definition: MachineInstr.h:746
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:649
bool isFullCopy() const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:691
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isAIXABI() const
Definition: PPCSubtarget.h:219
bool isUsingPCRelativeCalls() const
bool isELFv2ABI() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void dump() const
Definition: Pass.cpp:136
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
bool empty() const
Definition: SmallSet.h:168
bool erase(const T &V)
Definition: SmallSet.h:193
void clear()
Definition: SmallSet.h:204
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:844
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializePPCMIPeepholePass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
@ Keep
No function return thunk.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860