LLVM 22.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
32#include "PPC.h"
33#include "PPCInstrInfo.h"
35#include "PPCTargetMachine.h"
36#include "llvm/ADT/Statistic.h"
46#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50
51#define DEBUG_TYPE "ppc-mi-peepholes"
52
53STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
54STATISTIC(MultiTOCSaves,
55 "Number of functions with multiple TOC saves that must be kept");
56STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
57STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
58STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
59STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
60STATISTIC(NumConvertedToImmediateForm,
61 "Number of instructions converted to their immediate form");
62STATISTIC(NumFunctionsEnteredInMIPeephole,
63 "Number of functions entered in PPC MI Peepholes");
64STATISTIC(NumFixedPointIterations,
65 "Number of fixed-point iterations converting reg-reg instructions "
66 "to reg-imm ones");
67STATISTIC(NumRotatesCollapsed,
68 "Number of pairs of rotate left, clear left/right collapsed");
69STATISTIC(NumEXTSWAndSLDICombined,
70 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
71STATISTIC(NumLoadImmZeroFoldedAndRemoved,
72 "Number of LI(8) reg, 0 that are folded to r0 and removed");
73
74static cl::opt<bool>
75FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
76 cl::desc("Iterate to a fixed point when attempting to "
77 "convert reg-reg instructions to reg-imm"));
78
79static cl::opt<bool>
80ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
81 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
82
83static cl::opt<bool>
84 EnableSExtElimination("ppc-eliminate-signext",
85 cl::desc("enable elimination of sign-extensions"),
86 cl::init(true), cl::Hidden);
87
88static cl::opt<bool>
89 EnableZExtElimination("ppc-eliminate-zeroext",
90 cl::desc("enable elimination of zero-extensions"),
91 cl::init(true), cl::Hidden);
92
93static cl::opt<bool>
94 EnableTrapOptimization("ppc-opt-conditional-trap",
95 cl::desc("enable optimization of conditional traps"),
96 cl::init(false), cl::Hidden);
97
99 PeepholeXToICounter, "ppc-xtoi-peephole",
100 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
101
102DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
103 "Controls whether PPC per opcode peephole is performed on a MI");
104
105namespace {
106
107struct PPCMIPeephole : public MachineFunctionPass {
108
109 static char ID;
110 const PPCInstrInfo *TII;
111 MachineFunction *MF;
113 LiveVariables *LV;
114
115 PPCMIPeephole() : MachineFunctionPass(ID) {}
116
117private:
118 MachineDominatorTree *MDT;
119 MachinePostDominatorTree *MPDT;
120 MachineBlockFrequencyInfo *MBFI;
121 BlockFrequency EntryFreq;
122 SmallSet<Register, 16> RegsToUpdate;
123
124 // Initialize class variables.
125 void initialize(MachineFunction &MFParm);
126
127 // Perform peepholes.
128 bool simplifyCode();
129
130 // Perform peepholes.
131 bool eliminateRedundantCompare();
132 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
133 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
134 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
135 MachineInstr *&ToErase);
136 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
137 MachineInstr *MI);
138
139 // A number of transformations will eliminate the definition of a register
140 // as all of its uses will be removed. However, this leaves a register
141 // without a definition for LiveVariables. Such transformations should
142 // use this function to provide a dummy definition of the register that
143 // will simply be removed by DCE.
144 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
145 BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
146 }
147 void addRegToUpdateWithLine(Register Reg, int Line);
148 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
149 SmallVectorImpl<MachineInstr *> &PHIs,
150 Register Dst);
151
152public:
153
154 void getAnalysisUsage(AnalysisUsage &AU) const override {
155 AU.addRequired<LiveVariablesWrapperPass>();
156 AU.addRequired<MachineDominatorTreeWrapperPass>();
157 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
158 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
159 AU.addPreserved<LiveVariablesWrapperPass>();
160 AU.addPreserved<MachineDominatorTreeWrapperPass>();
161 AU.addPreserved<MachinePostDominatorTreeWrapperPass>();
162 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
164 }
165
166 // Main entry point for this pass.
167 bool runOnMachineFunction(MachineFunction &MF) override {
168 initialize(MF);
169 // At this point, TOC pointer should not be used in a function that uses
170 // PC-Relative addressing.
171 assert((MF.getRegInfo().use_empty(PPC::X2) ||
172 !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
173 "TOC pointer used in a function using PC-Relative addressing!");
174 if (skipFunction(MF.getFunction()))
175 return false;
176 bool Changed = simplifyCode();
177#ifndef NDEBUG
178 if (Changed)
179 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
180 "-mllvm -disable-ppc-peephole");
181#endif
182 return Changed;
183 }
184};
185
186#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
187void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
188 if (!Reg.isVirtual())
189 return;
190 if (RegsToUpdate.insert(Reg).second)
191 LLVM_DEBUG(dbgs() << "Adding register: " << printReg(Reg) << " on line "
192 << Line << " for re-computation of kill flags\n");
193}
194
195// Initialize class variables.
196void PPCMIPeephole::initialize(MachineFunction &MFParm) {
197 MF = &MFParm;
198 MRI = &MF->getRegInfo();
199 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
200 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
201 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
202 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
203 EntryFreq = MBFI->getEntryFreq();
204 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
205 RegsToUpdate.clear();
206 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
207 LLVM_DEBUG(MF->dump());
208}
209
210static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
211 MachineRegisterInfo *MRI) {
212 assert(Op && "Invalid Operand!");
213 if (!Op->isReg())
214 return nullptr;
215
216 Register Reg = Op->getReg();
217 if (!Reg.isVirtual())
218 return nullptr;
219
220 return MRI->getVRegDef(Reg);
221}
222
223// This function returns number of known zero bits in output of MI
224// starting from the most significant bit.
225static unsigned getKnownLeadingZeroCount(const unsigned Reg,
226 const PPCInstrInfo *TII,
227 const MachineRegisterInfo *MRI) {
228 MachineInstr *MI = MRI->getVRegDef(Reg);
229 unsigned Opcode = MI->getOpcode();
230 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
231 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
232 return MI->getOperand(3).getImm();
233
234 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
235 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
236 return MI->getOperand(3).getImm();
237
238 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
239 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
240 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
241 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
242 return 32 + MI->getOperand(3).getImm();
243
244 if (Opcode == PPC::ANDI_rec) {
245 uint16_t Imm = MI->getOperand(2).getImm();
246 return 48 + llvm::countl_zero(Imm);
247 }
248
249 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
250 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
251 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
252 // The result ranges from 0 to 32.
253 return 58;
254
255 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
256 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
257 // The result ranges from 0 to 64.
258 return 57;
259
260 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
261 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
262 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
263 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
264 return 48;
265
266 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
267 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
268 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
269 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
270 return 56;
271
272 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
273 Opcode == PPC::AND8_rec)
274 return std::max(
275 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
276 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
277
278 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
279 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
280 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
281 Opcode == PPC::XOR8_rec)
282 return std::min(
283 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
284 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
285
286 if (TII->isZeroExtended(Reg, MRI))
287 return 32;
288
289 return 0;
290}
291
292// This function maintains a map for the pairs <TOC Save Instr, Keep>
293// Each time a new TOC save is encountered, it checks if any of the existing
294// ones are dominated by the new one. If so, it marks the existing one as
295// redundant by setting it's entry in the map as false. It then adds the new
296// instruction to the map with either true or false depending on if any
297// existing instructions dominated the new one.
298void PPCMIPeephole::UpdateTOCSaves(
299 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
300 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
301 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
302 // here only support it under ELFv2.
303 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
304 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
305
306 MachineBasicBlock *Entry = &MF->front();
307 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
308
309 // If the block in which the TOC save resides is in a block that
310 // post-dominates Entry, or a block that is hotter than entry (keep in mind
311 // that early MachineLICM has already run so the TOC save won't be hoisted)
312 // we can just do the save in the prologue.
313 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
314 FI->setMustSaveTOC(true);
315
316 // If we are saving the TOC in the prologue, all the TOC saves can be
317 // removed from the code.
318 if (FI->mustSaveTOC()) {
319 for (auto &TOCSave : TOCSaves)
320 TOCSave.second = false;
321 // Add new instruction to map.
322 TOCSaves[MI] = false;
323 return;
324 }
325 }
326
327 bool Keep = true;
328 for (auto &I : TOCSaves) {
329 MachineInstr *CurrInst = I.first;
330 // If new instruction dominates an existing one, mark existing one as
331 // redundant.
332 if (I.second && MDT->dominates(MI, CurrInst))
333 I.second = false;
334 // Check if the new instruction is redundant.
335 if (MDT->dominates(CurrInst, MI)) {
336 Keep = false;
337 break;
338 }
339 }
340 // Add new instruction to map.
341 TOCSaves[MI] = Keep;
342}
343
344// This function returns a list of all PHI nodes in the tree starting from
345// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
346// The list initially only contains the root PHI. When we visit a PHI node, we
347// add it to the list. We continue to look for other PHI node operands while
348// there are nodes to visit in the list. The function returns false if the
349// optimization cannot be applied on this tree.
350static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
351 MachineInstr *RootPHI,
352 SmallVectorImpl<MachineInstr *> &PHIs) {
353 PHIs.push_back(RootPHI);
354 unsigned VisitedIndex = 0;
355 while (VisitedIndex < PHIs.size()) {
356 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
357 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
358 PHIOp != NumOps; PHIOp += 2) {
359 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
360 if (!RegOp.isVirtual())
361 return false;
362 MachineInstr *Instr = MRI->getVRegDef(RegOp);
363 // While collecting the PHI nodes, we check if they can be converted (i.e.
364 // all the operands are either copies, implicit defs or PHI nodes).
365 unsigned Opcode = Instr->getOpcode();
366 if (Opcode == PPC::COPY) {
367 Register Reg = Instr->getOperand(1).getReg();
368 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
369 return false;
370 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
371 return false;
372 // If we detect a cycle in the PHI nodes, we exit. It would be
373 // possible to change cycles as well, but that would add a lot
374 // of complexity for a case that is unlikely to occur with MMA
375 // code.
376 if (Opcode != PPC::PHI)
377 continue;
378 if (llvm::is_contained(PHIs, Instr))
379 return false;
380 PHIs.push_back(Instr);
381 }
382 VisitedIndex++;
383 }
384 return true;
385}
386
387// This function changes the unprimed accumulator PHI nodes in the PHIs list to
388// primed accumulator PHI nodes. The list is traversed in reverse order to
389// change all the PHI operands of a PHI node before changing the node itself.
390// We keep a map to associate each changed PHI node to its non-changed form.
391void PPCMIPeephole::convertUnprimedAccPHIs(
392 const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
393 SmallVectorImpl<MachineInstr *> &PHIs, Register Dst) {
394 DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
395 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
397 // We check if the current PHI node can be changed by looking at its
398 // operands. If all the operands are either copies from primed
399 // accumulators, implicit definitions or other unprimed accumulator
400 // PHI nodes, we change it.
401 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
402 PHIOp += 2) {
403 Register RegOp = PHI->getOperand(PHIOp).getReg();
404 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
405 unsigned Opcode = PHIInput->getOpcode();
406 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
407 Opcode == PPC::PHI) &&
408 "Unexpected instruction");
409 if (Opcode == PPC::COPY) {
410 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
411 &PPC::ACCRCRegClass &&
412 "Unexpected register class");
413 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
414 } else if (Opcode == PPC::IMPLICIT_DEF) {
415 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
416 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
417 TII->get(PPC::IMPLICIT_DEF), AccReg);
418 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
419 PHI->getOperand(PHIOp + 1)});
420 } else if (Opcode == PPC::PHI) {
421 // We found a PHI operand. At this point we know this operand
422 // has already been changed so we get its associated changed form
423 // from the map.
424 assert(ChangedPHIMap.count(PHIInput) == 1 &&
425 "This PHI node should have already been changed.");
426 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
428 PrimedAccPHI->getOperand(0).getReg(), false),
429 PHI->getOperand(PHIOp + 1)});
430 }
431 }
432 Register AccReg = Dst;
433 // If the PHI node we are changing is the root node, the register it defines
434 // will be the destination register of the original copy (of the PHI def).
435 // For all other PHI's in the list, we need to create another primed
436 // accumulator virtual register as the PHI will no longer define the
437 // unprimed accumulator.
438 if (PHI != PHIs[0])
439 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
440 MachineInstrBuilder NewPHI = BuildMI(
441 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
442 for (auto RegMBB : PHIOps) {
443 NewPHI.add(RegMBB.first).add(RegMBB.second);
444 if (MRI->isSSA())
445 addRegToUpdate(RegMBB.first.getReg());
446 }
447 // The liveness of old PHI and new PHI have to be updated.
448 addRegToUpdate(PHI->getOperand(0).getReg());
449 addRegToUpdate(AccReg);
450 ChangedPHIMap[PHI] = NewPHI.getInstr();
451 LLVM_DEBUG(dbgs() << "Converting PHI: ");
452 LLVM_DEBUG(PHI->dump());
453 LLVM_DEBUG(dbgs() << "To: ");
454 LLVM_DEBUG(NewPHI.getInstr()->dump());
455 }
456}
457
458// Perform peephole optimizations.
459bool PPCMIPeephole::simplifyCode() {
460 bool Simplified = false;
461 bool TrapOpt = false;
462 MachineInstr* ToErase = nullptr;
463 std::map<MachineInstr *, bool> TOCSaves;
464 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
465 NumFunctionsEnteredInMIPeephole++;
466 if (ConvertRegReg) {
467 // Fixed-point conversion of reg/reg instructions fed by load-immediate
468 // into reg/imm instructions. FIXME: This is expensive, control it with
469 // an option.
470 bool SomethingChanged = false;
471 do {
472 NumFixedPointIterations++;
473 SomethingChanged = false;
474 for (MachineBasicBlock &MBB : *MF) {
475 for (MachineInstr &MI : MBB) {
476 if (MI.isDebugInstr())
477 continue;
478
479 if (!DebugCounter::shouldExecute(PeepholeXToICounter))
480 continue;
481
482 SmallSet<Register, 4> RRToRIRegsToUpdate;
483 if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
484 continue;
485 for (Register R : RRToRIRegsToUpdate)
487 // The updated instruction may now have new register operands.
488 // Conservatively add them to recompute the flags as well.
489 for (const MachineOperand &MO : MI.operands())
490 if (MO.isReg())
491 addRegToUpdate(MO.getReg());
492 // We don't erase anything in case the def has other uses. Let DCE
493 // remove it if it can be removed.
494 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
495 LLVM_DEBUG(MI.dump());
496 NumConvertedToImmediateForm++;
497 SomethingChanged = true;
498 Simplified = true;
499 }
500 }
501 } while (SomethingChanged && FixedPointRegToImm);
502 }
503
504 // Since we are deleting this instruction, we need to run LiveVariables
505 // on any of its definitions that are marked as needing an update since
506 // we can't run LiveVariables on a deleted register. This only needs
507 // to be done for defs since uses will have their own defining
508 // instructions so we won't be running LiveVariables on a deleted reg.
509 auto recomputeLVForDyingInstr = [&]() {
510 if (RegsToUpdate.empty())
511 return;
512 for (MachineOperand &MO : ToErase->operands()) {
513 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
514 continue;
515 Register RegToUpdate = MO.getReg();
516 RegsToUpdate.erase(RegToUpdate);
517 // If some transformation has introduced an additional definition of
518 // this register (breaking SSA), we can safely convert this def to
519 // a def of an invalid register as the instruction is going away.
520 if (!MRI->getUniqueVRegDef(RegToUpdate))
521 MO.setReg(PPC::NoRegister);
522 LV->recomputeForSingleDefVirtReg(RegToUpdate);
523 }
524 };
525
526 for (MachineBasicBlock &MBB : *MF) {
527 for (MachineInstr &MI : MBB) {
528
529 // If the previous instruction was marked for elimination,
530 // remove it now.
531 if (ToErase) {
532 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
533 LLVM_DEBUG(ToErase->dump());
534 recomputeLVForDyingInstr();
535 ToErase->eraseFromParent();
536 ToErase = nullptr;
537 }
538 // If a conditional trap instruction got optimized to an
539 // unconditional trap, eliminate all the instructions after
540 // the trap.
541 if (EnableTrapOptimization && TrapOpt) {
542 ToErase = &MI;
543 continue;
544 }
545
546 // Ignore debug instructions.
547 if (MI.isDebugInstr())
548 continue;
549
550 if (!DebugCounter::shouldExecute(PeepholePerOpCounter))
551 continue;
552
553 // Per-opcode peepholes.
554 switch (MI.getOpcode()) {
555
556 default:
557 break;
558 case PPC::COPY: {
559 Register Src = MI.getOperand(1).getReg();
560 Register Dst = MI.getOperand(0).getReg();
561 if (!Src.isVirtual() || !Dst.isVirtual())
562 break;
563 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
564 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
565 break;
566
567 // We are copying an unprimed accumulator to a primed accumulator.
568 // If the input to the copy is a PHI that is fed only by (i) copies in
569 // the other direction (ii) implicitly defined unprimed accumulators or
570 // (iii) other PHI nodes satisfying (i) and (ii), we can change
571 // the PHI to a PHI on primed accumulators (as long as we also change
572 // its operands). To detect and change such copies, we first get a list
573 // of all the PHI nodes starting from the root PHI node in BFS order.
574 // We then visit all these PHI nodes to check if they can be changed to
575 // primed accumulator PHI nodes and if so, we change them.
576 MachineInstr *RootPHI = MRI->getVRegDef(Src);
577 if (RootPHI->getOpcode() != PPC::PHI)
578 break;
579
580 SmallVector<MachineInstr *, 4> PHIs;
581 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
582 break;
583
584 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
585
586 ToErase = &MI;
587 break;
588 }
589 case PPC::LI:
590 case PPC::LI8: {
591 // If we are materializing a zero, look for any use operands for which
592 // zero means immediate zero. All such operands can be replaced with
593 // PPC::ZERO.
594 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
595 break;
596 Register MIDestReg = MI.getOperand(0).getReg();
597 bool Folded = false;
598 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
599 Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
600 if (MRI->use_nodbg_empty(MIDestReg)) {
601 ++NumLoadImmZeroFoldedAndRemoved;
602 ToErase = &MI;
603 }
604 if (Folded)
605 addRegToUpdate(MIDestReg);
606 Simplified |= Folded;
607 break;
608 }
609 case PPC::STW:
610 case PPC::STD: {
611 MachineFrameInfo &MFI = MF->getFrameInfo();
612 if (MFI.hasVarSizedObjects() ||
613 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
614 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
615 break;
616 // When encountering a TOC save instruction, call UpdateTOCSaves
617 // to add it to the TOCSaves map and mark any existing TOC saves
618 // it dominates as redundant.
619 if (TII->isTOCSaveMI(MI))
620 UpdateTOCSaves(TOCSaves, &MI);
621 break;
622 }
623 case PPC::XXPERMDI: {
624 // Perform simplifications of 2x64 vector swaps and splats.
625 // A swap is identified by an immediate value of 2, and a splat
626 // is identified by an immediate value of 0 or 3.
627 int Immed = MI.getOperand(3).getImm();
628
629 if (Immed == 1)
630 break;
631
632 // For each of these simplifications, we need the two source
633 // regs to match. Unfortunately, MachineCSE ignores COPY and
634 // SUBREG_TO_REG, so for example we can see
635 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
636 // We have to look through chains of COPY and SUBREG_TO_REG
637 // to find the real source values for comparison.
638 Register TrueReg1 =
639 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
640 Register TrueReg2 =
641 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
642
643 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
644 break;
645
646 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
647
648 if (!DefMI)
649 break;
650
651 unsigned DefOpc = DefMI->getOpcode();
652
653 // If this is a splat fed by a splatting load, the splat is
654 // redundant. Replace with a copy. This doesn't happen directly due
655 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
656 // a load of a double to a vector of 64-bit integers.
657 auto isConversionOfLoadAndSplat = [=]() -> bool {
658 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
659 return false;
660 Register FeedReg1 =
661 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
662 if (FeedReg1.isVirtual()) {
663 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
664 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
665 return true;
666 }
667 return false;
668 };
669 if ((Immed == 0 || Immed == 3) &&
670 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
671 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
672 "to load-and-splat/copy: ");
673 LLVM_DEBUG(MI.dump());
674 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
675 MI.getOperand(0).getReg())
676 .add(MI.getOperand(1));
677 addRegToUpdate(MI.getOperand(1).getReg());
678 ToErase = &MI;
679 Simplified = true;
680 }
681
682 // If this is a splat or a swap fed by another splat, we
683 // can replace it with a copy.
684 if (DefOpc == PPC::XXPERMDI) {
685 Register DefReg1 = DefMI->getOperand(1).getReg();
686 Register DefReg2 = DefMI->getOperand(2).getReg();
687 unsigned DefImmed = DefMI->getOperand(3).getImm();
688
689 // If the two inputs are not the same register, check to see if
690 // they originate from the same virtual register after only
691 // copy-like instructions.
692 if (DefReg1 != DefReg2) {
693 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
694 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
695
696 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
697 break;
698 }
699
700 if (DefImmed == 0 || DefImmed == 3) {
701 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
702 "to splat/copy: ");
703 LLVM_DEBUG(MI.dump());
704 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
705 MI.getOperand(0).getReg())
706 .add(MI.getOperand(1));
707 addRegToUpdate(MI.getOperand(1).getReg());
708 ToErase = &MI;
709 Simplified = true;
710 }
711
712 // If this is a splat fed by a swap, we can simplify modify
713 // the splat to splat the other value from the swap's input
714 // parameter.
715 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
716 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
717 LLVM_DEBUG(MI.dump());
718 addRegToUpdate(MI.getOperand(1).getReg());
719 addRegToUpdate(MI.getOperand(2).getReg());
720 MI.getOperand(1).setReg(DefReg1);
721 MI.getOperand(2).setReg(DefReg2);
722 MI.getOperand(3).setImm(3 - Immed);
723 addRegToUpdate(DefReg1);
724 addRegToUpdate(DefReg2);
725 Simplified = true;
726 }
727
728 // If this is a swap fed by a swap, we can replace it
729 // with a copy from the first swap's input.
730 else if (Immed == 2 && DefImmed == 2) {
731 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
732 LLVM_DEBUG(MI.dump());
733 addRegToUpdate(MI.getOperand(1).getReg());
734 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
735 MI.getOperand(0).getReg())
736 .add(DefMI->getOperand(1));
739 ToErase = &MI;
740 Simplified = true;
741 }
742 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
743 DefOpc == PPC::XXPERMDIs &&
744 (DefMI->getOperand(2).getImm() == 0 ||
745 DefMI->getOperand(2).getImm() == 3)) {
746 ToErase = &MI;
747 Simplified = true;
748 // Swap of a splat, convert to copy.
749 if (Immed == 2) {
750 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
751 LLVM_DEBUG(MI.dump());
752 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
753 MI.getOperand(0).getReg())
754 .add(MI.getOperand(1));
755 addRegToUpdate(MI.getOperand(1).getReg());
756 break;
757 }
758 // Splat fed by another splat - switch the output of the first
759 // and remove the second.
760 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
761 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
762 LLVM_DEBUG(MI.dump());
763 } else if (Immed == 2 &&
764 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
765 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
766 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
767 DefOpc == PPC::VSPLTISW)) {
768 // Swap of various vector splats, convert to copy.
769 ToErase = &MI;
770 Simplified = true;
771 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
772 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
773 LLVM_DEBUG(MI.dump());
774 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
775 MI.getOperand(0).getReg())
776 .add(MI.getOperand(1));
777 addRegToUpdate(MI.getOperand(1).getReg());
778 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
779 TII->isLoadFromConstantPool(DefMI)) {
780 const Constant *C = TII->getConstantFromConstantPool(DefMI);
781 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
782 ToErase = &MI;
783 Simplified = true;
785 << "Optimizing swap(splat pattern from constant-pool) "
786 "=> copy(splat pattern from constant-pool): ");
787 LLVM_DEBUG(MI.dump());
788 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
789 MI.getOperand(0).getReg())
790 .add(MI.getOperand(1));
791 addRegToUpdate(MI.getOperand(1).getReg());
792 }
793 }
794 break;
795 }
796 case PPC::VSPLTB:
797 case PPC::VSPLTH:
798 case PPC::XXSPLTW: {
799 unsigned MyOpcode = MI.getOpcode();
800 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
801 Register TrueReg =
802 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
803 if (!TrueReg.isVirtual())
804 break;
805 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
806 if (!DefMI)
807 break;
808 unsigned DefOpcode = DefMI->getOpcode();
809 auto isConvertOfSplat = [=]() -> bool {
810 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
811 return false;
812 Register ConvReg = DefMI->getOperand(1).getReg();
813 if (!ConvReg.isVirtual())
814 return false;
815 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
816 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
817 Splt->getOpcode() == PPC::XXSPLTW);
818 };
819 bool AlreadySplat = (MyOpcode == DefOpcode) ||
820 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
821 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
822 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
823 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
824 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
825 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
826 // If the instruction[s] that feed this splat have already splat
827 // the value, this splat is redundant.
828 if (AlreadySplat) {
829 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
830 LLVM_DEBUG(MI.dump());
831 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
832 MI.getOperand(0).getReg())
833 .add(MI.getOperand(OpNo));
834 addRegToUpdate(MI.getOperand(OpNo).getReg());
835 ToErase = &MI;
836 Simplified = true;
837 }
838 // Splat fed by a shift. Usually when we align value to splat into
839 // vector element zero.
840 if (DefOpcode == PPC::XXSLDWI) {
841 Register ShiftRes = DefMI->getOperand(0).getReg();
842 Register ShiftOp1 = DefMI->getOperand(1).getReg();
843 Register ShiftOp2 = DefMI->getOperand(2).getReg();
844 unsigned ShiftImm = DefMI->getOperand(3).getImm();
845 unsigned SplatImm =
846 MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
847 if (ShiftOp1 == ShiftOp2) {
848 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
849 if (MRI->hasOneNonDBGUse(ShiftRes)) {
850 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
852 ToErase = DefMI;
853 }
854 Simplified = true;
855 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
856 << " to " << NewElem << " in instruction: ");
857 LLVM_DEBUG(MI.dump());
858 addRegToUpdate(MI.getOperand(OpNo).getReg());
859 addRegToUpdate(ShiftOp1);
860 MI.getOperand(OpNo).setReg(ShiftOp1);
861 MI.getOperand(2).setImm(NewElem);
862 }
863 }
864 break;
865 }
866 case PPC::XVCVDPSP: {
867 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
868 Register TrueReg =
869 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
870 if (!TrueReg.isVirtual())
871 break;
872 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
873
874 // This can occur when building a vector of single precision or integer
875 // values.
876 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
877 Register DefsReg1 =
878 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
879 Register DefsReg2 =
880 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
881 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
882 break;
883 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
884 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
885
886 if (!P1 || !P2)
887 break;
888
889 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
890 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
891 // the FRSP/XSRSP.
892 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
893 unsigned Opc = RoundInstr->getOpcode();
894 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
895 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
896 Simplified = true;
897 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
898 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
899 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
900 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
901 if (Use.getOperand(i).isReg() &&
902 Use.getOperand(i).getReg() == FRSPDefines)
903 Use.getOperand(i).setReg(ConvReg1);
904 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
905 LLVM_DEBUG(RoundInstr->dump());
906 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
907 LLVM_DEBUG(MI.dump());
908 LLVM_DEBUG(dbgs() << "Through instruction:\n");
910 addRegToUpdate(ConvReg1);
911 addRegToUpdate(FRSPDefines);
912 ToErase = RoundInstr;
913 }
914 };
915
916 // If the input to XVCVDPSP is a vector that was built (even
917 // partially) out of FRSP's, the FRSP(s) can safely be removed
918 // since this instruction performs the same operation.
919 if (P1 != P2) {
920 removeFRSPIfPossible(P1);
921 removeFRSPIfPossible(P2);
922 break;
923 }
924 removeFRSPIfPossible(P1);
925 }
926 break;
927 }
928 case PPC::EXTSH:
929 case PPC::EXTSH8:
930 case PPC::EXTSH8_32_64: {
931 if (!EnableSExtElimination) break;
932 Register NarrowReg = MI.getOperand(1).getReg();
933 if (!NarrowReg.isVirtual())
934 break;
935
936 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
937 unsigned SrcOpcode = SrcMI->getOpcode();
938 // If we've used a zero-extending load that we will sign-extend,
939 // just do a sign-extending load.
940 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
941 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
942 break;
943 // Determine the new opcode. We need to make sure that if the original
944 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
945 // Likewise if the source is X-Form the new opcode should also be
946 // X-Form.
947 unsigned Opc = PPC::LHA;
948 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
949 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
950 MI.getOpcode() == PPC::EXTSH8_32_64;
951
952 if (SourceIsXForm && MIIs64Bit)
953 Opc = PPC::LHAX8;
954 else if (SourceIsXForm && !MIIs64Bit)
955 Opc = PPC::LHAX;
956 else if (MIIs64Bit)
957 Opc = PPC::LHA8;
958
959 addRegToUpdate(NarrowReg);
960 addRegToUpdate(MI.getOperand(0).getReg());
961
962 // We are removing a definition of NarrowReg which will cause
963 // problems in AliveBlocks. Add an implicit def that will be
964 // removed so that AliveBlocks are updated correctly.
965 addDummyDef(MBB, &MI, NarrowReg);
966 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
967 LLVM_DEBUG(SrcMI->dump());
968 LLVM_DEBUG(dbgs() << "and sign-extension\n");
969 LLVM_DEBUG(MI.dump());
970 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
971 SrcMI->setDesc(TII->get(Opc));
972 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
973 ToErase = &MI;
974 Simplified = true;
975 NumEliminatedSExt++;
976 }
977 break;
978 }
979 case PPC::EXTSW:
980 case PPC::EXTSW_32:
981 case PPC::EXTSW_32_64: {
982 if (!EnableSExtElimination) break;
983 Register NarrowReg = MI.getOperand(1).getReg();
984 if (!NarrowReg.isVirtual())
985 break;
986
987 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
988 unsigned SrcOpcode = SrcMI->getOpcode();
989 // If we've used a zero-extending load that we will sign-extend,
990 // just do a sign-extending load.
991 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
992 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
993 break;
994
995 // The transformation from a zero-extending load to a sign-extending
996 // load is only legal when the displacement is a multiple of 4.
997 // If the displacement is not at least 4 byte aligned, don't perform
998 // the transformation.
999 bool IsWordAligned = false;
1000 if (SrcMI->getOperand(1).isGlobal()) {
1001 const GlobalVariable *GV =
1003 if (GV && GV->getAlign() && *GV->getAlign() >= 4 &&
1004 (SrcMI->getOperand(1).getOffset() % 4 == 0))
1005 IsWordAligned = true;
1006 } else if (SrcMI->getOperand(1).isImm()) {
1007 int64_t Value = SrcMI->getOperand(1).getImm();
1008 if (Value % 4 == 0)
1009 IsWordAligned = true;
1010 }
1011
1012 // Determine the new opcode. We need to make sure that if the original
1013 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1014 // Likewise if the source is X-Form the new opcode should also be
1015 // X-Form.
1016 unsigned Opc = PPC::LWA_32;
1017 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1018 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1019 MI.getOpcode() == PPC::EXTSW_32_64;
1020
1021 if (SourceIsXForm && MIIs64Bit)
1022 Opc = PPC::LWAX;
1023 else if (SourceIsXForm && !MIIs64Bit)
1024 Opc = PPC::LWAX_32;
1025 else if (MIIs64Bit)
1026 Opc = PPC::LWA;
1027
1028 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1029 break;
1030
1031 addRegToUpdate(NarrowReg);
1032 addRegToUpdate(MI.getOperand(0).getReg());
1033
1034 // We are removing a definition of NarrowReg which will cause
1035 // problems in AliveBlocks. Add an implicit def that will be
1036 // removed so that AliveBlocks are updated correctly.
1037 addDummyDef(MBB, &MI, NarrowReg);
1038 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1039 LLVM_DEBUG(SrcMI->dump());
1040 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1041 LLVM_DEBUG(MI.dump());
1042 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1043 SrcMI->setDesc(TII->get(Opc));
1044 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1045 ToErase = &MI;
1046 Simplified = true;
1047 NumEliminatedSExt++;
1048 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1049 TII->isSignExtended(NarrowReg, MRI)) {
1050 // We can eliminate EXTSW if the input is known to be already
1051 // sign-extended. However, we are not sure whether a spill will occur
1052 // during register allocation. If there is no promotion, it will use
1053 // 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
1054 // since the register class is 32-bits. Consequently, the high 32-bit
1055 // information will be lost. Therefore, all these instructions in the
1056 // chain used to deduce sign extension to eliminate the 'extsw' will
1057 // need to be promoted to 64-bit pseudo instructions when the 'extsw'
1058 // is eliminated.
1059 TII->promoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);
1060
1061 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1062 Register TmpReg =
1063 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
1064 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
1065 TmpReg);
1066 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
1067 MI.getOperand(0).getReg())
1068 .addReg(TmpReg)
1069 .addReg(NarrowReg)
1070 .addImm(PPC::sub_32);
1071 ToErase = &MI;
1072 Simplified = true;
1073 NumEliminatedSExt++;
1074 }
1075 break;
1076 }
1077 case PPC::RLDICL: {
1078 // We can eliminate RLDICL (e.g. for zero-extension)
1079 // if all bits to clear are already zero in the input.
1080 // This code assume following code sequence for zero-extension.
1081 // %6 = COPY %5:sub_32; (optional)
1082 // %8 = IMPLICIT_DEF;
1083 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1084 if (!EnableZExtElimination) break;
1085
1086 if (MI.getOperand(2).getImm() != 0)
1087 break;
1088
1089 Register SrcReg = MI.getOperand(1).getReg();
1090 if (!SrcReg.isVirtual())
1091 break;
1092
1093 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1094 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1095 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
1096 break;
1097
1098 MachineInstr *ImpDefMI, *SubRegMI;
1099 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1100 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
1101 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1102
1103 SrcMI = SubRegMI;
1104 if (SubRegMI->getOpcode() == PPC::COPY) {
1105 Register CopyReg = SubRegMI->getOperand(1).getReg();
1106 if (CopyReg.isVirtual())
1107 SrcMI = MRI->getVRegDef(CopyReg);
1108 }
1109 if (!SrcMI->getOperand(0).isReg())
1110 break;
1111
1112 unsigned KnownZeroCount =
1113 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
1114 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
1115 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1116 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1117 MI.getOperand(0).getReg())
1118 .addReg(SrcReg);
1119 addRegToUpdate(SrcReg);
1120 ToErase = &MI;
1121 Simplified = true;
1122 NumEliminatedZExt++;
1123 }
1124 break;
1125 }
1126
1127 // TODO: Any instruction that has an immediate form fed only by a PHI
1128 // whose operands are all load immediate can be folded away. We currently
1129 // do this for ADD instructions, but should expand it to arithmetic and
1130 // binary instructions with immediate forms in the future.
1131 case PPC::ADD4:
1132 case PPC::ADD8: {
1133 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1134 assert(PhiOp && "Invalid Operand!");
1135 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1136
1137 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1138 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1139 };
1140
1141 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1142 MachineOperand *PhiOp) {
1143 assert(PhiOp && "Invalid Operand!");
1144 assert(DominatorOp && "Invalid Operand!");
1145 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1146 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1147
1148 // Note: the vregs only show up at odd indices position of PHI Node,
1149 // the even indices position save the BB info.
1150 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1151 MachineInstr *LiMI =
1152 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1153 if (!LiMI ||
1154 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1155 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1156 !MDT->dominates(DefDomMI, LiMI))
1157 return false;
1158 }
1159
1160 return true;
1161 };
1162
1163 MachineOperand Op1 = MI.getOperand(1);
1164 MachineOperand Op2 = MI.getOperand(2);
1165 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1166 std::swap(Op1, Op2);
1167 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1168 break; // We don't have an ADD fed by LI's that can be transformed
1169
1170 // Now we know that Op1 is the PHI node and Op2 is the dominator
1171 Register DominatorReg = Op2.getReg();
1172
1173 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1174 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1175 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1176 MRI->setRegClass(DominatorReg, TRC);
1177
1178 // replace LIs with ADDIs
1179 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1180 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1181 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1182 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1183 LLVM_DEBUG(LiMI->dump());
1184
1185 // There could be repeated registers in the PHI, e.g: %1 =
1186 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1187 // already replaced the def instruction, skip.
1188 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1189 continue;
1190
1191 assert((LiMI->getOpcode() == PPC::LI ||
1192 LiMI->getOpcode() == PPC::LI8) &&
1193 "Invalid Opcode!");
1194 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1195 LiMI->removeOperand(1); // remove the imm of LI
1196 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1197 : PPC::ADDI8));
1198 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1199 .addReg(DominatorReg)
1200 .addImm(LiImm); // restore the imm of LI
1201 LLVM_DEBUG(LiMI->dump());
1202 }
1203
1204 // Replace ADD with COPY
1205 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1206 LLVM_DEBUG(MI.dump());
1207 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1208 MI.getOperand(0).getReg())
1209 .add(Op1);
1210 addRegToUpdate(Op1.getReg());
1211 addRegToUpdate(Op2.getReg());
1212 ToErase = &MI;
1213 Simplified = true;
1214 NumOptADDLIs++;
1215 break;
1216 }
1217 case PPC::RLDICR: {
1218 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1219 combineSEXTAndSHL(MI, ToErase);
1220 break;
1221 }
1222 case PPC::ANDI_rec:
1223 case PPC::ANDI8_rec:
1224 case PPC::ANDIS_rec:
1225 case PPC::ANDIS8_rec: {
1226 Register TrueReg =
1227 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1228 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1229 break;
1230
1231 MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1232 if (!SrcMI)
1233 break;
1234
1235 unsigned SrcOpCode = SrcMI->getOpcode();
1236 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1237 break;
1238
1239 Register SrcReg, DstReg;
1240 SrcReg = SrcMI->getOperand(1).getReg();
1241 DstReg = MI.getOperand(1).getReg();
1242 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
1243 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
1244 if (DstRC != SrcRC)
1245 break;
1246
1247 uint64_t AndImm = MI.getOperand(2).getImm();
1248 if (MI.getOpcode() == PPC::ANDIS_rec ||
1249 MI.getOpcode() == PPC::ANDIS8_rec)
1250 AndImm <<= 16;
1251 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1252 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1253 uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1254
1255 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1256 // if all bits to AND are already zero in the input.
1257 bool PatternResultZero =
1258 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1259 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1260
1261 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1262 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1263 bool PatternRemoveRotate =
1264 SrcMI->getOperand(2).getImm() == 0 &&
1265 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1266 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1267
1268 if (!PatternResultZero && !PatternRemoveRotate)
1269 break;
1270
1271 LLVM_DEBUG(dbgs() << "Combining pair: ");
1272 LLVM_DEBUG(SrcMI->dump());
1273 LLVM_DEBUG(MI.dump());
1274 if (PatternResultZero)
1275 MI.getOperand(2).setImm(0);
1276 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1277 LLVM_DEBUG(dbgs() << "To: ");
1278 LLVM_DEBUG(MI.dump());
1279 addRegToUpdate(MI.getOperand(1).getReg());
1280 addRegToUpdate(SrcMI->getOperand(0).getReg());
1281 Simplified = true;
1282 break;
1283 }
1284 case PPC::RLWINM:
1285 case PPC::RLWINM_rec:
1286 case PPC::RLWINM8:
1287 case PPC::RLWINM8_rec: {
1288 // We might replace operand 1 of the instruction which will
1289 // require we recompute kill flags for it.
1290 Register OrigOp1Reg = MI.getOperand(1).isReg()
1291 ? MI.getOperand(1).getReg()
1292 : PPC::NoRegister;
1293 Simplified = TII->combineRLWINM(MI, &ToErase);
1294 if (Simplified) {
1295 addRegToUpdate(OrigOp1Reg);
1296 if (MI.getOperand(1).isReg())
1297 addRegToUpdate(MI.getOperand(1).getReg());
1298 if (ToErase && ToErase->getOperand(1).isReg())
1299 for (auto UseReg : ToErase->explicit_uses())
1300 if (UseReg.isReg())
1301 addRegToUpdate(UseReg.getReg());
1302 ++NumRotatesCollapsed;
1303 }
1304 break;
1305 }
1306 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1307 // always trap, we will delete the node if it will never trap.
1308 case PPC::TDI:
1309 case PPC::TWI:
1310 case PPC::TD:
1311 case PPC::TW: {
1312 if (!EnableTrapOptimization) break;
1313 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1314 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1315 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1316 // We can only do the optimization if we can get immediates
1317 // from both operands
1318 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1319 LiMI1->getOpcode() == PPC::LI8)))
1320 break;
1321 if (!IsOperand2Immediate &&
1322 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1323 LiMI2->getOpcode() == PPC::LI8)))
1324 break;
1325
1326 auto ImmOperand0 = MI.getOperand(0).getImm();
1327 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1328 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1329 : LiMI2->getOperand(1).getImm();
1330
1331 // We will replace the MI with an unconditional trap if it will always
1332 // trap.
1333 if ((ImmOperand0 == 31) ||
1334 ((ImmOperand0 & 0x10) &&
1335 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1336 ((ImmOperand0 & 0x8) &&
1337 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1338 ((ImmOperand0 & 0x2) &&
1339 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1340 ((ImmOperand0 & 0x1) &&
1341 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1342 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1343 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1344 TrapOpt = true;
1345 }
1346 // We will delete the MI if it will never trap.
1347 ToErase = &MI;
1348 Simplified = true;
1349 break;
1350 }
1351 }
1352 }
1353
1354 // If the last instruction was marked for elimination,
1355 // remove it now.
1356 if (ToErase) {
1357 recomputeLVForDyingInstr();
1358 ToErase->eraseFromParent();
1359 ToErase = nullptr;
1360 }
1361 // Reset TrapOpt to false at the end of the basic block.
1363 TrapOpt = false;
1364 }
1365
1366 // Eliminate all the TOC save instructions which are redundant.
1367 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1368 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1369 if (FI->mustSaveTOC())
1370 NumTOCSavesInPrologue++;
1371
1372 // We try to eliminate redundant compare instruction.
1373 Simplified |= eliminateRedundantCompare();
1374
1375 // If we have made any modifications and added any registers to the set of
1376 // registers for which we need to update the kill flags, do so by recomputing
1377 // LiveVariables for those registers.
1378 for (Register Reg : RegsToUpdate) {
1379 if (!MRI->reg_empty(Reg))
1381 }
1382 return Simplified;
1383}
1384
1385// helper functions for eliminateRedundantCompare
1386static bool isEqOrNe(MachineInstr *BI) {
1388 unsigned PredCond = PPC::getPredicateCondition(Pred);
1389 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1390}
1391
1392static bool isSupportedCmpOp(unsigned opCode) {
1393 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1394 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1395 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1396 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1397}
1398
1399static bool is64bitCmpOp(unsigned opCode) {
1400 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1401 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1402}
1403
1404static bool isSignedCmpOp(unsigned opCode) {
1405 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1406 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1407}
1408
1409static unsigned getSignedCmpOpCode(unsigned opCode) {
1410 if (opCode == PPC::CMPLD) return PPC::CMPD;
1411 if (opCode == PPC::CMPLW) return PPC::CMPW;
1412 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1413 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1414 return opCode;
1415}
1416
1417// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1418// (LT x) to (LE x-1)
1419static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1420 uint64_t Imm = CMPI->getOperand(2).getImm();
1421 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1422 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1423 return 0;
1424
1426 unsigned PredCond = PPC::getPredicateCondition(Pred);
1427 unsigned PredHint = PPC::getPredicateHint(Pred);
1428 if (PredCond == PPC::PRED_GE)
1429 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1430 if (PredCond == PPC::PRED_LT)
1431 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1432
1433 return 0;
1434}
1435
1436// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1437// (LE x) to (LT x+1)
1438static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1439 uint64_t Imm = CMPI->getOperand(2).getImm();
1440 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1441 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1442 return 0;
1443
1445 unsigned PredCond = PPC::getPredicateCondition(Pred);
1446 unsigned PredHint = PPC::getPredicateHint(Pred);
1447 if (PredCond == PPC::PRED_GT)
1448 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1449 if (PredCond == PPC::PRED_LE)
1450 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1451
1452 return 0;
1453}
1454
1455// This takes a Phi node and returns a register value for the specified BB.
1456static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1457 MachineBasicBlock *MBB) {
1458 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1459 MachineOperand &MO = Phi->getOperand(I);
1460 if (MO.getMBB() == MBB)
1461 return Phi->getOperand(I-1).getReg();
1462 }
1463 llvm_unreachable("invalid src basic block for this Phi node\n");
1464 return 0;
1465}
1466
1467// This function tracks the source of the register through register copy.
1468// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1469// assuming that the control comes from BB1 into BB2.
1470static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1471 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
1472 unsigned SrcReg = Reg;
1473 while (true) {
1474 unsigned NextReg = SrcReg;
1475 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1476 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1477 NextReg = getIncomingRegForBlock(Inst, BB1);
1478 // We track through PHI only once to avoid infinite loop.
1479 BB1 = nullptr;
1480 }
1481 else if (Inst->isFullCopy())
1482 NextReg = Inst->getOperand(1).getReg();
1483 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1484 break;
1485 SrcReg = NextReg;
1486 }
1487 return SrcReg;
1488}
1489
1490static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1491 MachineBasicBlock *&PredMBB,
1492 MachineBasicBlock *&MBBtoMoveCmp,
1493 MachineRegisterInfo *MRI) {
1494
1495 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1496 auto BII = BB.getFirstInstrTerminator();
1497 // We optimize BBs ending with a conditional branch.
1498 // We check only for BCC here, not BCCLR, because BCCLR
1499 // will be formed only later in the pipeline.
1500 if (BB.succ_size() == 2 &&
1501 BII != BB.instr_end() &&
1502 (*BII).getOpcode() == PPC::BCC &&
1503 (*BII).getOperand(1).isReg()) {
1504 // We optimize only if the condition code is used only by one BCC.
1505 Register CndReg = (*BII).getOperand(1).getReg();
1506 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1507 return false;
1508
1509 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1510 // We assume compare and branch are in the same BB for ease of analysis.
1511 if (CMPI->getParent() != &BB)
1512 return false;
1513
1514 // We skip this BB if a physical register is used in comparison.
1515 for (MachineOperand &MO : CMPI->operands())
1516 if (MO.isReg() && !MO.getReg().isVirtual())
1517 return false;
1518
1519 return true;
1520 }
1521 return false;
1522 };
1523
1524 // If this BB has more than one successor, we can create a new BB and
1525 // move the compare instruction in the new BB.
1526 // So far, we do not move compare instruction to a BB having multiple
1527 // successors to avoid potentially increasing code size.
1528 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1529 return BB.succ_size() == 1;
1530 };
1531
1532 if (!isEligibleBB(MBB))
1533 return false;
1534
1535 unsigned NumPredBBs = MBB.pred_size();
1536 if (NumPredBBs == 1) {
1537 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1538 if (isEligibleBB(*TmpMBB)) {
1539 PredMBB = TmpMBB;
1540 MBBtoMoveCmp = nullptr;
1541 return true;
1542 }
1543 }
1544 else if (NumPredBBs == 2) {
1545 // We check for partially redundant case.
1546 // So far, we support cases with only two predecessors
1547 // to avoid increasing the number of instructions.
1549 MachineBasicBlock *Pred1MBB = *PI;
1550 MachineBasicBlock *Pred2MBB = *(PI+1);
1551
1552 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1553 // We assume Pred1MBB is the BB containing the compare to be merged and
1554 // Pred2MBB is the BB to which we will append a compare instruction.
1555 // Proceed as is if Pred1MBB is different from MBB.
1556 }
1557 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1558 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1559 std::swap(Pred1MBB, Pred2MBB);
1560 }
1561 else return false;
1562
1563 if (Pred1MBB == &MBB)
1564 return false;
1565
1566 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1567 // We cannot move the compare instruction if operands are not available
1568 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1569 MachineInstr *BI = &*MBB.getFirstInstrTerminator();
1570 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1571 for (int I = 1; I <= 2; I++)
1572 if (CMPI->getOperand(I).isReg()) {
1573 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1574 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1575 return false;
1576 }
1577
1578 PredMBB = Pred1MBB;
1579 MBBtoMoveCmp = Pred2MBB;
1580 return true;
1581 }
1582
1583 return false;
1584}
1585
1586// This function will iterate over the input map containing a pair of TOC save
1587// instruction and a flag. The flag will be set to false if the TOC save is
1588// proven redundant. This function will erase from the basic block all the TOC
1589// saves marked as redundant.
1590bool PPCMIPeephole::eliminateRedundantTOCSaves(
1591 std::map<MachineInstr *, bool> &TOCSaves) {
1592 bool Simplified = false;
1593 int NumKept = 0;
1594 for (auto TOCSave : TOCSaves) {
1595 if (!TOCSave.second) {
1596 TOCSave.first->eraseFromParent();
1597 RemoveTOCSave++;
1598 Simplified = true;
1599 } else {
1600 NumKept++;
1601 }
1602 }
1603
1604 if (NumKept > 1)
1605 MultiTOCSaves++;
1606
1607 return Simplified;
1608}
1609
1610// If multiple conditional branches are executed based on the (essentially)
1611// same comparison, we merge compare instructions into one and make multiple
1612// conditional branches on this comparison.
1613// For example,
1614// if (a == 0) { ... }
1615// else if (a < 0) { ... }
1616// can be executed by one compare and two conditional branches instead of
1617// two pairs of a compare and a conditional branch.
1618//
1619// This method merges two compare instructions in two MBBs and modifies the
1620// compare and conditional branch instructions if needed.
1621// For the above example, the input for this pass looks like:
1622// cmplwi r3, 0
1623// beq 0, .LBB0_3
1624// cmpwi r3, -1
1625// bgt 0, .LBB0_4
1626// So, before merging two compares, we need to modify these instructions as
1627// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1628// beq 0, .LBB0_3
1629// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1630// bge 0, .LBB0_4
1631
1632bool PPCMIPeephole::eliminateRedundantCompare() {
1633 bool Simplified = false;
1634
1635 for (MachineBasicBlock &MBB2 : *MF) {
1636 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1637
1638 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1639 // as an optimization target if
1640 // - both MBBs end with a conditional branch,
1641 // - MBB1 is the only predecessor of MBB2, and
1642 // - compare does not take a physical register as a operand in both MBBs.
1643 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1644 //
1645 // As partially redundant case, we additionally handle if MBB2 has one
1646 // additional predecessor, which has only one successor (MBB2).
1647 // In this case, we move the compare instruction originally in MBB2 into
1648 // MBBtoMoveCmp. This partially redundant case is typically appear by
1649 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1650 //
1651 // Overview of CFG of related basic blocks
1652 // Fully redundant case Partially redundant case
1653 // -------- ---------------- --------
1654 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1655 // -------- ---------------- --------
1656 // | \ (w/ 1 succ) \ | \
1657 // | \ \ | \
1658 // | \ |
1659 // -------- --------
1660 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1661 // -------- and 2 succ) -------- and 2 succ)
1662 // | \ | \
1663 // | \ | \
1664 //
1665 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1666 continue;
1667
1668 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1669 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1670
1671 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1672 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1673 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1674
1675 // We cannot optimize an unsupported compare opcode or
1676 // a mix of 32-bit and 64-bit comparisons
1677 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1678 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1679 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1680 continue;
1681
1682 unsigned NewOpCode = 0;
1683 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1684 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1685 bool SwapOperands = false;
1686
1687 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1688 // Typically, unsigned comparison is used for equality check, but
1689 // we replace it with a signed comparison if the comparison
1690 // to be merged is a signed comparison.
1691 // In other cases of opcode mismatch, we cannot optimize this.
1692
1693 // We cannot change opcode when comparing against an immediate
1694 // if the most significant bit of the immediate is one
1695 // due to the difference in sign extension.
1696 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1697 if (!I->getOperand(2).isImm())
1698 return false;
1699 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1700 return Imm < 0;
1701 };
1702
1703 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1704 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1705 NewOpCode = CMPI1->getOpcode();
1706 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1707 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1708 NewOpCode = CMPI2->getOpcode();
1709 else continue;
1710 }
1711
1712 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1713 // In case of comparisons between two registers, these two registers
1714 // must be same to merge two comparisons.
1715 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1716 nullptr, nullptr, MRI);
1717 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1718 nullptr, nullptr, MRI);
1719 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1720 MBB1, &MBB2, MRI);
1721 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1722 MBB1, &MBB2, MRI);
1723
1724 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1725 // Same pair of registers in the same order; ready to merge as is.
1726 }
1727 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1728 // Same pair of registers in different order.
1729 // We reverse the predicate to merge compare instructions.
1731 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1732 // In case of partial redundancy, we need to swap operands
1733 // in another compare instruction.
1734 SwapOperands = true;
1735 }
1736 else continue;
1737 }
1738 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1739 // In case of comparisons between a register and an immediate,
1740 // the operand register must be same for two compare instructions.
1741 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1742 nullptr, nullptr, MRI);
1743 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1744 MBB1, &MBB2, MRI);
1745 if (Cmp1Operand1 != Cmp2Operand1)
1746 continue;
1747
1748 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1749 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1750
1751 // If immediate are not same, we try to adjust by changing predicate;
1752 // e.g. GT imm means GE (imm+1).
1753 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1754 int Diff = Imm1 - Imm2;
1755 if (Diff < -2 || Diff > 2)
1756 continue;
1757
1758 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1759 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1760 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1761 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1762 if (Diff == 2) {
1763 if (PredToInc2 && PredToDec1) {
1764 NewPredicate2 = PredToInc2;
1765 NewPredicate1 = PredToDec1;
1766 NewImm2++;
1767 NewImm1--;
1768 }
1769 }
1770 else if (Diff == 1) {
1771 if (PredToInc2) {
1772 NewImm2++;
1773 NewPredicate2 = PredToInc2;
1774 }
1775 else if (PredToDec1) {
1776 NewImm1--;
1777 NewPredicate1 = PredToDec1;
1778 }
1779 }
1780 else if (Diff == -1) {
1781 if (PredToDec2) {
1782 NewImm2--;
1783 NewPredicate2 = PredToDec2;
1784 }
1785 else if (PredToInc1) {
1786 NewImm1++;
1787 NewPredicate1 = PredToInc1;
1788 }
1789 }
1790 else if (Diff == -2) {
1791 if (PredToDec2 && PredToInc1) {
1792 NewPredicate2 = PredToDec2;
1793 NewPredicate1 = PredToInc1;
1794 NewImm2--;
1795 NewImm1++;
1796 }
1797 }
1798 }
1799
1800 // We cannot merge two compares if the immediates are not same.
1801 if (NewImm2 != NewImm1)
1802 continue;
1803 }
1804
1805 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1806 LLVM_DEBUG(CMPI1->dump());
1807 LLVM_DEBUG(BI1->dump());
1808 LLVM_DEBUG(CMPI2->dump());
1809 LLVM_DEBUG(BI2->dump());
1810 for (const MachineOperand &MO : CMPI1->operands())
1811 if (MO.isReg())
1812 addRegToUpdate(MO.getReg());
1813 for (const MachineOperand &MO : CMPI2->operands())
1814 if (MO.isReg())
1815 addRegToUpdate(MO.getReg());
1816
1817 // We adjust opcode, predicates and immediate as we determined above.
1818 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1819 CMPI1->setDesc(TII->get(NewOpCode));
1820 }
1821 if (NewPredicate1) {
1822 BI1->getOperand(0).setImm(NewPredicate1);
1823 }
1824 if (NewPredicate2) {
1825 BI2->getOperand(0).setImm(NewPredicate2);
1826 }
1827 if (NewImm1 != Imm1) {
1828 CMPI1->getOperand(2).setImm(NewImm1);
1829 }
1830
1831 if (IsPartiallyRedundant) {
1832 // We touch up the compare instruction in MBB2 and move it to
1833 // a previous BB to handle partially redundant case.
1834 if (SwapOperands) {
1835 Register Op1 = CMPI2->getOperand(1).getReg();
1836 Register Op2 = CMPI2->getOperand(2).getReg();
1837 CMPI2->getOperand(1).setReg(Op2);
1838 CMPI2->getOperand(2).setReg(Op1);
1839 }
1840 if (NewImm2 != Imm2)
1841 CMPI2->getOperand(2).setImm(NewImm2);
1842
1843 for (int I = 1; I <= 2; I++) {
1844 if (CMPI2->getOperand(I).isReg()) {
1845 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1846 if (Inst->getParent() != &MBB2)
1847 continue;
1848
1849 assert(Inst->getOpcode() == PPC::PHI &&
1850 "We cannot support if an operand comes from this BB.");
1851 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1852 CMPI2->getOperand(I).setReg(SrcReg);
1853 addRegToUpdate(SrcReg);
1854 }
1855 }
1856 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1857 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1858
1859 DebugLoc DL = CMPI2->getDebugLoc();
1860 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1861 BuildMI(MBB2, MBB2.begin(), DL,
1862 TII->get(PPC::PHI), NewVReg)
1863 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1864 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1865 BI2->getOperand(1).setReg(NewVReg);
1866 addRegToUpdate(NewVReg);
1867 }
1868 else {
1869 // We finally eliminate compare instruction in MBB2.
1870 // We do not need to treat CMPI2 specially here in terms of re-computing
1871 // live variables even though it is being deleted because:
1872 // - It defines a register that has a single use (already checked in
1873 // eligibleForCompareElimination())
1874 // - The only user (BI2) is no longer using it so the register is dead (no
1875 // def, no uses)
1876 // - We do not attempt to recompute live variables for dead registers
1877 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1878 CMPI2->eraseFromParent();
1879 }
1880
1881 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1882 LLVM_DEBUG(CMPI1->dump());
1883 LLVM_DEBUG(BI1->dump());
1884 LLVM_DEBUG(BI2->dump());
1885 if (IsPartiallyRedundant) {
1886 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1887 << printMBBReference(*MBBtoMoveCmp)
1888 << " to handle partial redundancy.\n");
1889 LLVM_DEBUG(CMPI2->dump());
1890 }
1891 Simplified = true;
1892 }
1893
1894 return Simplified;
1895}
1896
1897// We miss the opportunity to emit an RLDIC when lowering jump tables
1898// since ISEL sees only a single basic block. When selecting, the clear
1899// and shift left will be in different blocks.
1900bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1901 MachineInstr *&ToErase) {
1902 if (MI.getOpcode() != PPC::RLDICR)
1903 return false;
1904
1905 Register SrcReg = MI.getOperand(1).getReg();
1906 if (!SrcReg.isVirtual())
1907 return false;
1908
1909 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1910 if (SrcMI->getOpcode() != PPC::RLDICL)
1911 return false;
1912
1913 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1914 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1915 MachineOperand MOpSHMI = MI.getOperand(2);
1916 MachineOperand MOpMEMI = MI.getOperand(3);
1917 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1918 MOpMEMI.isImm()))
1919 return false;
1920
1921 uint64_t SHSrc = MOpSHSrc.getImm();
1922 uint64_t MBSrc = MOpMBSrc.getImm();
1923 uint64_t SHMI = MOpSHMI.getImm();
1924 uint64_t MEMI = MOpMEMI.getImm();
1925 uint64_t NewSH = SHSrc + SHMI;
1926 uint64_t NewMB = MBSrc - SHMI;
1927 if (NewMB > 63 || NewSH > 63)
1928 return false;
1929
1930 // The bits cleared with RLDICL are [0, MBSrc).
1931 // The bits cleared with RLDICR are (MEMI, 63].
1932 // After the sequence, the bits cleared are:
1933 // [0, MBSrc-SHMI) and (MEMI, 63).
1934 //
1935 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1936 if ((63 - NewSH) != MEMI)
1937 return false;
1938
1939 LLVM_DEBUG(dbgs() << "Converting pair: ");
1940 LLVM_DEBUG(SrcMI->dump());
1941 LLVM_DEBUG(MI.dump());
1942
1943 MI.setDesc(TII->get(PPC::RLDIC));
1944 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1945 MI.getOperand(2).setImm(NewSH);
1946 MI.getOperand(3).setImm(NewMB);
1947 addRegToUpdate(MI.getOperand(1).getReg());
1948 addRegToUpdate(SrcMI->getOperand(0).getReg());
1949
1950 LLVM_DEBUG(dbgs() << "To: ");
1951 LLVM_DEBUG(MI.dump());
1952 NumRotatesCollapsed++;
1953 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1954 if (MRI->use_nodbg_empty(SrcReg)) {
1955 assert(!SrcMI->hasImplicitDef() &&
1956 "Not expecting an implicit def with this instr.");
1957 ToErase = SrcMI;
1958 }
1959 return true;
1960}
1961
1962// For case in LLVM IR
1963// entry:
1964// %iconv = sext i32 %index to i64
1965// br i1 undef label %true, label %false
1966// true:
1967// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1968// ...
1969// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1970// different BBs when conducting instruction selection. We can do a peephole
1971// optimization to combine these two instructions into extswsli after
1972// instruction selection.
1973bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1974 MachineInstr *&ToErase) {
1975 if (MI.getOpcode() != PPC::RLDICR)
1976 return false;
1977
1978 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1979 return false;
1980
1981 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1982
1983 MachineOperand MOpSHMI = MI.getOperand(2);
1984 MachineOperand MOpMEMI = MI.getOperand(3);
1985 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1986 return false;
1987
1988 uint64_t SHMI = MOpSHMI.getImm();
1989 uint64_t MEMI = MOpMEMI.getImm();
1990 if (SHMI + MEMI != 63)
1991 return false;
1992
1993 Register SrcReg = MI.getOperand(1).getReg();
1994 if (!SrcReg.isVirtual())
1995 return false;
1996
1997 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1998 if (SrcMI->getOpcode() != PPC::EXTSW &&
1999 SrcMI->getOpcode() != PPC::EXTSW_32_64)
2000 return false;
2001
2002 // If the register defined by extsw has more than one use, combination is not
2003 // needed.
2004 if (!MRI->hasOneNonDBGUse(SrcReg))
2005 return false;
2006
2007 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
2008 assert(SrcMI->getOperand(1).isReg() &&
2009 "EXTSW's second operand should be a register");
2010 if (!SrcMI->getOperand(1).getReg().isVirtual())
2011 return false;
2012
2013 LLVM_DEBUG(dbgs() << "Combining pair: ");
2014 LLVM_DEBUG(SrcMI->dump());
2015 LLVM_DEBUG(MI.dump());
2016
2017 MachineInstr *NewInstr =
2018 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
2019 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
2020 : TII->get(PPC::EXTSWSLI_32_64),
2021 MI.getOperand(0).getReg())
2022 .add(SrcMI->getOperand(1))
2023 .add(MOpSHMI);
2024 (void)NewInstr;
2025
2026 LLVM_DEBUG(dbgs() << "TO: ");
2027 LLVM_DEBUG(NewInstr->dump());
2028 ++NumEXTSWAndSLDICombined;
2029 ToErase = &MI;
2030 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2031 // can recompute its kill flags. We run DCE immediately after this pass
2032 // to clean up dead instructions such as this.
2033 addRegToUpdate(NewInstr->getOperand(1).getReg());
2034 addRegToUpdate(SrcMI->getOperand(0).getReg());
2035 return true;
2036}
2037
2038} // end default namespace
2039
2041 "PowerPC MI Peephole Optimization", false, false)
2047 "PowerPC MI Peephole Optimization", false, false)
2048
2049char PPCMIPeephole::ID = 0;
2051llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define addRegToUpdate(R)
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static bool shouldExecute(unsigned CounterName)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:187
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:161
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
bool isFullCopy() const
mop_range operands()
mop_range explicit_uses()
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:197
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
void push_back(const T &Elt)
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ Keep
No function return thunk.
Definition CodeGen.h:156
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853