LLVM 19.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
32#include "PPC.h"
33#include "PPCInstrBuilder.h"
34#include "PPCInstrInfo.h"
36#include "PPCTargetMachine.h"
37#include "llvm/ADT/Statistic.h"
47#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50
51#define DEBUG_TYPE "ppc-mi-peepholes"
52
53STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
54STATISTIC(MultiTOCSaves,
55 "Number of functions with multiple TOC saves that must be kept");
56STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
57STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
58STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
59STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
60STATISTIC(NumConvertedToImmediateForm,
61 "Number of instructions converted to their immediate form");
62STATISTIC(NumFunctionsEnteredInMIPeephole,
63 "Number of functions entered in PPC MI Peepholes");
64STATISTIC(NumFixedPointIterations,
65 "Number of fixed-point iterations converting reg-reg instructions "
66 "to reg-imm ones");
67STATISTIC(NumRotatesCollapsed,
68 "Number of pairs of rotate left, clear left/right collapsed");
69STATISTIC(NumEXTSWAndSLDICombined,
70 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
71STATISTIC(NumLoadImmZeroFoldedAndRemoved,
72 "Number of LI(8) reg, 0 that are folded to r0 and removed");
73
74static cl::opt<bool>
75FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
76 cl::desc("Iterate to a fixed point when attempting to "
77 "convert reg-reg instructions to reg-imm"));
78
79static cl::opt<bool>
80ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
81 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
82
83static cl::opt<bool>
84 EnableSExtElimination("ppc-eliminate-signext",
85 cl::desc("enable elimination of sign-extensions"),
86 cl::init(true), cl::Hidden);
87
88static cl::opt<bool>
89 EnableZExtElimination("ppc-eliminate-zeroext",
90 cl::desc("enable elimination of zero-extensions"),
91 cl::init(true), cl::Hidden);
92
93static cl::opt<bool>
94 EnableTrapOptimization("ppc-opt-conditional-trap",
95 cl::desc("enable optimization of conditional traps"),
96 cl::init(false), cl::Hidden);
97
98namespace {
99
100struct PPCMIPeephole : public MachineFunctionPass {
101
102 static char ID;
103 const PPCInstrInfo *TII;
104 MachineFunction *MF;
106 LiveVariables *LV;
107
108 PPCMIPeephole() : MachineFunctionPass(ID) {
110 }
111
112private:
116 BlockFrequency EntryFreq;
117 SmallSet<Register, 16> RegsToUpdate;
118
119 // Initialize class variables.
120 void initialize(MachineFunction &MFParm);
121
122 // Perform peepholes.
123 bool simplifyCode();
124
125 // Perform peepholes.
126 bool eliminateRedundantCompare();
127 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
128 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
129 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
130 MachineInstr *&ToErase);
131 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
133
134 // A number of transformations will eliminate the definition of a register
135 // as all of its uses will be removed. However, this leaves a register
136 // without a definition for LiveVariables. Such transformations should
137 // use this function to provide a dummy definition of the register that
138 // will simply be removed by DCE.
139 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
140 BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
141 }
142 void addRegToUpdateWithLine(Register Reg, int Line);
143 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
145 Register Dst);
146
147public:
148
149 void getAnalysisUsage(AnalysisUsage &AU) const override {
159 }
160
161 // Main entry point for this pass.
162 bool runOnMachineFunction(MachineFunction &MF) override {
163 initialize(MF);
164 // At this point, TOC pointer should not be used in a function that uses
165 // PC-Relative addressing.
166 assert((MF.getRegInfo().use_empty(PPC::X2) ||
168 "TOC pointer used in a function using PC-Relative addressing!");
169 if (skipFunction(MF.getFunction()))
170 return false;
171 bool Changed = simplifyCode();
172#ifndef NDEBUG
173 if (Changed)
174 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
175 "-mllvm -disable-ppc-peephole");
176#endif
177 return Changed;
178 }
179};
180
181#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
182void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
184 return;
185 if (RegsToUpdate.insert(Reg).second)
186 LLVM_DEBUG(dbgs() << "Adding register: " << Register::virtReg2Index(Reg)
187 << " on line " << Line
188 << " for re-computation of kill flags\n");
189}
190
191// Initialize class variables.
192void PPCMIPeephole::initialize(MachineFunction &MFParm) {
193 MF = &MFParm;
194 MRI = &MF->getRegInfo();
195 MDT = &getAnalysis<MachineDominatorTree>();
196 MPDT = &getAnalysis<MachinePostDominatorTree>();
197 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
198 LV = &getAnalysis<LiveVariables>();
199 EntryFreq = MBFI->getEntryFreq();
200 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
201 RegsToUpdate.clear();
202 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
203 LLVM_DEBUG(MF->dump());
204}
205
206static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
208 assert(Op && "Invalid Operand!");
209 if (!Op->isReg())
210 return nullptr;
211
212 Register Reg = Op->getReg();
213 if (!Reg.isVirtual())
214 return nullptr;
215
216 return MRI->getVRegDef(Reg);
217}
218
219// This function returns number of known zero bits in output of MI
220// starting from the most significant bit.
221static unsigned getKnownLeadingZeroCount(const unsigned Reg,
222 const PPCInstrInfo *TII,
223 const MachineRegisterInfo *MRI) {
224 MachineInstr *MI = MRI->getVRegDef(Reg);
225 unsigned Opcode = MI->getOpcode();
226 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
227 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
228 return MI->getOperand(3).getImm();
229
230 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
231 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
232 return MI->getOperand(3).getImm();
233
234 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
235 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
236 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
237 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
238 return 32 + MI->getOperand(3).getImm();
239
240 if (Opcode == PPC::ANDI_rec) {
241 uint16_t Imm = MI->getOperand(2).getImm();
242 return 48 + llvm::countl_zero(Imm);
243 }
244
245 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
246 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
247 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
248 // The result ranges from 0 to 32.
249 return 58;
250
251 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
252 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
253 // The result ranges from 0 to 64.
254 return 57;
255
256 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
257 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
258 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
259 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
260 return 48;
261
262 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
263 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
264 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
265 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
266 return 56;
267
268 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
269 Opcode == PPC::AND8_rec)
270 return std::max(
271 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
272 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
273
274 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
275 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
276 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
277 Opcode == PPC::XOR8_rec)
278 return std::min(
279 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
280 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
281
282 if (TII->isZeroExtended(Reg, MRI))
283 return 32;
284
285 return 0;
286}
287
288// This function maintains a map for the pairs <TOC Save Instr, Keep>
289// Each time a new TOC save is encountered, it checks if any of the existing
290// ones are dominated by the new one. If so, it marks the existing one as
291// redundant by setting it's entry in the map as false. It then adds the new
292// instruction to the map with either true or false depending on if any
293// existing instructions dominated the new one.
294void PPCMIPeephole::UpdateTOCSaves(
295 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
296 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
297 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
298 // here only support it under ELFv2.
299 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
301
302 MachineBasicBlock *Entry = &MF->front();
303 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
304
305 // If the block in which the TOC save resides is in a block that
306 // post-dominates Entry, or a block that is hotter than entry (keep in mind
307 // that early MachineLICM has already run so the TOC save won't be hoisted)
308 // we can just do the save in the prologue.
309 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
310 FI->setMustSaveTOC(true);
311
312 // If we are saving the TOC in the prologue, all the TOC saves can be
313 // removed from the code.
314 if (FI->mustSaveTOC()) {
315 for (auto &TOCSave : TOCSaves)
316 TOCSave.second = false;
317 // Add new instruction to map.
318 TOCSaves[MI] = false;
319 return;
320 }
321 }
322
323 bool Keep = true;
324 for (auto &I : TOCSaves) {
325 MachineInstr *CurrInst = I.first;
326 // If new instruction dominates an existing one, mark existing one as
327 // redundant.
328 if (I.second && MDT->dominates(MI, CurrInst))
329 I.second = false;
330 // Check if the new instruction is redundant.
331 if (MDT->dominates(CurrInst, MI)) {
332 Keep = false;
333 break;
334 }
335 }
336 // Add new instruction to map.
337 TOCSaves[MI] = Keep;
338}
339
340// This function returns a list of all PHI nodes in the tree starting from
341// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
342// The list initially only contains the root PHI. When we visit a PHI node, we
343// add it to the list. We continue to look for other PHI node operands while
344// there are nodes to visit in the list. The function returns false if the
345// optimization cannot be applied on this tree.
346static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
347 MachineInstr *RootPHI,
349 PHIs.push_back(RootPHI);
350 unsigned VisitedIndex = 0;
351 while (VisitedIndex < PHIs.size()) {
352 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
353 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
354 PHIOp != NumOps; PHIOp += 2) {
355 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
356 if (!RegOp.isVirtual())
357 return false;
358 MachineInstr *Instr = MRI->getVRegDef(RegOp);
359 // While collecting the PHI nodes, we check if they can be converted (i.e.
360 // all the operands are either copies, implicit defs or PHI nodes).
361 unsigned Opcode = Instr->getOpcode();
362 if (Opcode == PPC::COPY) {
363 Register Reg = Instr->getOperand(1).getReg();
364 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
365 return false;
366 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
367 return false;
368 // If we detect a cycle in the PHI nodes, we exit. It would be
369 // possible to change cycles as well, but that would add a lot
370 // of complexity for a case that is unlikely to occur with MMA
371 // code.
372 if (Opcode != PPC::PHI)
373 continue;
374 if (llvm::is_contained(PHIs, Instr))
375 return false;
376 PHIs.push_back(Instr);
377 }
378 VisitedIndex++;
379 }
380 return true;
381}
382
383// This function changes the unprimed accumulator PHI nodes in the PHIs list to
384// primed accumulator PHI nodes. The list is traversed in reverse order to
385// change all the PHI operands of a PHI node before changing the node itself.
386// We keep a map to associate each changed PHI node to its non-changed form.
387void PPCMIPeephole::convertUnprimedAccPHIs(
391 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
393 // We check if the current PHI node can be changed by looking at its
394 // operands. If all the operands are either copies from primed
395 // accumulators, implicit definitions or other unprimed accumulator
396 // PHI nodes, we change it.
397 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
398 PHIOp += 2) {
399 Register RegOp = PHI->getOperand(PHIOp).getReg();
400 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
401 unsigned Opcode = PHIInput->getOpcode();
402 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
403 Opcode == PPC::PHI) &&
404 "Unexpected instruction");
405 if (Opcode == PPC::COPY) {
406 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
407 &PPC::ACCRCRegClass &&
408 "Unexpected register class");
409 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
410 } else if (Opcode == PPC::IMPLICIT_DEF) {
411 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
412 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
413 TII->get(PPC::IMPLICIT_DEF), AccReg);
414 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
415 PHI->getOperand(PHIOp + 1)});
416 } else if (Opcode == PPC::PHI) {
417 // We found a PHI operand. At this point we know this operand
418 // has already been changed so we get its associated changed form
419 // from the map.
420 assert(ChangedPHIMap.count(PHIInput) == 1 &&
421 "This PHI node should have already been changed.");
422 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
424 PrimedAccPHI->getOperand(0).getReg(), false),
425 PHI->getOperand(PHIOp + 1)});
426 }
427 }
428 Register AccReg = Dst;
429 // If the PHI node we are changing is the root node, the register it defines
430 // will be the destination register of the original copy (of the PHI def).
431 // For all other PHI's in the list, we need to create another primed
432 // accumulator virtual register as the PHI will no longer define the
433 // unprimed accumulator.
434 if (PHI != PHIs[0])
435 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
437 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
438 for (auto RegMBB : PHIOps) {
439 NewPHI.add(RegMBB.first).add(RegMBB.second);
440 if (MRI->isSSA())
441 addRegToUpdate(RegMBB.first.getReg());
442 }
443 ChangedPHIMap[PHI] = NewPHI.getInstr();
444 LLVM_DEBUG(dbgs() << "Converting PHI: ");
445 LLVM_DEBUG(PHI->dump());
446 LLVM_DEBUG(dbgs() << "To: ");
447 LLVM_DEBUG(NewPHI.getInstr()->dump());
448 }
449}
450
451// Perform peephole optimizations.
452bool PPCMIPeephole::simplifyCode() {
453 bool Simplified = false;
454 bool TrapOpt = false;
455 MachineInstr* ToErase = nullptr;
456 std::map<MachineInstr *, bool> TOCSaves;
457 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
458 NumFunctionsEnteredInMIPeephole++;
459 if (ConvertRegReg) {
460 // Fixed-point conversion of reg/reg instructions fed by load-immediate
461 // into reg/imm instructions. FIXME: This is expensive, control it with
462 // an option.
463 bool SomethingChanged = false;
464 do {
465 NumFixedPointIterations++;
466 SomethingChanged = false;
467 for (MachineBasicBlock &MBB : *MF) {
468 for (MachineInstr &MI : MBB) {
469 if (MI.isDebugInstr())
470 continue;
471
472 SmallSet<Register, 4> RRToRIRegsToUpdate;
473 if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
474 continue;
475 for (Register R : RRToRIRegsToUpdate)
477 // The updated instruction may now have new register operands.
478 // Conservatively add them to recompute the flags as well.
479 for (const MachineOperand &MO : MI.operands())
480 if (MO.isReg())
481 addRegToUpdate(MO.getReg());
482 // We don't erase anything in case the def has other uses. Let DCE
483 // remove it if it can be removed.
484 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
485 LLVM_DEBUG(MI.dump());
486 NumConvertedToImmediateForm++;
487 SomethingChanged = true;
488 Simplified = true;
489 continue;
490 }
491 }
492 } while (SomethingChanged && FixedPointRegToImm);
493 }
494
495 // Since we are deleting this instruction, we need to run LiveVariables
496 // on any of its definitions that are marked as needing an update since
497 // we can't run LiveVariables on a deleted register. This only needs
498 // to be done for defs since uses will have their own defining
499 // instructions so we won't be running LiveVariables on a deleted reg.
500 auto recomputeLVForDyingInstr = [&]() {
501 if (RegsToUpdate.empty())
502 return;
503 for (MachineOperand &MO : ToErase->operands()) {
504 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
505 continue;
506 Register RegToUpdate = MO.getReg();
507 RegsToUpdate.erase(RegToUpdate);
508 // If some transformation has introduced an additional definition of
509 // this register (breaking SSA), we can safely convert this def to
510 // a def of an invalid register as the instruction is going away.
511 if (!MRI->getUniqueVRegDef(RegToUpdate))
512 MO.setReg(PPC::NoRegister);
513 LV->recomputeForSingleDefVirtReg(RegToUpdate);
514 }
515 };
516
517 for (MachineBasicBlock &MBB : *MF) {
518 for (MachineInstr &MI : MBB) {
519
520 // If the previous instruction was marked for elimination,
521 // remove it now.
522 if (ToErase) {
523 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
524 LLVM_DEBUG(ToErase->dump());
525 recomputeLVForDyingInstr();
526 ToErase->eraseFromParent();
527 ToErase = nullptr;
528 }
529 // If a conditional trap instruction got optimized to an
530 // unconditional trap, eliminate all the instructions after
531 // the trap.
532 if (EnableTrapOptimization && TrapOpt) {
533 ToErase = &MI;
534 continue;
535 }
536
537 // Ignore debug instructions.
538 if (MI.isDebugInstr())
539 continue;
540
541 // Per-opcode peepholes.
542 switch (MI.getOpcode()) {
543
544 default:
545 break;
546 case PPC::COPY: {
547 Register Src = MI.getOperand(1).getReg();
548 Register Dst = MI.getOperand(0).getReg();
549 if (!Src.isVirtual() || !Dst.isVirtual())
550 break;
551 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
552 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
553 break;
554
555 // We are copying an unprimed accumulator to a primed accumulator.
556 // If the input to the copy is a PHI that is fed only by (i) copies in
557 // the other direction (ii) implicitly defined unprimed accumulators or
558 // (iii) other PHI nodes satisfying (i) and (ii), we can change
559 // the PHI to a PHI on primed accumulators (as long as we also change
560 // its operands). To detect and change such copies, we first get a list
561 // of all the PHI nodes starting from the root PHI node in BFS order.
562 // We then visit all these PHI nodes to check if they can be changed to
563 // primed accumulator PHI nodes and if so, we change them.
564 MachineInstr *RootPHI = MRI->getVRegDef(Src);
565 if (RootPHI->getOpcode() != PPC::PHI)
566 break;
567
569 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
570 break;
571
572 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
573
574 ToErase = &MI;
575 break;
576 }
577 case PPC::LI:
578 case PPC::LI8: {
579 // If we are materializing a zero, look for any use operands for which
580 // zero means immediate zero. All such operands can be replaced with
581 // PPC::ZERO.
582 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
583 break;
584 Register MIDestReg = MI.getOperand(0).getReg();
585 bool Folded = false;
586 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
587 Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
588 if (MRI->use_nodbg_empty(MIDestReg)) {
589 ++NumLoadImmZeroFoldedAndRemoved;
590 ToErase = &MI;
591 }
592 if (Folded)
593 addRegToUpdate(MIDestReg);
594 Simplified |= Folded;
595 break;
596 }
597 case PPC::STW:
598 case PPC::STD: {
599 MachineFrameInfo &MFI = MF->getFrameInfo();
600 if (MFI.hasVarSizedObjects() ||
601 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
602 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
603 break;
604 // When encountering a TOC save instruction, call UpdateTOCSaves
605 // to add it to the TOCSaves map and mark any existing TOC saves
606 // it dominates as redundant.
607 if (TII->isTOCSaveMI(MI))
608 UpdateTOCSaves(TOCSaves, &MI);
609 break;
610 }
611 case PPC::XXPERMDI: {
612 // Perform simplifications of 2x64 vector swaps and splats.
613 // A swap is identified by an immediate value of 2, and a splat
614 // is identified by an immediate value of 0 or 3.
615 int Immed = MI.getOperand(3).getImm();
616
617 if (Immed == 1)
618 break;
619
620 // For each of these simplifications, we need the two source
621 // regs to match. Unfortunately, MachineCSE ignores COPY and
622 // SUBREG_TO_REG, so for example we can see
623 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
624 // We have to look through chains of COPY and SUBREG_TO_REG
625 // to find the real source values for comparison.
626 Register TrueReg1 =
627 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
628 Register TrueReg2 =
629 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
630
631 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
632 break;
633
634 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
635
636 if (!DefMI)
637 break;
638
639 unsigned DefOpc = DefMI->getOpcode();
640
641 // If this is a splat fed by a splatting load, the splat is
642 // redundant. Replace with a copy. This doesn't happen directly due
643 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
644 // a load of a double to a vector of 64-bit integers.
645 auto isConversionOfLoadAndSplat = [=]() -> bool {
646 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
647 return false;
648 Register FeedReg1 =
649 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
650 if (FeedReg1.isVirtual()) {
651 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
652 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
653 return true;
654 }
655 return false;
656 };
657 if ((Immed == 0 || Immed == 3) &&
658 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
659 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
660 "to load-and-splat/copy: ");
661 LLVM_DEBUG(MI.dump());
662 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
663 MI.getOperand(0).getReg())
664 .add(MI.getOperand(1));
665 addRegToUpdate(MI.getOperand(1).getReg());
666 ToErase = &MI;
667 Simplified = true;
668 }
669
670 // If this is a splat or a swap fed by another splat, we
671 // can replace it with a copy.
672 if (DefOpc == PPC::XXPERMDI) {
673 Register DefReg1 = DefMI->getOperand(1).getReg();
674 Register DefReg2 = DefMI->getOperand(2).getReg();
675 unsigned DefImmed = DefMI->getOperand(3).getImm();
676
677 // If the two inputs are not the same register, check to see if
678 // they originate from the same virtual register after only
679 // copy-like instructions.
680 if (DefReg1 != DefReg2) {
681 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
682 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
683
684 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
685 break;
686 }
687
688 if (DefImmed == 0 || DefImmed == 3) {
689 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
690 "to splat/copy: ");
691 LLVM_DEBUG(MI.dump());
692 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
693 MI.getOperand(0).getReg())
694 .add(MI.getOperand(1));
695 addRegToUpdate(MI.getOperand(1).getReg());
696 ToErase = &MI;
697 Simplified = true;
698 }
699
700 // If this is a splat fed by a swap, we can simplify modify
701 // the splat to splat the other value from the swap's input
702 // parameter.
703 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
704 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
705 LLVM_DEBUG(MI.dump());
706 addRegToUpdate(MI.getOperand(1).getReg());
707 addRegToUpdate(MI.getOperand(2).getReg());
708 MI.getOperand(1).setReg(DefReg1);
709 MI.getOperand(2).setReg(DefReg2);
710 MI.getOperand(3).setImm(3 - Immed);
711 addRegToUpdate(DefReg1);
712 addRegToUpdate(DefReg2);
713 Simplified = true;
714 }
715
716 // If this is a swap fed by a swap, we can replace it
717 // with a copy from the first swap's input.
718 else if (Immed == 2 && DefImmed == 2) {
719 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
720 LLVM_DEBUG(MI.dump());
721 addRegToUpdate(MI.getOperand(1).getReg());
722 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
723 MI.getOperand(0).getReg())
724 .add(DefMI->getOperand(1));
727 ToErase = &MI;
728 Simplified = true;
729 }
730 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
731 DefOpc == PPC::XXPERMDIs &&
732 (DefMI->getOperand(2).getImm() == 0 ||
733 DefMI->getOperand(2).getImm() == 3)) {
734 ToErase = &MI;
735 Simplified = true;
736 // Swap of a splat, convert to copy.
737 if (Immed == 2) {
738 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
739 LLVM_DEBUG(MI.dump());
740 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
741 MI.getOperand(0).getReg())
742 .add(MI.getOperand(1));
743 addRegToUpdate(MI.getOperand(1).getReg());
744 break;
745 }
746 // Splat fed by another splat - switch the output of the first
747 // and remove the second.
748 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
749 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
750 LLVM_DEBUG(MI.dump());
751 } else if (Immed == 2 &&
752 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
753 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
754 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
755 DefOpc == PPC::VSPLTISW)) {
756 // Swap of various vector splats, convert to copy.
757 ToErase = &MI;
758 Simplified = true;
759 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
760 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
761 LLVM_DEBUG(MI.dump());
762 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
763 MI.getOperand(0).getReg())
764 .add(MI.getOperand(1));
765 addRegToUpdate(MI.getOperand(1).getReg());
766 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
767 TII->isLoadFromConstantPool(DefMI)) {
768 const Constant *C = TII->getConstantFromConstantPool(DefMI);
769 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
770 ToErase = &MI;
771 Simplified = true;
773 << "Optimizing swap(splat pattern from constant-pool) "
774 "=> copy(splat pattern from constant-pool): ");
775 LLVM_DEBUG(MI.dump());
776 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
777 MI.getOperand(0).getReg())
778 .add(MI.getOperand(1));
779 addRegToUpdate(MI.getOperand(1).getReg());
780 }
781 }
782 break;
783 }
784 case PPC::VSPLTB:
785 case PPC::VSPLTH:
786 case PPC::XXSPLTW: {
787 unsigned MyOpcode = MI.getOpcode();
788 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
789 Register TrueReg =
790 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
791 if (!TrueReg.isVirtual())
792 break;
793 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
794 if (!DefMI)
795 break;
796 unsigned DefOpcode = DefMI->getOpcode();
797 auto isConvertOfSplat = [=]() -> bool {
798 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
799 return false;
800 Register ConvReg = DefMI->getOperand(1).getReg();
801 if (!ConvReg.isVirtual())
802 return false;
803 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
804 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
805 Splt->getOpcode() == PPC::XXSPLTW);
806 };
807 bool AlreadySplat = (MyOpcode == DefOpcode) ||
808 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
809 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
810 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
811 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
812 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
813 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
814 // If the instruction[s] that feed this splat have already splat
815 // the value, this splat is redundant.
816 if (AlreadySplat) {
817 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
818 LLVM_DEBUG(MI.dump());
819 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
820 MI.getOperand(0).getReg())
821 .add(MI.getOperand(OpNo));
822 addRegToUpdate(MI.getOperand(OpNo).getReg());
823 ToErase = &MI;
824 Simplified = true;
825 }
826 // Splat fed by a shift. Usually when we align value to splat into
827 // vector element zero.
828 if (DefOpcode == PPC::XXSLDWI) {
829 Register ShiftRes = DefMI->getOperand(0).getReg();
830 Register ShiftOp1 = DefMI->getOperand(1).getReg();
831 Register ShiftOp2 = DefMI->getOperand(2).getReg();
832 unsigned ShiftImm = DefMI->getOperand(3).getImm();
833 unsigned SplatImm =
834 MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
835 if (ShiftOp1 == ShiftOp2) {
836 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
837 if (MRI->hasOneNonDBGUse(ShiftRes)) {
838 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
840 ToErase = DefMI;
841 }
842 Simplified = true;
843 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
844 << " to " << NewElem << " in instruction: ");
845 LLVM_DEBUG(MI.dump());
846 addRegToUpdate(MI.getOperand(OpNo).getReg());
847 addRegToUpdate(ShiftOp1);
848 MI.getOperand(OpNo).setReg(ShiftOp1);
849 MI.getOperand(2).setImm(NewElem);
850 }
851 }
852 break;
853 }
854 case PPC::XVCVDPSP: {
855 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
856 Register TrueReg =
857 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
858 if (!TrueReg.isVirtual())
859 break;
860 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
861
862 // This can occur when building a vector of single precision or integer
863 // values.
864 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
865 Register DefsReg1 =
866 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
867 Register DefsReg2 =
868 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
869 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
870 break;
871 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
872 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
873
874 if (!P1 || !P2)
875 break;
876
877 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
878 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
879 // the FRSP/XSRSP.
880 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
881 unsigned Opc = RoundInstr->getOpcode();
882 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
883 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
884 Simplified = true;
885 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
886 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
887 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
888 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
889 if (Use.getOperand(i).isReg() &&
890 Use.getOperand(i).getReg() == FRSPDefines)
891 Use.getOperand(i).setReg(ConvReg1);
892 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
893 LLVM_DEBUG(RoundInstr->dump());
894 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
895 LLVM_DEBUG(MI.dump());
896 LLVM_DEBUG(dbgs() << "Through instruction:\n");
898 addRegToUpdate(ConvReg1);
899 addRegToUpdate(FRSPDefines);
900 ToErase = RoundInstr;
901 }
902 };
903
904 // If the input to XVCVDPSP is a vector that was built (even
905 // partially) out of FRSP's, the FRSP(s) can safely be removed
906 // since this instruction performs the same operation.
907 if (P1 != P2) {
908 removeFRSPIfPossible(P1);
909 removeFRSPIfPossible(P2);
910 break;
911 }
912 removeFRSPIfPossible(P1);
913 }
914 break;
915 }
916 case PPC::EXTSH:
917 case PPC::EXTSH8:
918 case PPC::EXTSH8_32_64: {
919 if (!EnableSExtElimination) break;
920 Register NarrowReg = MI.getOperand(1).getReg();
921 if (!NarrowReg.isVirtual())
922 break;
923
924 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
925 unsigned SrcOpcode = SrcMI->getOpcode();
926 // If we've used a zero-extending load that we will sign-extend,
927 // just do a sign-extending load.
928 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
929 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
930 break;
931 // Determine the new opcode. We need to make sure that if the original
932 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
933 // Likewise if the source is X-Form the new opcode should also be
934 // X-Form.
935 unsigned Opc = PPC::LHA;
936 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
937 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
938 MI.getOpcode() == PPC::EXTSH8_32_64;
939
940 if (SourceIsXForm && MIIs64Bit)
941 Opc = PPC::LHAX8;
942 else if (SourceIsXForm && !MIIs64Bit)
943 Opc = PPC::LHAX;
944 else if (MIIs64Bit)
945 Opc = PPC::LHA8;
946
947 addRegToUpdate(NarrowReg);
948 addRegToUpdate(MI.getOperand(0).getReg());
949
950 // We are removing a definition of NarrowReg which will cause
951 // problems in AliveBlocks. Add an implicit def that will be
952 // removed so that AliveBlocks are updated correctly.
953 addDummyDef(MBB, &MI, NarrowReg);
954 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
955 LLVM_DEBUG(SrcMI->dump());
956 LLVM_DEBUG(dbgs() << "and sign-extension\n");
957 LLVM_DEBUG(MI.dump());
958 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
959 SrcMI->setDesc(TII->get(Opc));
960 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
961 ToErase = &MI;
962 Simplified = true;
963 NumEliminatedSExt++;
964 }
965 break;
966 }
967 case PPC::EXTSW:
968 case PPC::EXTSW_32:
969 case PPC::EXTSW_32_64: {
970 if (!EnableSExtElimination) break;
971 Register NarrowReg = MI.getOperand(1).getReg();
972 if (!NarrowReg.isVirtual())
973 break;
974
975 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
976 unsigned SrcOpcode = SrcMI->getOpcode();
977 // If we've used a zero-extending load that we will sign-extend,
978 // just do a sign-extending load.
979 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
980 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
981 break;
982
983 // The transformation from a zero-extending load to a sign-extending
984 // load is only legal when the displacement is a multiple of 4.
985 // If the displacement is not at least 4 byte aligned, don't perform
986 // the transformation.
987 bool IsWordAligned = false;
988 if (SrcMI->getOperand(1).isGlobal()) {
989 const GlobalObject *GO =
990 dyn_cast<GlobalObject>(SrcMI->getOperand(1).getGlobal());
991 if (GO && GO->getAlign() && *GO->getAlign() >= 4 &&
992 (SrcMI->getOperand(1).getOffset() % 4 == 0))
993 IsWordAligned = true;
994 } else if (SrcMI->getOperand(1).isImm()) {
995 int64_t Value = SrcMI->getOperand(1).getImm();
996 if (Value % 4 == 0)
997 IsWordAligned = true;
998 }
999
1000 // Determine the new opcode. We need to make sure that if the original
1001 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1002 // Likewise if the source is X-Form the new opcode should also be
1003 // X-Form.
1004 unsigned Opc = PPC::LWA_32;
1005 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1006 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1007 MI.getOpcode() == PPC::EXTSW_32_64;
1008
1009 if (SourceIsXForm && MIIs64Bit)
1010 Opc = PPC::LWAX;
1011 else if (SourceIsXForm && !MIIs64Bit)
1012 Opc = PPC::LWAX_32;
1013 else if (MIIs64Bit)
1014 Opc = PPC::LWA;
1015
1016 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1017 break;
1018
1019 addRegToUpdate(NarrowReg);
1020 addRegToUpdate(MI.getOperand(0).getReg());
1021
1022 // We are removing a definition of NarrowReg which will cause
1023 // problems in AliveBlocks. Add an implicit def that will be
1024 // removed so that AliveBlocks are updated correctly.
1025 addDummyDef(MBB, &MI, NarrowReg);
1026 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1027 LLVM_DEBUG(SrcMI->dump());
1028 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1029 LLVM_DEBUG(MI.dump());
1030 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1031 SrcMI->setDesc(TII->get(Opc));
1032 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1033 ToErase = &MI;
1034 Simplified = true;
1035 NumEliminatedSExt++;
1036 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1037 TII->isSignExtended(NarrowReg, MRI)) {
1038 // We can eliminate EXTSW if the input is known to be already
1039 // sign-extended.
1040 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1041 Register TmpReg =
1042 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
1043 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
1044 TmpReg);
1045 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
1046 MI.getOperand(0).getReg())
1047 .addReg(TmpReg)
1048 .addReg(NarrowReg)
1049 .addImm(PPC::sub_32);
1050 ToErase = &MI;
1051 Simplified = true;
1052 NumEliminatedSExt++;
1053 }
1054 break;
1055 }
1056 case PPC::RLDICL: {
1057 // We can eliminate RLDICL (e.g. for zero-extension)
1058 // if all bits to clear are already zero in the input.
1059 // This code assume following code sequence for zero-extension.
1060 // %6 = COPY %5:sub_32; (optional)
1061 // %8 = IMPLICIT_DEF;
1062 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1063 if (!EnableZExtElimination) break;
1064
1065 if (MI.getOperand(2).getImm() != 0)
1066 break;
1067
1068 Register SrcReg = MI.getOperand(1).getReg();
1069 if (!SrcReg.isVirtual())
1070 break;
1071
1072 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1073 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1074 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
1075 break;
1076
1077 MachineInstr *ImpDefMI, *SubRegMI;
1078 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1079 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
1080 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1081
1082 SrcMI = SubRegMI;
1083 if (SubRegMI->getOpcode() == PPC::COPY) {
1084 Register CopyReg = SubRegMI->getOperand(1).getReg();
1085 if (CopyReg.isVirtual())
1086 SrcMI = MRI->getVRegDef(CopyReg);
1087 }
1088 if (!SrcMI->getOperand(0).isReg())
1089 break;
1090
1091 unsigned KnownZeroCount =
1092 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
1093 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
1094 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1095 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1096 MI.getOperand(0).getReg())
1097 .addReg(SrcReg);
1098 addRegToUpdate(SrcReg);
1099 ToErase = &MI;
1100 Simplified = true;
1101 NumEliminatedZExt++;
1102 }
1103 break;
1104 }
1105
1106 // TODO: Any instruction that has an immediate form fed only by a PHI
1107 // whose operands are all load immediate can be folded away. We currently
1108 // do this for ADD instructions, but should expand it to arithmetic and
1109 // binary instructions with immediate forms in the future.
1110 case PPC::ADD4:
1111 case PPC::ADD8: {
1112 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1113 assert(PhiOp && "Invalid Operand!");
1114 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1115
1116 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1117 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1118 };
1119
1120 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1121 MachineOperand *PhiOp) {
1122 assert(PhiOp && "Invalid Operand!");
1123 assert(DominatorOp && "Invalid Operand!");
1124 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1125 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1126
1127 // Note: the vregs only show up at odd indices position of PHI Node,
1128 // the even indices position save the BB info.
1129 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1130 MachineInstr *LiMI =
1131 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1132 if (!LiMI ||
1133 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1134 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1135 !MDT->dominates(DefDomMI, LiMI))
1136 return false;
1137 }
1138
1139 return true;
1140 };
1141
1142 MachineOperand Op1 = MI.getOperand(1);
1143 MachineOperand Op2 = MI.getOperand(2);
1144 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1145 std::swap(Op1, Op2);
1146 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1147 break; // We don't have an ADD fed by LI's that can be transformed
1148
1149 // Now we know that Op1 is the PHI node and Op2 is the dominator
1150 Register DominatorReg = Op2.getReg();
1151
1152 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1153 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1154 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1155 MRI->setRegClass(DominatorReg, TRC);
1156
1157 // replace LIs with ADDIs
1158 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1159 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1160 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1161 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1162 LLVM_DEBUG(LiMI->dump());
1163
1164 // There could be repeated registers in the PHI, e.g: %1 =
1165 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1166 // already replaced the def instruction, skip.
1167 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1168 continue;
1169
1170 assert((LiMI->getOpcode() == PPC::LI ||
1171 LiMI->getOpcode() == PPC::LI8) &&
1172 "Invalid Opcode!");
1173 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1174 LiMI->removeOperand(1); // remove the imm of LI
1175 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1176 : PPC::ADDI8));
1177 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1178 .addReg(DominatorReg)
1179 .addImm(LiImm); // restore the imm of LI
1180 LLVM_DEBUG(LiMI->dump());
1181 }
1182
1183 // Replace ADD with COPY
1184 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1185 LLVM_DEBUG(MI.dump());
1186 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1187 MI.getOperand(0).getReg())
1188 .add(Op1);
1189 addRegToUpdate(Op1.getReg());
1190 addRegToUpdate(Op2.getReg());
1191 ToErase = &MI;
1192 Simplified = true;
1193 NumOptADDLIs++;
1194 break;
1195 }
1196 case PPC::RLDICR: {
1197 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1198 combineSEXTAndSHL(MI, ToErase);
1199 break;
1200 }
1201 case PPC::ANDI_rec:
1202 case PPC::ANDI8_rec:
1203 case PPC::ANDIS_rec:
1204 case PPC::ANDIS8_rec: {
1205 Register TrueReg =
1206 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1207 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1208 break;
1209
1210 MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1211 if (!SrcMI)
1212 break;
1213
1214 unsigned SrcOpCode = SrcMI->getOpcode();
1215 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1216 break;
1217
1218 Register SrcReg, DstReg;
1219 SrcReg = SrcMI->getOperand(1).getReg();
1220 DstReg = MI.getOperand(1).getReg();
1221 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
1222 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
1223 if (DstRC != SrcRC)
1224 break;
1225
1226 uint64_t AndImm = MI.getOperand(2).getImm();
1227 if (MI.getOpcode() == PPC::ANDIS_rec ||
1228 MI.getOpcode() == PPC::ANDIS8_rec)
1229 AndImm <<= 16;
1230 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1231 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1232 uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1233
1234 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1235 // if all bits to AND are already zero in the input.
1236 bool PatternResultZero =
1237 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1238 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1239
1240 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1241 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1242 bool PatternRemoveRotate =
1243 SrcMI->getOperand(2).getImm() == 0 &&
1244 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1245 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1246
1247 if (!PatternResultZero && !PatternRemoveRotate)
1248 break;
1249
1250 LLVM_DEBUG(dbgs() << "Combining pair: ");
1251 LLVM_DEBUG(SrcMI->dump());
1252 LLVM_DEBUG(MI.dump());
1253 if (PatternResultZero)
1254 MI.getOperand(2).setImm(0);
1255 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1256 LLVM_DEBUG(dbgs() << "To: ");
1257 LLVM_DEBUG(MI.dump());
1258 addRegToUpdate(MI.getOperand(1).getReg());
1259 addRegToUpdate(SrcMI->getOperand(0).getReg());
1260 Simplified = true;
1261 break;
1262 }
1263 case PPC::RLWINM:
1264 case PPC::RLWINM_rec:
1265 case PPC::RLWINM8:
1266 case PPC::RLWINM8_rec: {
1267 // We might replace operand 1 of the instruction which will
1268 // require we recompute kill flags for it.
1269 Register OrigOp1Reg = MI.getOperand(1).isReg()
1270 ? MI.getOperand(1).getReg()
1271 : PPC::NoRegister;
1272 Simplified = TII->combineRLWINM(MI, &ToErase);
1273 if (Simplified) {
1274 addRegToUpdate(OrigOp1Reg);
1275 if (MI.getOperand(1).isReg())
1276 addRegToUpdate(MI.getOperand(1).getReg());
1277 ++NumRotatesCollapsed;
1278 }
1279 break;
1280 }
1281 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1282 // always trap, we will delete the node if it will never trap.
1283 case PPC::TDI:
1284 case PPC::TWI:
1285 case PPC::TD:
1286 case PPC::TW: {
1287 if (!EnableTrapOptimization) break;
1288 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1289 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1290 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1291 // We can only do the optimization if we can get immediates
1292 // from both operands
1293 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1294 LiMI1->getOpcode() == PPC::LI8)))
1295 break;
1296 if (!IsOperand2Immediate &&
1297 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1298 LiMI2->getOpcode() == PPC::LI8)))
1299 break;
1300
1301 auto ImmOperand0 = MI.getOperand(0).getImm();
1302 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1303 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1304 : LiMI2->getOperand(1).getImm();
1305
1306 // We will replace the MI with an unconditional trap if it will always
1307 // trap.
1308 if ((ImmOperand0 == 31) ||
1309 ((ImmOperand0 & 0x10) &&
1310 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1311 ((ImmOperand0 & 0x8) &&
1312 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1313 ((ImmOperand0 & 0x2) &&
1314 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1315 ((ImmOperand0 & 0x1) &&
1316 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1317 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1318 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1319 TrapOpt = true;
1320 }
1321 // We will delete the MI if it will never trap.
1322 ToErase = &MI;
1323 Simplified = true;
1324 break;
1325 }
1326 }
1327 }
1328
1329 // If the last instruction was marked for elimination,
1330 // remove it now.
1331 if (ToErase) {
1332 recomputeLVForDyingInstr();
1333 ToErase->eraseFromParent();
1334 ToErase = nullptr;
1335 }
1336 // Reset TrapOpt to false at the end of the basic block.
1338 TrapOpt = false;
1339 }
1340
1341 // Eliminate all the TOC save instructions which are redundant.
1342 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1343 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1344 if (FI->mustSaveTOC())
1345 NumTOCSavesInPrologue++;
1346
1347 // We try to eliminate redundant compare instruction.
1348 Simplified |= eliminateRedundantCompare();
1349
1350 // If we have made any modifications and added any registers to the set of
1351 // registers for which we need to update the kill flags, do so by recomputing
1352 // LiveVariables for those registers.
1353 for (Register Reg : RegsToUpdate) {
1354 if (!MRI->reg_empty(Reg))
1356 }
1357 return Simplified;
1358}
1359
1360// helper functions for eliminateRedundantCompare
1361static bool isEqOrNe(MachineInstr *BI) {
1363 unsigned PredCond = PPC::getPredicateCondition(Pred);
1364 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1365}
1366
1367static bool isSupportedCmpOp(unsigned opCode) {
1368 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1369 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1370 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1371 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1372}
1373
1374static bool is64bitCmpOp(unsigned opCode) {
1375 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1376 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1377}
1378
1379static bool isSignedCmpOp(unsigned opCode) {
1380 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1381 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1382}
1383
1384static unsigned getSignedCmpOpCode(unsigned opCode) {
1385 if (opCode == PPC::CMPLD) return PPC::CMPD;
1386 if (opCode == PPC::CMPLW) return PPC::CMPW;
1387 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1388 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1389 return opCode;
1390}
1391
1392// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1393// (LT x) to (LE x-1)
1394static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1395 uint64_t Imm = CMPI->getOperand(2).getImm();
1396 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1397 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1398 return 0;
1399
1401 unsigned PredCond = PPC::getPredicateCondition(Pred);
1402 unsigned PredHint = PPC::getPredicateHint(Pred);
1403 if (PredCond == PPC::PRED_GE)
1404 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1405 if (PredCond == PPC::PRED_LT)
1406 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1407
1408 return 0;
1409}
1410
1411// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1412// (LE x) to (LT x+1)
1413static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1414 uint64_t Imm = CMPI->getOperand(2).getImm();
1415 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1416 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1417 return 0;
1418
1420 unsigned PredCond = PPC::getPredicateCondition(Pred);
1421 unsigned PredHint = PPC::getPredicateHint(Pred);
1422 if (PredCond == PPC::PRED_GT)
1423 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1424 if (PredCond == PPC::PRED_LE)
1425 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1426
1427 return 0;
1428}
1429
1430// This takes a Phi node and returns a register value for the specified BB.
1431static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1433 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1434 MachineOperand &MO = Phi->getOperand(I);
1435 if (MO.getMBB() == MBB)
1436 return Phi->getOperand(I-1).getReg();
1437 }
1438 llvm_unreachable("invalid src basic block for this Phi node\n");
1439 return 0;
1440}
1441
1442// This function tracks the source of the register through register copy.
1443// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1444// assuming that the control comes from BB1 into BB2.
1445static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1447 unsigned SrcReg = Reg;
1448 while (true) {
1449 unsigned NextReg = SrcReg;
1450 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1451 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1452 NextReg = getIncomingRegForBlock(Inst, BB1);
1453 // We track through PHI only once to avoid infinite loop.
1454 BB1 = nullptr;
1455 }
1456 else if (Inst->isFullCopy())
1457 NextReg = Inst->getOperand(1).getReg();
1458 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1459 break;
1460 SrcReg = NextReg;
1461 }
1462 return SrcReg;
1463}
1464
1465static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1466 MachineBasicBlock *&PredMBB,
1467 MachineBasicBlock *&MBBtoMoveCmp,
1469
1470 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1471 auto BII = BB.getFirstInstrTerminator();
1472 // We optimize BBs ending with a conditional branch.
1473 // We check only for BCC here, not BCCLR, because BCCLR
1474 // will be formed only later in the pipeline.
1475 if (BB.succ_size() == 2 &&
1476 BII != BB.instr_end() &&
1477 (*BII).getOpcode() == PPC::BCC &&
1478 (*BII).getOperand(1).isReg()) {
1479 // We optimize only if the condition code is used only by one BCC.
1480 Register CndReg = (*BII).getOperand(1).getReg();
1481 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1482 return false;
1483
1484 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1485 // We assume compare and branch are in the same BB for ease of analysis.
1486 if (CMPI->getParent() != &BB)
1487 return false;
1488
1489 // We skip this BB if a physical register is used in comparison.
1490 for (MachineOperand &MO : CMPI->operands())
1491 if (MO.isReg() && !MO.getReg().isVirtual())
1492 return false;
1493
1494 return true;
1495 }
1496 return false;
1497 };
1498
1499 // If this BB has more than one successor, we can create a new BB and
1500 // move the compare instruction in the new BB.
1501 // So far, we do not move compare instruction to a BB having multiple
1502 // successors to avoid potentially increasing code size.
1503 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1504 return BB.succ_size() == 1;
1505 };
1506
1507 if (!isEligibleBB(MBB))
1508 return false;
1509
1510 unsigned NumPredBBs = MBB.pred_size();
1511 if (NumPredBBs == 1) {
1512 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1513 if (isEligibleBB(*TmpMBB)) {
1514 PredMBB = TmpMBB;
1515 MBBtoMoveCmp = nullptr;
1516 return true;
1517 }
1518 }
1519 else if (NumPredBBs == 2) {
1520 // We check for partially redundant case.
1521 // So far, we support cases with only two predecessors
1522 // to avoid increasing the number of instructions.
1524 MachineBasicBlock *Pred1MBB = *PI;
1525 MachineBasicBlock *Pred2MBB = *(PI+1);
1526
1527 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1528 // We assume Pred1MBB is the BB containing the compare to be merged and
1529 // Pred2MBB is the BB to which we will append a compare instruction.
1530 // Proceed as is if Pred1MBB is different from MBB.
1531 }
1532 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1533 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1534 std::swap(Pred1MBB, Pred2MBB);
1535 }
1536 else return false;
1537
1538 if (Pred1MBB == &MBB)
1539 return false;
1540
1541 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1542 // We cannot move the compare instruction if operands are not available
1543 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1545 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1546 for (int I = 1; I <= 2; I++)
1547 if (CMPI->getOperand(I).isReg()) {
1548 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1549 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1550 return false;
1551 }
1552
1553 PredMBB = Pred1MBB;
1554 MBBtoMoveCmp = Pred2MBB;
1555 return true;
1556 }
1557
1558 return false;
1559}
1560
1561// This function will iterate over the input map containing a pair of TOC save
1562// instruction and a flag. The flag will be set to false if the TOC save is
1563// proven redundant. This function will erase from the basic block all the TOC
1564// saves marked as redundant.
1565bool PPCMIPeephole::eliminateRedundantTOCSaves(
1566 std::map<MachineInstr *, bool> &TOCSaves) {
1567 bool Simplified = false;
1568 int NumKept = 0;
1569 for (auto TOCSave : TOCSaves) {
1570 if (!TOCSave.second) {
1571 TOCSave.first->eraseFromParent();
1572 RemoveTOCSave++;
1573 Simplified = true;
1574 } else {
1575 NumKept++;
1576 }
1577 }
1578
1579 if (NumKept > 1)
1580 MultiTOCSaves++;
1581
1582 return Simplified;
1583}
1584
1585// If multiple conditional branches are executed based on the (essentially)
1586// same comparison, we merge compare instructions into one and make multiple
1587// conditional branches on this comparison.
1588// For example,
1589// if (a == 0) { ... }
1590// else if (a < 0) { ... }
1591// can be executed by one compare and two conditional branches instead of
1592// two pairs of a compare and a conditional branch.
1593//
1594// This method merges two compare instructions in two MBBs and modifies the
1595// compare and conditional branch instructions if needed.
1596// For the above example, the input for this pass looks like:
1597// cmplwi r3, 0
1598// beq 0, .LBB0_3
1599// cmpwi r3, -1
1600// bgt 0, .LBB0_4
1601// So, before merging two compares, we need to modify these instructions as
1602// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1603// beq 0, .LBB0_3
1604// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1605// bge 0, .LBB0_4
1606
1607bool PPCMIPeephole::eliminateRedundantCompare() {
1608 bool Simplified = false;
1609
1610 for (MachineBasicBlock &MBB2 : *MF) {
1611 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1612
1613 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1614 // as an optimization target if
1615 // - both MBBs end with a conditional branch,
1616 // - MBB1 is the only predecessor of MBB2, and
1617 // - compare does not take a physical register as a operand in both MBBs.
1618 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1619 //
1620 // As partially redundant case, we additionally handle if MBB2 has one
1621 // additional predecessor, which has only one successor (MBB2).
1622 // In this case, we move the compare instruction originally in MBB2 into
1623 // MBBtoMoveCmp. This partially redundant case is typically appear by
1624 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1625 //
1626 // Overview of CFG of related basic blocks
1627 // Fully redundant case Partially redundant case
1628 // -------- ---------------- --------
1629 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1630 // -------- ---------------- --------
1631 // | \ (w/ 1 succ) \ | \
1632 // | \ \ | \
1633 // | \ |
1634 // -------- --------
1635 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1636 // -------- and 2 succ) -------- and 2 succ)
1637 // | \ | \
1638 // | \ | \
1639 //
1640 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1641 continue;
1642
1643 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1644 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1645
1646 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1647 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1648 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1649
1650 // We cannot optimize an unsupported compare opcode or
1651 // a mix of 32-bit and 64-bit comparisons
1652 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1653 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1654 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1655 continue;
1656
1657 unsigned NewOpCode = 0;
1658 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1659 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1660 bool SwapOperands = false;
1661
1662 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1663 // Typically, unsigned comparison is used for equality check, but
1664 // we replace it with a signed comparison if the comparison
1665 // to be merged is a signed comparison.
1666 // In other cases of opcode mismatch, we cannot optimize this.
1667
1668 // We cannot change opcode when comparing against an immediate
1669 // if the most significant bit of the immediate is one
1670 // due to the difference in sign extension.
1671 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1672 if (!I->getOperand(2).isImm())
1673 return false;
1674 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1675 return Imm < 0;
1676 };
1677
1678 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1679 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1680 NewOpCode = CMPI1->getOpcode();
1681 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1682 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1683 NewOpCode = CMPI2->getOpcode();
1684 else continue;
1685 }
1686
1687 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1688 // In case of comparisons between two registers, these two registers
1689 // must be same to merge two comparisons.
1690 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1691 nullptr, nullptr, MRI);
1692 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1693 nullptr, nullptr, MRI);
1694 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1695 MBB1, &MBB2, MRI);
1696 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1697 MBB1, &MBB2, MRI);
1698
1699 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1700 // Same pair of registers in the same order; ready to merge as is.
1701 }
1702 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1703 // Same pair of registers in different order.
1704 // We reverse the predicate to merge compare instructions.
1706 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1707 // In case of partial redundancy, we need to swap operands
1708 // in another compare instruction.
1709 SwapOperands = true;
1710 }
1711 else continue;
1712 }
1713 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1714 // In case of comparisons between a register and an immediate,
1715 // the operand register must be same for two compare instructions.
1716 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1717 nullptr, nullptr, MRI);
1718 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1719 MBB1, &MBB2, MRI);
1720 if (Cmp1Operand1 != Cmp2Operand1)
1721 continue;
1722
1723 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1724 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1725
1726 // If immediate are not same, we try to adjust by changing predicate;
1727 // e.g. GT imm means GE (imm+1).
1728 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1729 int Diff = Imm1 - Imm2;
1730 if (Diff < -2 || Diff > 2)
1731 continue;
1732
1733 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1734 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1735 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1736 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1737 if (Diff == 2) {
1738 if (PredToInc2 && PredToDec1) {
1739 NewPredicate2 = PredToInc2;
1740 NewPredicate1 = PredToDec1;
1741 NewImm2++;
1742 NewImm1--;
1743 }
1744 }
1745 else if (Diff == 1) {
1746 if (PredToInc2) {
1747 NewImm2++;
1748 NewPredicate2 = PredToInc2;
1749 }
1750 else if (PredToDec1) {
1751 NewImm1--;
1752 NewPredicate1 = PredToDec1;
1753 }
1754 }
1755 else if (Diff == -1) {
1756 if (PredToDec2) {
1757 NewImm2--;
1758 NewPredicate2 = PredToDec2;
1759 }
1760 else if (PredToInc1) {
1761 NewImm1++;
1762 NewPredicate1 = PredToInc1;
1763 }
1764 }
1765 else if (Diff == -2) {
1766 if (PredToDec2 && PredToInc1) {
1767 NewPredicate2 = PredToDec2;
1768 NewPredicate1 = PredToInc1;
1769 NewImm2--;
1770 NewImm1++;
1771 }
1772 }
1773 }
1774
1775 // We cannot merge two compares if the immediates are not same.
1776 if (NewImm2 != NewImm1)
1777 continue;
1778 }
1779
1780 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1781 LLVM_DEBUG(CMPI1->dump());
1782 LLVM_DEBUG(BI1->dump());
1783 LLVM_DEBUG(CMPI2->dump());
1784 LLVM_DEBUG(BI2->dump());
1785 for (const MachineOperand &MO : CMPI1->operands())
1786 if (MO.isReg())
1787 addRegToUpdate(MO.getReg());
1788 for (const MachineOperand &MO : CMPI2->operands())
1789 if (MO.isReg())
1790 addRegToUpdate(MO.getReg());
1791
1792 // We adjust opcode, predicates and immediate as we determined above.
1793 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1794 CMPI1->setDesc(TII->get(NewOpCode));
1795 }
1796 if (NewPredicate1) {
1797 BI1->getOperand(0).setImm(NewPredicate1);
1798 }
1799 if (NewPredicate2) {
1800 BI2->getOperand(0).setImm(NewPredicate2);
1801 }
1802 if (NewImm1 != Imm1) {
1803 CMPI1->getOperand(2).setImm(NewImm1);
1804 }
1805
1806 if (IsPartiallyRedundant) {
1807 // We touch up the compare instruction in MBB2 and move it to
1808 // a previous BB to handle partially redundant case.
1809 if (SwapOperands) {
1810 Register Op1 = CMPI2->getOperand(1).getReg();
1811 Register Op2 = CMPI2->getOperand(2).getReg();
1812 CMPI2->getOperand(1).setReg(Op2);
1813 CMPI2->getOperand(2).setReg(Op1);
1814 }
1815 if (NewImm2 != Imm2)
1816 CMPI2->getOperand(2).setImm(NewImm2);
1817
1818 for (int I = 1; I <= 2; I++) {
1819 if (CMPI2->getOperand(I).isReg()) {
1820 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1821 if (Inst->getParent() != &MBB2)
1822 continue;
1823
1824 assert(Inst->getOpcode() == PPC::PHI &&
1825 "We cannot support if an operand comes from this BB.");
1826 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1827 CMPI2->getOperand(I).setReg(SrcReg);
1828 addRegToUpdate(SrcReg);
1829 }
1830 }
1831 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1832 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1833
1834 DebugLoc DL = CMPI2->getDebugLoc();
1835 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1836 BuildMI(MBB2, MBB2.begin(), DL,
1837 TII->get(PPC::PHI), NewVReg)
1838 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1839 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1840 BI2->getOperand(1).setReg(NewVReg);
1841 addRegToUpdate(NewVReg);
1842 }
1843 else {
1844 // We finally eliminate compare instruction in MBB2.
1845 // We do not need to treat CMPI2 specially here in terms of re-computing
1846 // live variables even though it is being deleted because:
1847 // - It defines a register that has a single use (already checked in
1848 // eligibleForCompareElimination())
1849 // - The only user (BI2) is no longer using it so the register is dead (no
1850 // def, no uses)
1851 // - We do not attempt to recompute live variables for dead registers
1852 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1853 CMPI2->eraseFromParent();
1854 }
1855
1856 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1857 LLVM_DEBUG(CMPI1->dump());
1858 LLVM_DEBUG(BI1->dump());
1859 LLVM_DEBUG(BI2->dump());
1860 if (IsPartiallyRedundant) {
1861 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1862 << printMBBReference(*MBBtoMoveCmp)
1863 << " to handle partial redundancy.\n");
1864 LLVM_DEBUG(CMPI2->dump());
1865 }
1866 Simplified = true;
1867 }
1868
1869 return Simplified;
1870}
1871
1872// We miss the opportunity to emit an RLDIC when lowering jump tables
1873// since ISEL sees only a single basic block. When selecting, the clear
1874// and shift left will be in different blocks.
1875bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1876 MachineInstr *&ToErase) {
1877 if (MI.getOpcode() != PPC::RLDICR)
1878 return false;
1879
1880 Register SrcReg = MI.getOperand(1).getReg();
1881 if (!SrcReg.isVirtual())
1882 return false;
1883
1884 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1885 if (SrcMI->getOpcode() != PPC::RLDICL)
1886 return false;
1887
1888 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1889 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1890 MachineOperand MOpSHMI = MI.getOperand(2);
1891 MachineOperand MOpMEMI = MI.getOperand(3);
1892 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1893 MOpMEMI.isImm()))
1894 return false;
1895
1896 uint64_t SHSrc = MOpSHSrc.getImm();
1897 uint64_t MBSrc = MOpMBSrc.getImm();
1898 uint64_t SHMI = MOpSHMI.getImm();
1899 uint64_t MEMI = MOpMEMI.getImm();
1900 uint64_t NewSH = SHSrc + SHMI;
1901 uint64_t NewMB = MBSrc - SHMI;
1902 if (NewMB > 63 || NewSH > 63)
1903 return false;
1904
1905 // The bits cleared with RLDICL are [0, MBSrc).
1906 // The bits cleared with RLDICR are (MEMI, 63].
1907 // After the sequence, the bits cleared are:
1908 // [0, MBSrc-SHMI) and (MEMI, 63).
1909 //
1910 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1911 if ((63 - NewSH) != MEMI)
1912 return false;
1913
1914 LLVM_DEBUG(dbgs() << "Converting pair: ");
1915 LLVM_DEBUG(SrcMI->dump());
1916 LLVM_DEBUG(MI.dump());
1917
1918 MI.setDesc(TII->get(PPC::RLDIC));
1919 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1920 MI.getOperand(2).setImm(NewSH);
1921 MI.getOperand(3).setImm(NewMB);
1922 addRegToUpdate(MI.getOperand(1).getReg());
1923 addRegToUpdate(SrcMI->getOperand(0).getReg());
1924
1925 LLVM_DEBUG(dbgs() << "To: ");
1926 LLVM_DEBUG(MI.dump());
1927 NumRotatesCollapsed++;
1928 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1929 if (MRI->use_nodbg_empty(SrcReg)) {
1930 assert(!SrcMI->hasImplicitDef() &&
1931 "Not expecting an implicit def with this instr.");
1932 ToErase = SrcMI;
1933 }
1934 return true;
1935}
1936
1937// For case in LLVM IR
1938// entry:
1939// %iconv = sext i32 %index to i64
1940// br i1 undef label %true, label %false
1941// true:
1942// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1943// ...
1944// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1945// different BBs when conducting instruction selection. We can do a peephole
1946// optimization to combine these two instructions into extswsli after
1947// instruction selection.
1948bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1949 MachineInstr *&ToErase) {
1950 if (MI.getOpcode() != PPC::RLDICR)
1951 return false;
1952
1953 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1954 return false;
1955
1956 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1957
1958 MachineOperand MOpSHMI = MI.getOperand(2);
1959 MachineOperand MOpMEMI = MI.getOperand(3);
1960 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1961 return false;
1962
1963 uint64_t SHMI = MOpSHMI.getImm();
1964 uint64_t MEMI = MOpMEMI.getImm();
1965 if (SHMI + MEMI != 63)
1966 return false;
1967
1968 Register SrcReg = MI.getOperand(1).getReg();
1969 if (!SrcReg.isVirtual())
1970 return false;
1971
1972 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1973 if (SrcMI->getOpcode() != PPC::EXTSW &&
1974 SrcMI->getOpcode() != PPC::EXTSW_32_64)
1975 return false;
1976
1977 // If the register defined by extsw has more than one use, combination is not
1978 // needed.
1979 if (!MRI->hasOneNonDBGUse(SrcReg))
1980 return false;
1981
1982 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
1983 assert(SrcMI->getOperand(1).isReg() &&
1984 "EXTSW's second operand should be a register");
1985 if (!SrcMI->getOperand(1).getReg().isVirtual())
1986 return false;
1987
1988 LLVM_DEBUG(dbgs() << "Combining pair: ");
1989 LLVM_DEBUG(SrcMI->dump());
1990 LLVM_DEBUG(MI.dump());
1991
1992 MachineInstr *NewInstr =
1993 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
1994 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
1995 : TII->get(PPC::EXTSWSLI_32_64),
1996 MI.getOperand(0).getReg())
1997 .add(SrcMI->getOperand(1))
1998 .add(MOpSHMI);
1999 (void)NewInstr;
2000
2001 LLVM_DEBUG(dbgs() << "TO: ");
2002 LLVM_DEBUG(NewInstr->dump());
2003 ++NumEXTSWAndSLDICombined;
2004 ToErase = &MI;
2005 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2006 // can recompute its kill flags. We run DCE immediately after this pass
2007 // to clean up dead instructions such as this.
2008 addRegToUpdate(NewInstr->getOperand(1).getReg());
2009 addRegToUpdate(SrcMI->getOperand(0).getReg());
2010 return true;
2011}
2012
2013} // end default namespace
2014
2016 "PowerPC MI Peephole Optimization", false, false)
2022 "PowerPC MI Peephole Optimization", false, false)
2023
2024char PPCMIPeephole::ID = 0;
2026llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define addRegToUpdate(R)
PowerPC MI Peephole Optimization
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
#define DEBUG_TYPE
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
if(VerifyEach)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
unsigned pred_size() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:544
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:327
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:547
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:618
bool isFullCopy() const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:660
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:473
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool isUsingPCRelativeCalls() const
bool isELFv2ABI() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void dump() const
Definition: Pass.cpp:136
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool empty() const
Definition: SmallSet.h:159
bool erase(const T &V)
Definition: SmallSet.h:207
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializePPCMIPeepholePass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
@ Keep
No function return thunk.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860