LLVM 23.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
32#include "PPC.h"
33#include "PPCInstrInfo.h"
35#include "PPCTargetMachine.h"
36#include "llvm/ADT/Statistic.h"
47#include "llvm/Support/Debug.h"
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-mi-peepholes"
53
54STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
55STATISTIC(MultiTOCSaves,
56 "Number of functions with multiple TOC saves that must be kept");
57STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
58STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
59STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
60STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
61STATISTIC(NumConvertedToImmediateForm,
62 "Number of instructions converted to their immediate form");
63STATISTIC(NumFunctionsEnteredInMIPeephole,
64 "Number of functions entered in PPC MI Peepholes");
65STATISTIC(NumFixedPointIterations,
66 "Number of fixed-point iterations converting reg-reg instructions "
67 "to reg-imm ones");
68STATISTIC(NumRotatesCollapsed,
69 "Number of pairs of rotate left, clear left/right collapsed");
70STATISTIC(NumEXTSWAndSLDICombined,
71 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
72STATISTIC(NumLoadImmZeroFoldedAndRemoved,
73 "Number of LI(8) reg, 0 that are folded to r0 and removed");
74
75static cl::opt<bool>
76FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
77 cl::desc("Iterate to a fixed point when attempting to "
78 "convert reg-reg instructions to reg-imm"));
79
80static cl::opt<bool>
81ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
82 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
83
84static cl::opt<bool>
85 EnableSExtElimination("ppc-eliminate-signext",
86 cl::desc("enable elimination of sign-extensions"),
87 cl::init(true), cl::Hidden);
88
89static cl::opt<bool>
90 EnableZExtElimination("ppc-eliminate-zeroext",
91 cl::desc("enable elimination of zero-extensions"),
92 cl::init(true), cl::Hidden);
93
94static cl::opt<bool>
95 EnableTrapOptimization("ppc-opt-conditional-trap",
96 cl::desc("enable optimization of conditional traps"),
97 cl::init(false), cl::Hidden);
98
100 PeepholeXToICounter, "ppc-xtoi-peephole",
101 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
102
103DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
104 "Controls whether PPC per opcode peephole is performed on a MI");
105
106namespace {
107
108struct PPCMIPeephole : public MachineFunctionPass {
109
110 static char ID;
111 const PPCInstrInfo *TII;
112 MachineFunction *MF;
114 LiveVariables *LV;
115
116 PPCMIPeephole() : MachineFunctionPass(ID) {}
117
118private:
119 MachineDominatorTree *MDT;
120 MachinePostDominatorTree *MPDT;
121 MachineBlockFrequencyInfo *MBFI;
122 BlockFrequency EntryFreq;
123 SmallSet<Register, 16> RegsToUpdate;
124
125 // Initialize class variables.
126 void initialize(MachineFunction &MFParm);
127
128 // Perform peepholes.
129 bool simplifyCode();
130
131 // Perform peepholes.
132 bool eliminateRedundantCompare();
133 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
134 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
135 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
136 MachineInstr *&ToErase);
137 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
138 MachineInstr *MI);
139
140 // A number of transformations will eliminate the definition of a register
141 // as all of its uses will be removed. However, this leaves a register
142 // without a definition for LiveVariables. Such transformations should
143 // use this function to provide a dummy definition of the register that
144 // will simply be removed by DCE.
145 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
146 BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
147 }
148 void addRegToUpdateWithLine(Register Reg, int Line);
149 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
150 SmallVectorImpl<MachineInstr *> &PHIs,
151 Register Dst);
152
153public:
154
155 void getAnalysisUsage(AnalysisUsage &AU) const override {
156 AU.addRequired<LiveVariablesWrapperPass>();
157 AU.addRequired<MachineDominatorTreeWrapperPass>();
158 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
159 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
160 AU.addPreserved<LiveVariablesWrapperPass>();
161 AU.addPreserved<MachineDominatorTreeWrapperPass>();
162 AU.addPreserved<MachinePostDominatorTreeWrapperPass>();
163 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
165 }
166
167 // Main entry point for this pass.
168 bool runOnMachineFunction(MachineFunction &MF) override {
169 initialize(MF);
170 // At this point, TOC pointer should not be used in a function that uses
171 // PC-Relative addressing.
172 assert((MF.getRegInfo().use_empty(PPC::X2) ||
173 !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
174 "TOC pointer used in a function using PC-Relative addressing!");
175 if (skipFunction(MF.getFunction()))
176 return false;
177 bool Changed = simplifyCode();
178#ifndef NDEBUG
179 if (Changed)
180 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
181 "-mllvm -disable-ppc-peephole");
182#endif
183 return Changed;
184 }
185};
186
187#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
188void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
189 if (!Reg.isVirtual())
190 return;
191 if (RegsToUpdate.insert(Reg).second)
192 LLVM_DEBUG(dbgs() << "Adding register: " << printReg(Reg) << " on line "
193 << Line << " for re-computation of kill flags\n");
194}
195
196// Initialize class variables.
197void PPCMIPeephole::initialize(MachineFunction &MFParm) {
198 MF = &MFParm;
199 MRI = &MF->getRegInfo();
200 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
201 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
202 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
203 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
204 EntryFreq = MBFI->getEntryFreq();
205 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
206 RegsToUpdate.clear();
207 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
208 LLVM_DEBUG(MF->dump());
209}
210
211static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
212 MachineRegisterInfo *MRI) {
213 assert(Op && "Invalid Operand!");
214 if (!Op->isReg())
215 return nullptr;
216
217 Register Reg = Op->getReg();
218 if (!Reg.isVirtual())
219 return nullptr;
220
221 return MRI->getVRegDef(Reg);
222}
223
224// This function returns number of known zero bits in output of MI
225// starting from the most significant bit.
226static unsigned getKnownLeadingZeroCount(const unsigned Reg,
227 const PPCInstrInfo *TII,
228 const MachineRegisterInfo *MRI) {
229 MachineInstr *MI = MRI->getVRegDef(Reg);
230 unsigned Opcode = MI->getOpcode();
231 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
232 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
233 return MI->getOperand(3).getImm();
234
235 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
236 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
237 return MI->getOperand(3).getImm();
238
239 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
240 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
241 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
242 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
243 return 32 + MI->getOperand(3).getImm();
244
245 if (Opcode == PPC::ANDI_rec) {
246 uint16_t Imm = MI->getOperand(2).getImm();
247 return 48 + llvm::countl_zero(Imm);
248 }
249
250 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
251 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
252 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
253 // The result ranges from 0 to 32.
254 return 58;
255
256 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
257 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
258 // The result ranges from 0 to 64.
259 return 57;
260
261 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
262 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
263 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
264 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
265 return 48;
266
267 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
268 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
269 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
270 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
271 return 56;
272
273 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
274 Opcode == PPC::AND8_rec)
275 return std::max(
276 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
277 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
278
279 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
280 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
281 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
282 Opcode == PPC::XOR8_rec)
283 return std::min(
284 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
285 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
286
287 if (TII->isZeroExtended(Reg, MRI))
288 return 32;
289
290 return 0;
291}
292
293// This function maintains a map for the pairs <TOC Save Instr, Keep>
294// Each time a new TOC save is encountered, it checks if any of the existing
295// ones are dominated by the new one. If so, it marks the existing one as
296// redundant by setting it's entry in the map as false. It then adds the new
297// instruction to the map with either true or false depending on if any
298// existing instructions dominated the new one.
299void PPCMIPeephole::UpdateTOCSaves(
300 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
301 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
302 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
303 // here only support it under ELFv2.
304 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
305 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
306
307 MachineBasicBlock *Entry = &MF->front();
308 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
309
310 // If the block in which the TOC save resides is in a block that
311 // post-dominates Entry, or a block that is hotter than entry (keep in mind
312 // that early MachineLICM has already run so the TOC save won't be hoisted)
313 // we can just do the save in the prologue.
314 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
315 FI->setMustSaveTOC(true);
316
317 // If we are saving the TOC in the prologue, all the TOC saves can be
318 // removed from the code.
319 if (FI->mustSaveTOC()) {
320 for (auto &TOCSave : TOCSaves)
321 TOCSave.second = false;
322 // Add new instruction to map.
323 TOCSaves[MI] = false;
324 return;
325 }
326 }
327
328 bool Keep = true;
329 for (auto &I : TOCSaves) {
330 MachineInstr *CurrInst = I.first;
331 // If new instruction dominates an existing one, mark existing one as
332 // redundant.
333 if (I.second && MDT->dominates(MI, CurrInst))
334 I.second = false;
335 // Check if the new instruction is redundant.
336 if (MDT->dominates(CurrInst, MI)) {
337 Keep = false;
338 break;
339 }
340 }
341 // Add new instruction to map.
342 TOCSaves[MI] = Keep;
343}
344
345// This function returns a list of all PHI nodes in the tree starting from
346// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
347// The list initially only contains the root PHI. When we visit a PHI node, we
348// add it to the list. We continue to look for other PHI node operands while
349// there are nodes to visit in the list. The function returns false if the
350// optimization cannot be applied on this tree.
351static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
352 MachineInstr *RootPHI,
353 SmallVectorImpl<MachineInstr *> &PHIs) {
354 PHIs.push_back(RootPHI);
355 unsigned VisitedIndex = 0;
356 while (VisitedIndex < PHIs.size()) {
357 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
358 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
359 PHIOp != NumOps; PHIOp += 2) {
360 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
361 if (!RegOp.isVirtual())
362 return false;
363 MachineInstr *Instr = MRI->getVRegDef(RegOp);
364 // While collecting the PHI nodes, we check if they can be converted (i.e.
365 // all the operands are either copies, implicit defs or PHI nodes).
366 unsigned Opcode = Instr->getOpcode();
367 if (Opcode == PPC::COPY) {
368 Register Reg = Instr->getOperand(1).getReg();
369 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
370 return false;
371 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
372 return false;
373 // If we detect a cycle in the PHI nodes, we exit. It would be
374 // possible to change cycles as well, but that would add a lot
375 // of complexity for a case that is unlikely to occur with MMA
376 // code.
377 if (Opcode != PPC::PHI)
378 continue;
379 if (llvm::is_contained(PHIs, Instr))
380 return false;
381 PHIs.push_back(Instr);
382 }
383 VisitedIndex++;
384 }
385 return true;
386}
387
388// This function changes the unprimed accumulator PHI nodes in the PHIs list to
389// primed accumulator PHI nodes. The list is traversed in reverse order to
390// change all the PHI operands of a PHI node before changing the node itself.
391// We keep a map to associate each changed PHI node to its non-changed form.
392void PPCMIPeephole::convertUnprimedAccPHIs(
393 const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
394 SmallVectorImpl<MachineInstr *> &PHIs, Register Dst) {
395 DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
396 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
398 // We check if the current PHI node can be changed by looking at its
399 // operands. If all the operands are either copies from primed
400 // accumulators, implicit definitions or other unprimed accumulator
401 // PHI nodes, we change it.
402 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
403 PHIOp += 2) {
404 Register RegOp = PHI->getOperand(PHIOp).getReg();
405 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
406 unsigned Opcode = PHIInput->getOpcode();
407 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
408 Opcode == PPC::PHI) &&
409 "Unexpected instruction");
410 if (Opcode == PPC::COPY) {
411 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
412 &PPC::ACCRCRegClass &&
413 "Unexpected register class");
414 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
415 } else if (Opcode == PPC::IMPLICIT_DEF) {
416 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
417 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
418 TII->get(PPC::IMPLICIT_DEF), AccReg);
419 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
420 PHI->getOperand(PHIOp + 1)});
421 } else if (Opcode == PPC::PHI) {
422 // We found a PHI operand. At this point we know this operand
423 // has already been changed so we get its associated changed form
424 // from the map.
425 assert(ChangedPHIMap.count(PHIInput) == 1 &&
426 "This PHI node should have already been changed.");
427 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
429 PrimedAccPHI->getOperand(0).getReg(), false),
430 PHI->getOperand(PHIOp + 1)});
431 }
432 }
433 Register AccReg = Dst;
434 // If the PHI node we are changing is the root node, the register it defines
435 // will be the destination register of the original copy (of the PHI def).
436 // For all other PHI's in the list, we need to create another primed
437 // accumulator virtual register as the PHI will no longer define the
438 // unprimed accumulator.
439 if (PHI != PHIs[0])
440 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
441 MachineInstrBuilder NewPHI = BuildMI(
442 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
443 for (auto RegMBB : PHIOps) {
444 NewPHI.add(RegMBB.first).add(RegMBB.second);
445 if (MRI->isSSA())
446 addRegToUpdate(RegMBB.first.getReg());
447 }
448 // The liveness of old PHI and new PHI have to be updated.
449 addRegToUpdate(PHI->getOperand(0).getReg());
450 addRegToUpdate(AccReg);
451 ChangedPHIMap[PHI] = NewPHI.getInstr();
452 LLVM_DEBUG(dbgs() << "Converting PHI: ");
453 LLVM_DEBUG(PHI->dump());
454 LLVM_DEBUG(dbgs() << "To: ");
455 LLVM_DEBUG(NewPHI.getInstr()->dump());
456 }
457}
458
459// Perform peephole optimizations.
460bool PPCMIPeephole::simplifyCode() {
461 bool Simplified = false;
462 bool TrapOpt = false;
463 MachineInstr* ToErase = nullptr;
464 std::map<MachineInstr *, bool> TOCSaves;
465 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
466 NumFunctionsEnteredInMIPeephole++;
467 if (ConvertRegReg) {
468 // Fixed-point conversion of reg/reg instructions fed by load-immediate
469 // into reg/imm instructions. FIXME: This is expensive, control it with
470 // an option.
471 bool SomethingChanged = false;
472 do {
473 NumFixedPointIterations++;
474 SomethingChanged = false;
475 for (MachineBasicBlock &MBB : *MF) {
476 for (MachineInstr &MI : MBB) {
477 if (MI.isDebugInstr())
478 continue;
479
480 if (!DebugCounter::shouldExecute(PeepholeXToICounter))
481 continue;
482
483 SmallSet<Register, 4> RRToRIRegsToUpdate;
484 if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
485 continue;
486 for (Register R : RRToRIRegsToUpdate)
488 // The updated instruction may now have new register operands.
489 // Conservatively add them to recompute the flags as well.
490 for (const MachineOperand &MO : MI.operands())
491 if (MO.isReg())
492 addRegToUpdate(MO.getReg());
493 // We don't erase anything in case the def has other uses. Let DCE
494 // remove it if it can be removed.
495 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
496 LLVM_DEBUG(MI.dump());
497 NumConvertedToImmediateForm++;
498 SomethingChanged = true;
499 Simplified = true;
500 }
501 }
502 } while (SomethingChanged && FixedPointRegToImm);
503 }
504
505 // Since we are deleting this instruction, we need to run LiveVariables
506 // on any of its definitions that are marked as needing an update since
507 // we can't run LiveVariables on a deleted register. This only needs
508 // to be done for defs since uses will have their own defining
509 // instructions so we won't be running LiveVariables on a deleted reg.
510 auto recomputeLVForDyingInstr = [&]() {
511 if (RegsToUpdate.empty())
512 return;
513 for (MachineOperand &MO : ToErase->operands()) {
514 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
515 continue;
516 Register RegToUpdate = MO.getReg();
517 RegsToUpdate.erase(RegToUpdate);
518 // If some transformation has introduced an additional definition of
519 // this register (breaking SSA), we can safely convert this def to
520 // a def of an invalid register as the instruction is going away.
521 if (!MRI->getUniqueVRegDef(RegToUpdate))
522 MO.setReg(PPC::NoRegister);
523 LV->recomputeForSingleDefVirtReg(RegToUpdate);
524 }
525 };
526
527 for (MachineBasicBlock &MBB : *MF) {
528 for (MachineInstr &MI : MBB) {
529
530 // If the previous instruction was marked for elimination,
531 // remove it now.
532 if (ToErase) {
533 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
534 LLVM_DEBUG(ToErase->dump());
535 recomputeLVForDyingInstr();
536 ToErase->eraseFromParent();
537 ToErase = nullptr;
538 }
539 // If a conditional trap instruction got optimized to an
540 // unconditional trap, eliminate all the instructions after
541 // the trap.
542 if (EnableTrapOptimization && TrapOpt) {
543 ToErase = &MI;
544 continue;
545 }
546
547 // Ignore debug instructions.
548 if (MI.isDebugInstr())
549 continue;
550
551 if (!DebugCounter::shouldExecute(PeepholePerOpCounter))
552 continue;
553
554 // Per-opcode peepholes.
555 switch (MI.getOpcode()) {
556
557 default:
558 break;
559 case PPC::COPY: {
560 Register Src = MI.getOperand(1).getReg();
561 Register Dst = MI.getOperand(0).getReg();
562 if (!Src.isVirtual() || !Dst.isVirtual())
563 break;
564 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
565 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
566 break;
567
568 // We are copying an unprimed accumulator to a primed accumulator.
569 // If the input to the copy is a PHI that is fed only by (i) copies in
570 // the other direction (ii) implicitly defined unprimed accumulators or
571 // (iii) other PHI nodes satisfying (i) and (ii), we can change
572 // the PHI to a PHI on primed accumulators (as long as we also change
573 // its operands). To detect and change such copies, we first get a list
574 // of all the PHI nodes starting from the root PHI node in BFS order.
575 // We then visit all these PHI nodes to check if they can be changed to
576 // primed accumulator PHI nodes and if so, we change them.
577 MachineInstr *RootPHI = MRI->getVRegDef(Src);
578 if (RootPHI->getOpcode() != PPC::PHI)
579 break;
580
581 SmallVector<MachineInstr *, 4> PHIs;
582 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
583 break;
584
585 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
586
587 ToErase = &MI;
588 break;
589 }
590 case PPC::LI:
591 case PPC::LI8: {
592 // If we are materializing a zero, look for any use operands for which
593 // zero means immediate zero. All such operands can be replaced with
594 // PPC::ZERO.
595 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
596 break;
597 Register MIDestReg = MI.getOperand(0).getReg();
598 bool Folded = false;
599 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
600 Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
601 if (MRI->use_nodbg_empty(MIDestReg)) {
602 ++NumLoadImmZeroFoldedAndRemoved;
603 ToErase = &MI;
604 }
605 if (Folded)
606 addRegToUpdate(MIDestReg);
607 Simplified |= Folded;
608 break;
609 }
610 case PPC::STW:
611 case PPC::STD: {
612 MachineFrameInfo &MFI = MF->getFrameInfo();
613 if (MFI.hasVarSizedObjects() ||
614 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
615 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
616 break;
617 // When encountering a TOC save instruction, call UpdateTOCSaves
618 // to add it to the TOCSaves map and mark any existing TOC saves
619 // it dominates as redundant.
620 if (TII->isTOCSaveMI(MI))
621 UpdateTOCSaves(TOCSaves, &MI);
622 break;
623 }
624 case PPC::XXPERMDI: {
625 // Perform simplifications of 2x64 vector swaps and splats.
626 // A swap is identified by an immediate value of 2, and a splat
627 // is identified by an immediate value of 0 or 3.
628 int Immed = MI.getOperand(3).getImm();
629
630 if (Immed == 1)
631 break;
632
633 // For each of these simplifications, we need the two source
634 // regs to match. Unfortunately, MachineCSE ignores COPY and
635 // SUBREG_TO_REG, so for example we can see
636 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
637 // We have to look through chains of COPY and SUBREG_TO_REG
638 // to find the real source values for comparison.
639 Register TrueReg1 =
640 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
641 Register TrueReg2 =
642 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
643
644 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
645 break;
646
647 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
648
649 if (!DefMI)
650 break;
651
652 unsigned DefOpc = DefMI->getOpcode();
653
654 // If this is a splat fed by a splatting load, the splat is
655 // redundant. Replace with a copy. This doesn't happen directly due
656 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
657 // a load of a double to a vector of 64-bit integers.
658 auto isConversionOfLoadAndSplat = [=]() -> bool {
659 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
660 return false;
661 Register FeedReg1 =
662 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
663 if (FeedReg1.isVirtual()) {
664 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
665 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
666 return true;
667 }
668 return false;
669 };
670 if ((Immed == 0 || Immed == 3) &&
671 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
672 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
673 "to load-and-splat/copy: ");
674 LLVM_DEBUG(MI.dump());
675 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
676 MI.getOperand(0).getReg())
677 .add(MI.getOperand(1));
678 addRegToUpdate(MI.getOperand(1).getReg());
679 ToErase = &MI;
680 Simplified = true;
681 }
682
683 // If this is a splat or a swap fed by another splat, we
684 // can replace it with a copy.
685 if (DefOpc == PPC::XXPERMDI) {
686 Register DefReg1 = DefMI->getOperand(1).getReg();
687 Register DefReg2 = DefMI->getOperand(2).getReg();
688 unsigned DefImmed = DefMI->getOperand(3).getImm();
689
690 // If the two inputs are not the same register, check to see if
691 // they originate from the same virtual register after only
692 // copy-like instructions.
693 if (DefReg1 != DefReg2) {
694 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
695 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
696
697 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
698 break;
699 }
700
701 if (DefImmed == 0 || DefImmed == 3) {
702 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
703 "to splat/copy: ");
704 LLVM_DEBUG(MI.dump());
705 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
706 MI.getOperand(0).getReg())
707 .add(MI.getOperand(1));
708 addRegToUpdate(MI.getOperand(1).getReg());
709 ToErase = &MI;
710 Simplified = true;
711 }
712
713 // If this is a splat fed by a swap, we can simplify modify
714 // the splat to splat the other value from the swap's input
715 // parameter.
716 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
717 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
718 LLVM_DEBUG(MI.dump());
719 addRegToUpdate(MI.getOperand(1).getReg());
720 addRegToUpdate(MI.getOperand(2).getReg());
721 MI.getOperand(1).setReg(DefReg1);
722 MI.getOperand(2).setReg(DefReg2);
723 MI.getOperand(3).setImm(3 - Immed);
724 addRegToUpdate(DefReg1);
725 addRegToUpdate(DefReg2);
726 Simplified = true;
727 }
728
729 // If this is a swap fed by a swap, we can replace it
730 // with a copy from the first swap's input.
731 else if (Immed == 2 && DefImmed == 2) {
732 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
733 LLVM_DEBUG(MI.dump());
734 addRegToUpdate(MI.getOperand(1).getReg());
735 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
736 MI.getOperand(0).getReg())
737 .add(DefMI->getOperand(1));
740 ToErase = &MI;
741 Simplified = true;
742 }
743 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
744 DefOpc == PPC::XXPERMDIs &&
745 (DefMI->getOperand(2).getImm() == 0 ||
746 DefMI->getOperand(2).getImm() == 3)) {
747 ToErase = &MI;
748 Simplified = true;
749 // Swap of a splat, convert to copy.
750 if (Immed == 2) {
751 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
752 LLVM_DEBUG(MI.dump());
753 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
754 MI.getOperand(0).getReg())
755 .add(MI.getOperand(1));
756 addRegToUpdate(MI.getOperand(1).getReg());
757 break;
758 }
759 // Splat fed by another splat - switch the output of the first
760 // and remove the second.
761 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
762 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
763 LLVM_DEBUG(MI.dump());
764 } else if (Immed == 2 &&
765 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
766 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
767 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
768 DefOpc == PPC::VSPLTISW)) {
769 // Swap of various vector splats, convert to copy.
770 ToErase = &MI;
771 Simplified = true;
772 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
773 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
774 LLVM_DEBUG(MI.dump());
775 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
776 MI.getOperand(0).getReg())
777 .add(MI.getOperand(1));
778 addRegToUpdate(MI.getOperand(1).getReg());
779 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
780 TII->isLoadFromConstantPool(DefMI)) {
781 const Constant *C = TII->getConstantFromConstantPool(DefMI);
782 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
783 ToErase = &MI;
784 Simplified = true;
786 << "Optimizing swap(splat pattern from constant-pool) "
787 "=> copy(splat pattern from constant-pool): ");
788 LLVM_DEBUG(MI.dump());
789 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
790 MI.getOperand(0).getReg())
791 .add(MI.getOperand(1));
792 addRegToUpdate(MI.getOperand(1).getReg());
793 }
794 }
795 break;
796 }
797 case PPC::VSPLTB:
798 case PPC::VSPLTH:
799 case PPC::XXSPLTW: {
800 unsigned MyOpcode = MI.getOpcode();
801 // The operand number of the source register in the splat instruction.
802 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
803 Register TrueReg =
804 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
805 if (!TrueReg.isVirtual())
806 break;
807 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
808 if (!DefMI)
809 break;
810 unsigned DefOpcode = DefMI->getOpcode();
811 auto isConvertOfSplat = [=]() -> bool {
812 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
813 return false;
814 Register ConvReg = DefMI->getOperand(1).getReg();
815 if (!ConvReg.isVirtual())
816 return false;
817 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
818 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
819 Splt->getOpcode() == PPC::XXSPLTW);
820 };
821 bool AlreadySplat = (MyOpcode == DefOpcode) ||
822 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
823 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
824 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
825 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
826 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
827 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
828
829 // If the instruction[s] that feed this splat have already splat
830 // the value, this splat is redundant.
831 if (AlreadySplat) {
832 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
833 LLVM_DEBUG(MI.dump());
834 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
835 MI.getOperand(0).getReg())
836 .add(MI.getOperand(OpNo));
837 addRegToUpdate(MI.getOperand(OpNo).getReg());
838 ToErase = &MI;
839 Simplified = true;
840 }
841
842 // Splat fed by a shift. Usually when we align value to splat into
843 // vector element zero.
844 if (DefOpcode == PPC::XXSLDWI) {
845 Register ShiftOp1 = DefMI->getOperand(1).getReg();
846
847 if (ShiftOp1 == DefMI->getOperand(2).getReg()) {
848 // For example, We can erase XXSLDWI from in following:
849 // %2:vrrc = XXSLDWI killed %1:vrrc, %1:vrrc, 1
850 // %6:vrrc = VSPLTB 15, killed %2:vrrc
851 // %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
852 //
853 // --->
854 //
855 // %6:vrrc = VSPLTB 3, killed %1:vrrc
856 // %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
857
858 if (MRI->hasOneNonDBGUse(DefMI->getOperand(0).getReg())) {
859 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
861 ToErase = DefMI;
862 }
863 Simplified = true;
864 unsigned ShiftImm = DefMI->getOperand(3).getImm();
865 // The operand number of the splat Imm in the instruction.
866 unsigned SplatImmNo = MyOpcode == PPC::XXSPLTW ? 2 : 1;
867 unsigned SplatImm = MI.getOperand(SplatImmNo).getImm();
868
869 // Calculate the new splat-element immediate. We need to convert the
870 // element index into the proper unit (byte for VSPLTB, halfword for
871 // VSPLTH, word for VSPLTW) because PPC::XXSLDWI interprets its
872 // ShiftImm in 32-bit word units.
873 auto CalculateNewElementIdx = [&](unsigned Opcode) {
874 if (Opcode == PPC::VSPLTB)
875 return (SplatImm + ShiftImm * 4) & 0xF;
876 else if (Opcode == PPC::VSPLTH)
877 return (SplatImm + ShiftImm * 2) & 0x7;
878 else
879 return (SplatImm + ShiftImm) & 0x3;
880 };
881
882 unsigned NewElem = CalculateNewElementIdx(MyOpcode);
883
884 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
885 << " to " << NewElem << " in instruction: ");
886 LLVM_DEBUG(MI.dump());
887 addRegToUpdate(MI.getOperand(OpNo).getReg());
888 addRegToUpdate(ShiftOp1);
889 MI.getOperand(OpNo).setReg(ShiftOp1);
890 MI.getOperand(SplatImmNo).setImm(NewElem);
891 }
892 }
893 break;
894 }
895 case PPC::XVCVDPSP: {
896 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
897 Register TrueReg =
898 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
899 if (!TrueReg.isVirtual())
900 break;
901 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
902
903 // This can occur when building a vector of single precision or integer
904 // values.
905 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
906 Register DefsReg1 =
907 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
908 Register DefsReg2 =
909 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
910 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
911 break;
912 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
913 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
914
915 if (!P1 || !P2)
916 break;
917
918 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
919 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
920 // the FRSP/XSRSP.
921 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
922 unsigned Opc = RoundInstr->getOpcode();
923 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
924 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
925 Simplified = true;
926 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
927 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
928 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
929 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
930 if (Use.getOperand(i).isReg() &&
931 Use.getOperand(i).getReg() == FRSPDefines)
932 Use.getOperand(i).setReg(ConvReg1);
933 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
934 LLVM_DEBUG(RoundInstr->dump());
935 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
936 LLVM_DEBUG(MI.dump());
937 LLVM_DEBUG(dbgs() << "Through instruction:\n");
939 addRegToUpdate(ConvReg1);
940 addRegToUpdate(FRSPDefines);
941 ToErase = RoundInstr;
942 }
943 };
944
945 // If the input to XVCVDPSP is a vector that was built (even
946 // partially) out of FRSP's, the FRSP(s) can safely be removed
947 // since this instruction performs the same operation.
948 if (P1 != P2) {
949 removeFRSPIfPossible(P1);
950 removeFRSPIfPossible(P2);
951 break;
952 }
953 removeFRSPIfPossible(P1);
954 }
955 break;
956 }
957 case PPC::EXTSH:
958 case PPC::EXTSH8:
959 case PPC::EXTSH8_32_64: {
960 if (!EnableSExtElimination) break;
961 Register NarrowReg = MI.getOperand(1).getReg();
962 if (!NarrowReg.isVirtual())
963 break;
964
965 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
966 unsigned SrcOpcode = SrcMI->getOpcode();
967 // If we've used a zero-extending load that we will sign-extend,
968 // just do a sign-extending load.
969 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
970 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
971 break;
972 // Determine the new opcode. We need to make sure that if the original
973 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
974 // Likewise if the source is X-Form the new opcode should also be
975 // X-Form.
976 unsigned Opc = PPC::LHA;
977 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
978 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
979 MI.getOpcode() == PPC::EXTSH8_32_64;
980
981 if (SourceIsXForm && MIIs64Bit)
982 Opc = PPC::LHAX8;
983 else if (SourceIsXForm && !MIIs64Bit)
984 Opc = PPC::LHAX;
985 else if (MIIs64Bit)
986 Opc = PPC::LHA8;
987
988 addRegToUpdate(NarrowReg);
989 addRegToUpdate(MI.getOperand(0).getReg());
990
991 // We are removing a definition of NarrowReg which will cause
992 // problems in AliveBlocks. Add an implicit def that will be
993 // removed so that AliveBlocks are updated correctly.
994 addDummyDef(MBB, &MI, NarrowReg);
995 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
996 LLVM_DEBUG(SrcMI->dump());
997 LLVM_DEBUG(dbgs() << "and sign-extension\n");
998 LLVM_DEBUG(MI.dump());
999 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1000 SrcMI->setDesc(TII->get(Opc));
1001 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1002 ToErase = &MI;
1003 Simplified = true;
1004 NumEliminatedSExt++;
1005 }
1006 break;
1007 }
1008 case PPC::EXTSW:
1009 case PPC::EXTSW_32:
1010 case PPC::EXTSW_32_64: {
1011 if (!EnableSExtElimination) break;
1012 Register NarrowReg = MI.getOperand(1).getReg();
1013 if (!NarrowReg.isVirtual())
1014 break;
1015
1016 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
1017 unsigned SrcOpcode = SrcMI->getOpcode();
1018 // If we've used a zero-extending load that we will sign-extend,
1019 // just do a sign-extending load.
1020 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
1021 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
1022 break;
1023
1024 // The transformation from a zero-extending load to a sign-extending
1025 // load is only legal when the displacement is a multiple of 4.
1026 // If the displacement is not at least 4 byte aligned, don't perform
1027 // the transformation.
1028 bool IsWordAligned = false;
1029 if (SrcMI->getOperand(1).isGlobal()) {
1030 const GlobalVariable *GV =
1032 if (GV && GV->getAlign() && *GV->getAlign() >= 4 &&
1033 (SrcMI->getOperand(1).getOffset() % 4 == 0))
1034 IsWordAligned = true;
1035 } else if (SrcMI->getOperand(1).isImm()) {
1036 int64_t Value = SrcMI->getOperand(1).getImm();
1037 if (Value % 4 == 0)
1038 IsWordAligned = true;
1039 }
1040
1041 // Determine the new opcode. We need to make sure that if the original
1042 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1043 // Likewise if the source is X-Form the new opcode should also be
1044 // X-Form.
1045 unsigned Opc = PPC::LWA_32;
1046 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1047 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1048 MI.getOpcode() == PPC::EXTSW_32_64;
1049
1050 if (SourceIsXForm && MIIs64Bit)
1051 Opc = PPC::LWAX;
1052 else if (SourceIsXForm && !MIIs64Bit)
1053 Opc = PPC::LWAX_32;
1054 else if (MIIs64Bit)
1055 Opc = PPC::LWA;
1056
1057 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1058 break;
1059
1060 addRegToUpdate(NarrowReg);
1061 addRegToUpdate(MI.getOperand(0).getReg());
1062
1063 // We are removing a definition of NarrowReg which will cause
1064 // problems in AliveBlocks. Add an implicit def that will be
1065 // removed so that AliveBlocks are updated correctly.
1066 addDummyDef(MBB, &MI, NarrowReg);
1067 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1068 LLVM_DEBUG(SrcMI->dump());
1069 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1070 LLVM_DEBUG(MI.dump());
1071 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1072 SrcMI->setDesc(TII->get(Opc));
1073 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1074 ToErase = &MI;
1075 Simplified = true;
1076 NumEliminatedSExt++;
1077 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1078 TII->isSignExtended(NarrowReg, MRI)) {
1079 // We can eliminate EXTSW if the input is known to be already
1080 // sign-extended. However, we are not sure whether a spill will occur
1081 // during register allocation. If there is no promotion, it will use
1082 // 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
1083 // since the register class is 32-bits. Consequently, the high 32-bit
1084 // information will be lost. Therefore, all these instructions in the
1085 // chain used to deduce sign extension to eliminate the 'extsw' will
1086 // need to be promoted to 64-bit pseudo instructions when the 'extsw'
1087 // is eliminated.
1088 TII->promoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);
1089
1090 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1091 Register TmpReg =
1092 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
1093 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
1094 TmpReg);
1095 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
1096 MI.getOperand(0).getReg())
1097 .addReg(TmpReg)
1098 .addReg(NarrowReg)
1099 .addImm(PPC::sub_32);
1100 ToErase = &MI;
1101 Simplified = true;
1102 NumEliminatedSExt++;
1103 }
1104 break;
1105 }
1106 case PPC::RLDICL: {
1107 // We can eliminate RLDICL (e.g. for zero-extension)
1108 // if all bits to clear are already zero in the input.
1109 // This code assume following code sequence for zero-extension.
1110 // %6 = COPY %5:sub_32; (optional)
1111 // %8 = IMPLICIT_DEF;
1112 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1113 if (!EnableZExtElimination) break;
1114
1115 if (MI.getOperand(2).getImm() != 0)
1116 break;
1117
1118 Register SrcReg = MI.getOperand(1).getReg();
1119 if (!SrcReg.isVirtual())
1120 break;
1121
1122 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1123 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1124 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
1125 break;
1126
1127 MachineInstr *ImpDefMI, *SubRegMI;
1128 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1129 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
1130 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1131
1132 SrcMI = SubRegMI;
1133 if (SubRegMI->getOpcode() == PPC::COPY) {
1134 Register CopyReg = SubRegMI->getOperand(1).getReg();
1135 if (CopyReg.isVirtual())
1136 SrcMI = MRI->getVRegDef(CopyReg);
1137 }
1138 if (!SrcMI->getOperand(0).isReg())
1139 break;
1140
1141 unsigned KnownZeroCount =
1142 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
1143 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
1144 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1145 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1146 MI.getOperand(0).getReg())
1147 .addReg(SrcReg);
1148 addRegToUpdate(SrcReg);
1149 ToErase = &MI;
1150 Simplified = true;
1151 NumEliminatedZExt++;
1152 }
1153 break;
1154 }
1155
1156 // TODO: Any instruction that has an immediate form fed only by a PHI
1157 // whose operands are all load immediate can be folded away. We currently
1158 // do this for ADD instructions, but should expand it to arithmetic and
1159 // binary instructions with immediate forms in the future.
1160 case PPC::ADD4:
1161 case PPC::ADD8: {
1162 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1163 assert(PhiOp && "Invalid Operand!");
1164 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1165
1166 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1167 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1168 };
1169
1170 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1171 MachineOperand *PhiOp) {
1172 assert(PhiOp && "Invalid Operand!");
1173 assert(DominatorOp && "Invalid Operand!");
1174 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1175 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1176
1177 // Note: the vregs only show up at odd indices position of PHI Node,
1178 // the even indices position save the BB info.
1179 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1180 MachineInstr *LiMI =
1181 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1182 if (!LiMI ||
1183 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1184 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1185 !MDT->dominates(DefDomMI, LiMI))
1186 return false;
1187 }
1188
1189 return true;
1190 };
1191
1192 MachineOperand Op1 = MI.getOperand(1);
1193 MachineOperand Op2 = MI.getOperand(2);
1194 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1195 std::swap(Op1, Op2);
1196 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1197 break; // We don't have an ADD fed by LI's that can be transformed
1198
1199 // Now we know that Op1 is the PHI node and Op2 is the dominator
1200 Register DominatorReg = Op2.getReg();
1201
1202 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1203 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1204 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1205 MRI->setRegClass(DominatorReg, TRC);
1206
1207 // replace LIs with ADDIs
1208 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1209 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1210 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1211 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1212 LLVM_DEBUG(LiMI->dump());
1213
1214 // There could be repeated registers in the PHI, e.g: %1 =
1215 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1216 // already replaced the def instruction, skip.
1217 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1218 continue;
1219
1220 assert((LiMI->getOpcode() == PPC::LI ||
1221 LiMI->getOpcode() == PPC::LI8) &&
1222 "Invalid Opcode!");
1223 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1224 LiMI->removeOperand(1); // remove the imm of LI
1225 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1226 : PPC::ADDI8));
1227 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1228 .addReg(DominatorReg)
1229 .addImm(LiImm); // restore the imm of LI
1230 LLVM_DEBUG(LiMI->dump());
1231 }
1232
1233 // Replace ADD with COPY
1234 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1235 LLVM_DEBUG(MI.dump());
1236 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1237 MI.getOperand(0).getReg())
1238 .add(Op1);
1239 addRegToUpdate(Op1.getReg());
1240 addRegToUpdate(Op2.getReg());
1241 ToErase = &MI;
1242 Simplified = true;
1243 NumOptADDLIs++;
1244 break;
1245 }
1246 case PPC::RLDICR: {
1247 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1248 combineSEXTAndSHL(MI, ToErase);
1249 break;
1250 }
1251 case PPC::ANDI_rec:
1252 case PPC::ANDI8_rec:
1253 case PPC::ANDIS_rec:
1254 case PPC::ANDIS8_rec: {
1255 Register TrueReg =
1256 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1257 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1258 break;
1259
1260 MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1261 if (!SrcMI)
1262 break;
1263
1264 unsigned SrcOpCode = SrcMI->getOpcode();
1265 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1266 break;
1267
1268 Register SrcReg, DstReg;
1269 SrcReg = SrcMI->getOperand(1).getReg();
1270 DstReg = MI.getOperand(1).getReg();
1271 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
1272 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
1273 if (DstRC != SrcRC)
1274 break;
1275
1276 uint64_t AndImm = MI.getOperand(2).getImm();
1277 if (MI.getOpcode() == PPC::ANDIS_rec ||
1278 MI.getOpcode() == PPC::ANDIS8_rec)
1279 AndImm <<= 16;
1280 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1281 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1282 uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1283
1284 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1285 // if all bits to AND are already zero in the input.
1286 bool PatternResultZero =
1287 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1288 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1289
1290 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1291 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1292 bool PatternRemoveRotate =
1293 SrcMI->getOperand(2).getImm() == 0 &&
1294 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1295 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1296
1297 if (!PatternResultZero && !PatternRemoveRotate)
1298 break;
1299
1300 LLVM_DEBUG(dbgs() << "Combining pair: ");
1301 LLVM_DEBUG(SrcMI->dump());
1302 LLVM_DEBUG(MI.dump());
1303 if (PatternResultZero)
1304 MI.getOperand(2).setImm(0);
1305 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1306 LLVM_DEBUG(dbgs() << "To: ");
1307 LLVM_DEBUG(MI.dump());
1308 addRegToUpdate(MI.getOperand(1).getReg());
1309 addRegToUpdate(SrcMI->getOperand(0).getReg());
1310 Simplified = true;
1311 break;
1312 }
1313 case PPC::RLWINM:
1314 case PPC::RLWINM_rec:
1315 case PPC::RLWINM8:
1316 case PPC::RLWINM8_rec: {
1317 // We might replace operand 1 of the instruction which will
1318 // require we recompute kill flags for it.
1319 Register OrigOp1Reg = MI.getOperand(1).isReg()
1320 ? MI.getOperand(1).getReg()
1321 : PPC::NoRegister;
1322 Simplified = TII->combineRLWINM(MI, &ToErase);
1323 if (Simplified) {
1324 addRegToUpdate(OrigOp1Reg);
1325 if (MI.getOperand(1).isReg())
1326 addRegToUpdate(MI.getOperand(1).getReg());
1327 if (ToErase && ToErase->getOperand(1).isReg())
1328 for (auto UseReg : ToErase->explicit_uses())
1329 if (UseReg.isReg())
1330 addRegToUpdate(UseReg.getReg());
1331 ++NumRotatesCollapsed;
1332 }
1333 break;
1334 }
1335 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1336 // always trap, we will delete the node if it will never trap.
1337 case PPC::TDI:
1338 case PPC::TWI:
1339 case PPC::TD:
1340 case PPC::TW: {
1341 if (!EnableTrapOptimization) break;
1342 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1343 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1344 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1345 // We can only do the optimization if we can get immediates
1346 // from both operands
1347 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1348 LiMI1->getOpcode() == PPC::LI8)))
1349 break;
1350 if (!IsOperand2Immediate &&
1351 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1352 LiMI2->getOpcode() == PPC::LI8)))
1353 break;
1354
1355 auto ImmOperand0 = MI.getOperand(0).getImm();
1356 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1357 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1358 : LiMI2->getOperand(1).getImm();
1359
1360 // We will replace the MI with an unconditional trap if it will always
1361 // trap.
1362 if ((ImmOperand0 == 31) ||
1363 ((ImmOperand0 & 0x10) &&
1364 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1365 ((ImmOperand0 & 0x8) &&
1366 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1367 ((ImmOperand0 & 0x2) &&
1368 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1369 ((ImmOperand0 & 0x1) &&
1370 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1371 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1372 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1373 TrapOpt = true;
1374 }
1375 // We will delete the MI if it will never trap.
1376 ToErase = &MI;
1377 Simplified = true;
1378 break;
1379 }
1380 }
1381 }
1382
1383 // If the last instruction was marked for elimination,
1384 // remove it now.
1385 if (ToErase) {
1386 recomputeLVForDyingInstr();
1387 ToErase->eraseFromParent();
1388 ToErase = nullptr;
1389 }
1390 // Reset TrapOpt to false at the end of the basic block.
1392 TrapOpt = false;
1393 }
1394
1395 // Eliminate all the TOC save instructions which are redundant.
1396 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1397 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1398 if (FI->mustSaveTOC())
1399 NumTOCSavesInPrologue++;
1400
1401 // We try to eliminate redundant compare instruction.
1402 Simplified |= eliminateRedundantCompare();
1403
1404 // If we have made any modifications and added any registers to the set of
1405 // registers for which we need to update the kill flags, do so by recomputing
1406 // LiveVariables for those registers.
1407 for (Register Reg : RegsToUpdate) {
1408 if (!MRI->reg_empty(Reg))
1410 }
1411 return Simplified;
1412}
1413
1414// helper functions for eliminateRedundantCompare
1415static bool isEqOrNe(MachineInstr *BI) {
1417 unsigned PredCond = PPC::getPredicateCondition(Pred);
1418 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1419}
1420
1421static bool isSupportedCmpOp(unsigned opCode) {
1422 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1423 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1424 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1425 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1426}
1427
1428static bool is64bitCmpOp(unsigned opCode) {
1429 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1430 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1431}
1432
1433static bool isSignedCmpOp(unsigned opCode) {
1434 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1435 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1436}
1437
1438static unsigned getSignedCmpOpCode(unsigned opCode) {
1439 if (opCode == PPC::CMPLD) return PPC::CMPD;
1440 if (opCode == PPC::CMPLW) return PPC::CMPW;
1441 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1442 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1443 return opCode;
1444}
1445
1446// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1447// (LT x) to (LE x-1)
1448static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1449 uint64_t Imm = CMPI->getOperand(2).getImm();
1450 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1451 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1452 return 0;
1453
1455 unsigned PredCond = PPC::getPredicateCondition(Pred);
1456 unsigned PredHint = PPC::getPredicateHint(Pred);
1457 if (PredCond == PPC::PRED_GE)
1458 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1459 if (PredCond == PPC::PRED_LT)
1460 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1461
1462 return 0;
1463}
1464
1465// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1466// (LE x) to (LT x+1)
1467static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1468 uint64_t Imm = CMPI->getOperand(2).getImm();
1469 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1470 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1471 return 0;
1472
1474 unsigned PredCond = PPC::getPredicateCondition(Pred);
1475 unsigned PredHint = PPC::getPredicateHint(Pred);
1476 if (PredCond == PPC::PRED_GT)
1477 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1478 if (PredCond == PPC::PRED_LE)
1479 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1480
1481 return 0;
1482}
1483
1484// This takes a Phi node and returns a register value for the specified BB.
1485static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1486 MachineBasicBlock *MBB) {
1487 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1488 MachineOperand &MO = Phi->getOperand(I);
1489 if (MO.getMBB() == MBB)
1490 return Phi->getOperand(I-1).getReg();
1491 }
1492 llvm_unreachable("invalid src basic block for this Phi node\n");
1493 return 0;
1494}
1495
1496// This function tracks the source of the register through register copy.
1497// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1498// assuming that the control comes from BB1 into BB2.
1499static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1500 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
1501 unsigned SrcReg = Reg;
1502 while (true) {
1503 unsigned NextReg = SrcReg;
1504 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1505 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1506 NextReg = getIncomingRegForBlock(Inst, BB1);
1507 // We track through PHI only once to avoid infinite loop.
1508 BB1 = nullptr;
1509 }
1510 else if (Inst->isFullCopy())
1511 NextReg = Inst->getOperand(1).getReg();
1512 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1513 break;
1514 SrcReg = NextReg;
1515 }
1516 return SrcReg;
1517}
1518
1519static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1520 MachineBasicBlock *&PredMBB,
1521 MachineBasicBlock *&MBBtoMoveCmp,
1522 MachineRegisterInfo *MRI) {
1523
1524 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1525 auto BII = BB.getFirstInstrTerminator();
1526 // We optimize BBs ending with a conditional branch.
1527 // We check only for BCC here, not BCCLR, because BCCLR
1528 // will be formed only later in the pipeline.
1529 if (BB.succ_size() == 2 &&
1530 BII != BB.instr_end() &&
1531 (*BII).getOpcode() == PPC::BCC &&
1532 (*BII).getOperand(1).isReg()) {
1533 // We optimize only if the condition code is used only by one BCC.
1534 Register CndReg = (*BII).getOperand(1).getReg();
1535 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1536 return false;
1537
1538 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1539 // We assume compare and branch are in the same BB for ease of analysis.
1540 if (CMPI->getParent() != &BB)
1541 return false;
1542
1543 // We skip this BB if a physical register is used in comparison.
1544 for (MachineOperand &MO : CMPI->operands())
1545 if (MO.isReg() && !MO.getReg().isVirtual())
1546 return false;
1547
1548 return true;
1549 }
1550 return false;
1551 };
1552
1553 // If this BB has more than one successor, we can create a new BB and
1554 // move the compare instruction in the new BB.
1555 // So far, we do not move compare instruction to a BB having multiple
1556 // successors to avoid potentially increasing code size.
1557 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1558 return BB.succ_size() == 1;
1559 };
1560
1561 if (!isEligibleBB(MBB))
1562 return false;
1563
1564 unsigned NumPredBBs = MBB.pred_size();
1565 if (NumPredBBs == 1) {
1566 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1567 if (isEligibleBB(*TmpMBB)) {
1568 PredMBB = TmpMBB;
1569 MBBtoMoveCmp = nullptr;
1570 return true;
1571 }
1572 }
1573 else if (NumPredBBs == 2) {
1574 // We check for partially redundant case.
1575 // So far, we support cases with only two predecessors
1576 // to avoid increasing the number of instructions.
1578 MachineBasicBlock *Pred1MBB = *PI;
1579 MachineBasicBlock *Pred2MBB = *(PI+1);
1580
1581 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1582 // We assume Pred1MBB is the BB containing the compare to be merged and
1583 // Pred2MBB is the BB to which we will append a compare instruction.
1584 // Proceed as is if Pred1MBB is different from MBB.
1585 }
1586 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1587 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1588 std::swap(Pred1MBB, Pred2MBB);
1589 }
1590 else return false;
1591
1592 if (Pred1MBB == &MBB)
1593 return false;
1594
1595 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1596 // We cannot move the compare instruction if operands are not available
1597 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1598 MachineInstr *BI = &*MBB.getFirstInstrTerminator();
1599 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1600 for (int I = 1; I <= 2; I++)
1601 if (CMPI->getOperand(I).isReg()) {
1602 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1603 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1604 return false;
1605 }
1606
1607 PredMBB = Pred1MBB;
1608 MBBtoMoveCmp = Pred2MBB;
1609 return true;
1610 }
1611
1612 return false;
1613}
1614
1615// This function will iterate over the input map containing a pair of TOC save
1616// instruction and a flag. The flag will be set to false if the TOC save is
1617// proven redundant. This function will erase from the basic block all the TOC
1618// saves marked as redundant.
1619bool PPCMIPeephole::eliminateRedundantTOCSaves(
1620 std::map<MachineInstr *, bool> &TOCSaves) {
1621 bool Simplified = false;
1622 int NumKept = 0;
1623 for (auto TOCSave : TOCSaves) {
1624 if (!TOCSave.second) {
1625 TOCSave.first->eraseFromParent();
1626 RemoveTOCSave++;
1627 Simplified = true;
1628 } else {
1629 NumKept++;
1630 }
1631 }
1632
1633 if (NumKept > 1)
1634 MultiTOCSaves++;
1635
1636 return Simplified;
1637}
1638
1639// If multiple conditional branches are executed based on the (essentially)
1640// same comparison, we merge compare instructions into one and make multiple
1641// conditional branches on this comparison.
1642// For example,
1643// if (a == 0) { ... }
1644// else if (a < 0) { ... }
1645// can be executed by one compare and two conditional branches instead of
1646// two pairs of a compare and a conditional branch.
1647//
1648// This method merges two compare instructions in two MBBs and modifies the
1649// compare and conditional branch instructions if needed.
1650// For the above example, the input for this pass looks like:
1651// cmplwi r3, 0
1652// beq 0, .LBB0_3
1653// cmpwi r3, -1
1654// bgt 0, .LBB0_4
1655// So, before merging two compares, we need to modify these instructions as
1656// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1657// beq 0, .LBB0_3
1658// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1659// bge 0, .LBB0_4
1660
1661bool PPCMIPeephole::eliminateRedundantCompare() {
1662 bool Simplified = false;
1663
1664 for (MachineBasicBlock &MBB2 : *MF) {
1665 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1666
1667 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1668 // as an optimization target if
1669 // - both MBBs end with a conditional branch,
1670 // - MBB1 is the only predecessor of MBB2, and
1671 // - compare does not take a physical register as a operand in both MBBs.
1672 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1673 //
1674 // As partially redundant case, we additionally handle if MBB2 has one
1675 // additional predecessor, which has only one successor (MBB2).
1676 // In this case, we move the compare instruction originally in MBB2 into
1677 // MBBtoMoveCmp. This partially redundant case is typically appear by
1678 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1679 //
1680 // Overview of CFG of related basic blocks
1681 // Fully redundant case Partially redundant case
1682 // -------- ---------------- --------
1683 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1684 // -------- ---------------- --------
1685 // | \ (w/ 1 succ) \ | \
1686 // | \ \ | \
1687 // | \ |
1688 // -------- --------
1689 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1690 // -------- and 2 succ) -------- and 2 succ)
1691 // | \ | \
1692 // | \ | \
1693 //
1694 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1695 continue;
1696
1697 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1698 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1699
1700 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1701 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1702 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1703
1704 // We cannot optimize an unsupported compare opcode or
1705 // a mix of 32-bit and 64-bit comparisons
1706 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1707 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1708 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1709 continue;
1710
1711 unsigned NewOpCode = 0;
1712 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1713 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1714 bool SwapOperands = false;
1715
1716 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1717 // Typically, unsigned comparison is used for equality check, but
1718 // we replace it with a signed comparison if the comparison
1719 // to be merged is a signed comparison.
1720 // In other cases of opcode mismatch, we cannot optimize this.
1721
1722 // We cannot change opcode when comparing against an immediate
1723 // if the most significant bit of the immediate is one
1724 // due to the difference in sign extension.
1725 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1726 if (!I->getOperand(2).isImm())
1727 return false;
1728 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1729 return Imm < 0;
1730 };
1731
1732 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1733 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1734 NewOpCode = CMPI1->getOpcode();
1735 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1736 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1737 NewOpCode = CMPI2->getOpcode();
1738 else continue;
1739 }
1740
1741 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1742 // In case of comparisons between two registers, these two registers
1743 // must be same to merge two comparisons.
1744 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1745 nullptr, nullptr, MRI);
1746 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1747 nullptr, nullptr, MRI);
1748 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1749 MBB1, &MBB2, MRI);
1750 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1751 MBB1, &MBB2, MRI);
1752
1753 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1754 // Same pair of registers in the same order; ready to merge as is.
1755 }
1756 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1757 // Same pair of registers in different order.
1758 // We reverse the predicate to merge compare instructions.
1760 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1761 // In case of partial redundancy, we need to swap operands
1762 // in another compare instruction.
1763 SwapOperands = true;
1764 }
1765 else continue;
1766 }
1767 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1768 // In case of comparisons between a register and an immediate,
1769 // the operand register must be same for two compare instructions.
1770 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1771 nullptr, nullptr, MRI);
1772 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1773 MBB1, &MBB2, MRI);
1774 if (Cmp1Operand1 != Cmp2Operand1)
1775 continue;
1776
1777 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1778 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1779
1780 // If immediate are not same, we try to adjust by changing predicate;
1781 // e.g. GT imm means GE (imm+1).
1782 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1783 int Diff = Imm1 - Imm2;
1784 if (Diff < -2 || Diff > 2)
1785 continue;
1786
1787 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1788 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1789 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1790 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1791 if (Diff == 2) {
1792 if (PredToInc2 && PredToDec1) {
1793 NewPredicate2 = PredToInc2;
1794 NewPredicate1 = PredToDec1;
1795 NewImm2++;
1796 NewImm1--;
1797 }
1798 }
1799 else if (Diff == 1) {
1800 if (PredToInc2) {
1801 NewImm2++;
1802 NewPredicate2 = PredToInc2;
1803 }
1804 else if (PredToDec1) {
1805 NewImm1--;
1806 NewPredicate1 = PredToDec1;
1807 }
1808 }
1809 else if (Diff == -1) {
1810 if (PredToDec2) {
1811 NewImm2--;
1812 NewPredicate2 = PredToDec2;
1813 }
1814 else if (PredToInc1) {
1815 NewImm1++;
1816 NewPredicate1 = PredToInc1;
1817 }
1818 }
1819 else if (Diff == -2) {
1820 if (PredToDec2 && PredToInc1) {
1821 NewPredicate2 = PredToDec2;
1822 NewPredicate1 = PredToInc1;
1823 NewImm2--;
1824 NewImm1++;
1825 }
1826 }
1827 }
1828
1829 // We cannot merge two compares if the immediates are not same.
1830 if (NewImm2 != NewImm1)
1831 continue;
1832 }
1833
1834 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1835 LLVM_DEBUG(CMPI1->dump());
1836 LLVM_DEBUG(BI1->dump());
1837 LLVM_DEBUG(CMPI2->dump());
1838 LLVM_DEBUG(BI2->dump());
1839 for (const MachineOperand &MO : CMPI1->operands())
1840 if (MO.isReg())
1841 addRegToUpdate(MO.getReg());
1842 for (const MachineOperand &MO : CMPI2->operands())
1843 if (MO.isReg())
1844 addRegToUpdate(MO.getReg());
1845
1846 // We adjust opcode, predicates and immediate as we determined above.
1847 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1848 CMPI1->setDesc(TII->get(NewOpCode));
1849 }
1850 if (NewPredicate1) {
1851 BI1->getOperand(0).setImm(NewPredicate1);
1852 }
1853 if (NewPredicate2) {
1854 BI2->getOperand(0).setImm(NewPredicate2);
1855 }
1856 if (NewImm1 != Imm1) {
1857 CMPI1->getOperand(2).setImm(NewImm1);
1858 }
1859
1860 if (IsPartiallyRedundant) {
1861 // We touch up the compare instruction in MBB2 and move it to
1862 // a previous BB to handle partially redundant case.
1863 if (SwapOperands) {
1864 Register Op1 = CMPI2->getOperand(1).getReg();
1865 Register Op2 = CMPI2->getOperand(2).getReg();
1866 CMPI2->getOperand(1).setReg(Op2);
1867 CMPI2->getOperand(2).setReg(Op1);
1868 }
1869 if (NewImm2 != Imm2)
1870 CMPI2->getOperand(2).setImm(NewImm2);
1871
1872 for (int I = 1; I <= 2; I++) {
1873 if (CMPI2->getOperand(I).isReg()) {
1874 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1875 if (Inst->getParent() != &MBB2)
1876 continue;
1877
1878 assert(Inst->getOpcode() == PPC::PHI &&
1879 "We cannot support if an operand comes from this BB.");
1880 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1881 CMPI2->getOperand(I).setReg(SrcReg);
1882 addRegToUpdate(SrcReg);
1883 }
1884 }
1885 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1886 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1887
1888 DebugLoc DL = CMPI2->getDebugLoc();
1889 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1890 BuildMI(MBB2, MBB2.begin(), DL,
1891 TII->get(PPC::PHI), NewVReg)
1892 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1893 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1894 BI2->getOperand(1).setReg(NewVReg);
1895 addRegToUpdate(NewVReg);
1896 }
1897 else {
1898 // We finally eliminate compare instruction in MBB2.
1899 // We do not need to treat CMPI2 specially here in terms of re-computing
1900 // live variables even though it is being deleted because:
1901 // - It defines a register that has a single use (already checked in
1902 // eligibleForCompareElimination())
1903 // - The only user (BI2) is no longer using it so the register is dead (no
1904 // def, no uses)
1905 // - We do not attempt to recompute live variables for dead registers
1906 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1907 CMPI2->eraseFromParent();
1908 }
1909
1910 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1911 LLVM_DEBUG(CMPI1->dump());
1912 LLVM_DEBUG(BI1->dump());
1913 LLVM_DEBUG(BI2->dump());
1914 if (IsPartiallyRedundant) {
1915 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1916 << printMBBReference(*MBBtoMoveCmp)
1917 << " to handle partial redundancy.\n");
1918 LLVM_DEBUG(CMPI2->dump());
1919 }
1920 Simplified = true;
1921 }
1922
1923 return Simplified;
1924}
1925
1926// We miss the opportunity to emit an RLDIC when lowering jump tables
1927// since ISEL sees only a single basic block. When selecting, the clear
1928// and shift left will be in different blocks.
1929bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1930 MachineInstr *&ToErase) {
1931 if (MI.getOpcode() != PPC::RLDICR)
1932 return false;
1933
1934 Register SrcReg = MI.getOperand(1).getReg();
1935 if (!SrcReg.isVirtual())
1936 return false;
1937
1938 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1939 if (SrcMI->getOpcode() != PPC::RLDICL)
1940 return false;
1941
1942 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1943 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1944 MachineOperand MOpSHMI = MI.getOperand(2);
1945 MachineOperand MOpMEMI = MI.getOperand(3);
1946 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1947 MOpMEMI.isImm()))
1948 return false;
1949
1950 uint64_t SHSrc = MOpSHSrc.getImm();
1951 uint64_t MBSrc = MOpMBSrc.getImm();
1952 uint64_t SHMI = MOpSHMI.getImm();
1953 uint64_t MEMI = MOpMEMI.getImm();
1954 uint64_t NewSH = SHSrc + SHMI;
1955 uint64_t NewMB = MBSrc - SHMI;
1956 if (NewMB > 63 || NewSH > 63)
1957 return false;
1958
1959 // The bits cleared with RLDICL are [0, MBSrc).
1960 // The bits cleared with RLDICR are (MEMI, 63].
1961 // After the sequence, the bits cleared are:
1962 // [0, MBSrc-SHMI) and (MEMI, 63).
1963 //
1964 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1965 if ((63 - NewSH) != MEMI)
1966 return false;
1967
1968 LLVM_DEBUG(dbgs() << "Converting pair: ");
1969 LLVM_DEBUG(SrcMI->dump());
1970 LLVM_DEBUG(MI.dump());
1971
1972 MI.setDesc(TII->get(PPC::RLDIC));
1973 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1974 MI.getOperand(2).setImm(NewSH);
1975 MI.getOperand(3).setImm(NewMB);
1976 addRegToUpdate(MI.getOperand(1).getReg());
1977 addRegToUpdate(SrcMI->getOperand(0).getReg());
1978
1979 LLVM_DEBUG(dbgs() << "To: ");
1980 LLVM_DEBUG(MI.dump());
1981 NumRotatesCollapsed++;
1982 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1983 if (MRI->use_nodbg_empty(SrcReg)) {
1984 assert(!SrcMI->hasImplicitDef() &&
1985 "Not expecting an implicit def with this instr.");
1986 ToErase = SrcMI;
1987 }
1988 return true;
1989}
1990
1991// For case in LLVM IR
1992// entry:
1993// %iconv = sext i32 %index to i64
1994// br i1 undef label %true, label %false
1995// true:
1996// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1997// ...
1998// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1999// different BBs when conducting instruction selection. We can do a peephole
2000// optimization to combine these two instructions into extswsli after
2001// instruction selection.
2002bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
2003 MachineInstr *&ToErase) {
2004 if (MI.getOpcode() != PPC::RLDICR)
2005 return false;
2006
2007 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
2008 return false;
2009
2010 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
2011
2012 MachineOperand MOpSHMI = MI.getOperand(2);
2013 MachineOperand MOpMEMI = MI.getOperand(3);
2014 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
2015 return false;
2016
2017 uint64_t SHMI = MOpSHMI.getImm();
2018 uint64_t MEMI = MOpMEMI.getImm();
2019 if (SHMI + MEMI != 63)
2020 return false;
2021
2022 Register SrcReg = MI.getOperand(1).getReg();
2023 if (!SrcReg.isVirtual())
2024 return false;
2025
2026 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
2027 if (SrcMI->getOpcode() != PPC::EXTSW &&
2028 SrcMI->getOpcode() != PPC::EXTSW_32_64)
2029 return false;
2030
2031 // If the register defined by extsw has more than one use, combination is not
2032 // needed.
2033 if (!MRI->hasOneNonDBGUse(SrcReg))
2034 return false;
2035
2036 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
2037 assert(SrcMI->getOperand(1).isReg() &&
2038 "EXTSW's second operand should be a register");
2039 if (!SrcMI->getOperand(1).getReg().isVirtual())
2040 return false;
2041
2042 LLVM_DEBUG(dbgs() << "Combining pair: ");
2043 LLVM_DEBUG(SrcMI->dump());
2044 LLVM_DEBUG(MI.dump());
2045
2046 MachineInstr *NewInstr =
2047 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
2048 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
2049 : TII->get(PPC::EXTSWSLI_32_64),
2050 MI.getOperand(0).getReg())
2051 .add(SrcMI->getOperand(1))
2052 .add(MOpSHMI);
2053 (void)NewInstr;
2054
2055 LLVM_DEBUG(dbgs() << "TO: ");
2056 LLVM_DEBUG(NewInstr->dump());
2057 ++NumEXTSWAndSLDICombined;
2058 ToErase = &MI;
2059 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2060 // can recompute its kill flags. We run DCE immediately after this pass
2061 // to clean up dead instructions such as this.
2062 addRegToUpdate(NewInstr->getOperand(1).getReg());
2063 addRegToUpdate(SrcMI->getOperand(0).getReg());
2064 return true;
2065}
2066
2067} // end default namespace
2068
2070 "PowerPC MI Peephole Optimization", false, false)
2076 "PowerPC MI Peephole Optimization", false, false)
2077
2078char PPCMIPeephole::ID = 0;
2080llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define addRegToUpdate(R)
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static bool shouldExecute(CounterInfo &Counter)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
MaybeAlign getAlign() const
Returns the alignment of the given variable.
const HexagonRegisterInfo & getRegisterInfo() const
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1571
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
bool isFullCopy() const
mop_range operands()
mop_range explicit_uses()
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
void push_back(const T &Elt)
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ Keep
No function return thunk.
Definition CodeGen.h:162
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872