LLVM 17.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19//===---------------------------------------------------------------------===//
20
23#include "PPC.h"
24#include "PPCInstrBuilder.h"
25#include "PPCInstrInfo.h"
27#include "PPCTargetMachine.h"
28#include "llvm/ADT/Statistic.h"
37#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "ppc-mi-peepholes"
42
43STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
44STATISTIC(MultiTOCSaves,
45 "Number of functions with multiple TOC saves that must be kept");
46STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
47STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
48STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
49STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
50STATISTIC(NumConvertedToImmediateForm,
51 "Number of instructions converted to their immediate form");
52STATISTIC(NumFunctionsEnteredInMIPeephole,
53 "Number of functions entered in PPC MI Peepholes");
54STATISTIC(NumFixedPointIterations,
55 "Number of fixed-point iterations converting reg-reg instructions "
56 "to reg-imm ones");
57STATISTIC(NumRotatesCollapsed,
58 "Number of pairs of rotate left, clear left/right collapsed");
59STATISTIC(NumEXTSWAndSLDICombined,
60 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
61STATISTIC(NumLoadImmZeroFoldedAndRemoved,
62 "Number of LI(8) reg, 0 that are folded to r0 and removed");
63
64static cl::opt<bool>
65FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
66 cl::desc("Iterate to a fixed point when attempting to "
67 "convert reg-reg instructions to reg-imm"));
68
69static cl::opt<bool>
70ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
71 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
72
73static cl::opt<bool>
74 EnableSExtElimination("ppc-eliminate-signext",
75 cl::desc("enable elimination of sign-extensions"),
76 cl::init(true), cl::Hidden);
77
78static cl::opt<bool>
79 EnableZExtElimination("ppc-eliminate-zeroext",
80 cl::desc("enable elimination of zero-extensions"),
81 cl::init(true), cl::Hidden);
82
83static cl::opt<bool>
84 EnableTrapOptimization("ppc-opt-conditional-trap",
85 cl::desc("enable optimization of conditional traps"),
86 cl::init(false), cl::Hidden);
87
88namespace {
89
90struct PPCMIPeephole : public MachineFunctionPass {
91
92 static char ID;
93 const PPCInstrInfo *TII;
96
97 PPCMIPeephole() : MachineFunctionPass(ID) {
99 }
100
101private:
105 uint64_t EntryFreq;
106
107 // Initialize class variables.
108 void initialize(MachineFunction &MFParm);
109
110 // Perform peepholes.
111 bool simplifyCode();
112
113 // Perform peepholes.
114 bool eliminateRedundantCompare();
115 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
116 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
117 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
118 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
120
121public:
122
123 void getAnalysisUsage(AnalysisUsage &AU) const override {
131 }
132
133 // Main entry point for this pass.
134 bool runOnMachineFunction(MachineFunction &MF) override {
135 initialize(MF);
136 // At this point, TOC pointer should not be used in a function that uses
137 // PC-Relative addressing.
138 assert((MF.getRegInfo().use_empty(PPC::X2) ||
140 "TOC pointer used in a function using PC-Relative addressing!");
141 if (skipFunction(MF.getFunction()))
142 return false;
143 return simplifyCode();
144 }
145};
146
147// Initialize class variables.
148void PPCMIPeephole::initialize(MachineFunction &MFParm) {
149 MF = &MFParm;
150 MRI = &MF->getRegInfo();
151 MDT = &getAnalysis<MachineDominatorTree>();
152 MPDT = &getAnalysis<MachinePostDominatorTree>();
153 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
154 EntryFreq = MBFI->getEntryFreq();
155 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
156 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
157 LLVM_DEBUG(MF->dump());
158}
159
160static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
162 assert(Op && "Invalid Operand!");
163 if (!Op->isReg())
164 return nullptr;
165
166 Register Reg = Op->getReg();
167 if (!Reg.isVirtual())
168 return nullptr;
169
170 return MRI->getVRegDef(Reg);
171}
172
173// This function returns number of known zero bits in output of MI
174// starting from the most significant bit.
175static unsigned getKnownLeadingZeroCount(const unsigned Reg,
176 const PPCInstrInfo *TII,
177 const MachineRegisterInfo *MRI) {
178 MachineInstr *MI = MRI->getVRegDef(Reg);
179 unsigned Opcode = MI->getOpcode();
180 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
181 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
182 return MI->getOperand(3).getImm();
183
184 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
185 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
186 return MI->getOperand(3).getImm();
187
188 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
189 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
190 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
191 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
192 return 32 + MI->getOperand(3).getImm();
193
194 if (Opcode == PPC::ANDI_rec) {
195 uint16_t Imm = MI->getOperand(2).getImm();
196 return 48 + llvm::countl_zero(Imm);
197 }
198
199 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
200 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
201 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
202 // The result ranges from 0 to 32.
203 return 58;
204
205 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
206 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
207 // The result ranges from 0 to 64.
208 return 57;
209
210 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
211 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
212 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
213 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
214 return 48;
215
216 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
217 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
218 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
219 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
220 return 56;
221
222 if (TII->isZeroExtended(Reg, MRI))
223 return 32;
224
225 return 0;
226}
227
228// This function maintains a map for the pairs <TOC Save Instr, Keep>
229// Each time a new TOC save is encountered, it checks if any of the existing
230// ones are dominated by the new one. If so, it marks the existing one as
231// redundant by setting it's entry in the map as false. It then adds the new
232// instruction to the map with either true or false depending on if any
233// existing instructions dominated the new one.
234void PPCMIPeephole::UpdateTOCSaves(
235 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
236 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
237 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
238 // here only support it under ELFv2.
239 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
241
242 MachineBasicBlock *Entry = &MF->front();
243 uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
244
245 // If the block in which the TOC save resides is in a block that
246 // post-dominates Entry, or a block that is hotter than entry (keep in mind
247 // that early MachineLICM has already run so the TOC save won't be hoisted)
248 // we can just do the save in the prologue.
249 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
250 FI->setMustSaveTOC(true);
251
252 // If we are saving the TOC in the prologue, all the TOC saves can be
253 // removed from the code.
254 if (FI->mustSaveTOC()) {
255 for (auto &TOCSave : TOCSaves)
256 TOCSave.second = false;
257 // Add new instruction to map.
258 TOCSaves[MI] = false;
259 return;
260 }
261 }
262
263 bool Keep = true;
264 for (auto &I : TOCSaves) {
265 MachineInstr *CurrInst = I.first;
266 // If new instruction dominates an existing one, mark existing one as
267 // redundant.
268 if (I.second && MDT->dominates(MI, CurrInst))
269 I.second = false;
270 // Check if the new instruction is redundant.
271 if (MDT->dominates(CurrInst, MI)) {
272 Keep = false;
273 break;
274 }
275 }
276 // Add new instruction to map.
277 TOCSaves[MI] = Keep;
278}
279
280// This function returns a list of all PHI nodes in the tree starting from
281// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
282// The list initially only contains the root PHI. When we visit a PHI node, we
283// add it to the list. We continue to look for other PHI node operands while
284// there are nodes to visit in the list. The function returns false if the
285// optimization cannot be applied on this tree.
286static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
287 MachineInstr *RootPHI,
289 PHIs.push_back(RootPHI);
290 unsigned VisitedIndex = 0;
291 while (VisitedIndex < PHIs.size()) {
292 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
293 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
294 PHIOp != NumOps; PHIOp += 2) {
295 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
296 if (!RegOp.isVirtual())
297 return false;
298 MachineInstr *Instr = MRI->getVRegDef(RegOp);
299 // While collecting the PHI nodes, we check if they can be converted (i.e.
300 // all the operands are either copies, implicit defs or PHI nodes).
301 unsigned Opcode = Instr->getOpcode();
302 if (Opcode == PPC::COPY) {
303 Register Reg = Instr->getOperand(1).getReg();
304 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
305 return false;
306 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
307 return false;
308 // If we detect a cycle in the PHI nodes, we exit. It would be
309 // possible to change cycles as well, but that would add a lot
310 // of complexity for a case that is unlikely to occur with MMA
311 // code.
312 if (Opcode != PPC::PHI)
313 continue;
314 if (llvm::is_contained(PHIs, Instr))
315 return false;
316 PHIs.push_back(Instr);
317 }
318 VisitedIndex++;
319 }
320 return true;
321}
322
323// This function changes the unprimed accumulator PHI nodes in the PHIs list to
324// primed accumulator PHI nodes. The list is traversed in reverse order to
325// change all the PHI operands of a PHI node before changing the node itself.
326// We keep a map to associate each changed PHI node to its non-changed form.
327static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
330 Register Dst) {
332 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
334 // We check if the current PHI node can be changed by looking at its
335 // operands. If all the operands are either copies from primed
336 // accumulators, implicit definitions or other unprimed accumulator
337 // PHI nodes, we change it.
338 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
339 PHIOp += 2) {
340 Register RegOp = PHI->getOperand(PHIOp).getReg();
341 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
342 unsigned Opcode = PHIInput->getOpcode();
343 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
344 Opcode == PPC::PHI) &&
345 "Unexpected instruction");
346 if (Opcode == PPC::COPY) {
347 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
348 &PPC::ACCRCRegClass &&
349 "Unexpected register class");
350 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
351 } else if (Opcode == PPC::IMPLICIT_DEF) {
352 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
353 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
354 TII->get(PPC::IMPLICIT_DEF), AccReg);
355 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
356 PHI->getOperand(PHIOp + 1)});
357 } else if (Opcode == PPC::PHI) {
358 // We found a PHI operand. At this point we know this operand
359 // has already been changed so we get its associated changed form
360 // from the map.
361 assert(ChangedPHIMap.count(PHIInput) == 1 &&
362 "This PHI node should have already been changed.");
363 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
365 PrimedAccPHI->getOperand(0).getReg(), false),
366 PHI->getOperand(PHIOp + 1)});
367 }
368 }
369 Register AccReg = Dst;
370 // If the PHI node we are changing is the root node, the register it defines
371 // will be the destination register of the original copy (of the PHI def).
372 // For all other PHI's in the list, we need to create another primed
373 // accumulator virtual register as the PHI will no longer define the
374 // unprimed accumulator.
375 if (PHI != PHIs[0])
376 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
378 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
379 for (auto RegMBB : PHIOps)
380 NewPHI.add(RegMBB.first).add(RegMBB.second);
381 ChangedPHIMap[PHI] = NewPHI.getInstr();
382 LLVM_DEBUG(dbgs() << "Converting PHI: ");
383 LLVM_DEBUG(PHI->dump());
384 LLVM_DEBUG(dbgs() << "To: ");
385 LLVM_DEBUG(NewPHI.getInstr()->dump());
386 }
387}
388
389// Perform peephole optimizations.
390bool PPCMIPeephole::simplifyCode() {
391 bool Simplified = false;
392 bool TrapOpt = false;
393 MachineInstr* ToErase = nullptr;
394 std::map<MachineInstr *, bool> TOCSaves;
395 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
396 NumFunctionsEnteredInMIPeephole++;
397 if (ConvertRegReg) {
398 // Fixed-point conversion of reg/reg instructions fed by load-immediate
399 // into reg/imm instructions. FIXME: This is expensive, control it with
400 // an option.
401 bool SomethingChanged = false;
402 do {
403 NumFixedPointIterations++;
404 SomethingChanged = false;
405 for (MachineBasicBlock &MBB : *MF) {
406 for (MachineInstr &MI : MBB) {
407 if (MI.isDebugInstr())
408 continue;
409
410 if (TII->convertToImmediateForm(MI)) {
411 // We don't erase anything in case the def has other uses. Let DCE
412 // remove it if it can be removed.
413 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
414 LLVM_DEBUG(MI.dump());
415 NumConvertedToImmediateForm++;
416 SomethingChanged = true;
417 Simplified = true;
418 continue;
419 }
420 }
421 }
422 } while (SomethingChanged && FixedPointRegToImm);
423 }
424
425 for (MachineBasicBlock &MBB : *MF) {
426 for (MachineInstr &MI : MBB) {
427
428 // If the previous instruction was marked for elimination,
429 // remove it now.
430 if (ToErase) {
431 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
432 LLVM_DEBUG(ToErase->dump());
433 ToErase->eraseFromParent();
434 ToErase = nullptr;
435 }
436 // If a conditional trap instruction got optimized to an
437 // unconditional trap, eliminate all the instructions after
438 // the trap.
439 if (EnableTrapOptimization && TrapOpt) {
440 ToErase = &MI;
441 continue;
442 }
443
444 // Ignore debug instructions.
445 if (MI.isDebugInstr())
446 continue;
447
448 // Per-opcode peepholes.
449 switch (MI.getOpcode()) {
450
451 default:
452 break;
453 case PPC::COPY: {
454 Register Src = MI.getOperand(1).getReg();
455 Register Dst = MI.getOperand(0).getReg();
456 if (!Src.isVirtual() || !Dst.isVirtual())
457 break;
458 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
459 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
460 break;
461
462 // We are copying an unprimed accumulator to a primed accumulator.
463 // If the input to the copy is a PHI that is fed only by (i) copies in
464 // the other direction (ii) implicitly defined unprimed accumulators or
465 // (iii) other PHI nodes satisfying (i) and (ii), we can change
466 // the PHI to a PHI on primed accumulators (as long as we also change
467 // its operands). To detect and change such copies, we first get a list
468 // of all the PHI nodes starting from the root PHI node in BFS order.
469 // We then visit all these PHI nodes to check if they can be changed to
470 // primed accumulator PHI nodes and if so, we change them.
471 MachineInstr *RootPHI = MRI->getVRegDef(Src);
472 if (RootPHI->getOpcode() != PPC::PHI)
473 break;
474
476 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
477 break;
478
479 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
480
481 ToErase = &MI;
482 break;
483 }
484 case PPC::LI:
485 case PPC::LI8: {
486 // If we are materializing a zero, look for any use operands for which
487 // zero means immediate zero. All such operands can be replaced with
488 // PPC::ZERO.
489 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
490 break;
491 Register MIDestReg = MI.getOperand(0).getReg();
492 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
493 Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
494 if (MRI->use_nodbg_empty(MIDestReg)) {
495 ++NumLoadImmZeroFoldedAndRemoved;
496 ToErase = &MI;
497 }
498 break;
499 }
500 case PPC::STW:
501 case PPC::STD: {
502 MachineFrameInfo &MFI = MF->getFrameInfo();
503 if (MFI.hasVarSizedObjects() ||
504 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
505 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
506 break;
507 // When encountering a TOC save instruction, call UpdateTOCSaves
508 // to add it to the TOCSaves map and mark any existing TOC saves
509 // it dominates as redundant.
510 if (TII->isTOCSaveMI(MI))
511 UpdateTOCSaves(TOCSaves, &MI);
512 break;
513 }
514 case PPC::XXPERMDI: {
515 // Perform simplifications of 2x64 vector swaps and splats.
516 // A swap is identified by an immediate value of 2, and a splat
517 // is identified by an immediate value of 0 or 3.
518 int Immed = MI.getOperand(3).getImm();
519
520 if (Immed == 1)
521 break;
522
523 // For each of these simplifications, we need the two source
524 // regs to match. Unfortunately, MachineCSE ignores COPY and
525 // SUBREG_TO_REG, so for example we can see
526 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
527 // We have to look through chains of COPY and SUBREG_TO_REG
528 // to find the real source values for comparison.
529 Register TrueReg1 =
530 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
531 Register TrueReg2 =
532 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
533
534 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
535 break;
536
537 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
538
539 if (!DefMI)
540 break;
541
542 unsigned DefOpc = DefMI->getOpcode();
543
544 // If this is a splat fed by a splatting load, the splat is
545 // redundant. Replace with a copy. This doesn't happen directly due
546 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
547 // a load of a double to a vector of 64-bit integers.
548 auto isConversionOfLoadAndSplat = [=]() -> bool {
549 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
550 return false;
551 Register FeedReg1 =
552 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
553 if (FeedReg1.isVirtual()) {
554 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
555 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
556 return true;
557 }
558 return false;
559 };
560 if ((Immed == 0 || Immed == 3) &&
561 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
562 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
563 "to load-and-splat/copy: ");
564 LLVM_DEBUG(MI.dump());
565 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
566 MI.getOperand(0).getReg())
567 .add(MI.getOperand(1));
568 ToErase = &MI;
569 Simplified = true;
570 }
571
572 // If this is a splat or a swap fed by another splat, we
573 // can replace it with a copy.
574 if (DefOpc == PPC::XXPERMDI) {
575 Register DefReg1 = DefMI->getOperand(1).getReg();
576 Register DefReg2 = DefMI->getOperand(2).getReg();
577 unsigned DefImmed = DefMI->getOperand(3).getImm();
578
579 // If the two inputs are not the same register, check to see if
580 // they originate from the same virtual register after only
581 // copy-like instructions.
582 if (DefReg1 != DefReg2) {
583 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
584 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
585
586 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
587 break;
588 }
589
590 if (DefImmed == 0 || DefImmed == 3) {
591 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
592 "to splat/copy: ");
593 LLVM_DEBUG(MI.dump());
594 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
595 MI.getOperand(0).getReg())
596 .add(MI.getOperand(1));
597 ToErase = &MI;
598 Simplified = true;
599 }
600
601 // If this is a splat fed by a swap, we can simplify modify
602 // the splat to splat the other value from the swap's input
603 // parameter.
604 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
605 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
606 LLVM_DEBUG(MI.dump());
607 MI.getOperand(1).setReg(DefReg1);
608 MI.getOperand(2).setReg(DefReg2);
609 MI.getOperand(3).setImm(3 - Immed);
610 Simplified = true;
611 }
612
613 // If this is a swap fed by a swap, we can replace it
614 // with a copy from the first swap's input.
615 else if (Immed == 2 && DefImmed == 2) {
616 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
617 LLVM_DEBUG(MI.dump());
618 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
619 MI.getOperand(0).getReg())
620 .add(DefMI->getOperand(1));
621 ToErase = &MI;
622 Simplified = true;
623 }
624 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
625 DefOpc == PPC::XXPERMDIs &&
626 (DefMI->getOperand(2).getImm() == 0 ||
627 DefMI->getOperand(2).getImm() == 3)) {
628 ToErase = &MI;
629 Simplified = true;
630 // Swap of a splat, convert to copy.
631 if (Immed == 2) {
632 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
633 LLVM_DEBUG(MI.dump());
634 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
635 MI.getOperand(0).getReg())
636 .add(MI.getOperand(1));
637 break;
638 }
639 // Splat fed by another splat - switch the output of the first
640 // and remove the second.
641 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
642 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
643 LLVM_DEBUG(MI.dump());
644 } else if (Immed == 2 &&
645 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
646 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW)) {
647 // Swap of various vector splats, convert to copy.
648 ToErase = &MI;
649 Simplified = true;
650 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt[b|h|w]|xxspltw) => "
651 "copy(vsplt[b|h|w]|xxspltw): ");
652 LLVM_DEBUG(MI.dump());
653 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
654 MI.getOperand(0).getReg())
655 .add(MI.getOperand(1));
656 }
657 break;
658 }
659 case PPC::VSPLTB:
660 case PPC::VSPLTH:
661 case PPC::XXSPLTW: {
662 unsigned MyOpcode = MI.getOpcode();
663 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
664 Register TrueReg =
665 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
666 if (!TrueReg.isVirtual())
667 break;
668 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
669 if (!DefMI)
670 break;
671 unsigned DefOpcode = DefMI->getOpcode();
672 auto isConvertOfSplat = [=]() -> bool {
673 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
674 return false;
675 Register ConvReg = DefMI->getOperand(1).getReg();
676 if (!ConvReg.isVirtual())
677 return false;
678 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
679 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
680 Splt->getOpcode() == PPC::XXSPLTW);
681 };
682 bool AlreadySplat = (MyOpcode == DefOpcode) ||
683 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
684 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
685 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
686 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
687 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
688 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
689 // If the instruction[s] that feed this splat have already splat
690 // the value, this splat is redundant.
691 if (AlreadySplat) {
692 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
693 LLVM_DEBUG(MI.dump());
694 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
695 MI.getOperand(0).getReg())
696 .add(MI.getOperand(OpNo));
697 ToErase = &MI;
698 Simplified = true;
699 }
700 // Splat fed by a shift. Usually when we align value to splat into
701 // vector element zero.
702 if (DefOpcode == PPC::XXSLDWI) {
703 Register ShiftRes = DefMI->getOperand(0).getReg();
704 Register ShiftOp1 = DefMI->getOperand(1).getReg();
705 Register ShiftOp2 = DefMI->getOperand(2).getReg();
706 unsigned ShiftImm = DefMI->getOperand(3).getImm();
707 unsigned SplatImm =
708 MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
709 if (ShiftOp1 == ShiftOp2) {
710 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
711 if (MRI->hasOneNonDBGUse(ShiftRes)) {
712 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
714 ToErase = DefMI;
715 }
716 Simplified = true;
717 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
718 << " to " << NewElem << " in instruction: ");
719 LLVM_DEBUG(MI.dump());
720 MI.getOperand(1).setReg(ShiftOp1);
721 MI.getOperand(2).setImm(NewElem);
722 }
723 }
724 break;
725 }
726 case PPC::XVCVDPSP: {
727 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
728 Register TrueReg =
729 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
730 if (!TrueReg.isVirtual())
731 break;
732 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
733
734 // This can occur when building a vector of single precision or integer
735 // values.
736 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
737 Register DefsReg1 =
738 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
739 Register DefsReg2 =
740 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
741 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
742 break;
743 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
744 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
745
746 if (!P1 || !P2)
747 break;
748
749 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
750 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
751 // the FRSP/XSRSP.
752 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
753 unsigned Opc = RoundInstr->getOpcode();
754 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
755 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
756 Simplified = true;
757 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
758 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
759 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
760 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
761 if (Use.getOperand(i).isReg() &&
762 Use.getOperand(i).getReg() == FRSPDefines)
763 Use.getOperand(i).setReg(ConvReg1);
764 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
765 LLVM_DEBUG(RoundInstr->dump());
766 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
767 LLVM_DEBUG(MI.dump());
768 LLVM_DEBUG(dbgs() << "Through instruction:\n");
770 RoundInstr->eraseFromParent();
771 }
772 };
773
774 // If the input to XVCVDPSP is a vector that was built (even
775 // partially) out of FRSP's, the FRSP(s) can safely be removed
776 // since this instruction performs the same operation.
777 if (P1 != P2) {
778 removeFRSPIfPossible(P1);
779 removeFRSPIfPossible(P2);
780 break;
781 }
782 removeFRSPIfPossible(P1);
783 }
784 break;
785 }
786 case PPC::EXTSH:
787 case PPC::EXTSH8:
788 case PPC::EXTSH8_32_64: {
789 if (!EnableSExtElimination) break;
790 Register NarrowReg = MI.getOperand(1).getReg();
791 if (!NarrowReg.isVirtual())
792 break;
793
794 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
795 unsigned SrcOpcode = SrcMI->getOpcode();
796 // If we've used a zero-extending load that we will sign-extend,
797 // just do a sign-extending load.
798 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
799 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
800 break;
801 // Determine the new opcode. We need to make sure that if the original
802 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
803 // Likewise if the source is X-Form the new opcode should also be
804 // X-Form.
805 unsigned Opc = PPC::LHA;
806 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
807 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
808 MI.getOpcode() == PPC::EXTSH8_32_64;
809
810 if (SourceIsXForm && MIIs64Bit)
811 Opc = PPC::LHAX8;
812 else if (SourceIsXForm && !MIIs64Bit)
813 Opc = PPC::LHAX;
814 else if (MIIs64Bit)
815 Opc = PPC::LHA8;
816
817 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
818 LLVM_DEBUG(SrcMI->dump());
819 LLVM_DEBUG(dbgs() << "and sign-extension\n");
820 LLVM_DEBUG(MI.dump());
821 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
822 SrcMI->setDesc(TII->get(Opc));
823 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
824 ToErase = &MI;
825 Simplified = true;
826 NumEliminatedSExt++;
827 }
828 break;
829 }
830 case PPC::EXTSW:
831 case PPC::EXTSW_32:
832 case PPC::EXTSW_32_64: {
833 if (!EnableSExtElimination) break;
834 Register NarrowReg = MI.getOperand(1).getReg();
835 if (!NarrowReg.isVirtual())
836 break;
837
838 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
839 unsigned SrcOpcode = SrcMI->getOpcode();
840 // If we've used a zero-extending load that we will sign-extend,
841 // just do a sign-extending load.
842 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
843 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
844 break;
845
846 // The transformation from a zero-extending load to a sign-extending
847 // load is only legal when the displacement is a multiple of 4.
848 // If the displacement is not at least 4 byte aligned, don't perform
849 // the transformation.
850 bool IsWordAligned = false;
851 if (SrcMI->getOperand(1).isGlobal()) {
852 const GlobalObject *GO =
853 dyn_cast<GlobalObject>(SrcMI->getOperand(1).getGlobal());
854 if (GO && GO->getAlign() && *GO->getAlign() >= 4 &&
855 (SrcMI->getOperand(1).getOffset() % 4 == 0))
856 IsWordAligned = true;
857 } else if (SrcMI->getOperand(1).isImm()) {
858 int64_t Value = SrcMI->getOperand(1).getImm();
859 if (Value % 4 == 0)
860 IsWordAligned = true;
861 }
862
863 // Determine the new opcode. We need to make sure that if the original
864 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
865 // Likewise if the source is X-Form the new opcode should also be
866 // X-Form.
867 unsigned Opc = PPC::LWA_32;
868 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
869 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
870 MI.getOpcode() == PPC::EXTSW_32_64;
871
872 if (SourceIsXForm && MIIs64Bit)
873 Opc = PPC::LWAX;
874 else if (SourceIsXForm && !MIIs64Bit)
875 Opc = PPC::LWAX_32;
876 else if (MIIs64Bit)
877 Opc = PPC::LWA;
878
879 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
880 break;
881
882 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
883 LLVM_DEBUG(SrcMI->dump());
884 LLVM_DEBUG(dbgs() << "and sign-extension\n");
885 LLVM_DEBUG(MI.dump());
886 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
887 SrcMI->setDesc(TII->get(Opc));
888 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
889 ToErase = &MI;
890 Simplified = true;
891 NumEliminatedSExt++;
892 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
893 TII->isSignExtended(NarrowReg, MRI)) {
894 // We can eliminate EXTSW if the input is known to be already
895 // sign-extended.
896 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
897 Register TmpReg =
898 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
899 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
900 TmpReg);
901 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
902 MI.getOperand(0).getReg())
903 .addReg(TmpReg)
904 .addReg(NarrowReg)
905 .addImm(PPC::sub_32);
906 ToErase = &MI;
907 Simplified = true;
908 NumEliminatedSExt++;
909 }
910 break;
911 }
912 case PPC::RLDICL: {
913 // We can eliminate RLDICL (e.g. for zero-extension)
914 // if all bits to clear are already zero in the input.
915 // This code assume following code sequence for zero-extension.
916 // %6 = COPY %5:sub_32; (optional)
917 // %8 = IMPLICIT_DEF;
918 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
919 if (!EnableZExtElimination) break;
920
921 if (MI.getOperand(2).getImm() != 0)
922 break;
923
924 Register SrcReg = MI.getOperand(1).getReg();
925 if (!SrcReg.isVirtual())
926 break;
927
928 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
929 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
930 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
931 break;
932
933 MachineInstr *ImpDefMI, *SubRegMI;
934 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
935 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
936 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
937
938 SrcMI = SubRegMI;
939 if (SubRegMI->getOpcode() == PPC::COPY) {
940 Register CopyReg = SubRegMI->getOperand(1).getReg();
941 if (CopyReg.isVirtual())
942 SrcMI = MRI->getVRegDef(CopyReg);
943 }
944 if (!SrcMI->getOperand(0).isReg())
945 break;
946
947 unsigned KnownZeroCount =
948 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
949 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
950 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
951 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
952 MI.getOperand(0).getReg())
953 .addReg(SrcReg);
954 ToErase = &MI;
955 Simplified = true;
956 NumEliminatedZExt++;
957 }
958 break;
959 }
960
961 // TODO: Any instruction that has an immediate form fed only by a PHI
962 // whose operands are all load immediate can be folded away. We currently
963 // do this for ADD instructions, but should expand it to arithmetic and
964 // binary instructions with immediate forms in the future.
965 case PPC::ADD4:
966 case PPC::ADD8: {
967 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
968 assert(PhiOp && "Invalid Operand!");
969 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
970
971 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
972 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
973 };
974
975 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
976 MachineOperand *PhiOp) {
977 assert(PhiOp && "Invalid Operand!");
978 assert(DominatorOp && "Invalid Operand!");
979 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
980 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
981
982 // Note: the vregs only show up at odd indices position of PHI Node,
983 // the even indices position save the BB info.
984 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
985 MachineInstr *LiMI =
986 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
987 if (!LiMI ||
988 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
989 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
990 !MDT->dominates(DefDomMI, LiMI))
991 return false;
992 }
993
994 return true;
995 };
996
997 MachineOperand Op1 = MI.getOperand(1);
998 MachineOperand Op2 = MI.getOperand(2);
999 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1000 std::swap(Op1, Op2);
1001 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1002 break; // We don't have an ADD fed by LI's that can be transformed
1003
1004 // Now we know that Op1 is the PHI node and Op2 is the dominator
1005 Register DominatorReg = Op2.getReg();
1006
1007 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1008 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1009 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1010 MRI->setRegClass(DominatorReg, TRC);
1011
1012 // replace LIs with ADDIs
1013 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1014 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1015 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1016 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1017 LLVM_DEBUG(LiMI->dump());
1018
1019 // There could be repeated registers in the PHI, e.g: %1 =
1020 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1021 // already replaced the def instruction, skip.
1022 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1023 continue;
1024
1025 assert((LiMI->getOpcode() == PPC::LI ||
1026 LiMI->getOpcode() == PPC::LI8) &&
1027 "Invalid Opcode!");
1028 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1029 LiMI->removeOperand(1); // remove the imm of LI
1030 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1031 : PPC::ADDI8));
1032 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1033 .addReg(DominatorReg)
1034 .addImm(LiImm); // restore the imm of LI
1035 LLVM_DEBUG(LiMI->dump());
1036 }
1037
1038 // Replace ADD with COPY
1039 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1040 LLVM_DEBUG(MI.dump());
1041 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1042 MI.getOperand(0).getReg())
1043 .add(Op1);
1044 ToErase = &MI;
1045 Simplified = true;
1046 NumOptADDLIs++;
1047 break;
1048 }
1049 case PPC::RLDICR: {
1050 Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
1051 combineSEXTAndSHL(MI, ToErase);
1052 break;
1053 }
1054 case PPC::RLWINM:
1055 case PPC::RLWINM_rec:
1056 case PPC::RLWINM8:
1057 case PPC::RLWINM8_rec: {
1058 Simplified = TII->combineRLWINM(MI, &ToErase);
1059 if (Simplified)
1060 ++NumRotatesCollapsed;
1061 break;
1062 }
1063 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1064 // always trap, we will delete the node if it will never trap.
1065 case PPC::TDI:
1066 case PPC::TWI:
1067 case PPC::TD:
1068 case PPC::TW: {
1069 if (!EnableTrapOptimization) break;
1070 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1071 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1072 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1073 // We can only do the optimization if we can get immediates
1074 // from both operands
1075 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1076 LiMI1->getOpcode() == PPC::LI8)))
1077 break;
1078 if (!IsOperand2Immediate &&
1079 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1080 LiMI2->getOpcode() == PPC::LI8)))
1081 break;
1082
1083 auto ImmOperand0 = MI.getOperand(0).getImm();
1084 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1085 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1086 : LiMI2->getOperand(1).getImm();
1087
1088 // We will replace the MI with an unconditional trap if it will always
1089 // trap.
1090 if ((ImmOperand0 == 31) ||
1091 ((ImmOperand0 & 0x10) &&
1092 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1093 ((ImmOperand0 & 0x8) &&
1094 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1095 ((ImmOperand0 & 0x2) &&
1096 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1097 ((ImmOperand0 & 0x1) &&
1098 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1099 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1100 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1101 TrapOpt = true;
1102 }
1103 // We will delete the MI if it will never trap.
1104 ToErase = &MI;
1105 Simplified = true;
1106 break;
1107 }
1108 }
1109 }
1110
1111 // If the last instruction was marked for elimination,
1112 // remove it now.
1113 if (ToErase) {
1114 ToErase->eraseFromParent();
1115 ToErase = nullptr;
1116 }
1117 // Reset TrapOpt to false at the end of the basic block.
1119 TrapOpt = false;
1120 }
1121
1122 // Eliminate all the TOC save instructions which are redundant.
1123 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1124 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1125 if (FI->mustSaveTOC())
1126 NumTOCSavesInPrologue++;
1127
1128 // We try to eliminate redundant compare instruction.
1129 Simplified |= eliminateRedundantCompare();
1130
1131 return Simplified;
1132}
1133
1134// helper functions for eliminateRedundantCompare
1135static bool isEqOrNe(MachineInstr *BI) {
1137 unsigned PredCond = PPC::getPredicateCondition(Pred);
1138 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1139}
1140
1141static bool isSupportedCmpOp(unsigned opCode) {
1142 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1143 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1144 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1145 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1146}
1147
1148static bool is64bitCmpOp(unsigned opCode) {
1149 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1150 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1151}
1152
1153static bool isSignedCmpOp(unsigned opCode) {
1154 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1155 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1156}
1157
1158static unsigned getSignedCmpOpCode(unsigned opCode) {
1159 if (opCode == PPC::CMPLD) return PPC::CMPD;
1160 if (opCode == PPC::CMPLW) return PPC::CMPW;
1161 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1162 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1163 return opCode;
1164}
1165
1166// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1167// (LT x) to (LE x-1)
1168static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1169 uint64_t Imm = CMPI->getOperand(2).getImm();
1170 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1171 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1172 return 0;
1173
1175 unsigned PredCond = PPC::getPredicateCondition(Pred);
1176 unsigned PredHint = PPC::getPredicateHint(Pred);
1177 if (PredCond == PPC::PRED_GE)
1178 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1179 if (PredCond == PPC::PRED_LT)
1180 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1181
1182 return 0;
1183}
1184
1185// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1186// (LE x) to (LT x+1)
1187static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1188 uint64_t Imm = CMPI->getOperand(2).getImm();
1189 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1190 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1191 return 0;
1192
1194 unsigned PredCond = PPC::getPredicateCondition(Pred);
1195 unsigned PredHint = PPC::getPredicateHint(Pred);
1196 if (PredCond == PPC::PRED_GT)
1197 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1198 if (PredCond == PPC::PRED_LE)
1199 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1200
1201 return 0;
1202}
1203
1204// This takes a Phi node and returns a register value for the specified BB.
1205static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1207 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1208 MachineOperand &MO = Phi->getOperand(I);
1209 if (MO.getMBB() == MBB)
1210 return Phi->getOperand(I-1).getReg();
1211 }
1212 llvm_unreachable("invalid src basic block for this Phi node\n");
1213 return 0;
1214}
1215
1216// This function tracks the source of the register through register copy.
1217// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1218// assuming that the control comes from BB1 into BB2.
1219static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1221 unsigned SrcReg = Reg;
1222 while (true) {
1223 unsigned NextReg = SrcReg;
1224 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1225 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1226 NextReg = getIncomingRegForBlock(Inst, BB1);
1227 // We track through PHI only once to avoid infinite loop.
1228 BB1 = nullptr;
1229 }
1230 else if (Inst->isFullCopy())
1231 NextReg = Inst->getOperand(1).getReg();
1232 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1233 break;
1234 SrcReg = NextReg;
1235 }
1236 return SrcReg;
1237}
1238
1239static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1240 MachineBasicBlock *&PredMBB,
1241 MachineBasicBlock *&MBBtoMoveCmp,
1243
1244 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1245 auto BII = BB.getFirstInstrTerminator();
1246 // We optimize BBs ending with a conditional branch.
1247 // We check only for BCC here, not BCCLR, because BCCLR
1248 // will be formed only later in the pipeline.
1249 if (BB.succ_size() == 2 &&
1250 BII != BB.instr_end() &&
1251 (*BII).getOpcode() == PPC::BCC &&
1252 (*BII).getOperand(1).isReg()) {
1253 // We optimize only if the condition code is used only by one BCC.
1254 Register CndReg = (*BII).getOperand(1).getReg();
1255 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1256 return false;
1257
1258 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1259 // We assume compare and branch are in the same BB for ease of analysis.
1260 if (CMPI->getParent() != &BB)
1261 return false;
1262
1263 // We skip this BB if a physical register is used in comparison.
1264 for (MachineOperand &MO : CMPI->operands())
1265 if (MO.isReg() && !MO.getReg().isVirtual())
1266 return false;
1267
1268 return true;
1269 }
1270 return false;
1271 };
1272
1273 // If this BB has more than one successor, we can create a new BB and
1274 // move the compare instruction in the new BB.
1275 // So far, we do not move compare instruction to a BB having multiple
1276 // successors to avoid potentially increasing code size.
1277 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1278 return BB.succ_size() == 1;
1279 };
1280
1281 if (!isEligibleBB(MBB))
1282 return false;
1283
1284 unsigned NumPredBBs = MBB.pred_size();
1285 if (NumPredBBs == 1) {
1286 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1287 if (isEligibleBB(*TmpMBB)) {
1288 PredMBB = TmpMBB;
1289 MBBtoMoveCmp = nullptr;
1290 return true;
1291 }
1292 }
1293 else if (NumPredBBs == 2) {
1294 // We check for partially redundant case.
1295 // So far, we support cases with only two predecessors
1296 // to avoid increasing the number of instructions.
1298 MachineBasicBlock *Pred1MBB = *PI;
1299 MachineBasicBlock *Pred2MBB = *(PI+1);
1300
1301 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1302 // We assume Pred1MBB is the BB containing the compare to be merged and
1303 // Pred2MBB is the BB to which we will append a compare instruction.
1304 // Hence we can proceed as is.
1305 }
1306 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1307 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1308 std::swap(Pred1MBB, Pred2MBB);
1309 }
1310 else return false;
1311
1312 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1313 // We cannot move the compare instruction if operands are not available
1314 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1316 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1317 for (int I = 1; I <= 2; I++)
1318 if (CMPI->getOperand(I).isReg()) {
1319 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1320 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1321 return false;
1322 }
1323
1324 PredMBB = Pred1MBB;
1325 MBBtoMoveCmp = Pred2MBB;
1326 return true;
1327 }
1328
1329 return false;
1330}
1331
1332// This function will iterate over the input map containing a pair of TOC save
1333// instruction and a flag. The flag will be set to false if the TOC save is
1334// proven redundant. This function will erase from the basic block all the TOC
1335// saves marked as redundant.
1336bool PPCMIPeephole::eliminateRedundantTOCSaves(
1337 std::map<MachineInstr *, bool> &TOCSaves) {
1338 bool Simplified = false;
1339 int NumKept = 0;
1340 for (auto TOCSave : TOCSaves) {
1341 if (!TOCSave.second) {
1342 TOCSave.first->eraseFromParent();
1343 RemoveTOCSave++;
1344 Simplified = true;
1345 } else {
1346 NumKept++;
1347 }
1348 }
1349
1350 if (NumKept > 1)
1351 MultiTOCSaves++;
1352
1353 return Simplified;
1354}
1355
1356// If multiple conditional branches are executed based on the (essentially)
1357// same comparison, we merge compare instructions into one and make multiple
1358// conditional branches on this comparison.
1359// For example,
1360// if (a == 0) { ... }
1361// else if (a < 0) { ... }
1362// can be executed by one compare and two conditional branches instead of
1363// two pairs of a compare and a conditional branch.
1364//
1365// This method merges two compare instructions in two MBBs and modifies the
1366// compare and conditional branch instructions if needed.
1367// For the above example, the input for this pass looks like:
1368// cmplwi r3, 0
1369// beq 0, .LBB0_3
1370// cmpwi r3, -1
1371// bgt 0, .LBB0_4
1372// So, before merging two compares, we need to modify these instructions as
1373// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1374// beq 0, .LBB0_3
1375// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1376// bge 0, .LBB0_4
1377
1378bool PPCMIPeephole::eliminateRedundantCompare() {
1379 bool Simplified = false;
1380
1381 for (MachineBasicBlock &MBB2 : *MF) {
1382 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1383
1384 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1385 // as an optimization target if
1386 // - both MBBs end with a conditional branch,
1387 // - MBB1 is the only predecessor of MBB2, and
1388 // - compare does not take a physical register as a operand in both MBBs.
1389 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1390 //
1391 // As partially redundant case, we additionally handle if MBB2 has one
1392 // additional predecessor, which has only one successor (MBB2).
1393 // In this case, we move the compare instruction originally in MBB2 into
1394 // MBBtoMoveCmp. This partially redundant case is typically appear by
1395 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1396 //
1397 // Overview of CFG of related basic blocks
1398 // Fully redundant case Partially redundant case
1399 // -------- ---------------- --------
1400 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1401 // -------- ---------------- --------
1402 // | \ (w/ 1 succ) \ | \
1403 // | \ \ | \
1404 // | \ |
1405 // -------- --------
1406 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1407 // -------- and 2 succ) -------- and 2 succ)
1408 // | \ | \
1409 // | \ | \
1410 //
1411 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1412 continue;
1413
1414 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1415 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1416
1417 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1418 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1419 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1420
1421 // We cannot optimize an unsupported compare opcode or
1422 // a mix of 32-bit and 64-bit comparisons
1423 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1424 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1425 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1426 continue;
1427
1428 unsigned NewOpCode = 0;
1429 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1430 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1431 bool SwapOperands = false;
1432
1433 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1434 // Typically, unsigned comparison is used for equality check, but
1435 // we replace it with a signed comparison if the comparison
1436 // to be merged is a signed comparison.
1437 // In other cases of opcode mismatch, we cannot optimize this.
1438
1439 // We cannot change opcode when comparing against an immediate
1440 // if the most significant bit of the immediate is one
1441 // due to the difference in sign extension.
1442 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1443 if (!I->getOperand(2).isImm())
1444 return false;
1445 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1446 return Imm < 0;
1447 };
1448
1449 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1450 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1451 NewOpCode = CMPI1->getOpcode();
1452 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1453 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1454 NewOpCode = CMPI2->getOpcode();
1455 else continue;
1456 }
1457
1458 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1459 // In case of comparisons between two registers, these two registers
1460 // must be same to merge two comparisons.
1461 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1462 nullptr, nullptr, MRI);
1463 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1464 nullptr, nullptr, MRI);
1465 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1466 MBB1, &MBB2, MRI);
1467 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1468 MBB1, &MBB2, MRI);
1469
1470 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1471 // Same pair of registers in the same order; ready to merge as is.
1472 }
1473 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1474 // Same pair of registers in different order.
1475 // We reverse the predicate to merge compare instructions.
1477 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1478 // In case of partial redundancy, we need to swap operands
1479 // in another compare instruction.
1480 SwapOperands = true;
1481 }
1482 else continue;
1483 }
1484 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1485 // In case of comparisons between a register and an immediate,
1486 // the operand register must be same for two compare instructions.
1487 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1488 nullptr, nullptr, MRI);
1489 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1490 MBB1, &MBB2, MRI);
1491 if (Cmp1Operand1 != Cmp2Operand1)
1492 continue;
1493
1494 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1495 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1496
1497 // If immediate are not same, we try to adjust by changing predicate;
1498 // e.g. GT imm means GE (imm+1).
1499 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1500 int Diff = Imm1 - Imm2;
1501 if (Diff < -2 || Diff > 2)
1502 continue;
1503
1504 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1505 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1506 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1507 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1508 if (Diff == 2) {
1509 if (PredToInc2 && PredToDec1) {
1510 NewPredicate2 = PredToInc2;
1511 NewPredicate1 = PredToDec1;
1512 NewImm2++;
1513 NewImm1--;
1514 }
1515 }
1516 else if (Diff == 1) {
1517 if (PredToInc2) {
1518 NewImm2++;
1519 NewPredicate2 = PredToInc2;
1520 }
1521 else if (PredToDec1) {
1522 NewImm1--;
1523 NewPredicate1 = PredToDec1;
1524 }
1525 }
1526 else if (Diff == -1) {
1527 if (PredToDec2) {
1528 NewImm2--;
1529 NewPredicate2 = PredToDec2;
1530 }
1531 else if (PredToInc1) {
1532 NewImm1++;
1533 NewPredicate1 = PredToInc1;
1534 }
1535 }
1536 else if (Diff == -2) {
1537 if (PredToDec2 && PredToInc1) {
1538 NewPredicate2 = PredToDec2;
1539 NewPredicate1 = PredToInc1;
1540 NewImm2--;
1541 NewImm1++;
1542 }
1543 }
1544 }
1545
1546 // We cannot merge two compares if the immediates are not same.
1547 if (NewImm2 != NewImm1)
1548 continue;
1549 }
1550
1551 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1552 LLVM_DEBUG(CMPI1->dump());
1553 LLVM_DEBUG(BI1->dump());
1554 LLVM_DEBUG(CMPI2->dump());
1555 LLVM_DEBUG(BI2->dump());
1556
1557 // We adjust opcode, predicates and immediate as we determined above.
1558 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1559 CMPI1->setDesc(TII->get(NewOpCode));
1560 }
1561 if (NewPredicate1) {
1562 BI1->getOperand(0).setImm(NewPredicate1);
1563 }
1564 if (NewPredicate2) {
1565 BI2->getOperand(0).setImm(NewPredicate2);
1566 }
1567 if (NewImm1 != Imm1) {
1568 CMPI1->getOperand(2).setImm(NewImm1);
1569 }
1570
1571 if (IsPartiallyRedundant) {
1572 // We touch up the compare instruction in MBB2 and move it to
1573 // a previous BB to handle partially redundant case.
1574 if (SwapOperands) {
1575 Register Op1 = CMPI2->getOperand(1).getReg();
1576 Register Op2 = CMPI2->getOperand(2).getReg();
1577 CMPI2->getOperand(1).setReg(Op2);
1578 CMPI2->getOperand(2).setReg(Op1);
1579 }
1580 if (NewImm2 != Imm2)
1581 CMPI2->getOperand(2).setImm(NewImm2);
1582
1583 for (int I = 1; I <= 2; I++) {
1584 if (CMPI2->getOperand(I).isReg()) {
1585 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1586 if (Inst->getParent() != &MBB2)
1587 continue;
1588
1589 assert(Inst->getOpcode() == PPC::PHI &&
1590 "We cannot support if an operand comes from this BB.");
1591 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1592 CMPI2->getOperand(I).setReg(SrcReg);
1593 }
1594 }
1595 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1596 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1597
1598 DebugLoc DL = CMPI2->getDebugLoc();
1599 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1600 BuildMI(MBB2, MBB2.begin(), DL,
1601 TII->get(PPC::PHI), NewVReg)
1602 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1603 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1604 BI2->getOperand(1).setReg(NewVReg);
1605 }
1606 else {
1607 // We finally eliminate compare instruction in MBB2.
1608 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1609 CMPI2->eraseFromParent();
1610 }
1611 BI2->getOperand(1).setIsKill(true);
1612 BI1->getOperand(1).setIsKill(false);
1613
1614 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1615 LLVM_DEBUG(CMPI1->dump());
1616 LLVM_DEBUG(BI1->dump());
1617 LLVM_DEBUG(BI2->dump());
1618 if (IsPartiallyRedundant) {
1619 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1620 << printMBBReference(*MBBtoMoveCmp)
1621 << " to handle partial redundancy.\n");
1622 LLVM_DEBUG(CMPI2->dump());
1623 }
1624
1625 Simplified = true;
1626 }
1627
1628 return Simplified;
1629}
1630
1631// We miss the opportunity to emit an RLDIC when lowering jump tables
1632// since ISEL sees only a single basic block. When selecting, the clear
1633// and shift left will be in different blocks.
1634bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
1635 if (MI.getOpcode() != PPC::RLDICR)
1636 return false;
1637
1638 Register SrcReg = MI.getOperand(1).getReg();
1639 if (!SrcReg.isVirtual())
1640 return false;
1641
1642 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1643 if (SrcMI->getOpcode() != PPC::RLDICL)
1644 return false;
1645
1646 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1647 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1648 MachineOperand MOpSHMI = MI.getOperand(2);
1649 MachineOperand MOpMEMI = MI.getOperand(3);
1650 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1651 MOpMEMI.isImm()))
1652 return false;
1653
1654 uint64_t SHSrc = MOpSHSrc.getImm();
1655 uint64_t MBSrc = MOpMBSrc.getImm();
1656 uint64_t SHMI = MOpSHMI.getImm();
1657 uint64_t MEMI = MOpMEMI.getImm();
1658 uint64_t NewSH = SHSrc + SHMI;
1659 uint64_t NewMB = MBSrc - SHMI;
1660 if (NewMB > 63 || NewSH > 63)
1661 return false;
1662
1663 // The bits cleared with RLDICL are [0, MBSrc).
1664 // The bits cleared with RLDICR are (MEMI, 63].
1665 // After the sequence, the bits cleared are:
1666 // [0, MBSrc-SHMI) and (MEMI, 63).
1667 //
1668 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1669 if ((63 - NewSH) != MEMI)
1670 return false;
1671
1672 LLVM_DEBUG(dbgs() << "Converting pair: ");
1673 LLVM_DEBUG(SrcMI->dump());
1674 LLVM_DEBUG(MI.dump());
1675
1676 MI.setDesc(TII->get(PPC::RLDIC));
1677 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1678 MI.getOperand(2).setImm(NewSH);
1679 MI.getOperand(3).setImm(NewMB);
1680 MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill());
1681 SrcMI->getOperand(1).setIsKill(false);
1682
1683 LLVM_DEBUG(dbgs() << "To: ");
1684 LLVM_DEBUG(MI.dump());
1685 NumRotatesCollapsed++;
1686 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1687 if (MRI->use_nodbg_empty(SrcReg)) {
1688 assert(!SrcMI->hasImplicitDef() &&
1689 "Not expecting an implicit def with this instr.");
1690 SrcMI->eraseFromParent();
1691 }
1692 return true;
1693}
1694
1695// For case in LLVM IR
1696// entry:
1697// %iconv = sext i32 %index to i64
1698// br i1 undef label %true, label %false
1699// true:
1700// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1701// ...
1702// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1703// different BBs when conducting instruction selection. We can do a peephole
1704// optimization to combine these two instructions into extswsli after
1705// instruction selection.
1706bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1707 MachineInstr *&ToErase) {
1708 if (MI.getOpcode() != PPC::RLDICR)
1709 return false;
1710
1711 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1712 return false;
1713
1714 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1715
1716 MachineOperand MOpSHMI = MI.getOperand(2);
1717 MachineOperand MOpMEMI = MI.getOperand(3);
1718 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1719 return false;
1720
1721 uint64_t SHMI = MOpSHMI.getImm();
1722 uint64_t MEMI = MOpMEMI.getImm();
1723 if (SHMI + MEMI != 63)
1724 return false;
1725
1726 Register SrcReg = MI.getOperand(1).getReg();
1727 if (!SrcReg.isVirtual())
1728 return false;
1729
1730 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1731 if (SrcMI->getOpcode() != PPC::EXTSW &&
1732 SrcMI->getOpcode() != PPC::EXTSW_32_64)
1733 return false;
1734
1735 // If the register defined by extsw has more than one use, combination is not
1736 // needed.
1737 if (!MRI->hasOneNonDBGUse(SrcReg))
1738 return false;
1739
1740 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
1741 assert(SrcMI->getOperand(1).isReg() &&
1742 "EXTSW's second operand should be a register");
1743 if (!SrcMI->getOperand(1).getReg().isVirtual())
1744 return false;
1745
1746 LLVM_DEBUG(dbgs() << "Combining pair: ");
1747 LLVM_DEBUG(SrcMI->dump());
1748 LLVM_DEBUG(MI.dump());
1749
1750 MachineInstr *NewInstr =
1751 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
1752 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
1753 : TII->get(PPC::EXTSWSLI_32_64),
1754 MI.getOperand(0).getReg())
1755 .add(SrcMI->getOperand(1))
1756 .add(MOpSHMI);
1757 (void)NewInstr;
1758
1759 LLVM_DEBUG(dbgs() << "TO: ");
1760 LLVM_DEBUG(NewInstr->dump());
1761 ++NumEXTSWAndSLDICombined;
1762 ToErase = &MI;
1763 // SrcMI, which is extsw, is of no use now, erase it.
1764 SrcMI->eraseFromParent();
1765 return true;
1766}
1767
1768} // end default namespace
1769
1771 "PowerPC MI Peephole Optimization", false, false)
1776 "PowerPC MI Peephole Optimization", false, false)
1777
1778char PPCMIPeephole::ID = 0;
1780llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
PowerPC MI Peephole Optimization
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
#define DEBUG_TYPE
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
if(VerifyEach)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
unsigned pred_size() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
uint64_t getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:599
bool isFullCopy() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool isUsingPCRelativeCalls() const
bool isELFv2ABI() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void dump() const
Definition: Pass.cpp:136
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializePPCMIPeepholePass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1869
@ Keep
No function return thunk.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853