LLVM 17.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19//===---------------------------------------------------------------------===//
20
23#include "PPC.h"
24#include "PPCInstrBuilder.h"
25#include "PPCInstrInfo.h"
27#include "PPCTargetMachine.h"
28#include "llvm/ADT/Statistic.h"
37#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "ppc-mi-peepholes"
42
43STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
44STATISTIC(MultiTOCSaves,
45 "Number of functions with multiple TOC saves that must be kept");
46STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
47STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
48STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
49STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
50STATISTIC(NumConvertedToImmediateForm,
51 "Number of instructions converted to their immediate form");
52STATISTIC(NumFunctionsEnteredInMIPeephole,
53 "Number of functions entered in PPC MI Peepholes");
54STATISTIC(NumFixedPointIterations,
55 "Number of fixed-point iterations converting reg-reg instructions "
56 "to reg-imm ones");
57STATISTIC(NumRotatesCollapsed,
58 "Number of pairs of rotate left, clear left/right collapsed");
59STATISTIC(NumEXTSWAndSLDICombined,
60 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
61STATISTIC(NumLoadImmZeroFoldedAndRemoved,
62 "Number of LI(8) reg, 0 that are folded to r0 and removed");
63
64static cl::opt<bool>
65FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
66 cl::desc("Iterate to a fixed point when attempting to "
67 "convert reg-reg instructions to reg-imm"));
68
69static cl::opt<bool>
70ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
71 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
72
73static cl::opt<bool>
74 EnableSExtElimination("ppc-eliminate-signext",
75 cl::desc("enable elimination of sign-extensions"),
76 cl::init(true), cl::Hidden);
77
78static cl::opt<bool>
79 EnableZExtElimination("ppc-eliminate-zeroext",
80 cl::desc("enable elimination of zero-extensions"),
81 cl::init(true), cl::Hidden);
82
83static cl::opt<bool>
84 EnableTrapOptimization("ppc-opt-conditional-trap",
85 cl::desc("enable optimization of conditional traps"),
86 cl::init(false), cl::Hidden);
87
88namespace {
89
90struct PPCMIPeephole : public MachineFunctionPass {
91
92 static char ID;
93 const PPCInstrInfo *TII;
96
97 PPCMIPeephole() : MachineFunctionPass(ID) {
99 }
100
101private:
105 uint64_t EntryFreq;
106
107 // Initialize class variables.
108 void initialize(MachineFunction &MFParm);
109
110 // Perform peepholes.
111 bool simplifyCode();
112
113 // Perform peepholes.
114 bool eliminateRedundantCompare();
115 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
116 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
117 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
118 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
120
121public:
122
123 void getAnalysisUsage(AnalysisUsage &AU) const override {
131 }
132
133 // Main entry point for this pass.
134 bool runOnMachineFunction(MachineFunction &MF) override {
135 initialize(MF);
136 // At this point, TOC pointer should not be used in a function that uses
137 // PC-Relative addressing.
138 assert((MF.getRegInfo().use_empty(PPC::X2) ||
140 "TOC pointer used in a function using PC-Relative addressing!");
141 if (skipFunction(MF.getFunction()))
142 return false;
143 return simplifyCode();
144 }
145};
146
147// Initialize class variables.
148void PPCMIPeephole::initialize(MachineFunction &MFParm) {
149 MF = &MFParm;
150 MRI = &MF->getRegInfo();
151 MDT = &getAnalysis<MachineDominatorTree>();
152 MPDT = &getAnalysis<MachinePostDominatorTree>();
153 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
154 EntryFreq = MBFI->getEntryFreq();
155 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
156 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
157 LLVM_DEBUG(MF->dump());
158}
159
160static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
162 assert(Op && "Invalid Operand!");
163 if (!Op->isReg())
164 return nullptr;
165
166 Register Reg = Op->getReg();
167 if (!Reg.isVirtual())
168 return nullptr;
169
170 return MRI->getVRegDef(Reg);
171}
172
173// This function returns number of known zero bits in output of MI
174// starting from the most significant bit.
175static unsigned getKnownLeadingZeroCount(const unsigned Reg,
176 const PPCInstrInfo *TII,
177 const MachineRegisterInfo *MRI) {
178 MachineInstr *MI = MRI->getVRegDef(Reg);
179 unsigned Opcode = MI->getOpcode();
180 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
181 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
182 return MI->getOperand(3).getImm();
183
184 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
185 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
186 return MI->getOperand(3).getImm();
187
188 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
189 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
190 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
191 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
192 return 32 + MI->getOperand(3).getImm();
193
194 if (Opcode == PPC::ANDI_rec) {
195 uint16_t Imm = MI->getOperand(2).getImm();
196 return 48 + llvm::countl_zero(Imm);
197 }
198
199 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
200 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
201 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
202 // The result ranges from 0 to 32.
203 return 58;
204
205 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
206 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
207 // The result ranges from 0 to 64.
208 return 57;
209
210 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
211 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
212 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
213 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
214 return 48;
215
216 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
217 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
218 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
219 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
220 return 56;
221
222 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
223 Opcode == PPC::AND8_rec)
224 return std::max(
225 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
226 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
227
228 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
229 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
230 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
231 Opcode == PPC::XOR8_rec)
232 return std::min(
233 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
234 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
235
236 if (TII->isZeroExtended(Reg, MRI))
237 return 32;
238
239 return 0;
240}
241
242// This function maintains a map for the pairs <TOC Save Instr, Keep>
243// Each time a new TOC save is encountered, it checks if any of the existing
244// ones are dominated by the new one. If so, it marks the existing one as
245// redundant by setting it's entry in the map as false. It then adds the new
246// instruction to the map with either true or false depending on if any
247// existing instructions dominated the new one.
248void PPCMIPeephole::UpdateTOCSaves(
249 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
250 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
251 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
252 // here only support it under ELFv2.
253 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
255
256 MachineBasicBlock *Entry = &MF->front();
257 uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
258
259 // If the block in which the TOC save resides is in a block that
260 // post-dominates Entry, or a block that is hotter than entry (keep in mind
261 // that early MachineLICM has already run so the TOC save won't be hoisted)
262 // we can just do the save in the prologue.
263 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
264 FI->setMustSaveTOC(true);
265
266 // If we are saving the TOC in the prologue, all the TOC saves can be
267 // removed from the code.
268 if (FI->mustSaveTOC()) {
269 for (auto &TOCSave : TOCSaves)
270 TOCSave.second = false;
271 // Add new instruction to map.
272 TOCSaves[MI] = false;
273 return;
274 }
275 }
276
277 bool Keep = true;
278 for (auto &I : TOCSaves) {
279 MachineInstr *CurrInst = I.first;
280 // If new instruction dominates an existing one, mark existing one as
281 // redundant.
282 if (I.second && MDT->dominates(MI, CurrInst))
283 I.second = false;
284 // Check if the new instruction is redundant.
285 if (MDT->dominates(CurrInst, MI)) {
286 Keep = false;
287 break;
288 }
289 }
290 // Add new instruction to map.
291 TOCSaves[MI] = Keep;
292}
293
294// This function returns a list of all PHI nodes in the tree starting from
295// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
296// The list initially only contains the root PHI. When we visit a PHI node, we
297// add it to the list. We continue to look for other PHI node operands while
298// there are nodes to visit in the list. The function returns false if the
299// optimization cannot be applied on this tree.
300static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
301 MachineInstr *RootPHI,
303 PHIs.push_back(RootPHI);
304 unsigned VisitedIndex = 0;
305 while (VisitedIndex < PHIs.size()) {
306 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
307 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
308 PHIOp != NumOps; PHIOp += 2) {
309 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
310 if (!RegOp.isVirtual())
311 return false;
312 MachineInstr *Instr = MRI->getVRegDef(RegOp);
313 // While collecting the PHI nodes, we check if they can be converted (i.e.
314 // all the operands are either copies, implicit defs or PHI nodes).
315 unsigned Opcode = Instr->getOpcode();
316 if (Opcode == PPC::COPY) {
317 Register Reg = Instr->getOperand(1).getReg();
318 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
319 return false;
320 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
321 return false;
322 // If we detect a cycle in the PHI nodes, we exit. It would be
323 // possible to change cycles as well, but that would add a lot
324 // of complexity for a case that is unlikely to occur with MMA
325 // code.
326 if (Opcode != PPC::PHI)
327 continue;
328 if (llvm::is_contained(PHIs, Instr))
329 return false;
330 PHIs.push_back(Instr);
331 }
332 VisitedIndex++;
333 }
334 return true;
335}
336
337// This function changes the unprimed accumulator PHI nodes in the PHIs list to
338// primed accumulator PHI nodes. The list is traversed in reverse order to
339// change all the PHI operands of a PHI node before changing the node itself.
340// We keep a map to associate each changed PHI node to its non-changed form.
341static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
344 Register Dst) {
346 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
348 // We check if the current PHI node can be changed by looking at its
349 // operands. If all the operands are either copies from primed
350 // accumulators, implicit definitions or other unprimed accumulator
351 // PHI nodes, we change it.
352 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
353 PHIOp += 2) {
354 Register RegOp = PHI->getOperand(PHIOp).getReg();
355 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
356 unsigned Opcode = PHIInput->getOpcode();
357 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
358 Opcode == PPC::PHI) &&
359 "Unexpected instruction");
360 if (Opcode == PPC::COPY) {
361 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
362 &PPC::ACCRCRegClass &&
363 "Unexpected register class");
364 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
365 } else if (Opcode == PPC::IMPLICIT_DEF) {
366 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
367 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
368 TII->get(PPC::IMPLICIT_DEF), AccReg);
369 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
370 PHI->getOperand(PHIOp + 1)});
371 } else if (Opcode == PPC::PHI) {
372 // We found a PHI operand. At this point we know this operand
373 // has already been changed so we get its associated changed form
374 // from the map.
375 assert(ChangedPHIMap.count(PHIInput) == 1 &&
376 "This PHI node should have already been changed.");
377 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
379 PrimedAccPHI->getOperand(0).getReg(), false),
380 PHI->getOperand(PHIOp + 1)});
381 }
382 }
383 Register AccReg = Dst;
384 // If the PHI node we are changing is the root node, the register it defines
385 // will be the destination register of the original copy (of the PHI def).
386 // For all other PHI's in the list, we need to create another primed
387 // accumulator virtual register as the PHI will no longer define the
388 // unprimed accumulator.
389 if (PHI != PHIs[0])
390 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
392 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
393 for (auto RegMBB : PHIOps)
394 NewPHI.add(RegMBB.first).add(RegMBB.second);
395 ChangedPHIMap[PHI] = NewPHI.getInstr();
396 LLVM_DEBUG(dbgs() << "Converting PHI: ");
397 LLVM_DEBUG(PHI->dump());
398 LLVM_DEBUG(dbgs() << "To: ");
399 LLVM_DEBUG(NewPHI.getInstr()->dump());
400 }
401}
402
403// Perform peephole optimizations.
404bool PPCMIPeephole::simplifyCode() {
405 bool Simplified = false;
406 bool TrapOpt = false;
407 MachineInstr* ToErase = nullptr;
408 std::map<MachineInstr *, bool> TOCSaves;
409 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
410 NumFunctionsEnteredInMIPeephole++;
411 if (ConvertRegReg) {
412 // Fixed-point conversion of reg/reg instructions fed by load-immediate
413 // into reg/imm instructions. FIXME: This is expensive, control it with
414 // an option.
415 bool SomethingChanged = false;
416 do {
417 NumFixedPointIterations++;
418 SomethingChanged = false;
419 for (MachineBasicBlock &MBB : *MF) {
420 for (MachineInstr &MI : MBB) {
421 if (MI.isDebugInstr())
422 continue;
423
424 if (TII->convertToImmediateForm(MI)) {
425 // We don't erase anything in case the def has other uses. Let DCE
426 // remove it if it can be removed.
427 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
428 LLVM_DEBUG(MI.dump());
429 NumConvertedToImmediateForm++;
430 SomethingChanged = true;
431 Simplified = true;
432 continue;
433 }
434 }
435 }
436 } while (SomethingChanged && FixedPointRegToImm);
437 }
438
439 for (MachineBasicBlock &MBB : *MF) {
440 for (MachineInstr &MI : MBB) {
441
442 // If the previous instruction was marked for elimination,
443 // remove it now.
444 if (ToErase) {
445 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
446 LLVM_DEBUG(ToErase->dump());
447 ToErase->eraseFromParent();
448 ToErase = nullptr;
449 }
450 // If a conditional trap instruction got optimized to an
451 // unconditional trap, eliminate all the instructions after
452 // the trap.
453 if (EnableTrapOptimization && TrapOpt) {
454 ToErase = &MI;
455 continue;
456 }
457
458 // Ignore debug instructions.
459 if (MI.isDebugInstr())
460 continue;
461
462 // Per-opcode peepholes.
463 switch (MI.getOpcode()) {
464
465 default:
466 break;
467 case PPC::COPY: {
468 Register Src = MI.getOperand(1).getReg();
469 Register Dst = MI.getOperand(0).getReg();
470 if (!Src.isVirtual() || !Dst.isVirtual())
471 break;
472 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
473 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
474 break;
475
476 // We are copying an unprimed accumulator to a primed accumulator.
477 // If the input to the copy is a PHI that is fed only by (i) copies in
478 // the other direction (ii) implicitly defined unprimed accumulators or
479 // (iii) other PHI nodes satisfying (i) and (ii), we can change
480 // the PHI to a PHI on primed accumulators (as long as we also change
481 // its operands). To detect and change such copies, we first get a list
482 // of all the PHI nodes starting from the root PHI node in BFS order.
483 // We then visit all these PHI nodes to check if they can be changed to
484 // primed accumulator PHI nodes and if so, we change them.
485 MachineInstr *RootPHI = MRI->getVRegDef(Src);
486 if (RootPHI->getOpcode() != PPC::PHI)
487 break;
488
490 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
491 break;
492
493 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
494
495 ToErase = &MI;
496 break;
497 }
498 case PPC::LI:
499 case PPC::LI8: {
500 // If we are materializing a zero, look for any use operands for which
501 // zero means immediate zero. All such operands can be replaced with
502 // PPC::ZERO.
503 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
504 break;
505 Register MIDestReg = MI.getOperand(0).getReg();
506 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
507 Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
508 if (MRI->use_nodbg_empty(MIDestReg)) {
509 ++NumLoadImmZeroFoldedAndRemoved;
510 ToErase = &MI;
511 }
512 break;
513 }
514 case PPC::STW:
515 case PPC::STD: {
516 MachineFrameInfo &MFI = MF->getFrameInfo();
517 if (MFI.hasVarSizedObjects() ||
518 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
519 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
520 break;
521 // When encountering a TOC save instruction, call UpdateTOCSaves
522 // to add it to the TOCSaves map and mark any existing TOC saves
523 // it dominates as redundant.
524 if (TII->isTOCSaveMI(MI))
525 UpdateTOCSaves(TOCSaves, &MI);
526 break;
527 }
528 case PPC::XXPERMDI: {
529 // Perform simplifications of 2x64 vector swaps and splats.
530 // A swap is identified by an immediate value of 2, and a splat
531 // is identified by an immediate value of 0 or 3.
532 int Immed = MI.getOperand(3).getImm();
533
534 if (Immed == 1)
535 break;
536
537 // For each of these simplifications, we need the two source
538 // regs to match. Unfortunately, MachineCSE ignores COPY and
539 // SUBREG_TO_REG, so for example we can see
540 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
541 // We have to look through chains of COPY and SUBREG_TO_REG
542 // to find the real source values for comparison.
543 Register TrueReg1 =
544 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
545 Register TrueReg2 =
546 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
547
548 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
549 break;
550
551 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
552
553 if (!DefMI)
554 break;
555
556 unsigned DefOpc = DefMI->getOpcode();
557
558 // If this is a splat fed by a splatting load, the splat is
559 // redundant. Replace with a copy. This doesn't happen directly due
560 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
561 // a load of a double to a vector of 64-bit integers.
562 auto isConversionOfLoadAndSplat = [=]() -> bool {
563 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
564 return false;
565 Register FeedReg1 =
566 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
567 if (FeedReg1.isVirtual()) {
568 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
569 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
570 return true;
571 }
572 return false;
573 };
574 if ((Immed == 0 || Immed == 3) &&
575 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
576 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
577 "to load-and-splat/copy: ");
578 LLVM_DEBUG(MI.dump());
579 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
580 MI.getOperand(0).getReg())
581 .add(MI.getOperand(1));
582 ToErase = &MI;
583 Simplified = true;
584 }
585
586 // If this is a splat or a swap fed by another splat, we
587 // can replace it with a copy.
588 if (DefOpc == PPC::XXPERMDI) {
589 Register DefReg1 = DefMI->getOperand(1).getReg();
590 Register DefReg2 = DefMI->getOperand(2).getReg();
591 unsigned DefImmed = DefMI->getOperand(3).getImm();
592
593 // If the two inputs are not the same register, check to see if
594 // they originate from the same virtual register after only
595 // copy-like instructions.
596 if (DefReg1 != DefReg2) {
597 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
598 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
599
600 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
601 break;
602 }
603
604 if (DefImmed == 0 || DefImmed == 3) {
605 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
606 "to splat/copy: ");
607 LLVM_DEBUG(MI.dump());
608 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
609 MI.getOperand(0).getReg())
610 .add(MI.getOperand(1));
611 ToErase = &MI;
612 Simplified = true;
613 }
614
615 // If this is a splat fed by a swap, we can simplify modify
616 // the splat to splat the other value from the swap's input
617 // parameter.
618 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
619 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
620 LLVM_DEBUG(MI.dump());
621 MI.getOperand(1).setReg(DefReg1);
622 MI.getOperand(2).setReg(DefReg2);
623 MI.getOperand(3).setImm(3 - Immed);
624 Simplified = true;
625 }
626
627 // If this is a swap fed by a swap, we can replace it
628 // with a copy from the first swap's input.
629 else if (Immed == 2 && DefImmed == 2) {
630 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
631 LLVM_DEBUG(MI.dump());
632 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
633 MI.getOperand(0).getReg())
634 .add(DefMI->getOperand(1));
635 ToErase = &MI;
636 Simplified = true;
637 }
638 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
639 DefOpc == PPC::XXPERMDIs &&
640 (DefMI->getOperand(2).getImm() == 0 ||
641 DefMI->getOperand(2).getImm() == 3)) {
642 ToErase = &MI;
643 Simplified = true;
644 // Swap of a splat, convert to copy.
645 if (Immed == 2) {
646 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
647 LLVM_DEBUG(MI.dump());
648 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
649 MI.getOperand(0).getReg())
650 .add(MI.getOperand(1));
651 break;
652 }
653 // Splat fed by another splat - switch the output of the first
654 // and remove the second.
655 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
656 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
657 LLVM_DEBUG(MI.dump());
658 } else if (Immed == 2 &&
659 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
660 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW)) {
661 // Swap of various vector splats, convert to copy.
662 ToErase = &MI;
663 Simplified = true;
664 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt[b|h|w]|xxspltw) => "
665 "copy(vsplt[b|h|w]|xxspltw): ");
666 LLVM_DEBUG(MI.dump());
667 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
668 MI.getOperand(0).getReg())
669 .add(MI.getOperand(1));
670 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
671 TII->isLoadFromConstantPool(DefMI)) {
672 const Constant *C = TII->getConstantFromConstantPool(DefMI);
673 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
674 ToErase = &MI;
675 Simplified = true;
677 << "Optimizing swap(splat pattern from constant-pool) "
678 "=> copy(splat pattern from constant-pool): ");
679 LLVM_DEBUG(MI.dump());
680 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
681 MI.getOperand(0).getReg())
682 .add(MI.getOperand(1));
683 }
684 }
685 break;
686 }
687 case PPC::VSPLTB:
688 case PPC::VSPLTH:
689 case PPC::XXSPLTW: {
690 unsigned MyOpcode = MI.getOpcode();
691 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
692 Register TrueReg =
693 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
694 if (!TrueReg.isVirtual())
695 break;
696 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
697 if (!DefMI)
698 break;
699 unsigned DefOpcode = DefMI->getOpcode();
700 auto isConvertOfSplat = [=]() -> bool {
701 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
702 return false;
703 Register ConvReg = DefMI->getOperand(1).getReg();
704 if (!ConvReg.isVirtual())
705 return false;
706 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
707 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
708 Splt->getOpcode() == PPC::XXSPLTW);
709 };
710 bool AlreadySplat = (MyOpcode == DefOpcode) ||
711 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
712 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
713 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
714 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
715 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
716 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
717 // If the instruction[s] that feed this splat have already splat
718 // the value, this splat is redundant.
719 if (AlreadySplat) {
720 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
721 LLVM_DEBUG(MI.dump());
722 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
723 MI.getOperand(0).getReg())
724 .add(MI.getOperand(OpNo));
725 ToErase = &MI;
726 Simplified = true;
727 }
728 // Splat fed by a shift. Usually when we align value to splat into
729 // vector element zero.
730 if (DefOpcode == PPC::XXSLDWI) {
731 Register ShiftRes = DefMI->getOperand(0).getReg();
732 Register ShiftOp1 = DefMI->getOperand(1).getReg();
733 Register ShiftOp2 = DefMI->getOperand(2).getReg();
734 unsigned ShiftImm = DefMI->getOperand(3).getImm();
735 unsigned SplatImm =
736 MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
737 if (ShiftOp1 == ShiftOp2) {
738 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
739 if (MRI->hasOneNonDBGUse(ShiftRes)) {
740 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
742 ToErase = DefMI;
743 }
744 Simplified = true;
745 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
746 << " to " << NewElem << " in instruction: ");
747 LLVM_DEBUG(MI.dump());
748 MI.getOperand(1).setReg(ShiftOp1);
749 MI.getOperand(2).setImm(NewElem);
750 }
751 }
752 break;
753 }
754 case PPC::XVCVDPSP: {
755 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
756 Register TrueReg =
757 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
758 if (!TrueReg.isVirtual())
759 break;
760 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
761
762 // This can occur when building a vector of single precision or integer
763 // values.
764 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
765 Register DefsReg1 =
766 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
767 Register DefsReg2 =
768 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
769 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
770 break;
771 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
772 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
773
774 if (!P1 || !P2)
775 break;
776
777 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
778 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
779 // the FRSP/XSRSP.
780 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
781 unsigned Opc = RoundInstr->getOpcode();
782 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
783 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
784 Simplified = true;
785 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
786 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
787 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
788 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
789 if (Use.getOperand(i).isReg() &&
790 Use.getOperand(i).getReg() == FRSPDefines)
791 Use.getOperand(i).setReg(ConvReg1);
792 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
793 LLVM_DEBUG(RoundInstr->dump());
794 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
795 LLVM_DEBUG(MI.dump());
796 LLVM_DEBUG(dbgs() << "Through instruction:\n");
798 RoundInstr->eraseFromParent();
799 }
800 };
801
802 // If the input to XVCVDPSP is a vector that was built (even
803 // partially) out of FRSP's, the FRSP(s) can safely be removed
804 // since this instruction performs the same operation.
805 if (P1 != P2) {
806 removeFRSPIfPossible(P1);
807 removeFRSPIfPossible(P2);
808 break;
809 }
810 removeFRSPIfPossible(P1);
811 }
812 break;
813 }
814 case PPC::EXTSH:
815 case PPC::EXTSH8:
816 case PPC::EXTSH8_32_64: {
817 if (!EnableSExtElimination) break;
818 Register NarrowReg = MI.getOperand(1).getReg();
819 if (!NarrowReg.isVirtual())
820 break;
821
822 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
823 unsigned SrcOpcode = SrcMI->getOpcode();
824 // If we've used a zero-extending load that we will sign-extend,
825 // just do a sign-extending load.
826 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
827 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
828 break;
829 // Determine the new opcode. We need to make sure that if the original
830 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
831 // Likewise if the source is X-Form the new opcode should also be
832 // X-Form.
833 unsigned Opc = PPC::LHA;
834 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
835 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
836 MI.getOpcode() == PPC::EXTSH8_32_64;
837
838 if (SourceIsXForm && MIIs64Bit)
839 Opc = PPC::LHAX8;
840 else if (SourceIsXForm && !MIIs64Bit)
841 Opc = PPC::LHAX;
842 else if (MIIs64Bit)
843 Opc = PPC::LHA8;
844
845 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
846 LLVM_DEBUG(SrcMI->dump());
847 LLVM_DEBUG(dbgs() << "and sign-extension\n");
848 LLVM_DEBUG(MI.dump());
849 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
850 SrcMI->setDesc(TII->get(Opc));
851 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
852 ToErase = &MI;
853 Simplified = true;
854 NumEliminatedSExt++;
855 }
856 break;
857 }
858 case PPC::EXTSW:
859 case PPC::EXTSW_32:
860 case PPC::EXTSW_32_64: {
861 if (!EnableSExtElimination) break;
862 Register NarrowReg = MI.getOperand(1).getReg();
863 if (!NarrowReg.isVirtual())
864 break;
865
866 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
867 unsigned SrcOpcode = SrcMI->getOpcode();
868 // If we've used a zero-extending load that we will sign-extend,
869 // just do a sign-extending load.
870 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
871 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
872 break;
873
874 // The transformation from a zero-extending load to a sign-extending
875 // load is only legal when the displacement is a multiple of 4.
876 // If the displacement is not at least 4 byte aligned, don't perform
877 // the transformation.
878 bool IsWordAligned = false;
879 if (SrcMI->getOperand(1).isGlobal()) {
880 const GlobalObject *GO =
881 dyn_cast<GlobalObject>(SrcMI->getOperand(1).getGlobal());
882 if (GO && GO->getAlign() && *GO->getAlign() >= 4 &&
883 (SrcMI->getOperand(1).getOffset() % 4 == 0))
884 IsWordAligned = true;
885 } else if (SrcMI->getOperand(1).isImm()) {
886 int64_t Value = SrcMI->getOperand(1).getImm();
887 if (Value % 4 == 0)
888 IsWordAligned = true;
889 }
890
891 // Determine the new opcode. We need to make sure that if the original
892 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
893 // Likewise if the source is X-Form the new opcode should also be
894 // X-Form.
895 unsigned Opc = PPC::LWA_32;
896 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
897 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
898 MI.getOpcode() == PPC::EXTSW_32_64;
899
900 if (SourceIsXForm && MIIs64Bit)
901 Opc = PPC::LWAX;
902 else if (SourceIsXForm && !MIIs64Bit)
903 Opc = PPC::LWAX_32;
904 else if (MIIs64Bit)
905 Opc = PPC::LWA;
906
907 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
908 break;
909
910 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
911 LLVM_DEBUG(SrcMI->dump());
912 LLVM_DEBUG(dbgs() << "and sign-extension\n");
913 LLVM_DEBUG(MI.dump());
914 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
915 SrcMI->setDesc(TII->get(Opc));
916 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
917 ToErase = &MI;
918 Simplified = true;
919 NumEliminatedSExt++;
920 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
921 TII->isSignExtended(NarrowReg, MRI)) {
922 // We can eliminate EXTSW if the input is known to be already
923 // sign-extended.
924 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
925 Register TmpReg =
926 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
927 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
928 TmpReg);
929 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
930 MI.getOperand(0).getReg())
931 .addReg(TmpReg)
932 .addReg(NarrowReg)
933 .addImm(PPC::sub_32);
934 ToErase = &MI;
935 Simplified = true;
936 NumEliminatedSExt++;
937 }
938 break;
939 }
940 case PPC::RLDICL: {
941 // We can eliminate RLDICL (e.g. for zero-extension)
942 // if all bits to clear are already zero in the input.
943 // This code assume following code sequence for zero-extension.
944 // %6 = COPY %5:sub_32; (optional)
945 // %8 = IMPLICIT_DEF;
946 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
947 if (!EnableZExtElimination) break;
948
949 if (MI.getOperand(2).getImm() != 0)
950 break;
951
952 Register SrcReg = MI.getOperand(1).getReg();
953 if (!SrcReg.isVirtual())
954 break;
955
956 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
957 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
958 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
959 break;
960
961 MachineInstr *ImpDefMI, *SubRegMI;
962 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
963 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
964 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
965
966 SrcMI = SubRegMI;
967 if (SubRegMI->getOpcode() == PPC::COPY) {
968 Register CopyReg = SubRegMI->getOperand(1).getReg();
969 if (CopyReg.isVirtual())
970 SrcMI = MRI->getVRegDef(CopyReg);
971 }
972 if (!SrcMI->getOperand(0).isReg())
973 break;
974
975 unsigned KnownZeroCount =
976 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
977 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
978 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
979 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
980 MI.getOperand(0).getReg())
981 .addReg(SrcReg);
982 ToErase = &MI;
983 Simplified = true;
984 NumEliminatedZExt++;
985 }
986 break;
987 }
988
989 // TODO: Any instruction that has an immediate form fed only by a PHI
990 // whose operands are all load immediate can be folded away. We currently
991 // do this for ADD instructions, but should expand it to arithmetic and
992 // binary instructions with immediate forms in the future.
993 case PPC::ADD4:
994 case PPC::ADD8: {
995 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
996 assert(PhiOp && "Invalid Operand!");
997 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
998
999 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1000 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1001 };
1002
1003 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1004 MachineOperand *PhiOp) {
1005 assert(PhiOp && "Invalid Operand!");
1006 assert(DominatorOp && "Invalid Operand!");
1007 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1008 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1009
1010 // Note: the vregs only show up at odd indices position of PHI Node,
1011 // the even indices position save the BB info.
1012 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1013 MachineInstr *LiMI =
1014 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1015 if (!LiMI ||
1016 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1017 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1018 !MDT->dominates(DefDomMI, LiMI))
1019 return false;
1020 }
1021
1022 return true;
1023 };
1024
1025 MachineOperand Op1 = MI.getOperand(1);
1026 MachineOperand Op2 = MI.getOperand(2);
1027 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1028 std::swap(Op1, Op2);
1029 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1030 break; // We don't have an ADD fed by LI's that can be transformed
1031
1032 // Now we know that Op1 is the PHI node and Op2 is the dominator
1033 Register DominatorReg = Op2.getReg();
1034
1035 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1036 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1037 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1038 MRI->setRegClass(DominatorReg, TRC);
1039
1040 // replace LIs with ADDIs
1041 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1042 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1043 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1044 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1045 LLVM_DEBUG(LiMI->dump());
1046
1047 // There could be repeated registers in the PHI, e.g: %1 =
1048 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1049 // already replaced the def instruction, skip.
1050 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1051 continue;
1052
1053 assert((LiMI->getOpcode() == PPC::LI ||
1054 LiMI->getOpcode() == PPC::LI8) &&
1055 "Invalid Opcode!");
1056 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1057 LiMI->removeOperand(1); // remove the imm of LI
1058 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1059 : PPC::ADDI8));
1060 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1061 .addReg(DominatorReg)
1062 .addImm(LiImm); // restore the imm of LI
1063 LLVM_DEBUG(LiMI->dump());
1064 }
1065
1066 // Replace ADD with COPY
1067 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1068 LLVM_DEBUG(MI.dump());
1069 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1070 MI.getOperand(0).getReg())
1071 .add(Op1);
1072 ToErase = &MI;
1073 Simplified = true;
1074 NumOptADDLIs++;
1075 break;
1076 }
1077 case PPC::RLDICR: {
1078 Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
1079 combineSEXTAndSHL(MI, ToErase);
1080 break;
1081 }
1082 case PPC::RLWINM:
1083 case PPC::RLWINM_rec:
1084 case PPC::RLWINM8:
1085 case PPC::RLWINM8_rec: {
1086 Simplified = TII->combineRLWINM(MI, &ToErase);
1087 if (Simplified)
1088 ++NumRotatesCollapsed;
1089 break;
1090 }
1091 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1092 // always trap, we will delete the node if it will never trap.
1093 case PPC::TDI:
1094 case PPC::TWI:
1095 case PPC::TD:
1096 case PPC::TW: {
1097 if (!EnableTrapOptimization) break;
1098 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1099 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1100 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1101 // We can only do the optimization if we can get immediates
1102 // from both operands
1103 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1104 LiMI1->getOpcode() == PPC::LI8)))
1105 break;
1106 if (!IsOperand2Immediate &&
1107 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1108 LiMI2->getOpcode() == PPC::LI8)))
1109 break;
1110
1111 auto ImmOperand0 = MI.getOperand(0).getImm();
1112 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1113 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1114 : LiMI2->getOperand(1).getImm();
1115
1116 // We will replace the MI with an unconditional trap if it will always
1117 // trap.
1118 if ((ImmOperand0 == 31) ||
1119 ((ImmOperand0 & 0x10) &&
1120 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1121 ((ImmOperand0 & 0x8) &&
1122 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1123 ((ImmOperand0 & 0x2) &&
1124 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1125 ((ImmOperand0 & 0x1) &&
1126 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1127 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1128 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1129 TrapOpt = true;
1130 }
1131 // We will delete the MI if it will never trap.
1132 ToErase = &MI;
1133 Simplified = true;
1134 break;
1135 }
1136 }
1137 }
1138
1139 // If the last instruction was marked for elimination,
1140 // remove it now.
1141 if (ToErase) {
1142 ToErase->eraseFromParent();
1143 ToErase = nullptr;
1144 }
1145 // Reset TrapOpt to false at the end of the basic block.
1147 TrapOpt = false;
1148 }
1149
1150 // Eliminate all the TOC save instructions which are redundant.
1151 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1152 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1153 if (FI->mustSaveTOC())
1154 NumTOCSavesInPrologue++;
1155
1156 // We try to eliminate redundant compare instruction.
1157 Simplified |= eliminateRedundantCompare();
1158
1159 return Simplified;
1160}
1161
1162// helper functions for eliminateRedundantCompare
1163static bool isEqOrNe(MachineInstr *BI) {
1165 unsigned PredCond = PPC::getPredicateCondition(Pred);
1166 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1167}
1168
1169static bool isSupportedCmpOp(unsigned opCode) {
1170 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1171 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1172 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1173 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1174}
1175
1176static bool is64bitCmpOp(unsigned opCode) {
1177 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1178 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1179}
1180
1181static bool isSignedCmpOp(unsigned opCode) {
1182 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1183 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1184}
1185
1186static unsigned getSignedCmpOpCode(unsigned opCode) {
1187 if (opCode == PPC::CMPLD) return PPC::CMPD;
1188 if (opCode == PPC::CMPLW) return PPC::CMPW;
1189 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1190 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1191 return opCode;
1192}
1193
1194// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1195// (LT x) to (LE x-1)
1196static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1197 uint64_t Imm = CMPI->getOperand(2).getImm();
1198 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1199 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1200 return 0;
1201
1203 unsigned PredCond = PPC::getPredicateCondition(Pred);
1204 unsigned PredHint = PPC::getPredicateHint(Pred);
1205 if (PredCond == PPC::PRED_GE)
1206 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1207 if (PredCond == PPC::PRED_LT)
1208 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1209
1210 return 0;
1211}
1212
1213// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1214// (LE x) to (LT x+1)
1215static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1216 uint64_t Imm = CMPI->getOperand(2).getImm();
1217 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1218 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1219 return 0;
1220
1222 unsigned PredCond = PPC::getPredicateCondition(Pred);
1223 unsigned PredHint = PPC::getPredicateHint(Pred);
1224 if (PredCond == PPC::PRED_GT)
1225 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1226 if (PredCond == PPC::PRED_LE)
1227 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1228
1229 return 0;
1230}
1231
1232// This takes a Phi node and returns a register value for the specified BB.
1233static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1235 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1236 MachineOperand &MO = Phi->getOperand(I);
1237 if (MO.getMBB() == MBB)
1238 return Phi->getOperand(I-1).getReg();
1239 }
1240 llvm_unreachable("invalid src basic block for this Phi node\n");
1241 return 0;
1242}
1243
1244// This function tracks the source of the register through register copy.
1245// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1246// assuming that the control comes from BB1 into BB2.
1247static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1249 unsigned SrcReg = Reg;
1250 while (true) {
1251 unsigned NextReg = SrcReg;
1252 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1253 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1254 NextReg = getIncomingRegForBlock(Inst, BB1);
1255 // We track through PHI only once to avoid infinite loop.
1256 BB1 = nullptr;
1257 }
1258 else if (Inst->isFullCopy())
1259 NextReg = Inst->getOperand(1).getReg();
1260 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1261 break;
1262 SrcReg = NextReg;
1263 }
1264 return SrcReg;
1265}
1266
1267static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1268 MachineBasicBlock *&PredMBB,
1269 MachineBasicBlock *&MBBtoMoveCmp,
1271
1272 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1273 auto BII = BB.getFirstInstrTerminator();
1274 // We optimize BBs ending with a conditional branch.
1275 // We check only for BCC here, not BCCLR, because BCCLR
1276 // will be formed only later in the pipeline.
1277 if (BB.succ_size() == 2 &&
1278 BII != BB.instr_end() &&
1279 (*BII).getOpcode() == PPC::BCC &&
1280 (*BII).getOperand(1).isReg()) {
1281 // We optimize only if the condition code is used only by one BCC.
1282 Register CndReg = (*BII).getOperand(1).getReg();
1283 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1284 return false;
1285
1286 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1287 // We assume compare and branch are in the same BB for ease of analysis.
1288 if (CMPI->getParent() != &BB)
1289 return false;
1290
1291 // We skip this BB if a physical register is used in comparison.
1292 for (MachineOperand &MO : CMPI->operands())
1293 if (MO.isReg() && !MO.getReg().isVirtual())
1294 return false;
1295
1296 return true;
1297 }
1298 return false;
1299 };
1300
1301 // If this BB has more than one successor, we can create a new BB and
1302 // move the compare instruction in the new BB.
1303 // So far, we do not move compare instruction to a BB having multiple
1304 // successors to avoid potentially increasing code size.
1305 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1306 return BB.succ_size() == 1;
1307 };
1308
1309 if (!isEligibleBB(MBB))
1310 return false;
1311
1312 unsigned NumPredBBs = MBB.pred_size();
1313 if (NumPredBBs == 1) {
1314 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1315 if (isEligibleBB(*TmpMBB)) {
1316 PredMBB = TmpMBB;
1317 MBBtoMoveCmp = nullptr;
1318 return true;
1319 }
1320 }
1321 else if (NumPredBBs == 2) {
1322 // We check for partially redundant case.
1323 // So far, we support cases with only two predecessors
1324 // to avoid increasing the number of instructions.
1326 MachineBasicBlock *Pred1MBB = *PI;
1327 MachineBasicBlock *Pred2MBB = *(PI+1);
1328
1329 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1330 // We assume Pred1MBB is the BB containing the compare to be merged and
1331 // Pred2MBB is the BB to which we will append a compare instruction.
1332 // Proceed as is if Pred1MBB is different from MBB.
1333 }
1334 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1335 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1336 std::swap(Pred1MBB, Pred2MBB);
1337 }
1338 else return false;
1339
1340 if (Pred1MBB == &MBB)
1341 return false;
1342
1343 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1344 // We cannot move the compare instruction if operands are not available
1345 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1347 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1348 for (int I = 1; I <= 2; I++)
1349 if (CMPI->getOperand(I).isReg()) {
1350 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1351 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1352 return false;
1353 }
1354
1355 PredMBB = Pred1MBB;
1356 MBBtoMoveCmp = Pred2MBB;
1357 return true;
1358 }
1359
1360 return false;
1361}
1362
1363// This function will iterate over the input map containing a pair of TOC save
1364// instruction and a flag. The flag will be set to false if the TOC save is
1365// proven redundant. This function will erase from the basic block all the TOC
1366// saves marked as redundant.
1367bool PPCMIPeephole::eliminateRedundantTOCSaves(
1368 std::map<MachineInstr *, bool> &TOCSaves) {
1369 bool Simplified = false;
1370 int NumKept = 0;
1371 for (auto TOCSave : TOCSaves) {
1372 if (!TOCSave.second) {
1373 TOCSave.first->eraseFromParent();
1374 RemoveTOCSave++;
1375 Simplified = true;
1376 } else {
1377 NumKept++;
1378 }
1379 }
1380
1381 if (NumKept > 1)
1382 MultiTOCSaves++;
1383
1384 return Simplified;
1385}
1386
1387// If multiple conditional branches are executed based on the (essentially)
1388// same comparison, we merge compare instructions into one and make multiple
1389// conditional branches on this comparison.
1390// For example,
1391// if (a == 0) { ... }
1392// else if (a < 0) { ... }
1393// can be executed by one compare and two conditional branches instead of
1394// two pairs of a compare and a conditional branch.
1395//
1396// This method merges two compare instructions in two MBBs and modifies the
1397// compare and conditional branch instructions if needed.
1398// For the above example, the input for this pass looks like:
1399// cmplwi r3, 0
1400// beq 0, .LBB0_3
1401// cmpwi r3, -1
1402// bgt 0, .LBB0_4
1403// So, before merging two compares, we need to modify these instructions as
1404// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1405// beq 0, .LBB0_3
1406// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1407// bge 0, .LBB0_4
1408
1409bool PPCMIPeephole::eliminateRedundantCompare() {
1410 bool Simplified = false;
1411
1412 for (MachineBasicBlock &MBB2 : *MF) {
1413 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1414
1415 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1416 // as an optimization target if
1417 // - both MBBs end with a conditional branch,
1418 // - MBB1 is the only predecessor of MBB2, and
1419 // - compare does not take a physical register as a operand in both MBBs.
1420 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1421 //
1422 // As partially redundant case, we additionally handle if MBB2 has one
1423 // additional predecessor, which has only one successor (MBB2).
1424 // In this case, we move the compare instruction originally in MBB2 into
1425 // MBBtoMoveCmp. This partially redundant case is typically appear by
1426 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1427 //
1428 // Overview of CFG of related basic blocks
1429 // Fully redundant case Partially redundant case
1430 // -------- ---------------- --------
1431 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1432 // -------- ---------------- --------
1433 // | \ (w/ 1 succ) \ | \
1434 // | \ \ | \
1435 // | \ |
1436 // -------- --------
1437 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1438 // -------- and 2 succ) -------- and 2 succ)
1439 // | \ | \
1440 // | \ | \
1441 //
1442 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1443 continue;
1444
1445 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1446 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1447
1448 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1449 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1450 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1451
1452 // We cannot optimize an unsupported compare opcode or
1453 // a mix of 32-bit and 64-bit comparisons
1454 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1455 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1456 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1457 continue;
1458
1459 unsigned NewOpCode = 0;
1460 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1461 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1462 bool SwapOperands = false;
1463
1464 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1465 // Typically, unsigned comparison is used for equality check, but
1466 // we replace it with a signed comparison if the comparison
1467 // to be merged is a signed comparison.
1468 // In other cases of opcode mismatch, we cannot optimize this.
1469
1470 // We cannot change opcode when comparing against an immediate
1471 // if the most significant bit of the immediate is one
1472 // due to the difference in sign extension.
1473 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1474 if (!I->getOperand(2).isImm())
1475 return false;
1476 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1477 return Imm < 0;
1478 };
1479
1480 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1481 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1482 NewOpCode = CMPI1->getOpcode();
1483 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1484 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1485 NewOpCode = CMPI2->getOpcode();
1486 else continue;
1487 }
1488
1489 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1490 // In case of comparisons between two registers, these two registers
1491 // must be same to merge two comparisons.
1492 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1493 nullptr, nullptr, MRI);
1494 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1495 nullptr, nullptr, MRI);
1496 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1497 MBB1, &MBB2, MRI);
1498 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1499 MBB1, &MBB2, MRI);
1500
1501 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1502 // Same pair of registers in the same order; ready to merge as is.
1503 }
1504 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1505 // Same pair of registers in different order.
1506 // We reverse the predicate to merge compare instructions.
1508 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1509 // In case of partial redundancy, we need to swap operands
1510 // in another compare instruction.
1511 SwapOperands = true;
1512 }
1513 else continue;
1514 }
1515 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1516 // In case of comparisons between a register and an immediate,
1517 // the operand register must be same for two compare instructions.
1518 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1519 nullptr, nullptr, MRI);
1520 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1521 MBB1, &MBB2, MRI);
1522 if (Cmp1Operand1 != Cmp2Operand1)
1523 continue;
1524
1525 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1526 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1527
1528 // If immediate are not same, we try to adjust by changing predicate;
1529 // e.g. GT imm means GE (imm+1).
1530 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1531 int Diff = Imm1 - Imm2;
1532 if (Diff < -2 || Diff > 2)
1533 continue;
1534
1535 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1536 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1537 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1538 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1539 if (Diff == 2) {
1540 if (PredToInc2 && PredToDec1) {
1541 NewPredicate2 = PredToInc2;
1542 NewPredicate1 = PredToDec1;
1543 NewImm2++;
1544 NewImm1--;
1545 }
1546 }
1547 else if (Diff == 1) {
1548 if (PredToInc2) {
1549 NewImm2++;
1550 NewPredicate2 = PredToInc2;
1551 }
1552 else if (PredToDec1) {
1553 NewImm1--;
1554 NewPredicate1 = PredToDec1;
1555 }
1556 }
1557 else if (Diff == -1) {
1558 if (PredToDec2) {
1559 NewImm2--;
1560 NewPredicate2 = PredToDec2;
1561 }
1562 else if (PredToInc1) {
1563 NewImm1++;
1564 NewPredicate1 = PredToInc1;
1565 }
1566 }
1567 else if (Diff == -2) {
1568 if (PredToDec2 && PredToInc1) {
1569 NewPredicate2 = PredToDec2;
1570 NewPredicate1 = PredToInc1;
1571 NewImm2--;
1572 NewImm1++;
1573 }
1574 }
1575 }
1576
1577 // We cannot merge two compares if the immediates are not same.
1578 if (NewImm2 != NewImm1)
1579 continue;
1580 }
1581
1582 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1583 LLVM_DEBUG(CMPI1->dump());
1584 LLVM_DEBUG(BI1->dump());
1585 LLVM_DEBUG(CMPI2->dump());
1586 LLVM_DEBUG(BI2->dump());
1587
1588 // We adjust opcode, predicates and immediate as we determined above.
1589 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1590 CMPI1->setDesc(TII->get(NewOpCode));
1591 }
1592 if (NewPredicate1) {
1593 BI1->getOperand(0).setImm(NewPredicate1);
1594 }
1595 if (NewPredicate2) {
1596 BI2->getOperand(0).setImm(NewPredicate2);
1597 }
1598 if (NewImm1 != Imm1) {
1599 CMPI1->getOperand(2).setImm(NewImm1);
1600 }
1601
1602 if (IsPartiallyRedundant) {
1603 // We touch up the compare instruction in MBB2 and move it to
1604 // a previous BB to handle partially redundant case.
1605 if (SwapOperands) {
1606 Register Op1 = CMPI2->getOperand(1).getReg();
1607 Register Op2 = CMPI2->getOperand(2).getReg();
1608 CMPI2->getOperand(1).setReg(Op2);
1609 CMPI2->getOperand(2).setReg(Op1);
1610 }
1611 if (NewImm2 != Imm2)
1612 CMPI2->getOperand(2).setImm(NewImm2);
1613
1614 for (int I = 1; I <= 2; I++) {
1615 if (CMPI2->getOperand(I).isReg()) {
1616 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1617 if (Inst->getParent() != &MBB2)
1618 continue;
1619
1620 assert(Inst->getOpcode() == PPC::PHI &&
1621 "We cannot support if an operand comes from this BB.");
1622 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1623 CMPI2->getOperand(I).setReg(SrcReg);
1624 }
1625 }
1626 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1627 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1628
1629 DebugLoc DL = CMPI2->getDebugLoc();
1630 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1631 BuildMI(MBB2, MBB2.begin(), DL,
1632 TII->get(PPC::PHI), NewVReg)
1633 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1634 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1635 BI2->getOperand(1).setReg(NewVReg);
1636 }
1637 else {
1638 // We finally eliminate compare instruction in MBB2.
1639 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1640 CMPI2->eraseFromParent();
1641 }
1642 BI2->getOperand(1).setIsKill(true);
1643 BI1->getOperand(1).setIsKill(false);
1644
1645 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1646 LLVM_DEBUG(CMPI1->dump());
1647 LLVM_DEBUG(BI1->dump());
1648 LLVM_DEBUG(BI2->dump());
1649 if (IsPartiallyRedundant) {
1650 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1651 << printMBBReference(*MBBtoMoveCmp)
1652 << " to handle partial redundancy.\n");
1653 LLVM_DEBUG(CMPI2->dump());
1654 }
1655
1656 Simplified = true;
1657 }
1658
1659 return Simplified;
1660}
1661
1662// We miss the opportunity to emit an RLDIC when lowering jump tables
1663// since ISEL sees only a single basic block. When selecting, the clear
1664// and shift left will be in different blocks.
1665bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
1666 if (MI.getOpcode() != PPC::RLDICR)
1667 return false;
1668
1669 Register SrcReg = MI.getOperand(1).getReg();
1670 if (!SrcReg.isVirtual())
1671 return false;
1672
1673 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1674 if (SrcMI->getOpcode() != PPC::RLDICL)
1675 return false;
1676
1677 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1678 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1679 MachineOperand MOpSHMI = MI.getOperand(2);
1680 MachineOperand MOpMEMI = MI.getOperand(3);
1681 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1682 MOpMEMI.isImm()))
1683 return false;
1684
1685 uint64_t SHSrc = MOpSHSrc.getImm();
1686 uint64_t MBSrc = MOpMBSrc.getImm();
1687 uint64_t SHMI = MOpSHMI.getImm();
1688 uint64_t MEMI = MOpMEMI.getImm();
1689 uint64_t NewSH = SHSrc + SHMI;
1690 uint64_t NewMB = MBSrc - SHMI;
1691 if (NewMB > 63 || NewSH > 63)
1692 return false;
1693
1694 // The bits cleared with RLDICL are [0, MBSrc).
1695 // The bits cleared with RLDICR are (MEMI, 63].
1696 // After the sequence, the bits cleared are:
1697 // [0, MBSrc-SHMI) and (MEMI, 63).
1698 //
1699 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1700 if ((63 - NewSH) != MEMI)
1701 return false;
1702
1703 LLVM_DEBUG(dbgs() << "Converting pair: ");
1704 LLVM_DEBUG(SrcMI->dump());
1705 LLVM_DEBUG(MI.dump());
1706
1707 MI.setDesc(TII->get(PPC::RLDIC));
1708 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1709 MI.getOperand(2).setImm(NewSH);
1710 MI.getOperand(3).setImm(NewMB);
1711 MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill());
1712 SrcMI->getOperand(1).setIsKill(false);
1713
1714 LLVM_DEBUG(dbgs() << "To: ");
1715 LLVM_DEBUG(MI.dump());
1716 NumRotatesCollapsed++;
1717 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1718 if (MRI->use_nodbg_empty(SrcReg)) {
1719 assert(!SrcMI->hasImplicitDef() &&
1720 "Not expecting an implicit def with this instr.");
1721 SrcMI->eraseFromParent();
1722 }
1723 return true;
1724}
1725
1726// For case in LLVM IR
1727// entry:
1728// %iconv = sext i32 %index to i64
1729// br i1 undef label %true, label %false
1730// true:
1731// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1732// ...
1733// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1734// different BBs when conducting instruction selection. We can do a peephole
1735// optimization to combine these two instructions into extswsli after
1736// instruction selection.
1737bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1738 MachineInstr *&ToErase) {
1739 if (MI.getOpcode() != PPC::RLDICR)
1740 return false;
1741
1742 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1743 return false;
1744
1745 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1746
1747 MachineOperand MOpSHMI = MI.getOperand(2);
1748 MachineOperand MOpMEMI = MI.getOperand(3);
1749 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1750 return false;
1751
1752 uint64_t SHMI = MOpSHMI.getImm();
1753 uint64_t MEMI = MOpMEMI.getImm();
1754 if (SHMI + MEMI != 63)
1755 return false;
1756
1757 Register SrcReg = MI.getOperand(1).getReg();
1758 if (!SrcReg.isVirtual())
1759 return false;
1760
1761 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1762 if (SrcMI->getOpcode() != PPC::EXTSW &&
1763 SrcMI->getOpcode() != PPC::EXTSW_32_64)
1764 return false;
1765
1766 // If the register defined by extsw has more than one use, combination is not
1767 // needed.
1768 if (!MRI->hasOneNonDBGUse(SrcReg))
1769 return false;
1770
1771 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
1772 assert(SrcMI->getOperand(1).isReg() &&
1773 "EXTSW's second operand should be a register");
1774 if (!SrcMI->getOperand(1).getReg().isVirtual())
1775 return false;
1776
1777 LLVM_DEBUG(dbgs() << "Combining pair: ");
1778 LLVM_DEBUG(SrcMI->dump());
1779 LLVM_DEBUG(MI.dump());
1780
1781 MachineInstr *NewInstr =
1782 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
1783 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
1784 : TII->get(PPC::EXTSWSLI_32_64),
1785 MI.getOperand(0).getReg())
1786 .add(SrcMI->getOperand(1))
1787 .add(MOpSHMI);
1788 (void)NewInstr;
1789
1790 LLVM_DEBUG(dbgs() << "TO: ");
1791 LLVM_DEBUG(NewInstr->dump());
1792 ++NumEXTSWAndSLDICombined;
1793 ToErase = &MI;
1794 // SrcMI, which is extsw, is of no use now, erase it.
1795 SrcMI->eraseFromParent();
1796 return true;
1797}
1798
1799} // end default namespace
1800
1802 "PowerPC MI Peephole Optimization", false, false)
1807 "PowerPC MI Peephole Optimization", false, false)
1808
1809char PPCMIPeephole::ID = 0;
1811llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
PowerPC MI Peephole Optimization
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
#define DEBUG_TYPE
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
if(VerifyEach)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This is an important base class in LLVM.
Definition: Constant.h:41
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
unsigned pred_size() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
uint64_t getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:599
bool isFullCopy() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool isUsingPCRelativeCalls() const
bool isELFv2ABI() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void dump() const
Definition: Pass.cpp:136
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializePPCMIPeepholePass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
@ Keep
No function return thunk.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860