LLVM 20.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCInst.h"
42#include "llvm/Support/Debug.h"
45
46using namespace llvm;
47
48#define DEBUG_TYPE "ppc-instr-info"
49
50#define GET_INSTRMAP_INFO
51#define GET_INSTRINFO_CTOR_DTOR
52#include "PPCGenInstrInfo.inc"
53
54STATISTIC(NumStoreSPILLVSRRCAsVec,
55 "Number of spillvsrrc spilled to stack as vec");
56STATISTIC(NumStoreSPILLVSRRCAsGpr,
57 "Number of spillvsrrc spilled to stack as gpr");
58STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
59STATISTIC(CmpIselsConverted,
60 "Number of ISELs that depend on comparison of constants converted");
61STATISTIC(MissedConvertibleImmediateInstrs,
62 "Number of compare-immediate instructions fed by constants");
63STATISTIC(NumRcRotatesConvertedToRcAnd,
64 "Number of record-form rotates converted to record-form andi");
65
66static cl::
67opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
68 cl::desc("Disable analysis for CTR loops"));
69
70static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
71cl::desc("Disable compare instruction optimization"), cl::Hidden);
72
73static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
74cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
76
77static cl::opt<bool>
78UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
79 cl::desc("Use the old (incorrect) instruction latency calculation"));
80
81static cl::opt<float>
82 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
83 cl::desc("register pressure factor for the transformations."));
84
86 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
87 cl::desc("enable register pressure reduce in machine combiner pass."));
88
89// Pin the vtable to this file.
90void PPCInstrInfo::anchor() {}
91
93 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
94 /* CatchRetOpcode */ -1,
95 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
96 Subtarget(STI), RI(STI.getTargetMachine()) {}
97
98/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
99/// this target when scheduling the DAG.
102 const ScheduleDAG *DAG) const {
103 unsigned Directive =
104 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
107 const InstrItineraryData *II =
108 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
109 return new ScoreboardHazardRecognizer(II, DAG);
110 }
111
113}
114
115/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
116/// to use for this target when scheduling the DAG.
119 const ScheduleDAG *DAG) const {
120 unsigned Directive =
121 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
122
123 // FIXME: Leaving this as-is until we have POWER9 scheduling info
125 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
126
127 // Most subtargets use a PPC970 recognizer.
130 assert(DAG->TII && "No InstrInfo?");
131
132 return new PPCHazardRecognizer970(*DAG);
133 }
134
135 return new ScoreboardHazardRecognizer(II, DAG);
136}
137
139 const MachineInstr &MI,
140 unsigned *PredCost) const {
141 if (!ItinData || UseOldLatencyCalc)
142 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
143
144 // The default implementation of getInstrLatency calls getStageLatency, but
145 // getStageLatency does not do the right thing for us. While we have
146 // itinerary, most cores are fully pipelined, and so the itineraries only
147 // express the first part of the pipeline, not every stage. Instead, we need
148 // to use the listed output operand cycle number (using operand 0 here, which
149 // is an output).
150
151 unsigned Latency = 1;
152 unsigned DefClass = MI.getDesc().getSchedClass();
153 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
154 const MachineOperand &MO = MI.getOperand(i);
155 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
156 continue;
157
158 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
159 if (!Cycle)
160 continue;
161
162 Latency = std::max(Latency, *Cycle);
163 }
164
165 return Latency;
166}
167
168std::optional<unsigned> PPCInstrInfo::getOperandLatency(
169 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
170 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
171 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
172 ItinData, DefMI, DefIdx, UseMI, UseIdx);
173
174 if (!DefMI.getParent())
175 return Latency;
176
177 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
178 Register Reg = DefMO.getReg();
179
180 bool IsRegCR;
181 if (Reg.isVirtual()) {
182 const MachineRegisterInfo *MRI =
183 &DefMI.getParent()->getParent()->getRegInfo();
184 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
185 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
186 } else {
187 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
188 PPC::CRBITRCRegClass.contains(Reg);
189 }
190
191 if (UseMI.isBranch() && IsRegCR) {
192 if (!Latency)
193 Latency = getInstrLatency(ItinData, DefMI);
194
195 // On some cores, there is an additional delay between writing to a condition
196 // register, and using it from a branch.
197 unsigned Directive = Subtarget.getCPUDirective();
198 switch (Directive) {
199 default: break;
200 case PPC::DIR_7400:
201 case PPC::DIR_750:
202 case PPC::DIR_970:
203 case PPC::DIR_E5500:
204 case PPC::DIR_PWR4:
205 case PPC::DIR_PWR5:
206 case PPC::DIR_PWR5X:
207 case PPC::DIR_PWR6:
208 case PPC::DIR_PWR6X:
209 case PPC::DIR_PWR7:
210 case PPC::DIR_PWR8:
211 // FIXME: Is this needed for POWER9?
212 Latency = *Latency + 2;
213 break;
214 }
215 }
216
217 return Latency;
218}
219
221 uint32_t Flags) const {
222 MI.setFlags(Flags);
226}
227
228// This function does not list all associative and commutative operations, but
229// only those worth feeding through the machine combiner in an attempt to
230// reduce the critical path. Mostly, this means floating-point operations,
231// because they have high latencies(>=5) (compared to other operations, such as
232// and/or, which are also associative and commutative, but have low latencies).
234 bool Invert) const {
235 if (Invert)
236 return false;
237 switch (Inst.getOpcode()) {
238 // Floating point:
239 // FP Add:
240 case PPC::FADD:
241 case PPC::FADDS:
242 // FP Multiply:
243 case PPC::FMUL:
244 case PPC::FMULS:
245 // Altivec Add:
246 case PPC::VADDFP:
247 // VSX Add:
248 case PPC::XSADDDP:
249 case PPC::XVADDDP:
250 case PPC::XVADDSP:
251 case PPC::XSADDSP:
252 // VSX Multiply:
253 case PPC::XSMULDP:
254 case PPC::XVMULDP:
255 case PPC::XVMULSP:
256 case PPC::XSMULSP:
259 // Fixed point:
260 // Multiply:
261 case PPC::MULHD:
262 case PPC::MULLD:
263 case PPC::MULHW:
264 case PPC::MULLW:
265 return true;
266 default:
267 return false;
268 }
269}
270
271#define InfoArrayIdxFMAInst 0
272#define InfoArrayIdxFAddInst 1
273#define InfoArrayIdxFMULInst 2
274#define InfoArrayIdxAddOpIdx 3
275#define InfoArrayIdxMULOpIdx 4
276#define InfoArrayIdxFSubInst 5
277// Array keeps info for FMA instructions:
278// Index 0(InfoArrayIdxFMAInst): FMA instruction;
279// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
280// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
281// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
282// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
283// second MUL operand index is plus 1;
284// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
285static const uint16_t FMAOpIdxInfo[][6] = {
286 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
287 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
288 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
289 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
290 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
291 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
292 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
293
294// Check if an opcode is a FMA instruction. If it is, return the index in array
295// FMAOpIdxInfo. Otherwise, return -1.
296int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
297 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
298 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
299 return I;
300 return -1;
301}
302
303// On PowerPC target, we have two kinds of patterns related to FMA:
304// 1: Improve ILP.
305// Try to reassociate FMA chains like below:
306//
307// Pattern 1:
308// A = FADD X, Y (Leaf)
309// B = FMA A, M21, M22 (Prev)
310// C = FMA B, M31, M32 (Root)
311// -->
312// A = FMA X, M21, M22
313// B = FMA Y, M31, M32
314// C = FADD A, B
315//
316// Pattern 2:
317// A = FMA X, M11, M12 (Leaf)
318// B = FMA A, M21, M22 (Prev)
319// C = FMA B, M31, M32 (Root)
320// -->
321// A = FMUL M11, M12
322// B = FMA X, M21, M22
323// D = FMA A, M31, M32
324// C = FADD B, D
325//
326// breaking the dependency between A and B, allowing FMA to be executed in
327// parallel (or back-to-back in a pipeline) instead of depending on each other.
328//
329// 2: Reduce register pressure.
330// Try to reassociate FMA with FSUB and a constant like below:
331// C is a floating point const.
332//
333// Pattern 1:
334// A = FSUB X, Y (Leaf)
335// D = FMA B, C, A (Root)
336// -->
337// A = FMA B, Y, -C
338// D = FMA A, X, C
339//
340// Pattern 2:
341// A = FSUB X, Y (Leaf)
342// D = FMA B, A, C (Root)
343// -->
344// A = FMA B, Y, -C
345// D = FMA A, X, C
346//
347// Before the transformation, A must be assigned with different hardware
348// register with D. After the transformation, A and D must be assigned with
349// same hardware register due to TIE attribute of FMA instructions.
350//
353 bool DoRegPressureReduce) const {
357
358 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
359 for (const auto &MO : Instr.explicit_operands())
360 if (!(MO.isReg() && MO.getReg().isVirtual()))
361 return false;
362 return true;
363 };
364
365 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
366 unsigned OpType) {
367 if (Instr.getOpcode() !=
368 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
369 return false;
370
371 // Instruction can be reassociated.
372 // fast math flags may prohibit reassociation.
373 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
374 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
375 return false;
376
377 // Instruction operands are virtual registers for reassociation.
378 if (!IsAllOpsVirtualReg(Instr))
379 return false;
380
381 // For register pressure reassociation, the FSub must have only one use as
382 // we want to delete the sub to save its def.
383 if (OpType == InfoArrayIdxFSubInst &&
384 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
385 return false;
386
387 return true;
388 };
389
390 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
391 int16_t &MulOpIdx, bool IsLeaf) {
392 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
393 if (Idx < 0)
394 return false;
395
396 // Instruction can be reassociated.
397 // fast math flags may prohibit reassociation.
398 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
399 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
400 return false;
401
402 // Instruction operands are virtual registers for reassociation.
403 if (!IsAllOpsVirtualReg(Instr))
404 return false;
405
407 if (IsLeaf)
408 return true;
409
411
412 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
413 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
414 // If 'add' operand's def is not in current block, don't do ILP related opt.
415 if (!MIAdd || MIAdd->getParent() != MBB)
416 return false;
417
418 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
419 // as this fma will be changed later.
420 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
421 };
422
423 int16_t AddOpIdx = -1;
424 int16_t MulOpIdx = -1;
425
426 bool IsUsedOnceL = false;
427 bool IsUsedOnceR = false;
428 MachineInstr *MULInstrL = nullptr;
429 MachineInstr *MULInstrR = nullptr;
430
431 auto IsRPReductionCandidate = [&]() {
432 // Currently, we only support float and double.
433 // FIXME: add support for other types.
434 unsigned Opcode = Root.getOpcode();
435 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
436 return false;
437
438 // Root must be a valid FMA like instruction.
439 // Treat it as leaf as we don't care its add operand.
440 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
441 assert((MulOpIdx >= 0) && "mul operand index not right!");
442 Register MULRegL = TRI->lookThruSingleUseCopyChain(
443 Root.getOperand(MulOpIdx).getReg(), MRI);
444 Register MULRegR = TRI->lookThruSingleUseCopyChain(
445 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
446 if (!MULRegL && !MULRegR)
447 return false;
448
449 if (MULRegL && !MULRegR) {
450 MULRegR =
451 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
452 IsUsedOnceL = true;
453 } else if (!MULRegL && MULRegR) {
454 MULRegL =
455 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
456 IsUsedOnceR = true;
457 } else {
458 IsUsedOnceL = true;
459 IsUsedOnceR = true;
460 }
461
462 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
463 return false;
464
465 MULInstrL = MRI->getVRegDef(MULRegL);
466 MULInstrR = MRI->getVRegDef(MULRegR);
467 return true;
468 }
469 return false;
470 };
471
472 // Register pressure fma reassociation patterns.
473 if (DoRegPressureReduce && IsRPReductionCandidate()) {
474 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
475 // Register pressure pattern 1
476 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
477 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
478 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
480 return true;
481 }
482
483 // Register pressure pattern 2
484 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
485 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
486 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
488 return true;
489 }
490 }
491
492 // ILP fma reassociation patterns.
493 // Root must be a valid FMA like instruction.
494 AddOpIdx = -1;
495 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
496 return false;
497
498 assert((AddOpIdx >= 0) && "add operand index not right!");
499
500 Register RegB = Root.getOperand(AddOpIdx).getReg();
501 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
502
503 // Prev must be a valid FMA like instruction.
504 AddOpIdx = -1;
505 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
506 return false;
507
508 assert((AddOpIdx >= 0) && "add operand index not right!");
509
510 Register RegA = Prev->getOperand(AddOpIdx).getReg();
511 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
512 AddOpIdx = -1;
513 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
515 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
516 return true;
517 }
518 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
520 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
521 return true;
522 }
523 return false;
524}
525
527 MachineInstr &Root, unsigned &Pattern,
528 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
529 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
530
531 MachineFunction *MF = Root.getMF();
535
536 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
537 if (Idx < 0)
538 return;
539
541
542 // For now we only need to fix up placeholder for register pressure reduce
543 // patterns.
544 Register ConstReg = 0;
545 switch (Pattern) {
547 ConstReg =
548 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
549 break;
551 ConstReg =
552 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
553 break;
554 default:
555 // Not register pressure reduce patterns.
556 return;
557 }
558
559 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
560 // Get const value from const pool.
561 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
562 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
563
564 // Get negative fp const.
565 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
566 F1.changeSign();
567 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
568 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
569
570 // Put negative fp const into constant pool.
571 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
572
573 MachineOperand *Placeholder = nullptr;
574 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
575 for (auto *Inst : InsInstrs) {
576 for (MachineOperand &Operand : Inst->explicit_operands()) {
577 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
578 if (Operand.getReg() == PPC::ZERO8) {
579 Placeholder = &Operand;
580 break;
581 }
582 }
583 }
584
585 assert(Placeholder && "Placeholder does not exist!");
586
587 // Generate instructions to load the const fp from constant pool.
588 // We only support PPC64 and medium code model.
589 Register LoadNewConst =
590 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
591
592 // Fill the placeholder with the new load from constant pool.
593 Placeholder->setReg(LoadNewConst);
594}
595
597 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
598
600 return false;
601
602 // Currently, we only enable register pressure reducing in machine combiner
603 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
604 // support.
605 //
606 // So we need following instructions to access a TOC entry:
607 //
608 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
609 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
610 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
611 //
612 // FIXME: add more supported targets, like Small and Large code model, PPC32,
613 // AIX.
614 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
616 return false;
617
619 const MachineFunction *MF = MBB->getParent();
620 const MachineRegisterInfo *MRI = &MF->getRegInfo();
621
622 auto GetMBBPressure =
623 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
624 RegionPressure Pressure;
625 RegPressureTracker RPTracker(Pressure);
626
627 // Initialize the register pressure tracker.
628 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
629 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
630
631 for (const auto &MI : reverse(*MBB)) {
632 if (MI.isDebugValue() || MI.isDebugLabel())
633 continue;
634 RegisterOperands RegOpers;
635 RegOpers.collect(MI, *TRI, *MRI, false, false);
636 RPTracker.recedeSkipDebugValues();
637 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
638 RPTracker.recede(RegOpers);
639 }
640
641 // Close the RPTracker to finalize live ins.
642 RPTracker.closeRegion();
643
644 return RPTracker.getPressure().MaxSetPressure;
645 };
646
647 // For now we only care about float and double type fma.
648 unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
649 *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
650
651 // Only reduce register pressure when pressure is high.
652 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
653 (float)VSSRCLimit * FMARPFactor;
654}
655
657 // I has only one memory operand which is load from constant pool.
658 if (!I->hasOneMemOperand())
659 return false;
660
661 MachineMemOperand *Op = I->memoperands()[0];
662 return Op->isLoad() && Op->getPseudoValue() &&
663 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
664}
665
666Register PPCInstrInfo::generateLoadForNewConst(
667 unsigned Idx, MachineInstr *MI, Type *Ty,
668 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
669 // Now we only support PPC64, Medium code model and P9 with vector.
670 // We have immutable pattern to access const pool. See function
671 // shouldReduceRegisterPressure.
672 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
674 "Target not supported!\n");
675
676 MachineFunction *MF = MI->getMF();
678
679 // Generate ADDIStocHA8
680 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
681 MachineInstrBuilder TOCOffset =
682 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
683 .addReg(PPC::X2)
685
686 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
687 "Only float and double are supported!");
688
689 unsigned LoadOpcode;
690 // Should be float type or double type.
691 if (Ty->isFloatTy())
692 LoadOpcode = PPC::DFLOADf32;
693 else
694 LoadOpcode = PPC::DFLOADf64;
695
696 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
697 Register VReg2 = MRI->createVirtualRegister(RC);
701
702 // Generate Load from constant pool.
704 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
706 .addReg(VReg1, getKillRegState(true))
707 .addMemOperand(MMO);
708
709 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
710
711 // Insert the toc load instructions into InsInstrs.
712 InsInstrs.insert(InsInstrs.begin(), Load);
713 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
714 return VReg2;
715}
716
717// This function returns the const value in constant pool if the \p I is a load
718// from constant pool.
719const Constant *
721 MachineFunction *MF = I->getMF();
724 assert(I->mayLoad() && "Should be a load instruction.\n");
725 for (auto MO : I->uses()) {
726 if (!MO.isReg())
727 continue;
728 Register Reg = MO.getReg();
729 if (Reg == 0 || !Reg.isVirtual())
730 continue;
731 // Find the toc address.
732 MachineInstr *DefMI = MRI->getVRegDef(Reg);
733 for (auto MO2 : DefMI->uses())
734 if (MO2.isCPI())
735 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
736 }
737 return nullptr;
738}
739
741 switch (Pattern) {
748 default:
750 }
751}
752
755 bool DoRegPressureReduce) const {
756 // Using the machine combiner in this way is potentially expensive, so
757 // restrict to when aggressive optimizations are desired.
759 return false;
760
761 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
762 return true;
763
765 DoRegPressureReduce);
766}
767
769 MachineInstr &Root, unsigned Pattern,
772 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
773 switch (Pattern) {
778 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
779 break;
780 default:
781 // Reassociate default patterns.
783 DelInstrs, InstrIdxForVirtReg);
784 break;
785 }
786}
787
788void PPCInstrInfo::reassociateFMA(
789 MachineInstr &Root, unsigned Pattern,
792 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
793 MachineFunction *MF = Root.getMF();
796 MachineOperand &OpC = Root.getOperand(0);
797 Register RegC = OpC.getReg();
798 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
799 MRI.constrainRegClass(RegC, RC);
800
801 unsigned FmaOp = Root.getOpcode();
802 int16_t Idx = getFMAOpIdxInfo(FmaOp);
803 assert(Idx >= 0 && "Root must be a FMA instruction");
804
805 bool IsILPReassociate =
808
811
812 MachineInstr *Prev = nullptr;
813 MachineInstr *Leaf = nullptr;
814 switch (Pattern) {
815 default:
816 llvm_unreachable("not recognized pattern!");
819 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
820 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
821 break;
823 Register MULReg =
824 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
825 Leaf = MRI.getVRegDef(MULReg);
826 break;
827 }
829 Register MULReg = TRI->lookThruCopyLike(
830 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
831 Leaf = MRI.getVRegDef(MULReg);
832 break;
833 }
834 }
835
836 uint32_t IntersectedFlags = 0;
837 if (IsILPReassociate)
838 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
839 else
840 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
841
842 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
843 bool &KillFlag) {
844 Reg = Operand.getReg();
845 MRI.constrainRegClass(Reg, RC);
846 KillFlag = Operand.isKill();
847 };
848
849 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
850 Register &MulOp2, Register &AddOp,
851 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
852 bool &AddOpKillFlag) {
853 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
854 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
855 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
856 };
857
858 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
859 RegA21, RegB;
860 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
861 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
862 KillA11 = false, KillA21 = false, KillB = false;
863
864 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
865
866 if (IsILPReassociate)
867 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
868
870 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
871 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
873 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
874 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
875 } else {
876 // Get FSUB instruction info.
877 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
878 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
879 }
880
881 // Create new virtual registers for the new results instead of
882 // recycling legacy ones because the MachineCombiner's computation of the
883 // critical path requires a new register definition rather than an existing
884 // one.
885 // For register pressure reassociation, we only need create one virtual
886 // register for the new fma.
887 Register NewVRA = MRI.createVirtualRegister(RC);
888 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
889
890 Register NewVRB = 0;
891 if (IsILPReassociate) {
892 NewVRB = MRI.createVirtualRegister(RC);
893 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
894 }
895
896 Register NewVRD = 0;
898 NewVRD = MRI.createVirtualRegister(RC);
899 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
900 }
901
902 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
903 Register RegMul1, bool KillRegMul1,
904 Register RegMul2, bool KillRegMul2) {
905 MI->getOperand(AddOpIdx).setReg(RegAdd);
906 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
907 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
908 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
909 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
910 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
911 };
912
913 MachineInstrBuilder NewARegPressure, NewCRegPressure;
914 switch (Pattern) {
915 default:
916 llvm_unreachable("not recognized pattern!");
918 // Create new instructions for insertion.
919 MachineInstrBuilder MINewB =
920 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
921 .addReg(RegX, getKillRegState(KillX))
922 .addReg(RegM21, getKillRegState(KillM21))
923 .addReg(RegM22, getKillRegState(KillM22));
924 MachineInstrBuilder MINewA =
925 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
926 .addReg(RegY, getKillRegState(KillY))
927 .addReg(RegM31, getKillRegState(KillM31))
928 .addReg(RegM32, getKillRegState(KillM32));
929 // If AddOpIdx is not 1, adjust the order.
930 if (AddOpIdx != 1) {
931 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
932 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
933 }
934
935 MachineInstrBuilder MINewC =
936 BuildMI(*MF, Root.getDebugLoc(),
938 .addReg(NewVRB, getKillRegState(true))
939 .addReg(NewVRA, getKillRegState(true));
940
941 // Update flags for newly created instructions.
942 setSpecialOperandAttr(*MINewA, IntersectedFlags);
943 setSpecialOperandAttr(*MINewB, IntersectedFlags);
944 setSpecialOperandAttr(*MINewC, IntersectedFlags);
945
946 // Record new instructions for insertion.
947 InsInstrs.push_back(MINewA);
948 InsInstrs.push_back(MINewB);
949 InsInstrs.push_back(MINewC);
950 break;
951 }
953 assert(NewVRD && "new FMA register not created!");
954 // Create new instructions for insertion.
955 MachineInstrBuilder MINewA =
956 BuildMI(*MF, Leaf->getDebugLoc(),
958 .addReg(RegM11, getKillRegState(KillM11))
959 .addReg(RegM12, getKillRegState(KillM12));
960 MachineInstrBuilder MINewB =
961 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
962 .addReg(RegX, getKillRegState(KillX))
963 .addReg(RegM21, getKillRegState(KillM21))
964 .addReg(RegM22, getKillRegState(KillM22));
965 MachineInstrBuilder MINewD =
966 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
967 .addReg(NewVRA, getKillRegState(true))
968 .addReg(RegM31, getKillRegState(KillM31))
969 .addReg(RegM32, getKillRegState(KillM32));
970 // If AddOpIdx is not 1, adjust the order.
971 if (AddOpIdx != 1) {
972 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
973 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
974 KillM32);
975 }
976
977 MachineInstrBuilder MINewC =
978 BuildMI(*MF, Root.getDebugLoc(),
980 .addReg(NewVRB, getKillRegState(true))
981 .addReg(NewVRD, getKillRegState(true));
982
983 // Update flags for newly created instructions.
984 setSpecialOperandAttr(*MINewA, IntersectedFlags);
985 setSpecialOperandAttr(*MINewB, IntersectedFlags);
986 setSpecialOperandAttr(*MINewD, IntersectedFlags);
987 setSpecialOperandAttr(*MINewC, IntersectedFlags);
988
989 // Record new instructions for insertion.
990 InsInstrs.push_back(MINewA);
991 InsInstrs.push_back(MINewB);
992 InsInstrs.push_back(MINewD);
993 InsInstrs.push_back(MINewC);
994 break;
995 }
998 Register VarReg;
999 bool KillVarReg = false;
1001 VarReg = RegM31;
1002 KillVarReg = KillM31;
1003 } else {
1004 VarReg = RegM32;
1005 KillVarReg = KillM32;
1006 }
1007 // We don't want to get negative const from memory pool too early, as the
1008 // created entry will not be deleted even if it has no users. Since all
1009 // operand of Leaf and Root are virtual register, we use zero register
1010 // here as a placeholder. When the InsInstrs is selected in
1011 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1012 // with a virtual register which is a load from constant pool.
1013 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1014 .addReg(RegB, getKillRegState(RegB))
1015 .addReg(RegY, getKillRegState(KillY))
1016 .addReg(PPC::ZERO8);
1017 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1018 .addReg(NewVRA, getKillRegState(true))
1019 .addReg(RegX, getKillRegState(KillX))
1020 .addReg(VarReg, getKillRegState(KillVarReg));
1021 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1022 // both at index 1, no need to adjust.
1023 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1024 // the operand index here.
1025 break;
1026 }
1027 }
1028
1029 if (!IsILPReassociate) {
1030 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1031 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1032
1033 InsInstrs.push_back(NewARegPressure);
1034 InsInstrs.push_back(NewCRegPressure);
1035 }
1036
1037 assert(!InsInstrs.empty() &&
1038 "Insertion instructions set should not be empty!");
1039
1040 // Record old instructions for deletion.
1041 DelInstrs.push_back(Leaf);
1042 if (IsILPReassociate)
1043 DelInstrs.push_back(Prev);
1044 DelInstrs.push_back(&Root);
1045}
1046
1047// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1049 Register &SrcReg, Register &DstReg,
1050 unsigned &SubIdx) const {
1051 switch (MI.getOpcode()) {
1052 default: return false;
1053 case PPC::EXTSW:
1054 case PPC::EXTSW_32:
1055 case PPC::EXTSW_32_64:
1056 SrcReg = MI.getOperand(1).getReg();
1057 DstReg = MI.getOperand(0).getReg();
1058 SubIdx = PPC::sub_32;
1059 return true;
1060 }
1061}
1062
1064 int &FrameIndex) const {
1065 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1066 // Check for the operands added by addFrameReference (the immediate is the
1067 // offset which defaults to 0).
1068 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1069 MI.getOperand(2).isFI()) {
1070 FrameIndex = MI.getOperand(2).getIndex();
1071 return MI.getOperand(0).getReg();
1072 }
1073 }
1074 return 0;
1075}
1076
1077// For opcodes with the ReMaterializable flag set, this function is called to
1078// verify the instruction is really rematable.
1080 const MachineInstr &MI) const {
1081 switch (MI.getOpcode()) {
1082 default:
1083 // Let base implementaion decide.
1084 break;
1085 case PPC::LI:
1086 case PPC::LI8:
1087 case PPC::PLI:
1088 case PPC::PLI8:
1089 case PPC::LIS:
1090 case PPC::LIS8:
1091 case PPC::ADDIStocHA:
1092 case PPC::ADDIStocHA8:
1093 case PPC::ADDItocL:
1094 case PPC::ADDItocL8:
1095 case PPC::LOAD_STACK_GUARD:
1096 case PPC::PPCLdFixedAddr:
1097 case PPC::XXLXORz:
1098 case PPC::XXLXORspz:
1099 case PPC::XXLXORdpz:
1100 case PPC::XXLEQVOnes:
1101 case PPC::XXSPLTI32DX:
1102 case PPC::XXSPLTIW:
1103 case PPC::XXSPLTIDP:
1104 case PPC::V_SET0B:
1105 case PPC::V_SET0H:
1106 case PPC::V_SET0:
1107 case PPC::V_SETALLONESB:
1108 case PPC::V_SETALLONESH:
1109 case PPC::V_SETALLONES:
1110 case PPC::CRSET:
1111 case PPC::CRUNSET:
1112 case PPC::XXSETACCZ:
1113 case PPC::XXSETACCZW:
1114 return true;
1115 }
1117}
1118
1120 int &FrameIndex) const {
1121 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1122 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1123 MI.getOperand(2).isFI()) {
1124 FrameIndex = MI.getOperand(2).getIndex();
1125 return MI.getOperand(0).getReg();
1126 }
1127 }
1128 return 0;
1129}
1130
1132 unsigned OpIdx1,
1133 unsigned OpIdx2) const {
1134 MachineFunction &MF = *MI.getParent()->getParent();
1135
1136 // Normal instructions can be commuted the obvious way.
1137 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1138 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1139 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1140 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1141 // changing the relative order of the mask operands might change what happens
1142 // to the high-bits of the mask (and, thus, the result).
1143
1144 // Cannot commute if it has a non-zero rotate count.
1145 if (MI.getOperand(3).getImm() != 0)
1146 return nullptr;
1147
1148 // If we have a zero rotate count, we have:
1149 // M = mask(MB,ME)
1150 // Op0 = (Op1 & ~M) | (Op2 & M)
1151 // Change this to:
1152 // M = mask((ME+1)&31, (MB-1)&31)
1153 // Op0 = (Op2 & ~M) | (Op1 & M)
1154
1155 // Swap op1/op2
1156 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1157 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1158 Register Reg0 = MI.getOperand(0).getReg();
1159 Register Reg1 = MI.getOperand(1).getReg();
1160 Register Reg2 = MI.getOperand(2).getReg();
1161 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1162 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1163 bool Reg1IsKill = MI.getOperand(1).isKill();
1164 bool Reg2IsKill = MI.getOperand(2).isKill();
1165 bool ChangeReg0 = false;
1166 // If machine instrs are no longer in two-address forms, update
1167 // destination register as well.
1168 if (Reg0 == Reg1) {
1169 // Must be two address instruction (i.e. op1 is tied to op0).
1170 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1171 "Expecting a two-address instruction!");
1172 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1173 Reg2IsKill = false;
1174 ChangeReg0 = true;
1175 }
1176
1177 // Masks.
1178 unsigned MB = MI.getOperand(4).getImm();
1179 unsigned ME = MI.getOperand(5).getImm();
1180
1181 // We can't commute a trivial mask (there is no way to represent an all-zero
1182 // mask).
1183 if (MB == 0 && ME == 31)
1184 return nullptr;
1185
1186 if (NewMI) {
1187 // Create a new instruction.
1188 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1189 bool Reg0IsDead = MI.getOperand(0).isDead();
1190 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1191 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1192 .addReg(Reg2, getKillRegState(Reg2IsKill))
1193 .addReg(Reg1, getKillRegState(Reg1IsKill))
1194 .addImm((ME + 1) & 31)
1195 .addImm((MB - 1) & 31);
1196 }
1197
1198 if (ChangeReg0) {
1199 MI.getOperand(0).setReg(Reg2);
1200 MI.getOperand(0).setSubReg(SubReg2);
1201 }
1202 MI.getOperand(2).setReg(Reg1);
1203 MI.getOperand(1).setReg(Reg2);
1204 MI.getOperand(2).setSubReg(SubReg1);
1205 MI.getOperand(1).setSubReg(SubReg2);
1206 MI.getOperand(2).setIsKill(Reg1IsKill);
1207 MI.getOperand(1).setIsKill(Reg2IsKill);
1208
1209 // Swap the mask around.
1210 MI.getOperand(4).setImm((ME + 1) & 31);
1211 MI.getOperand(5).setImm((MB - 1) & 31);
1212 return &MI;
1213}
1214
1216 unsigned &SrcOpIdx1,
1217 unsigned &SrcOpIdx2) const {
1218 // For VSX A-Type FMA instructions, it is the first two operands that can be
1219 // commuted, however, because the non-encoded tied input operand is listed
1220 // first, the operands to swap are actually the second and third.
1221
1222 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1223 if (AltOpc == -1)
1224 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1225
1226 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1227 // and SrcOpIdx2.
1228 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1229}
1230
1233 // This function is used for scheduling, and the nop wanted here is the type
1234 // that terminates dispatch groups on the POWER cores.
1235 unsigned Directive = Subtarget.getCPUDirective();
1236 unsigned Opcode;
1237 switch (Directive) {
1238 default: Opcode = PPC::NOP; break;
1239 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1240 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1241 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1242 // FIXME: Update when POWER9 scheduling model is ready.
1243 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1244 }
1245
1246 DebugLoc DL;
1247 BuildMI(MBB, MI, DL, get(Opcode));
1248}
1249
1250/// Return the noop instruction to use for a noop.
1252 MCInst Nop;
1253 Nop.setOpcode(PPC::NOP);
1254 return Nop;
1255}
1256
1257// Branch analysis.
1258// Note: If the condition register is set to CTR or CTR8 then this is a
1259// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1262 MachineBasicBlock *&FBB,
1264 bool AllowModify) const {
1265 bool isPPC64 = Subtarget.isPPC64();
1266
1267 // If the block has no terminators, it just falls into the block after it.
1269 if (I == MBB.end())
1270 return false;
1271
1272 if (!isUnpredicatedTerminator(*I))
1273 return false;
1274
1275 if (AllowModify) {
1276 // If the BB ends with an unconditional branch to the fallthrough BB,
1277 // we eliminate the branch instruction.
1278 if (I->getOpcode() == PPC::B &&
1279 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1280 I->eraseFromParent();
1281
1282 // We update iterator after deleting the last branch.
1284 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1285 return false;
1286 }
1287 }
1288
1289 // Get the last instruction in the block.
1290 MachineInstr &LastInst = *I;
1291
1292 // If there is only one terminator instruction, process it.
1293 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1294 if (LastInst.getOpcode() == PPC::B) {
1295 if (!LastInst.getOperand(0).isMBB())
1296 return true;
1297 TBB = LastInst.getOperand(0).getMBB();
1298 return false;
1299 } else if (LastInst.getOpcode() == PPC::BCC) {
1300 if (!LastInst.getOperand(2).isMBB())
1301 return true;
1302 // Block ends with fall-through condbranch.
1303 TBB = LastInst.getOperand(2).getMBB();
1304 Cond.push_back(LastInst.getOperand(0));
1305 Cond.push_back(LastInst.getOperand(1));
1306 return false;
1307 } else if (LastInst.getOpcode() == PPC::BC) {
1308 if (!LastInst.getOperand(1).isMBB())
1309 return true;
1310 // Block ends with fall-through condbranch.
1311 TBB = LastInst.getOperand(1).getMBB();
1313 Cond.push_back(LastInst.getOperand(0));
1314 return false;
1315 } else if (LastInst.getOpcode() == PPC::BCn) {
1316 if (!LastInst.getOperand(1).isMBB())
1317 return true;
1318 // Block ends with fall-through condbranch.
1319 TBB = LastInst.getOperand(1).getMBB();
1321 Cond.push_back(LastInst.getOperand(0));
1322 return false;
1323 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1324 LastInst.getOpcode() == PPC::BDNZ) {
1325 if (!LastInst.getOperand(0).isMBB())
1326 return true;
1328 return true;
1329 TBB = LastInst.getOperand(0).getMBB();
1330 Cond.push_back(MachineOperand::CreateImm(1));
1331 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1332 true));
1333 return false;
1334 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1335 LastInst.getOpcode() == PPC::BDZ) {
1336 if (!LastInst.getOperand(0).isMBB())
1337 return true;
1339 return true;
1340 TBB = LastInst.getOperand(0).getMBB();
1341 Cond.push_back(MachineOperand::CreateImm(0));
1342 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1343 true));
1344 return false;
1345 }
1346
1347 // Otherwise, don't know what this is.
1348 return true;
1349 }
1350
1351 // Get the instruction before it if it's a terminator.
1352 MachineInstr &SecondLastInst = *I;
1353
1354 // If there are three terminators, we don't know what sort of block this is.
1355 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1356 return true;
1357
1358 // If the block ends with PPC::B and PPC:BCC, handle it.
1359 if (SecondLastInst.getOpcode() == PPC::BCC &&
1360 LastInst.getOpcode() == PPC::B) {
1361 if (!SecondLastInst.getOperand(2).isMBB() ||
1362 !LastInst.getOperand(0).isMBB())
1363 return true;
1364 TBB = SecondLastInst.getOperand(2).getMBB();
1365 Cond.push_back(SecondLastInst.getOperand(0));
1366 Cond.push_back(SecondLastInst.getOperand(1));
1367 FBB = LastInst.getOperand(0).getMBB();
1368 return false;
1369 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1370 LastInst.getOpcode() == PPC::B) {
1371 if (!SecondLastInst.getOperand(1).isMBB() ||
1372 !LastInst.getOperand(0).isMBB())
1373 return true;
1374 TBB = SecondLastInst.getOperand(1).getMBB();
1376 Cond.push_back(SecondLastInst.getOperand(0));
1377 FBB = LastInst.getOperand(0).getMBB();
1378 return false;
1379 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1380 LastInst.getOpcode() == PPC::B) {
1381 if (!SecondLastInst.getOperand(1).isMBB() ||
1382 !LastInst.getOperand(0).isMBB())
1383 return true;
1384 TBB = SecondLastInst.getOperand(1).getMBB();
1386 Cond.push_back(SecondLastInst.getOperand(0));
1387 FBB = LastInst.getOperand(0).getMBB();
1388 return false;
1389 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1390 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1391 LastInst.getOpcode() == PPC::B) {
1392 if (!SecondLastInst.getOperand(0).isMBB() ||
1393 !LastInst.getOperand(0).isMBB())
1394 return true;
1396 return true;
1397 TBB = SecondLastInst.getOperand(0).getMBB();
1398 Cond.push_back(MachineOperand::CreateImm(1));
1399 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1400 true));
1401 FBB = LastInst.getOperand(0).getMBB();
1402 return false;
1403 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1404 SecondLastInst.getOpcode() == PPC::BDZ) &&
1405 LastInst.getOpcode() == PPC::B) {
1406 if (!SecondLastInst.getOperand(0).isMBB() ||
1407 !LastInst.getOperand(0).isMBB())
1408 return true;
1410 return true;
1411 TBB = SecondLastInst.getOperand(0).getMBB();
1412 Cond.push_back(MachineOperand::CreateImm(0));
1413 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1414 true));
1415 FBB = LastInst.getOperand(0).getMBB();
1416 return false;
1417 }
1418
1419 // If the block ends with two PPC:Bs, handle it. The second one is not
1420 // executed, so remove it.
1421 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1422 if (!SecondLastInst.getOperand(0).isMBB())
1423 return true;
1424 TBB = SecondLastInst.getOperand(0).getMBB();
1425 I = LastInst;
1426 if (AllowModify)
1427 I->eraseFromParent();
1428 return false;
1429 }
1430
1431 // Otherwise, can't handle this.
1432 return true;
1433}
1434
1436 int *BytesRemoved) const {
1437 assert(!BytesRemoved && "code size not handled");
1438
1440 if (I == MBB.end())
1441 return 0;
1442
1443 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1444 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1445 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1446 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1447 return 0;
1448
1449 // Remove the branch.
1450 I->eraseFromParent();
1451
1452 I = MBB.end();
1453
1454 if (I == MBB.begin()) return 1;
1455 --I;
1456 if (I->getOpcode() != PPC::BCC &&
1457 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1458 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1459 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1460 return 1;
1461
1462 // Remove the branch.
1463 I->eraseFromParent();
1464 return 2;
1465}
1466
1469 MachineBasicBlock *FBB,
1471 const DebugLoc &DL,
1472 int *BytesAdded) const {
1473 // Shouldn't be a fall through.
1474 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1475 assert((Cond.size() == 2 || Cond.size() == 0) &&
1476 "PPC branch conditions have two components!");
1477 assert(!BytesAdded && "code size not handled");
1478
1479 bool isPPC64 = Subtarget.isPPC64();
1480
1481 // One-way branch.
1482 if (!FBB) {
1483 if (Cond.empty()) // Unconditional branch
1484 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1485 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1486 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1487 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1488 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1489 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1490 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1491 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1492 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1493 else // Conditional branch
1494 BuildMI(&MBB, DL, get(PPC::BCC))
1495 .addImm(Cond[0].getImm())
1496 .add(Cond[1])
1497 .addMBB(TBB);
1498 return 1;
1499 }
1500
1501 // Two-way Conditional Branch.
1502 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1503 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1504 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1505 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1506 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1507 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1508 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1509 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1510 else
1511 BuildMI(&MBB, DL, get(PPC::BCC))
1512 .addImm(Cond[0].getImm())
1513 .add(Cond[1])
1514 .addMBB(TBB);
1515 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1516 return 2;
1517}
1518
1519// Select analysis.
1522 Register DstReg, Register TrueReg,
1523 Register FalseReg, int &CondCycles,
1524 int &TrueCycles, int &FalseCycles) const {
1525 if (!Subtarget.hasISEL())
1526 return false;
1527
1528 if (Cond.size() != 2)
1529 return false;
1530
1531 // If this is really a bdnz-like condition, then it cannot be turned into a
1532 // select.
1533 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1534 return false;
1535
1536 // If the conditional branch uses a physical register, then it cannot be
1537 // turned into a select.
1538 if (Cond[1].getReg().isPhysical())
1539 return false;
1540
1541 // Check register classes.
1543 const TargetRegisterClass *RC =
1544 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1545 if (!RC)
1546 return false;
1547
1548 // isel is for regular integer GPRs only.
1549 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1550 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1551 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1552 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1553 return false;
1554
1555 // FIXME: These numbers are for the A2, how well they work for other cores is
1556 // an open question. On the A2, the isel instruction has a 2-cycle latency
1557 // but single-cycle throughput. These numbers are used in combination with
1558 // the MispredictPenalty setting from the active SchedMachineModel.
1559 CondCycles = 1;
1560 TrueCycles = 1;
1561 FalseCycles = 1;
1562
1563 return true;
1564}
1565
1568 const DebugLoc &dl, Register DestReg,
1570 Register FalseReg) const {
1571 assert(Cond.size() == 2 &&
1572 "PPC branch conditions have two components!");
1573
1574 // Get the register classes.
1576 const TargetRegisterClass *RC =
1577 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1578 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1579
1580 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1581 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1582 assert((Is64Bit ||
1583 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1584 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1585 "isel is for regular integer GPRs only");
1586
1587 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1588 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1589
1590 unsigned SubIdx = 0;
1591 bool SwapOps = false;
1592 switch (SelectPred) {
1593 case PPC::PRED_EQ:
1594 case PPC::PRED_EQ_MINUS:
1595 case PPC::PRED_EQ_PLUS:
1596 SubIdx = PPC::sub_eq; SwapOps = false; break;
1597 case PPC::PRED_NE:
1598 case PPC::PRED_NE_MINUS:
1599 case PPC::PRED_NE_PLUS:
1600 SubIdx = PPC::sub_eq; SwapOps = true; break;
1601 case PPC::PRED_LT:
1602 case PPC::PRED_LT_MINUS:
1603 case PPC::PRED_LT_PLUS:
1604 SubIdx = PPC::sub_lt; SwapOps = false; break;
1605 case PPC::PRED_GE:
1606 case PPC::PRED_GE_MINUS:
1607 case PPC::PRED_GE_PLUS:
1608 SubIdx = PPC::sub_lt; SwapOps = true; break;
1609 case PPC::PRED_GT:
1610 case PPC::PRED_GT_MINUS:
1611 case PPC::PRED_GT_PLUS:
1612 SubIdx = PPC::sub_gt; SwapOps = false; break;
1613 case PPC::PRED_LE:
1614 case PPC::PRED_LE_MINUS:
1615 case PPC::PRED_LE_PLUS:
1616 SubIdx = PPC::sub_gt; SwapOps = true; break;
1617 case PPC::PRED_UN:
1618 case PPC::PRED_UN_MINUS:
1619 case PPC::PRED_UN_PLUS:
1620 SubIdx = PPC::sub_un; SwapOps = false; break;
1621 case PPC::PRED_NU:
1622 case PPC::PRED_NU_MINUS:
1623 case PPC::PRED_NU_PLUS:
1624 SubIdx = PPC::sub_un; SwapOps = true; break;
1625 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1626 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1627 }
1628
1629 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1630 SecondReg = SwapOps ? TrueReg : FalseReg;
1631
1632 // The first input register of isel cannot be r0. If it is a member
1633 // of a register class that can be r0, then copy it first (the
1634 // register allocator should eliminate the copy).
1635 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1636 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1637 const TargetRegisterClass *FirstRC =
1638 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1639 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1640 Register OldFirstReg = FirstReg;
1641 FirstReg = MRI.createVirtualRegister(FirstRC);
1642 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1643 .addReg(OldFirstReg);
1644 }
1645
1646 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1647 .addReg(FirstReg).addReg(SecondReg)
1648 .addReg(Cond[1].getReg(), 0, SubIdx);
1649}
1650
1651static unsigned getCRBitValue(unsigned CRBit) {
1652 unsigned Ret = 4;
1653 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1654 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1655 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1656 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1657 Ret = 3;
1658 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1659 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1660 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1661 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1662 Ret = 2;
1663 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1664 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1665 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1666 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1667 Ret = 1;
1668 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1669 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1670 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1671 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1672 Ret = 0;
1673
1674 assert(Ret != 4 && "Invalid CR bit register");
1675 return Ret;
1676}
1677
1680 const DebugLoc &DL, MCRegister DestReg,
1681 MCRegister SrcReg, bool KillSrc) const {
1682 // We can end up with self copies and similar things as a result of VSX copy
1683 // legalization. Promote them here.
1685 if (PPC::F8RCRegClass.contains(DestReg) &&
1686 PPC::VSRCRegClass.contains(SrcReg)) {
1687 MCRegister SuperReg =
1688 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1689
1690 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1691 llvm_unreachable("nop VSX copy");
1692
1693 DestReg = SuperReg;
1694 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1695 PPC::VSRCRegClass.contains(DestReg)) {
1696 MCRegister SuperReg =
1697 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1698
1699 if (VSXSelfCopyCrash && DestReg == SuperReg)
1700 llvm_unreachable("nop VSX copy");
1701
1702 SrcReg = SuperReg;
1703 }
1704
1705 // Different class register copy
1706 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1707 PPC::GPRCRegClass.contains(DestReg)) {
1708 MCRegister CRReg = getCRFromCRBit(SrcReg);
1709 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1710 getKillRegState(KillSrc);
1711 // Rotate the CR bit in the CR fields to be the least significant bit and
1712 // then mask with 0x1 (MB = ME = 31).
1713 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1714 .addReg(DestReg, RegState::Kill)
1715 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1716 .addImm(31)
1717 .addImm(31);
1718 return;
1719 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1720 (PPC::G8RCRegClass.contains(DestReg) ||
1721 PPC::GPRCRegClass.contains(DestReg))) {
1722 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1723 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1724 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1725 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1726 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1727 getKillRegState(KillSrc);
1728 if (CRNum == 7)
1729 return;
1730 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1731 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1732 .addReg(DestReg, RegState::Kill)
1733 .addImm(CRNum * 4 + 4)
1734 .addImm(28)
1735 .addImm(31);
1736 return;
1737 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1738 PPC::VSFRCRegClass.contains(DestReg)) {
1739 assert(Subtarget.hasDirectMove() &&
1740 "Subtarget doesn't support directmove, don't know how to copy.");
1741 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1742 NumGPRtoVSRSpill++;
1743 getKillRegState(KillSrc);
1744 return;
1745 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1746 PPC::G8RCRegClass.contains(DestReg)) {
1747 assert(Subtarget.hasDirectMove() &&
1748 "Subtarget doesn't support directmove, don't know how to copy.");
1749 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1750 getKillRegState(KillSrc);
1751 return;
1752 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1753 PPC::GPRCRegClass.contains(DestReg)) {
1754 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1755 getKillRegState(KillSrc);
1756 return;
1757 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1758 PPC::SPERCRegClass.contains(DestReg)) {
1759 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1760 getKillRegState(KillSrc);
1761 return;
1762 }
1763
1764 unsigned Opc;
1765 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1766 Opc = PPC::OR;
1767 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1768 Opc = PPC::OR8;
1769 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1770 Opc = PPC::FMR;
1771 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1772 Opc = PPC::MCRF;
1773 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1774 Opc = PPC::VOR;
1775 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1776 // There are two different ways this can be done:
1777 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1778 // issue in VSU pipeline 0.
1779 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1780 // can go to either pipeline.
1781 // We'll always use xxlor here, because in practically all cases where
1782 // copies are generated, they are close enough to some use that the
1783 // lower-latency form is preferable.
1784 Opc = PPC::XXLOR;
1785 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1786 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1787 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1788 else if (Subtarget.pairedVectorMemops() &&
1789 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1790 if (SrcReg > PPC::VSRp15)
1791 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1792 else
1793 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1794 if (DestReg > PPC::VSRp15)
1795 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1796 else
1797 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1798 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1799 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1800 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1801 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1802 return;
1803 }
1804 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1805 Opc = PPC::CROR;
1806 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1807 Opc = PPC::EVOR;
1808 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1809 PPC::UACCRCRegClass.contains(DestReg)) &&
1810 (PPC::ACCRCRegClass.contains(SrcReg) ||
1811 PPC::UACCRCRegClass.contains(SrcReg))) {
1812 // If primed, de-prime the source register, copy the individual registers
1813 // and prime the destination if needed. The vector subregisters are
1814 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1815 // source is primed, we need to re-prime it after the copy as well.
1816 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1817 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1818 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1819 MCRegister VSLSrcReg =
1820 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1821 MCRegister VSLDestReg =
1822 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1823 if (SrcPrimed)
1824 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1825 for (unsigned Idx = 0; Idx < 4; Idx++)
1826 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1827 .addReg(VSLSrcReg + Idx)
1828 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1829 if (DestPrimed)
1830 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1831 if (SrcPrimed && !KillSrc)
1832 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1833 return;
1834 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1835 PPC::G8pRCRegClass.contains(SrcReg)) {
1836 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1837 unsigned DestRegIdx = DestReg - PPC::G8p0;
1838 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1839 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1840 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1841 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1842 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1843 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1844 .addReg(SrcRegSub0)
1845 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1846 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1847 .addReg(SrcRegSub1)
1848 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1849 return;
1850 } else
1851 llvm_unreachable("Impossible reg-to-reg copy");
1852
1853 const MCInstrDesc &MCID = get(Opc);
1854 if (MCID.getNumOperands() == 3)
1855 BuildMI(MBB, I, DL, MCID, DestReg)
1856 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1857 else
1858 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1859}
1860
1861unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1862 int OpcodeIndex = 0;
1863
1864 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1865 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1867 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1868 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1870 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1872 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1874 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1876 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1878 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1880 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1882 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1884 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1886 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1888 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1890 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1891 assert(Subtarget.pairedVectorMemops() &&
1892 "Register unexpected when paired memops are disabled.");
1894 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1895 assert(Subtarget.pairedVectorMemops() &&
1896 "Register unexpected when paired memops are disabled.");
1898 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1899 assert(Subtarget.pairedVectorMemops() &&
1900 "Register unexpected when paired memops are disabled.");
1902 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1903 assert(Subtarget.pairedVectorMemops() &&
1904 "Register unexpected when paired memops are disabled.");
1906 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1908 } else {
1909 llvm_unreachable("Unknown regclass!");
1910 }
1911 return OpcodeIndex;
1912}
1913
1914unsigned
1916 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1917 return OpcodesForSpill[getSpillIndex(RC)];
1918}
1919
1920unsigned
1922 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1923 return OpcodesForSpill[getSpillIndex(RC)];
1924}
1925
1926void PPCInstrInfo::StoreRegToStackSlot(
1927 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1928 const TargetRegisterClass *RC,
1929 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1930 unsigned Opcode = getStoreOpcodeForSpill(RC);
1931 DebugLoc DL;
1932
1933 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1934 FuncInfo->setHasSpills();
1935
1937 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
1938 FrameIdx));
1939
1940 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
1941 PPC::CRBITRCRegClass.hasSubClassEq(RC))
1942 FuncInfo->setSpillsCR();
1943
1944 if (isXFormMemOp(Opcode))
1945 FuncInfo->setHasNonRISpills();
1946}
1947
1950 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1951 const TargetRegisterInfo *TRI) const {
1952 MachineFunction &MF = *MBB.getParent();
1954
1955 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
1956
1957 for (MachineInstr *NewMI : NewMIs)
1958 MBB.insert(MI, NewMI);
1959
1960 const MachineFrameInfo &MFI = MF.getFrameInfo();
1964 MFI.getObjectAlign(FrameIdx));
1965 NewMIs.back()->addMemOperand(MF, MMO);
1966}
1967
1970 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1971 const TargetRegisterInfo *TRI, Register VReg) const {
1972 // We need to avoid a situation in which the value from a VRRC register is
1973 // spilled using an Altivec instruction and reloaded into a VSRC register
1974 // using a VSX instruction. The issue with this is that the VSX
1975 // load/store instructions swap the doublewords in the vector and the Altivec
1976 // ones don't. The register classes on the spill/reload may be different if
1977 // the register is defined using an Altivec instruction and is then used by a
1978 // VSX instruction.
1979 RC = updatedRC(RC);
1980 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
1981}
1982
1983void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
1984 unsigned DestReg, int FrameIdx,
1985 const TargetRegisterClass *RC,
1987 const {
1988 unsigned Opcode = getLoadOpcodeForSpill(RC);
1989 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
1990 FrameIdx));
1991}
1992
1995 int FrameIdx, const TargetRegisterClass *RC,
1996 const TargetRegisterInfo *TRI) const {
1997 MachineFunction &MF = *MBB.getParent();
1999 DebugLoc DL;
2000 if (MI != MBB.end()) DL = MI->getDebugLoc();
2001
2002 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2003
2004 for (MachineInstr *NewMI : NewMIs)
2005 MBB.insert(MI, NewMI);
2006
2007 const MachineFrameInfo &MFI = MF.getFrameInfo();
2011 MFI.getObjectAlign(FrameIdx));
2012 NewMIs.back()->addMemOperand(MF, MMO);
2013}
2014
2017 Register DestReg, int FrameIdx,
2018 const TargetRegisterClass *RC,
2019 const TargetRegisterInfo *TRI,
2020 Register VReg) const {
2021 // We need to avoid a situation in which the value from a VRRC register is
2022 // spilled using an Altivec instruction and reloaded into a VSRC register
2023 // using a VSX instruction. The issue with this is that the VSX
2024 // load/store instructions swap the doublewords in the vector and the Altivec
2025 // ones don't. The register classes on the spill/reload may be different if
2026 // the register is defined using an Altivec instruction and is then used by a
2027 // VSX instruction.
2028 RC = updatedRC(RC);
2029
2030 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2031}
2032
2035 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2036 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2037 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2038 else
2039 // Leave the CR# the same, but invert the condition.
2040 Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
2041 return false;
2042}
2043
2044// For some instructions, it is legal to fold ZERO into the RA register field.
2045// This function performs that fold by replacing the operand with PPC::ZERO,
2046// it does not consider whether the load immediate zero is no longer in use.
2048 Register Reg) const {
2049 // A zero immediate should always be loaded with a single li.
2050 unsigned DefOpc = DefMI.getOpcode();
2051 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2052 return false;
2053 if (!DefMI.getOperand(1).isImm())
2054 return false;
2055 if (DefMI.getOperand(1).getImm() != 0)
2056 return false;
2057
2058 // Note that we cannot here invert the arguments of an isel in order to fold
2059 // a ZERO into what is presented as the second argument. All we have here
2060 // is the condition bit, and that might come from a CR-logical bit operation.
2061
2062 const MCInstrDesc &UseMCID = UseMI.getDesc();
2063
2064 // Only fold into real machine instructions.
2065 if (UseMCID.isPseudo())
2066 return false;
2067
2068 // We need to find which of the User's operands is to be folded, that will be
2069 // the operand that matches the given register ID.
2070 unsigned UseIdx;
2071 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2072 if (UseMI.getOperand(UseIdx).isReg() &&
2073 UseMI.getOperand(UseIdx).getReg() == Reg)
2074 break;
2075
2076 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2077 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2078
2079 const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2080
2081 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2082 // register (which might also be specified as a pointer class kind).
2083 if (UseInfo->isLookupPtrRegClass()) {
2084 if (UseInfo->RegClass /* Kind */ != 1)
2085 return false;
2086 } else {
2087 if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2088 UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2089 return false;
2090 }
2091
2092 // Make sure this is not tied to an output register (or otherwise
2093 // constrained). This is true for ST?UX registers, for example, which
2094 // are tied to their output registers.
2095 if (UseInfo->Constraints != 0)
2096 return false;
2097
2098 MCRegister ZeroReg;
2099 if (UseInfo->isLookupPtrRegClass()) {
2100 bool isPPC64 = Subtarget.isPPC64();
2101 ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2102 } else {
2103 ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2104 PPC::ZERO8 : PPC::ZERO;
2105 }
2106
2107 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2108 LLVM_DEBUG(UseMI.dump());
2109 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2110 LLVM_DEBUG(dbgs() << "Into: ");
2111 LLVM_DEBUG(UseMI.dump());
2112 return true;
2113}
2114
2115// Folds zero into instructions which have a load immediate zero as an operand
2116// but also recognize zero as immediate zero. If the definition of the load
2117// has no more users it is deleted.
2119 Register Reg, MachineRegisterInfo *MRI) const {
2120 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2121 if (MRI->use_nodbg_empty(Reg))
2122 DefMI.eraseFromParent();
2123 return Changed;
2124}
2125
2127 for (MachineInstr &MI : MBB)
2128 if (MI.definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
2129 MI.definesRegister(PPC::CTR8, /*TRI=*/nullptr))
2130 return true;
2131 return false;
2132}
2133
2134// We should make sure that, if we're going to predicate both sides of a
2135// condition (a diamond), that both sides don't define the counter register. We
2136// can predicate counter-decrement-based branches, but while that predicates
2137// the branching, it does not predicate the counter decrement. If we tried to
2138// merge the triangle into one predicated block, we'd decrement the counter
2139// twice.
2141 unsigned NumT, unsigned ExtraT,
2142 MachineBasicBlock &FMBB,
2143 unsigned NumF, unsigned ExtraF,
2144 BranchProbability Probability) const {
2145 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2146}
2147
2148
2150 // The predicated branches are identified by their type, not really by the
2151 // explicit presence of a predicate. Furthermore, some of them can be
2152 // predicated more than once. Because if conversion won't try to predicate
2153 // any instruction which already claims to be predicated (by returning true
2154 // here), always return false. In doing so, we let isPredicable() be the
2155 // final word on whether not the instruction can be (further) predicated.
2156
2157 return false;
2158}
2159
2161 const MachineBasicBlock *MBB,
2162 const MachineFunction &MF) const {
2163 switch (MI.getOpcode()) {
2164 default:
2165 break;
2166 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2167 // across them, since some FP operations may change content of FPSCR.
2168 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2169 case PPC::MFFS:
2170 case PPC::MTFSF:
2171 case PPC::FENCE:
2172 return true;
2173 }
2175}
2176
2178 ArrayRef<MachineOperand> Pred) const {
2179 unsigned OpC = MI.getOpcode();
2180 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2181 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2182 bool isPPC64 = Subtarget.isPPC64();
2183 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2184 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2185 // Need add Def and Use for CTR implicit operand.
2186 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2187 .addReg(Pred[1].getReg(), RegState::Implicit)
2189 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2190 MI.setDesc(get(PPC::BCLR));
2191 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2192 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2193 MI.setDesc(get(PPC::BCLRn));
2194 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2195 } else {
2196 MI.setDesc(get(PPC::BCCLR));
2197 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2198 .addImm(Pred[0].getImm())
2199 .add(Pred[1]);
2200 }
2201
2202 return true;
2203 } else if (OpC == PPC::B) {
2204 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2205 bool isPPC64 = Subtarget.isPPC64();
2206 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2207 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2208 // Need add Def and Use for CTR implicit operand.
2209 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2210 .addReg(Pred[1].getReg(), RegState::Implicit)
2212 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2213 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2214 MI.removeOperand(0);
2215
2216 MI.setDesc(get(PPC::BC));
2217 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2218 .add(Pred[1])
2219 .addMBB(MBB);
2220 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2221 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2222 MI.removeOperand(0);
2223
2224 MI.setDesc(get(PPC::BCn));
2225 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2226 .add(Pred[1])
2227 .addMBB(MBB);
2228 } else {
2229 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2230 MI.removeOperand(0);
2231
2232 MI.setDesc(get(PPC::BCC));
2233 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2234 .addImm(Pred[0].getImm())
2235 .add(Pred[1])
2236 .addMBB(MBB);
2237 }
2238
2239 return true;
2240 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2241 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2242 OpC == PPC::BCTRL8_RM) {
2243 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2244 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2245
2246 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2247 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2248 bool isPPC64 = Subtarget.isPPC64();
2249
2250 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2251 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2252 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2253 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2254 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2255 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2256 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2257 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2258 } else {
2259 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2260 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2261 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2262 .addImm(Pred[0].getImm())
2263 .add(Pred[1]);
2264 }
2265
2266 // Need add Def and Use for LR implicit operand.
2267 if (setLR)
2268 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2269 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2270 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2271 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2272 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2274
2275 return true;
2276 }
2277
2278 return false;
2279}
2280
2282 ArrayRef<MachineOperand> Pred2) const {
2283 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2284 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2285
2286 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2287 return false;
2288 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2289 return false;
2290
2291 // P1 can only subsume P2 if they test the same condition register.
2292 if (Pred1[1].getReg() != Pred2[1].getReg())
2293 return false;
2294
2295 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2296 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2297
2298 if (P1 == P2)
2299 return true;
2300
2301 // Does P1 subsume P2, e.g. GE subsumes GT.
2302 if (P1 == PPC::PRED_LE &&
2303 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2304 return true;
2305 if (P1 == PPC::PRED_GE &&
2306 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2307 return true;
2308
2309 return false;
2310}
2311
2313 std::vector<MachineOperand> &Pred,
2314 bool SkipDead) const {
2315 // Note: At the present time, the contents of Pred from this function is
2316 // unused by IfConversion. This implementation follows ARM by pushing the
2317 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2318 // predicate, instructions defining CTR or CTR8 are also included as
2319 // predicate-defining instructions.
2320
2321 const TargetRegisterClass *RCs[] =
2322 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2323 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2324
2325 bool Found = false;
2326 for (const MachineOperand &MO : MI.operands()) {
2327 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2328 const TargetRegisterClass *RC = RCs[c];
2329 if (MO.isReg()) {
2330 if (MO.isDef() && RC->contains(MO.getReg())) {
2331 Pred.push_back(MO);
2332 Found = true;
2333 }
2334 } else if (MO.isRegMask()) {
2335 for (MCPhysReg R : *RC)
2336 if (MO.clobbersPhysReg(R)) {
2337 Pred.push_back(MO);
2338 Found = true;
2339 }
2340 }
2341 }
2342 }
2343
2344 return Found;
2345}
2346
2348 Register &SrcReg2, int64_t &Mask,
2349 int64_t &Value) const {
2350 unsigned Opc = MI.getOpcode();
2351
2352 switch (Opc) {
2353 default: return false;
2354 case PPC::CMPWI:
2355 case PPC::CMPLWI:
2356 case PPC::CMPDI:
2357 case PPC::CMPLDI:
2358 SrcReg = MI.getOperand(1).getReg();
2359 SrcReg2 = 0;
2360 Value = MI.getOperand(2).getImm();
2361 Mask = 0xFFFF;
2362 return true;
2363 case PPC::CMPW:
2364 case PPC::CMPLW:
2365 case PPC::CMPD:
2366 case PPC::CMPLD:
2367 case PPC::FCMPUS:
2368 case PPC::FCMPUD:
2369 SrcReg = MI.getOperand(1).getReg();
2370 SrcReg2 = MI.getOperand(2).getReg();
2371 Value = 0;
2372 Mask = 0;
2373 return true;
2374 }
2375}
2376
2378 Register SrcReg2, int64_t Mask,
2379 int64_t Value,
2380 const MachineRegisterInfo *MRI) const {
2381 if (DisableCmpOpt)
2382 return false;
2383
2384 int OpC = CmpInstr.getOpcode();
2385 Register CRReg = CmpInstr.getOperand(0).getReg();
2386
2387 // FP record forms set CR1 based on the exception status bits, not a
2388 // comparison with zero.
2389 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2390 return false;
2391
2393 // The record forms set the condition register based on a signed comparison
2394 // with zero (so says the ISA manual). This is not as straightforward as it
2395 // seems, however, because this is always a 64-bit comparison on PPC64, even
2396 // for instructions that are 32-bit in nature (like slw for example).
2397 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2398 // for equality checks (as those don't depend on the sign). On PPC64,
2399 // we are restricted to equality for unsigned 64-bit comparisons and for
2400 // signed 32-bit comparisons the applicability is more restricted.
2401 bool isPPC64 = Subtarget.isPPC64();
2402 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2403 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2404 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2405
2406 // Look through copies unless that gets us to a physical register.
2407 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2408 if (ActualSrc.isVirtual())
2409 SrcReg = ActualSrc;
2410
2411 // Get the unique definition of SrcReg.
2412 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2413 if (!MI) return false;
2414
2415 bool equalityOnly = false;
2416 bool noSub = false;
2417 if (isPPC64) {
2418 if (is32BitSignedCompare) {
2419 // We can perform this optimization only if SrcReg is sign-extending.
2420 if (isSignExtended(SrcReg, MRI))
2421 noSub = true;
2422 else
2423 return false;
2424 } else if (is32BitUnsignedCompare) {
2425 // We can perform this optimization, equality only, if SrcReg is
2426 // zero-extending.
2427 if (isZeroExtended(SrcReg, MRI)) {
2428 noSub = true;
2429 equalityOnly = true;
2430 } else
2431 return false;
2432 } else
2433 equalityOnly = is64BitUnsignedCompare;
2434 } else
2435 equalityOnly = is32BitUnsignedCompare;
2436
2437 if (equalityOnly) {
2438 // We need to check the uses of the condition register in order to reject
2439 // non-equality comparisons.
2441 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2442 I != IE; ++I) {
2443 MachineInstr *UseMI = &*I;
2444 if (UseMI->getOpcode() == PPC::BCC) {
2446 unsigned PredCond = PPC::getPredicateCondition(Pred);
2447 // We ignore hint bits when checking for non-equality comparisons.
2448 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2449 return false;
2450 } else if (UseMI->getOpcode() == PPC::ISEL ||
2451 UseMI->getOpcode() == PPC::ISEL8) {
2452 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2453 if (SubIdx != PPC::sub_eq)
2454 return false;
2455 } else
2456 return false;
2457 }
2458 }
2459
2460 MachineBasicBlock::iterator I = CmpInstr;
2461
2462 // Scan forward to find the first use of the compare.
2463 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2464 ++I) {
2465 bool FoundUse = false;
2467 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2468 J != JE; ++J)
2469 if (&*J == &*I) {
2470 FoundUse = true;
2471 break;
2472 }
2473
2474 if (FoundUse)
2475 break;
2476 }
2477
2480
2481 // There are two possible candidates which can be changed to set CR[01].
2482 // One is MI, the other is a SUB instruction.
2483 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2484 MachineInstr *Sub = nullptr;
2485 if (SrcReg2 != 0)
2486 // MI is not a candidate for CMPrr.
2487 MI = nullptr;
2488 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2489 // same BB as the comparison. This is to allow the check below to avoid calls
2490 // (and other explicit clobbers); instead we should really check for these
2491 // more explicitly (in at least a few predecessors).
2492 else if (MI->getParent() != CmpInstr.getParent())
2493 return false;
2494 else if (Value != 0) {
2495 // The record-form instructions set CR bit based on signed comparison
2496 // against 0. We try to convert a compare against 1 or -1 into a compare
2497 // against 0 to exploit record-form instructions. For example, we change
2498 // the condition "greater than -1" into "greater than or equal to 0"
2499 // and "less than 1" into "less than or equal to 0".
2500
2501 // Since we optimize comparison based on a specific branch condition,
2502 // we don't optimize if condition code is used by more than once.
2503 if (equalityOnly || !MRI->hasOneUse(CRReg))
2504 return false;
2505
2506 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2507 if (UseMI->getOpcode() != PPC::BCC)
2508 return false;
2509
2511 unsigned PredCond = PPC::getPredicateCondition(Pred);
2512 unsigned PredHint = PPC::getPredicateHint(Pred);
2513 int16_t Immed = (int16_t)Value;
2514
2515 // When modifying the condition in the predicate, we propagate hint bits
2516 // from the original predicate to the new one.
2517 if (Immed == -1 && PredCond == PPC::PRED_GT)
2518 // We convert "greater than -1" into "greater than or equal to 0",
2519 // since we are assuming signed comparison by !equalityOnly
2520 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2521 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2522 // We convert "less than or equal to -1" into "less than 0".
2523 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2524 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2525 // We convert "less than 1" into "less than or equal to 0".
2526 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2527 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2528 // We convert "greater than or equal to 1" into "greater than 0".
2529 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2530 else
2531 return false;
2532
2533 // Convert the comparison and its user to a compare against zero with the
2534 // appropriate predicate on the branch. Zero comparison might provide
2535 // optimization opportunities post-RA (see optimization in
2536 // PPCPreEmitPeephole.cpp).
2537 UseMI->getOperand(0).setImm(Pred);
2538 CmpInstr.getOperand(2).setImm(0);
2539 }
2540
2541 // Search for Sub.
2542 --I;
2543
2544 // Get ready to iterate backward from CmpInstr.
2545 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2546
2547 for (; I != E && !noSub; --I) {
2548 const MachineInstr &Instr = *I;
2549 unsigned IOpC = Instr.getOpcode();
2550
2551 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2552 Instr.readsRegister(PPC::CR0, TRI)))
2553 // This instruction modifies or uses the record condition register after
2554 // the one we want to change. While we could do this transformation, it
2555 // would likely not be profitable. This transformation removes one
2556 // instruction, and so even forcing RA to generate one move probably
2557 // makes it unprofitable.
2558 return false;
2559
2560 // Check whether CmpInstr can be made redundant by the current instruction.
2561 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2562 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2563 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2564 ((Instr.getOperand(1).getReg() == SrcReg &&
2565 Instr.getOperand(2).getReg() == SrcReg2) ||
2566 (Instr.getOperand(1).getReg() == SrcReg2 &&
2567 Instr.getOperand(2).getReg() == SrcReg))) {
2568 Sub = &*I;
2569 break;
2570 }
2571
2572 if (I == B)
2573 // The 'and' is below the comparison instruction.
2574 return false;
2575 }
2576
2577 // Return false if no candidates exist.
2578 if (!MI && !Sub)
2579 return false;
2580
2581 // The single candidate is called MI.
2582 if (!MI) MI = Sub;
2583
2584 int NewOpC = -1;
2585 int MIOpC = MI->getOpcode();
2586 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2587 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2588 NewOpC = MIOpC;
2589 else {
2590 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2591 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2592 NewOpC = MIOpC;
2593 }
2594
2595 // FIXME: On the non-embedded POWER architectures, only some of the record
2596 // forms are fast, and we should use only the fast ones.
2597
2598 // The defining instruction has a record form (or is already a record
2599 // form). It is possible, however, that we'll need to reverse the condition
2600 // code of the users.
2601 if (NewOpC == -1)
2602 return false;
2603
2604 // This transformation should not be performed if `nsw` is missing and is not
2605 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2606 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2607 // CRReg can reflect if compared values are equal, this optz is still valid.
2608 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2609 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2610 return false;
2611
2612 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2613 // needs to be updated to be based on SUB. Push the condition code
2614 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2615 // condition code of these operands will be modified.
2616 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2617 // comparison against 0, which may modify predicate.
2618 bool ShouldSwap = false;
2619 if (Sub && Value == 0) {
2620 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2621 Sub->getOperand(2).getReg() == SrcReg;
2622
2623 // The operands to subf are the opposite of sub, so only in the fixed-point
2624 // case, invert the order.
2625 ShouldSwap = !ShouldSwap;
2626 }
2627
2628 if (ShouldSwap)
2630 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2631 I != IE; ++I) {
2632 MachineInstr *UseMI = &*I;
2633 if (UseMI->getOpcode() == PPC::BCC) {
2635 unsigned PredCond = PPC::getPredicateCondition(Pred);
2636 assert((!equalityOnly ||
2637 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2638 "Invalid predicate for equality-only optimization");
2639 (void)PredCond; // To suppress warning in release build.
2640 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2642 } else if (UseMI->getOpcode() == PPC::ISEL ||
2643 UseMI->getOpcode() == PPC::ISEL8) {
2644 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2645 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2646 "Invalid CR bit for equality-only optimization");
2647
2648 if (NewSubReg == PPC::sub_lt)
2649 NewSubReg = PPC::sub_gt;
2650 else if (NewSubReg == PPC::sub_gt)
2651 NewSubReg = PPC::sub_lt;
2652
2653 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2654 NewSubReg));
2655 } else // We need to abort on a user we don't understand.
2656 return false;
2657 }
2658 assert(!(Value != 0 && ShouldSwap) &&
2659 "Non-zero immediate support and ShouldSwap"
2660 "may conflict in updating predicate");
2661
2662 // Create a new virtual register to hold the value of the CR set by the
2663 // record-form instruction. If the instruction was not previously in
2664 // record form, then set the kill flag on the CR.
2665 CmpInstr.eraseFromParent();
2666
2668 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2669 get(TargetOpcode::COPY), CRReg)
2670 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2671
2672 // Even if CR0 register were dead before, it is alive now since the
2673 // instruction we just built uses it.
2674 MI->clearRegisterDeads(PPC::CR0);
2675
2676 if (MIOpC != NewOpC) {
2677 // We need to be careful here: we're replacing one instruction with
2678 // another, and we need to make sure that we get all of the right
2679 // implicit uses and defs. On the other hand, the caller may be holding
2680 // an iterator to this instruction, and so we can't delete it (this is
2681 // specifically the case if this is the instruction directly after the
2682 // compare).
2683
2684 // Rotates are expensive instructions. If we're emitting a record-form
2685 // rotate that can just be an andi/andis, we should just emit that.
2686 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2687 Register GPRRes = MI->getOperand(0).getReg();
2688 int64_t SH = MI->getOperand(2).getImm();
2689 int64_t MB = MI->getOperand(3).getImm();
2690 int64_t ME = MI->getOperand(4).getImm();
2691 // We can only do this if both the start and end of the mask are in the
2692 // same halfword.
2693 bool MBInLoHWord = MB >= 16;
2694 bool MEInLoHWord = ME >= 16;
2695 uint64_t Mask = ~0LLU;
2696
2697 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2698 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2699 // The mask value needs to shift right 16 if we're emitting andis.
2700 Mask >>= MBInLoHWord ? 0 : 16;
2701 NewOpC = MIOpC == PPC::RLWINM
2702 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2703 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2704 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2705 (ME - MB + 1 == SH) && (MB >= 16)) {
2706 // If we are rotating by the exact number of bits as are in the mask
2707 // and the mask is in the least significant bits of the register,
2708 // that's just an andis. (as long as the GPR result has no uses).
2709 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2710 Mask >>= 16;
2711 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2712 }
2713 // If we've set the mask, we can transform.
2714 if (Mask != ~0LLU) {
2715 MI->removeOperand(4);
2716 MI->removeOperand(3);
2717 MI->getOperand(2).setImm(Mask);
2718 NumRcRotatesConvertedToRcAnd++;
2719 }
2720 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2721 int64_t MB = MI->getOperand(3).getImm();
2722 if (MB >= 48) {
2723 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2724 NewOpC = PPC::ANDI8_rec;
2725 MI->removeOperand(3);
2726 MI->getOperand(2).setImm(Mask);
2727 NumRcRotatesConvertedToRcAnd++;
2728 }
2729 }
2730
2731 const MCInstrDesc &NewDesc = get(NewOpC);
2732 MI->setDesc(NewDesc);
2733
2734 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2735 if (!MI->definesRegister(ImpDef, /*TRI=*/nullptr)) {
2736 MI->addOperand(*MI->getParent()->getParent(),
2737 MachineOperand::CreateReg(ImpDef, true, true));
2738 }
2739 }
2740 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2741 if (!MI->readsRegister(ImpUse, /*TRI=*/nullptr)) {
2742 MI->addOperand(*MI->getParent()->getParent(),
2743 MachineOperand::CreateReg(ImpUse, false, true));
2744 }
2745 }
2746 }
2747 assert(MI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2748 "Record-form instruction does not define cr0?");
2749
2750 // Modify the condition code of operands in OperandsToUpdate.
2751 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2752 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2753 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2754 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2755
2756 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2757 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2758
2759 return true;
2760}
2761
2764 if (MRI->isSSA())
2765 return false;
2766
2767 Register SrcReg, SrcReg2;
2768 int64_t CmpMask, CmpValue;
2769 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2770 return false;
2771
2772 // Try to optimize the comparison against 0.
2773 if (CmpValue || !CmpMask || SrcReg2)
2774 return false;
2775
2776 // The record forms set the condition register based on a signed comparison
2777 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2778 // equality checks in post-RA, we are more restricted on a unsigned
2779 // comparison.
2780 unsigned Opc = CmpMI.getOpcode();
2781 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2782 return false;
2783
2784 // The record forms are always based on a 64-bit comparison on PPC64
2785 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2786 // comparison. Since we can't do the equality checks in post-RA, we bail out
2787 // the case.
2788 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2789 return false;
2790
2791 // CmpMI can't be deleted if it has implicit def.
2792 if (CmpMI.hasImplicitDef())
2793 return false;
2794
2795 bool SrcRegHasOtherUse = false;
2796 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2797 if (!SrcMI || !SrcMI->definesRegister(SrcReg, /*TRI=*/nullptr))
2798 return false;
2799
2800 MachineOperand RegMO = CmpMI.getOperand(0);
2801 Register CRReg = RegMO.getReg();
2802 if (CRReg != PPC::CR0)
2803 return false;
2804
2805 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2806 bool SeenUseOfCRReg = false;
2807 bool IsCRRegKilled = false;
2808 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2809 SeenUseOfCRReg) ||
2810 SrcMI->definesRegister(CRReg, /*TRI=*/nullptr) || SeenUseOfCRReg)
2811 return false;
2812
2813 int SrcMIOpc = SrcMI->getOpcode();
2814 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2815 if (NewOpC == -1)
2816 return false;
2817
2818 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2819 LLVM_DEBUG(SrcMI->dump());
2820
2821 const MCInstrDesc &NewDesc = get(NewOpC);
2822 SrcMI->setDesc(NewDesc);
2823 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2825 SrcMI->clearRegisterDeads(CRReg);
2826
2827 assert(SrcMI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2828 "Record-form instruction does not define cr0?");
2829
2830 LLVM_DEBUG(dbgs() << "with: ");
2831 LLVM_DEBUG(SrcMI->dump());
2832 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2833 LLVM_DEBUG(CmpMI.dump());
2834 return true;
2835}
2836
2839 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2840 const TargetRegisterInfo *TRI) const {
2841 const MachineOperand *BaseOp;
2842 OffsetIsScalable = false;
2843 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2844 return false;
2845 BaseOps.push_back(BaseOp);
2846 return true;
2847}
2848
2849static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2850 const TargetRegisterInfo *TRI) {
2851 // If this is a volatile load/store, don't mess with it.
2852 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2853 return false;
2854
2855 if (LdSt.getOperand(2).isFI())
2856 return true;
2857
2858 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2859 // Can't cluster if the instruction modifies the base register
2860 // or it is update form. e.g. ld r2,3(r2)
2861 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2862 return false;
2863
2864 return true;
2865}
2866
2867// Only cluster instruction pair that have the same opcode, and they are
2868// clusterable according to PowerPC specification.
2869static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2870 const PPCSubtarget &Subtarget) {
2871 switch (FirstOpc) {
2872 default:
2873 return false;
2874 case PPC::STD:
2875 case PPC::STFD:
2876 case PPC::STXSD:
2877 case PPC::DFSTOREf64:
2878 return FirstOpc == SecondOpc;
2879 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2880 // 32bit and 64bit instruction selection. They are clusterable pair though
2881 // they are different opcode.
2882 case PPC::STW:
2883 case PPC::STW8:
2884 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2885 }
2886}
2887
2889 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2890 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2891 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2892 unsigned NumBytes) const {
2893
2894 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2895 const MachineOperand &BaseOp1 = *BaseOps1.front();
2896 const MachineOperand &BaseOp2 = *BaseOps2.front();
2897 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2898 "Only base registers and frame indices are supported.");
2899
2900 // ClusterSize means the number of memory operations that will have been
2901 // clustered if this hook returns true.
2902 // Don't cluster memory op if there are already two ops clustered at least.
2903 if (ClusterSize > 2)
2904 return false;
2905
2906 // Cluster the load/store only when they have the same base
2907 // register or FI.
2908 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2909 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2910 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2911 return false;
2912
2913 // Check if the load/store are clusterable according to the PowerPC
2914 // specification.
2915 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2916 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2917 unsigned FirstOpc = FirstLdSt.getOpcode();
2918 unsigned SecondOpc = SecondLdSt.getOpcode();
2920 // Cluster the load/store only when they have the same opcode, and they are
2921 // clusterable opcode according to PowerPC specification.
2922 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2923 return false;
2924
2925 // Can't cluster load/store that have ordered or volatile memory reference.
2926 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2927 !isLdStSafeToCluster(SecondLdSt, TRI))
2928 return false;
2929
2930 int64_t Offset1 = 0, Offset2 = 0;
2931 LocationSize Width1 = 0, Width2 = 0;
2932 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2933 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2934 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2935 Width1 != Width2)
2936 return false;
2937
2938 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2939 "getMemOperandWithOffsetWidth return incorrect base op");
2940 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2941 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2942 return Offset1 + (int64_t)Width1.getValue() == Offset2;
2943}
2944
2945/// GetInstSize - Return the number of bytes of code the specified
2946/// instruction may be. This returns the maximum number of bytes.
2947///
2949 unsigned Opcode = MI.getOpcode();
2950
2951 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
2952 const MachineFunction *MF = MI.getParent()->getParent();
2953 const char *AsmStr = MI.getOperand(0).getSymbolName();
2954 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
2955 } else if (Opcode == TargetOpcode::STACKMAP) {
2956 StackMapOpers Opers(&MI);
2957 return Opers.getNumPatchBytes();
2958 } else if (Opcode == TargetOpcode::PATCHPOINT) {
2959 PatchPointOpers Opers(&MI);
2960 return Opers.getNumPatchBytes();
2961 } else {
2962 return get(Opcode).getSize();
2963 }
2964}
2965
2966std::pair<unsigned, unsigned>
2968 // PPC always uses a direct mask.
2969 return std::make_pair(TF, 0u);
2970}
2971
2974 using namespace PPCII;
2975 static const std::pair<unsigned, const char *> TargetFlags[] = {
2976 {MO_PLT, "ppc-plt"},
2977 {MO_PIC_FLAG, "ppc-pic"},
2978 {MO_PCREL_FLAG, "ppc-pcrel"},
2979 {MO_GOT_FLAG, "ppc-got"},
2980 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
2981 {MO_TLSGD_FLAG, "ppc-tlsgd"},
2982 {MO_TPREL_FLAG, "ppc-tprel"},
2983 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
2984 {MO_TLSLD_FLAG, "ppc-tlsld"},
2985 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
2986 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
2987 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
2988 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
2989 {MO_LO, "ppc-lo"},
2990 {MO_HA, "ppc-ha"},
2991 {MO_TPREL_LO, "ppc-tprel-lo"},
2992 {MO_TPREL_HA, "ppc-tprel-ha"},
2993 {MO_DTPREL_LO, "ppc-dtprel-lo"},
2994 {MO_TLSLD_LO, "ppc-tlsld-lo"},
2995 {MO_TOC_LO, "ppc-toc-lo"},
2996 {MO_TLS, "ppc-tls"},
2997 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
2998 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
2999 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
3000 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
3001 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3002 };
3003 return ArrayRef(TargetFlags);
3004}
3005
3006// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3007// The VSX versions have the advantage of a full 64-register target whereas
3008// the FP ones have the advantage of lower latency and higher throughput. So
3009// what we are after is using the faster instructions in low register pressure
3010// situations and using the larger register file in high register pressure
3011// situations.
3013 unsigned UpperOpcode, LowerOpcode;
3014 switch (MI.getOpcode()) {
3015 case PPC::DFLOADf32:
3016 UpperOpcode = PPC::LXSSP;
3017 LowerOpcode = PPC::LFS;
3018 break;
3019 case PPC::DFLOADf64:
3020 UpperOpcode = PPC::LXSD;
3021 LowerOpcode = PPC::LFD;
3022 break;
3023 case PPC::DFSTOREf32:
3024 UpperOpcode = PPC::STXSSP;
3025 LowerOpcode = PPC::STFS;
3026 break;
3027 case PPC::DFSTOREf64:
3028 UpperOpcode = PPC::STXSD;
3029 LowerOpcode = PPC::STFD;
3030 break;
3031 case PPC::XFLOADf32:
3032 UpperOpcode = PPC::LXSSPX;
3033 LowerOpcode = PPC::LFSX;
3034 break;
3035 case PPC::XFLOADf64:
3036 UpperOpcode = PPC::LXSDX;
3037 LowerOpcode = PPC::LFDX;
3038 break;
3039 case PPC::XFSTOREf32:
3040 UpperOpcode = PPC::STXSSPX;
3041 LowerOpcode = PPC::STFSX;
3042 break;
3043 case PPC::XFSTOREf64:
3044 UpperOpcode = PPC::STXSDX;
3045 LowerOpcode = PPC::STFDX;
3046 break;
3047 case PPC::LIWAX:
3048 UpperOpcode = PPC::LXSIWAX;
3049 LowerOpcode = PPC::LFIWAX;
3050 break;
3051 case PPC::LIWZX:
3052 UpperOpcode = PPC::LXSIWZX;
3053 LowerOpcode = PPC::LFIWZX;
3054 break;
3055 case PPC::STIWX:
3056 UpperOpcode = PPC::STXSIWX;
3057 LowerOpcode = PPC::STFIWX;
3058 break;
3059 default:
3060 llvm_unreachable("Unknown Operation!");
3061 }
3062
3063 Register TargetReg = MI.getOperand(0).getReg();
3064 unsigned Opcode;
3065 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3066 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3067 Opcode = LowerOpcode;
3068 else
3069 Opcode = UpperOpcode;
3070 MI.setDesc(get(Opcode));
3071 return true;
3072}
3073
3074static bool isAnImmediateOperand(const MachineOperand &MO) {
3075 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3076}
3077
3079 auto &MBB = *MI.getParent();
3080 auto DL = MI.getDebugLoc();
3081
3082 switch (MI.getOpcode()) {
3083 case PPC::BUILD_UACC: {
3084 MCRegister ACC = MI.getOperand(0).getReg();
3085 MCRegister UACC = MI.getOperand(1).getReg();
3086 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3087 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3088 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3089 // FIXME: This can easily be improved to look up to the top of the MBB
3090 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3091 // we can just re-target any such XXLOR's to DstVSR + offset.
3092 for (int VecNo = 0; VecNo < 4; VecNo++)
3093 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3094 .addReg(SrcVSR + VecNo)
3095 .addReg(SrcVSR + VecNo);
3096 }
3097 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3098 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3099 // with a NOP.
3100 [[fallthrough]];
3101 }
3102 case PPC::KILL_PAIR: {
3103 MI.setDesc(get(PPC::UNENCODED_NOP));
3104 MI.removeOperand(1);
3105 MI.removeOperand(0);
3106 return true;
3107 }
3108 case TargetOpcode::LOAD_STACK_GUARD: {
3109 assert(Subtarget.isTargetLinux() &&
3110 "Only Linux target is expected to contain LOAD_STACK_GUARD");
3111 const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3112 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3113 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3114 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3115 .addImm(Offset)
3116 .addReg(Reg);
3117 return true;
3118 }
3119 case PPC::PPCLdFixedAddr: {
3120 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3121 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3122 int64_t Offset = 0;
3123 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3124 MI.setDesc(get(PPC::LWZ));
3125 uint64_t FAType = MI.getOperand(1).getImm();
3126#undef PPC_LNX_FEATURE
3127#undef PPC_CPU
3128#define PPC_LNX_DEFINE_OFFSETS
3129#include "llvm/TargetParser/PPCTargetParser.def"
3130 bool IsLE = Subtarget.isLittleEndian();
3131 bool Is64 = Subtarget.isPPC64();
3132 if (FAType == PPC_FAWORD_HWCAP) {
3133 if (IsLE)
3134 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3135 else
3136 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3137 } else if (FAType == PPC_FAWORD_HWCAP2) {
3138 if (IsLE)
3139 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3140 else
3141 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3142 } else if (FAType == PPC_FAWORD_CPUID) {
3143 if (IsLE)
3144 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3145 else
3146 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3147 }
3148 assert(Offset && "Do not know the offset for this fixed addr load");
3149 MI.removeOperand(1);
3151 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3152 .addImm(Offset)
3153 .addReg(Reg);
3154 return true;
3155#define PPC_TGT_PARSER_UNDEF_MACROS
3156#include "llvm/TargetParser/PPCTargetParser.def"
3157#undef PPC_TGT_PARSER_UNDEF_MACROS
3158 }
3159 case PPC::DFLOADf32:
3160 case PPC::DFLOADf64:
3161 case PPC::DFSTOREf32:
3162 case PPC::DFSTOREf64: {
3163 assert(Subtarget.hasP9Vector() &&
3164 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3165 assert(MI.getOperand(2).isReg() &&
3166 isAnImmediateOperand(MI.getOperand(1)) &&
3167 "D-form op must have register and immediate operands");
3168 return expandVSXMemPseudo(MI);
3169 }
3170 case PPC::XFLOADf32:
3171 case PPC::XFSTOREf32:
3172 case PPC::LIWAX:
3173 case PPC::LIWZX:
3174 case PPC::STIWX: {
3175 assert(Subtarget.hasP8Vector() &&
3176 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3177 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3178 "X-form op must have register and register operands");
3179 return expandVSXMemPseudo(MI);
3180 }
3181 case PPC::XFLOADf64:
3182 case PPC::XFSTOREf64: {
3183 assert(Subtarget.hasVSX() &&
3184 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3185 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3186 "X-form op must have register and register operands");
3187 return expandVSXMemPseudo(MI);
3188 }
3189 case PPC::SPILLTOVSR_LD: {
3190 Register TargetReg = MI.getOperand(0).getReg();
3191 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3192 MI.setDesc(get(PPC::DFLOADf64));
3193 return expandPostRAPseudo(MI);
3194 }
3195 else
3196 MI.setDesc(get(PPC::LD));
3197 return true;
3198 }
3199 case PPC::SPILLTOVSR_ST: {
3200 Register SrcReg = MI.getOperand(0).getReg();
3201 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3202 NumStoreSPILLVSRRCAsVec++;
3203 MI.setDesc(get(PPC::DFSTOREf64));
3204 return expandPostRAPseudo(MI);
3205 } else {
3206 NumStoreSPILLVSRRCAsGpr++;
3207 MI.setDesc(get(PPC::STD));
3208 }
3209 return true;
3210 }
3211 case PPC::SPILLTOVSR_LDX: {
3212 Register TargetReg = MI.getOperand(0).getReg();
3213 if (PPC::VSFRCRegClass.contains(TargetReg))
3214 MI.setDesc(get(PPC::LXSDX));
3215 else
3216 MI.setDesc(get(PPC::LDX));
3217 return true;
3218 }
3219 case PPC::SPILLTOVSR_STX: {
3220 Register SrcReg = MI.getOperand(0).getReg();
3221 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3222 NumStoreSPILLVSRRCAsVec++;
3223 MI.setDesc(get(PPC::STXSDX));
3224 } else {
3225 NumStoreSPILLVSRRCAsGpr++;
3226 MI.setDesc(get(PPC::STDX));
3227 }
3228 return true;
3229 }
3230
3231 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3232 case PPC::CFENCE:
3233 case PPC::CFENCE8: {
3234 auto Val = MI.getOperand(0).getReg();
3235 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3236 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3237 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3239 .addReg(PPC::CR7)
3240 .addImm(1);
3241 MI.setDesc(get(PPC::ISYNC));
3242 MI.removeOperand(0);
3243 return true;
3244 }
3245 }
3246 return false;
3247}
3248
3249// Essentially a compile-time implementation of a compare->isel sequence.
3250// It takes two constants to compare, along with the true/false registers
3251// and the comparison type (as a subreg to a CR field) and returns one
3252// of the true/false registers, depending on the comparison results.
3253static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3254 unsigned TrueReg, unsigned FalseReg,
3255 unsigned CRSubReg) {
3256 // Signed comparisons. The immediates are assumed to be sign-extended.
3257 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3258 switch (CRSubReg) {
3259 default: llvm_unreachable("Unknown integer comparison type.");
3260 case PPC::sub_lt:
3261 return Imm1 < Imm2 ? TrueReg : FalseReg;
3262 case PPC::sub_gt:
3263 return Imm1 > Imm2 ? TrueReg : FalseReg;
3264 case PPC::sub_eq:
3265 return Imm1 == Imm2 ? TrueReg : FalseReg;
3266 }
3267 }
3268 // Unsigned comparisons.
3269 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3270 switch (CRSubReg) {
3271 default: llvm_unreachable("Unknown integer comparison type.");
3272 case PPC::sub_lt:
3273 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3274 case PPC::sub_gt:
3275 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3276 case PPC::sub_eq:
3277 return Imm1 == Imm2 ? TrueReg : FalseReg;
3278 }
3279 }
3280 return PPC::NoRegister;
3281}
3282
3284 unsigned OpNo,
3285 int64_t Imm) const {
3286 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3287 // Replace the REG with the Immediate.
3288 Register InUseReg = MI.getOperand(OpNo).getReg();
3289 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3290
3291 // We need to make sure that the MI didn't have any implicit use
3292 // of this REG any more. We don't call MI.implicit_operands().empty() to
3293 // return early, since MI's MCID might be changed in calling context, as a
3294 // result its number of explicit operands may be changed, thus the begin of
3295 // implicit operand is changed.
3297 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, TRI, false);
3298 if (UseOpIdx >= 0) {
3299 MachineOperand &MO = MI.getOperand(UseOpIdx);
3300 if (MO.isImplicit())
3301 // The operands must always be in the following order:
3302 // - explicit reg defs,
3303 // - other explicit operands (reg uses, immediates, etc.),
3304 // - implicit reg defs
3305 // - implicit reg uses
3306 // Therefore, removing the implicit operand won't change the explicit
3307 // operands layout.
3308 MI.removeOperand(UseOpIdx);
3309 }
3310}
3311
3312// Replace an instruction with one that materializes a constant (and sets
3313// CR0 if the original instruction was a record-form instruction).
3315 const LoadImmediateInfo &LII) const {
3316 // Remove existing operands.
3317 int OperandToKeep = LII.SetCR ? 1 : 0;
3318 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3319 MI.removeOperand(i);
3320
3321 // Replace the instruction.
3322 if (LII.SetCR) {
3323 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3324 // Set the immediate.
3325 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3326 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3327 return;
3328 }
3329 else
3330 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3331
3332 // Set the immediate.
3333 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3334 .addImm(LII.Imm);
3335}
3336
3338 bool &SeenIntermediateUse) const {
3339 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3340 "Should be called after register allocation.");
3342 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3343 It++;
3344 SeenIntermediateUse = false;
3345 for (; It != E; ++It) {
3346 if (It->modifiesRegister(Reg, TRI))
3347 return &*It;
3348 if (It->readsRegister(Reg, TRI))
3349 SeenIntermediateUse = true;
3350 }
3351 return nullptr;
3352}
3353
3356 const DebugLoc &DL, Register Reg,
3357 int64_t Imm) const {
3359 "Register should be in non-SSA form after RA");
3360 bool isPPC64 = Subtarget.isPPC64();
3361 // FIXME: Materialization here is not optimal.
3362 // For some special bit patterns we can use less instructions.
3363 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3364 if (isInt<16>(Imm)) {
3365 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3366 } else if (isInt<32>(Imm)) {
3367 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3368 .addImm(Imm >> 16);
3369 if (Imm & 0xFFFF)
3370 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3371 .addReg(Reg, RegState::Kill)
3372 .addImm(Imm & 0xFFFF);
3373 } else {
3374 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3375 "only supported in PPC64");
3376 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3377 if ((Imm >> 32) & 0xFFFF)
3378 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3379 .addReg(Reg, RegState::Kill)
3380 .addImm((Imm >> 32) & 0xFFFF);
3381 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3382 .addReg(Reg, RegState::Kill)
3383 .addImm(32)
3384 .addImm(31);
3385 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3386 .addReg(Reg, RegState::Kill)
3387 .addImm((Imm >> 16) & 0xFFFF);
3388 if (Imm & 0xFFFF)
3389 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3390 .addReg(Reg, RegState::Kill)
3391 .addImm(Imm & 0xFFFF);
3392 }
3393}
3394
3395MachineInstr *PPCInstrInfo::getForwardingDefMI(
3397 unsigned &OpNoForForwarding,
3398 bool &SeenIntermediateUse) const {
3399 OpNoForForwarding = ~0U;
3400 MachineInstr *DefMI = nullptr;
3401 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3403 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3404 // within the basic block to see if the register is defined using an
3405 // LI/LI8/ADDI/ADDI8.
3406 if (MRI->isSSA()) {
3407 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3408 if (!MI.getOperand(i).isReg())
3409 continue;
3410 Register Reg = MI.getOperand(i).getReg();
3411 if (!Reg.isVirtual())
3412 continue;
3413 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3414 if (TrueReg.isVirtual()) {
3415 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3416 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3417 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3418 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3419 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3420 OpNoForForwarding = i;
3421 DefMI = DefMIForTrueReg;
3422 // The ADDI and LI operand maybe exist in one instruction at same
3423 // time. we prefer to fold LI operand as LI only has one Imm operand
3424 // and is more possible to be converted. So if current DefMI is
3425 // ADDI/ADDI8, we continue to find possible LI/LI8.
3426 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3427 break;
3428 }
3429 }
3430 }
3431 } else {
3432 // Looking back through the definition for each operand could be expensive,
3433 // so exit early if this isn't an instruction that either has an immediate
3434 // form or is already an immediate form that we can handle.
3435 ImmInstrInfo III;
3436 unsigned Opc = MI.getOpcode();
3437 bool ConvertibleImmForm =
3438 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3439 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3440 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3441 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3442 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3443 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3444 Opc == PPC::RLWINM8_rec;
3445 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3446 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3447 : false;
3448 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3449 return nullptr;
3450
3451 // Don't convert or %X, %Y, %Y since that's just a register move.
3452 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3453 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3454 return nullptr;
3455 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3456 MachineOperand &MO = MI.getOperand(i);
3457 SeenIntermediateUse = false;
3458 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3459 Register Reg = MI.getOperand(i).getReg();
3460 // If we see another use of this reg between the def and the MI,
3461 // we want to flag it so the def isn't deleted.
3462 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3463 if (DefMI) {
3464 // Is this register defined by some form of add-immediate (including
3465 // load-immediate) within this basic block?
3466 switch (DefMI->getOpcode()) {
3467 default:
3468 break;
3469 case PPC::LI:
3470 case PPC::LI8:
3471 case PPC::ADDItocL8:
3472 case PPC::ADDI:
3473 case PPC::ADDI8:
3474 OpNoForForwarding = i;
3475 return DefMI;
3476 }
3477 }
3478 }
3479 }
3480 }
3481 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3482}
3483
3484unsigned PPCInstrInfo::getSpillTarget() const {
3485 // With P10, we may need to spill paired vector registers or accumulator
3486 // registers. MMA implies paired vectors, so we can just check that.
3487 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3488 // P11 uses the P10 target.
3489 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3490 2 : Subtarget.hasP9Vector() ?
3491 1 : 0;
3492}
3493
3494ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3495 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3496}
3497
3498ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3499 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3500}
3501
3502// This opt tries to convert the following imm form to an index form to save an
3503// add for stack variables.
3504// Return false if no such pattern found.
3505//
3506// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3507// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3508// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3509//
3510// can be converted to:
3511//
3512// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3513// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3514//
3515// In order to eliminate ADD instr, make sure that:
3516// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3517// new ADDI instr and ADDI can only take int16 Imm.
3518// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3519// between ADDI and ADD instr since its original def in ADDI will be changed
3520// in new ADDI instr. And also there should be no new def for it between
3521// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3522// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3523// between ADD and Imm instr since ADD instr will be eliminated.
3524// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3525// moved to Index instr.
3527 MachineFunction *MF = MI.getParent()->getParent();
3529 bool PostRA = !MRI->isSSA();
3530 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3531 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3532 // frame base(OffsetAddi) are determined.
3533 if (!PostRA)
3534 return false;
3535 unsigned ToBeDeletedReg = 0;
3536 int64_t OffsetImm = 0;
3537 unsigned XFormOpcode = 0;
3538 ImmInstrInfo III;
3539
3540 // Check if Imm instr meets requirement.
3541 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3542 III))
3543 return false;
3544
3545 bool OtherIntermediateUse = false;
3546 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3547
3548 // Exit if there is other use between ADD and Imm instr or no def found.
3549 if (OtherIntermediateUse || !ADDMI)
3550 return false;
3551
3552 // Check if ADD instr meets requirement.
3553 if (!isADDInstrEligibleForFolding(*ADDMI))
3554 return false;
3555
3556 unsigned ScaleRegIdx = 0;
3557 int64_t OffsetAddi = 0;
3558 MachineInstr *ADDIMI = nullptr;
3559
3560 // Check if there is a valid ToBeChangedReg in ADDMI.
3561 // 1: It must be killed.
3562 // 2: Its definition must be a valid ADDIMI.
3563 // 3: It must satify int16 offset requirement.
3564 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3565 ScaleRegIdx = 2;
3566 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3567 ScaleRegIdx = 1;
3568 else
3569 return false;
3570
3571 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3572 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3573 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3574 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3576 for (auto It = ++Start; It != End; It++)
3577 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3578 return true;
3579 return false;
3580 };
3581
3582 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3583 // treated as special zero when ScaleReg is R0/X0 register.
3584 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3585 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3586 return false;
3587
3588 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3589 // and Imm Instr.
3590 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3591 return false;
3592
3593 // Now start to do the transformation.
3594 LLVM_DEBUG(dbgs() << "Replace instruction: "
3595 << "\n");
3596 LLVM_DEBUG(ADDIMI->dump());
3597 LLVM_DEBUG(ADDMI->dump());
3598 LLVM_DEBUG(MI.dump());
3599 LLVM_DEBUG(dbgs() << "with: "
3600 << "\n");
3601
3602 // Update ADDI instr.
3603 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3604
3605 // Update Imm instr.
3606 MI.setDesc(get(XFormOpcode));
3607 MI.getOperand(III.ImmOpNo)
3608 .ChangeToRegister(ScaleReg, false, false,
3609 ADDMI->getOperand(ScaleRegIdx).isKill());
3610
3611 MI.getOperand(III.OpNoForForwarding)
3612 .ChangeToRegister(ToBeChangedReg, false, false, true);
3613
3614 // Eliminate ADD instr.
3615 ADDMI->eraseFromParent();
3616
3617 LLVM_DEBUG(ADDIMI->dump());
3618 LLVM_DEBUG(MI.dump());
3619
3620 return true;
3621}
3622
3624 int64_t &Imm) const {
3625 unsigned Opc = ADDIMI.getOpcode();
3626
3627 // Exit if the instruction is not ADDI.
3628 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3629 return false;
3630
3631 // The operand may not necessarily be an immediate - it could be a relocation.
3632 if (!ADDIMI.getOperand(2).isImm())
3633 return false;
3634
3635 Imm = ADDIMI.getOperand(2).getImm();
3636
3637 return true;
3638}
3639
3641 unsigned Opc = ADDMI.getOpcode();
3642
3643 // Exit if the instruction is not ADD.
3644 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3645}
3646
3648 unsigned &ToBeDeletedReg,
3649 unsigned &XFormOpcode,
3650 int64_t &OffsetImm,
3651 ImmInstrInfo &III) const {
3652 // Only handle load/store.
3653 if (!MI.mayLoadOrStore())
3654 return false;
3655
3656 unsigned Opc = MI.getOpcode();
3657
3658 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3659
3660 // Exit if instruction has no index form.
3661 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3662 return false;
3663
3664 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3665 if (!instrHasImmForm(XFormOpcode,
3666 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3667 return false;
3668
3669 if (!III.IsSummingOperands)
3670 return false;
3671
3672 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3673 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3674 // Only support imm operands, not relocation slots or others.
3675 if (!ImmOperand.isImm())
3676 return false;
3677
3678 assert(RegOperand.isReg() && "Instruction format is not right");
3679
3680 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3681 if (!RegOperand.isKill())
3682 return false;
3683
3684 ToBeDeletedReg = RegOperand.getReg();
3685 OffsetImm = ImmOperand.getImm();
3686
3687 return true;
3688}
3689
3691 MachineInstr *&ADDIMI,
3692 int64_t &OffsetAddi,
3693 int64_t OffsetImm) const {
3694 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3695 MachineOperand &MO = ADDMI->getOperand(Index);
3696
3697 if (!MO.isKill())
3698 return false;
3699
3700 bool OtherIntermediateUse = false;
3701
3702 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3703 // Currently handle only one "add + Imminstr" pair case, exit if other
3704 // intermediate use for ToBeChangedReg found.
3705 // TODO: handle the cases where there are other "add + Imminstr" pairs
3706 // with same offset in Imminstr which is like:
3707 //
3708 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3709 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3710 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3711 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3712 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3713 //
3714 // can be converted to:
3715 //
3716 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3717 // (OffsetAddi + OffsetImm)
3718 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3719 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3720
3721 if (OtherIntermediateUse || !ADDIMI)
3722 return false;
3723 // Check if ADDI instr meets requirement.
3724 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3725 return false;
3726
3727 if (isInt<16>(OffsetAddi + OffsetImm))
3728 return true;
3729 return false;
3730}
3731
3732// If this instruction has an immediate form and one of its operands is a
3733// result of a load-immediate or an add-immediate, convert it to
3734// the immediate form if the constant is in range.
3736 SmallSet<Register, 4> &RegsToUpdate,
3737 MachineInstr **KilledDef) const {
3738 MachineFunction *MF = MI.getParent()->getParent();
3740 bool PostRA = !MRI->isSSA();
3741 bool SeenIntermediateUse = true;
3742 unsigned ForwardingOperand = ~0U;
3743 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3744 SeenIntermediateUse);
3745 if (!DefMI)
3746 return false;
3747 assert(ForwardingOperand < MI.getNumOperands() &&
3748 "The forwarding operand needs to be valid at this point");
3749 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3750 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3751 if (KilledDef && KillFwdDefMI)
3752 *KilledDef = DefMI;
3753
3754 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3755 // registers that need their kill flags updated.
3756 for (const MachineOperand &MO : DefMI->operands())
3757 if (MO.isReg() && MO.isDef())
3758 RegsToUpdate.insert(MO.getReg());
3759 for (const MachineOperand &MO : MI.operands())
3760 if (MO.isReg())
3761 RegsToUpdate.insert(MO.getReg());
3762
3763 // If this is a imm instruction and its register operands is produced by ADDI,
3764 // put the imm into imm inst directly.
3765 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3766 PPC::INSTRUCTION_LIST_END &&
3767 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3768 return true;
3769
3770 ImmInstrInfo III;
3771 bool IsVFReg = MI.getOperand(0).isReg()
3772 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3773 : false;
3774 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3775 // If this is a reg+reg instruction that has a reg+imm form,
3776 // and one of the operands is produced by an add-immediate,
3777 // try to convert it.
3778 if (HasImmForm &&
3779 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3780 KillFwdDefMI))
3781 return true;
3782
3783 // If this is a reg+reg instruction that has a reg+imm form,
3784 // and one of the operands is produced by LI, convert it now.
3785 if (HasImmForm &&
3786 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3787 return true;
3788
3789 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3790 // can be simpified to LI.
3791 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3792 return true;
3793
3794 return false;
3795}
3796
3798 MachineInstr **ToErase) const {
3799 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3800 Register FoldingReg = MI.getOperand(1).getReg();
3801 if (!FoldingReg.isVirtual())
3802 return false;
3803 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3804 if (SrcMI->getOpcode() != PPC::RLWINM &&
3805 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3806 SrcMI->getOpcode() != PPC::RLWINM8 &&
3807 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3808 return false;
3809 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3810 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3811 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3812 "Invalid PPC::RLWINM Instruction!");
3813 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3814 uint64_t SHMI = MI.getOperand(2).getImm();
3815 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3816 uint64_t MBMI = MI.getOperand(3).getImm();
3817 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3818 uint64_t MEMI = MI.getOperand(4).getImm();
3819
3820 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3821 "Invalid PPC::RLWINM Instruction!");
3822 // If MBMI is bigger than MEMI, we always can not get run of ones.
3823 // RotatedSrcMask non-wrap:
3824 // 0........31|32........63
3825 // RotatedSrcMask: B---E B---E
3826 // MaskMI: -----------|--E B------
3827 // Result: ----- --- (Bad candidate)
3828 //
3829 // RotatedSrcMask wrap:
3830 // 0........31|32........63
3831 // RotatedSrcMask: --E B----|--E B----
3832 // MaskMI: -----------|--E B------
3833 // Result: --- -----|--- ----- (Bad candidate)
3834 //
3835 // One special case is RotatedSrcMask is a full set mask.
3836 // RotatedSrcMask full:
3837 // 0........31|32........63
3838 // RotatedSrcMask: ------EB---|-------EB---
3839 // MaskMI: -----------|--E B------
3840 // Result: -----------|--- ------- (Good candidate)
3841
3842 // Mark special case.
3843 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3844
3845 // For other MBMI > MEMI cases, just return.
3846 if ((MBMI > MEMI) && !SrcMaskFull)
3847 return false;
3848
3849 // Handle MBMI <= MEMI cases.
3850 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3851 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3852 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3853 // while in PowerPC ISA, lowerest bit is at index 63.
3854 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3855
3856 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3857 APInt FinalMask = RotatedSrcMask & MaskMI;
3858 uint32_t NewMB, NewME;
3859 bool Simplified = false;
3860
3861 // If final mask is 0, MI result should be 0 too.
3862 if (FinalMask.isZero()) {
3863 bool Is64Bit =
3864 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3865 Simplified = true;
3866 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3867 LLVM_DEBUG(MI.dump());
3868
3869 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3870 // Replace MI with "LI 0"
3871 MI.removeOperand(4);
3872 MI.removeOperand(3);
3873 MI.removeOperand(2);
3874 MI.getOperand(1).ChangeToImmediate(0);
3875 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3876 } else {
3877 // Replace MI with "ANDI_rec reg, 0"
3878 MI.removeOperand(4);
3879 MI.removeOperand(3);
3880 MI.getOperand(2).setImm(0);
3881 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3882 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3883 if (SrcMI->getOperand(1).isKill()) {
3884 MI.getOperand(1).setIsKill(true);
3885 SrcMI->getOperand(1).setIsKill(false);
3886 } else
3887 // About to replace MI.getOperand(1), clear its kill flag.
3888 MI.getOperand(1).setIsKill(false);
3889 }
3890
3891 LLVM_DEBUG(dbgs() << "With: ");
3892 LLVM_DEBUG(MI.dump());
3893
3894 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3895 NewMB <= NewME) ||
3896 SrcMaskFull) {
3897 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3898 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3899 // return a 32 bit value.
3900 Simplified = true;
3901 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3902 LLVM_DEBUG(MI.dump());
3903
3904 uint16_t NewSH = (SHSrc + SHMI) % 32;
3905 MI.getOperand(2).setImm(NewSH);
3906 // If SrcMI mask is full, no need to update MBMI and MEMI.
3907 if (!SrcMaskFull) {
3908 MI.getOperand(3).setImm(NewMB);
3909 MI.getOperand(4).setImm(NewME);
3910 }
3911 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3912 if (SrcMI->getOperand(1).isKill()) {
3913 MI.getOperand(1).setIsKill(true);
3914 SrcMI->getOperand(1).setIsKill(false);
3915 } else
3916 // About to replace MI.getOperand(1), clear its kill flag.
3917 MI.getOperand(1).setIsKill(false);
3918
3919 LLVM_DEBUG(dbgs() << "To: ");
3920 LLVM_DEBUG(MI.dump());
3921 }
3922 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3923 !SrcMI->hasImplicitDef()) {
3924 // If FoldingReg has no non-debug use and it has no implicit def (it
3925 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3926 // Otherwise keep it.
3927 *ToErase = SrcMI;
3928 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3929 LLVM_DEBUG(SrcMI->dump());
3930 }
3931 return Simplified;
3932}
3933
3934bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3935 ImmInstrInfo &III, bool PostRA) const {
3936 // The vast majority of the instructions would need their operand 2 replaced
3937 // with an immediate when switching to the reg+imm form. A marked exception
3938 // are the update form loads/stores for which a constant operand 2 would need
3939 // to turn into a displacement and move operand 1 to the operand 2 position.
3940 III.ImmOpNo = 2;
3941 III.OpNoForForwarding = 2;
3942 III.ImmWidth = 16;
3943 III.ImmMustBeMultipleOf = 1;
3944 III.TruncateImmTo = 0;
3945 III.IsSummingOperands = false;
3946 switch (Opc) {
3947 default: return false;
3948 case PPC::ADD4:
3949 case PPC::ADD8:
3950 III.SignedImm = true;
3951 III.ZeroIsSpecialOrig = 0;
3952 III.ZeroIsSpecialNew = 1;
3953 III.IsCommutative = true;
3954 III.IsSummingOperands = true;
3955 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
3956 break;
3957 case PPC::ADDC:
3958 case PPC::ADDC8:
3959 III.SignedImm = true;
3960 III.ZeroIsSpecialOrig = 0;
3961 III.ZeroIsSpecialNew = 0;
3962 III.IsCommutative = true;
3963 III.IsSummingOperands = true;
3964 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
3965 break;
3966 case PPC::ADDC_rec:
3967 III.SignedImm = true;
3968 III.ZeroIsSpecialOrig = 0;
3969 III.ZeroIsSpecialNew = 0;
3970 III.IsCommutative = true;
3971 III.IsSummingOperands = true;
3972 III.ImmOpcode = PPC::ADDIC_rec;
3973 break;
3974 case PPC::SUBFC:
3975 case PPC::SUBFC8:
3976 III.SignedImm = true;
3977 III.ZeroIsSpecialOrig = 0;
3978 III.ZeroIsSpecialNew = 0;
3979 III.IsCommutative = false;
3980 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
3981 break;
3982 case PPC::CMPW:
3983 case PPC::CMPD:
3984 III.SignedImm = true;
3985 III.ZeroIsSpecialOrig = 0;
3986 III.ZeroIsSpecialNew = 0;
3987 III.IsCommutative = false;
3988 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
3989 break;
3990 case PPC::CMPLW:
3991 case PPC::CMPLD:
3992 III.SignedImm = false;
3993 III.ZeroIsSpecialOrig = 0;
3994 III.ZeroIsSpecialNew = 0;
3995 III.IsCommutative = false;
3996 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
3997 break;
3998 case PPC::AND_rec:
3999 case PPC::AND8_rec:
4000 case PPC::OR:
4001 case PPC::OR8:
4002 case PPC::XOR:
4003 case PPC::XOR8:
4004 III.SignedImm = false;
4005 III.ZeroIsSpecialOrig = 0;
4006 III.ZeroIsSpecialNew = 0;
4007 III.IsCommutative = true;
4008 switch(Opc) {
4009 default: llvm_unreachable("Unknown opcode");
4010 case PPC::AND_rec:
4011 III.ImmOpcode = PPC::ANDI_rec;
4012 break;
4013 case PPC::AND8_rec:
4014 III.ImmOpcode = PPC::ANDI8_rec;
4015 break;
4016 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4017 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4018 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4019 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4020 }
4021 break;
4022 case PPC::RLWNM:
4023 case PPC::RLWNM8:
4024 case PPC::RLWNM_rec:
4025 case PPC::RLWNM8_rec:
4026 case PPC::SLW:
4027 case PPC::SLW8:
4028 case PPC::SLW_rec:
4029 case PPC::SLW8_rec:
4030 case PPC::SRW:
4031 case PPC::SRW8:
4032 case PPC::SRW_rec:
4033 case PPC::SRW8_rec:
4034 case PPC::SRAW:
4035 case PPC::SRAW_rec:
4036 III.SignedImm = false;
4037 III.ZeroIsSpecialOrig = 0;
4038 III.ZeroIsSpecialNew = 0;
4039 III.IsCommutative = false;
4040 // This isn't actually true, but the instructions ignore any of the
4041 // upper bits, so any immediate loaded with an LI is acceptable.
4042 // This does not apply to shift right algebraic because a value
4043 // out of range will produce a -1/0.
4044 III.ImmWidth = 16;
4045 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4046 Opc == PPC::RLWNM8_rec)
4047 III.TruncateImmTo = 5;
4048 else
4049 III.TruncateImmTo = 6;
4050 switch(Opc) {
4051 default: llvm_unreachable("Unknown opcode");
4052 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4053 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4054 case PPC::RLWNM_rec:
4055 III.ImmOpcode = PPC::RLWINM_rec;
4056 break;
4057 case PPC::RLWNM8_rec:
4058 III.ImmOpcode = PPC::RLWINM8_rec;
4059 break;
4060 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4061 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4062 case PPC::SLW_rec:
4063 III.ImmOpcode = PPC::RLWINM_rec;
4064 break;
4065 case PPC::SLW8_rec:
4066 III.ImmOpcode = PPC::RLWINM8_rec;
4067 break;
4068 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4069 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4070 case PPC::SRW_rec:
4071 III.ImmOpcode = PPC::RLWINM_rec;
4072 break;
4073 case PPC::SRW8_rec:
4074 III.ImmOpcode = PPC::RLWINM8_rec;
4075 break;
4076 case PPC::SRAW:
4077 III.ImmWidth = 5;
4078 III.TruncateImmTo = 0;
4079 III.ImmOpcode = PPC::SRAWI;
4080 break;
4081 case PPC::SRAW_rec:
4082 III.ImmWidth = 5;
4083 III.TruncateImmTo = 0;
4084 III.ImmOpcode = PPC::SRAWI_rec;
4085 break;
4086 }
4087 break;
4088 case PPC::RLDCL:
4089 case PPC::RLDCL_rec:
4090 case PPC::RLDCR:
4091 case PPC::RLDCR_rec:
4092 case PPC::SLD:
4093 case PPC::SLD_rec:
4094 case PPC::SRD:
4095 case PPC::SRD_rec:
4096 case PPC::SRAD:
4097 case PPC::SRAD_rec:
4098 III.SignedImm = false;
4099 III.ZeroIsSpecialOrig = 0;
4100 III.ZeroIsSpecialNew = 0;
4101 III.IsCommutative = false;
4102 // This isn't actually true, but the instructions ignore any of the
4103 // upper bits, so any immediate loaded with an LI is acceptable.
4104 // This does not apply to shift right algebraic because a value
4105 // out of range will produce a -1/0.
4106 III.ImmWidth = 16;
4107 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4108 Opc == PPC::RLDCR_rec)
4109 III.TruncateImmTo = 6;
4110 else
4111 III.TruncateImmTo = 7;
4112 switch(Opc) {
4113 default: llvm_unreachable("Unknown opcode");
4114 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4115 case PPC::RLDCL_rec:
4116 III.ImmOpcode = PPC::RLDICL_rec;
4117 break;
4118 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4119 case PPC::RLDCR_rec:
4120 III.ImmOpcode = PPC::RLDICR_rec;
4121 break;
4122 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4123 case PPC::SLD_rec:
4124 III.ImmOpcode = PPC::RLDICR_rec;
4125 break;
4126 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4127 case PPC::SRD_rec:
4128 III.ImmOpcode = PPC::RLDICL_rec;
4129 break;
4130 case PPC::SRAD:
4131 III.ImmWidth = 6;
4132 III.TruncateImmTo = 0;
4133 III.ImmOpcode = PPC::SRADI;
4134 break;
4135 case PPC::SRAD_rec:
4136 III.ImmWidth = 6;
4137 III.TruncateImmTo = 0;
4138 III.ImmOpcode = PPC::SRADI_rec;
4139 break;
4140 }
4141 break;
4142 // Loads and stores:
4143 case PPC::LBZX:
4144 case PPC::LBZX8:
4145 case PPC::LHZX:
4146 case PPC::LHZX8:
4147 case PPC::LHAX:
4148 case PPC::LHAX8:
4149 case PPC::LWZX:
4150 case PPC::LWZX8:
4151 case PPC::LWAX:
4152 case PPC::LDX:
4153 case PPC::LFSX:
4154 case PPC::LFDX:
4155 case PPC::STBX:
4156 case PPC::STBX8:
4157 case PPC::STHX:
4158 case PPC::STHX8:
4159 case PPC::STWX:
4160 case PPC::STWX8:
4161 case PPC::STDX:
4162 case PPC::STFSX:
4163 case PPC::STFDX:
4164 III.SignedImm = true;
4165 III.ZeroIsSpecialOrig = 1;
4166 III.ZeroIsSpecialNew = 2;
4167 III.IsCommutative = true;
4168 III.IsSummingOperands = true;
4169 III.ImmOpNo = 1;
4170 III.OpNoForForwarding = 2;
4171 switch(Opc) {
4172 default: llvm_unreachable("Unknown opcode");
4173 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4174 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4175 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4176 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4177 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4178 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4179 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4180 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4181 case PPC::LWAX:
4182 III.ImmOpcode = PPC::LWA;
4183 III.ImmMustBeMultipleOf = 4;
4184 break;
4185 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4186 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4187 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4188 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4189 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4190 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4191 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4192 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4193 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4194 case PPC::STDX:
4195 III.ImmOpcode = PPC::STD;
4196 III.ImmMustBeMultipleOf = 4;
4197 break;
4198 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4199 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4200 }
4201 break;
4202 case PPC::LBZUX:
4203 case PPC::LBZUX8:
4204 case PPC::LHZUX:
4205 case PPC::LHZUX8:
4206 case PPC::LHAUX:
4207 case PPC::LHAUX8:
4208 case PPC::LWZUX:
4209 case PPC::LWZUX8:
4210 case PPC::LDUX:
4211 case PPC::LFSUX:
4212 case PPC::LFDUX:
4213 case PPC::STBUX:
4214 case PPC::STBUX8:
4215 case PPC::STHUX:
4216 case PPC::STHUX8:
4217 case PPC::STWUX:
4218 case PPC::STWUX8:
4219 case PPC::STDUX:
4220 case PPC::STFSUX:
4221 case PPC::STFDUX:
4222 III.SignedImm = true;
4223 III.ZeroIsSpecialOrig = 2;
4224 III.ZeroIsSpecialNew = 3;
4225 III.IsCommutative = false;
4226 III.IsSummingOperands = true;
4227 III.ImmOpNo = 2;
4228 III.OpNoForForwarding = 3;
4229 switch(Opc) {
4230 default: llvm_unreachable("Unknown opcode");
4231 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4232 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4233 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4234 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4235 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4236 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4237 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4238 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4239 case PPC::LDUX:
4240 III.ImmOpcode = PPC::LDU;
4241 III.ImmMustBeMultipleOf = 4;
4242 break;
4243 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4244 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4245 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4246 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4247 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4248 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4249 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4250 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4251 case PPC::STDUX:
4252 III.ImmOpcode = PPC::STDU;
4253 III.ImmMustBeMultipleOf = 4;
4254 break;
4255 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4256 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4257 }
4258 break;
4259 // Power9 and up only. For some of these, the X-Form version has access to all
4260 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4261 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4262 // into or stored from is one of the VR registers.
4263 case PPC::LXVX:
4264 case PPC::LXSSPX:
4265 case PPC::LXSDX:
4266 case PPC::STXVX:
4267 case PPC::STXSSPX:
4268 case PPC::STXSDX:
4269 case PPC::XFLOADf32:
4270 case PPC::XFLOADf64:
4271 case PPC::XFSTOREf32:
4272 case PPC::XFSTOREf64:
4273 if (!Subtarget.hasP9Vector())
4274 return false;
4275 III.SignedImm = true;
4276 III.ZeroIsSpecialOrig = 1;
4277 III.ZeroIsSpecialNew = 2;
4278 III.IsCommutative = true;
4279 III.IsSummingOperands = true;
4280 III.ImmOpNo = 1;
4281 III.OpNoForForwarding = 2;
4282 III.ImmMustBeMultipleOf = 4;
4283 switch(Opc) {
4284 default: llvm_unreachable("Unknown opcode");
4285 case PPC::LXVX:
4286 III.ImmOpcode = PPC::LXV;
4287 III.ImmMustBeMultipleOf = 16;
4288 break;
4289 case PPC::LXSSPX:
4290 if (PostRA) {
4291 if (IsVFReg)
4292 III.ImmOpcode = PPC::LXSSP;
4293 else {
4294 III.ImmOpcode = PPC::LFS;
4295 III.ImmMustBeMultipleOf = 1;
4296 }
4297 break;
4298 }
4299 [[fallthrough]];
4300 case PPC::XFLOADf32:
4301 III.ImmOpcode = PPC::DFLOADf32;
4302 break;
4303 case PPC::LXSDX:
4304 if (PostRA) {
4305 if (IsVFReg)
4306 III.ImmOpcode = PPC::LXSD;
4307 else {
4308 III.ImmOpcode = PPC::LFD;
4309 III.ImmMustBeMultipleOf = 1;
4310 }
4311 break;
4312 }
4313 [[fallthrough]];
4314 case PPC::XFLOADf64:
4315 III.ImmOpcode = PPC::DFLOADf64;
4316 break;
4317 case PPC::STXVX:
4318 III.ImmOpcode = PPC::STXV;
4319 III.ImmMustBeMultipleOf = 16;
4320 break;
4321 case PPC::STXSSPX:
4322 if (PostRA) {
4323 if (IsVFReg)
4324 III.ImmOpcode = PPC::STXSSP;
4325 else {
4326 III.ImmOpcode = PPC::STFS;
4327 III.ImmMustBeMultipleOf = 1;
4328 }
4329 break;
4330 }
4331 [[fallthrough]];
4332 case PPC::XFSTOREf32:
4333 III.ImmOpcode = PPC::DFSTOREf32;
4334 break;
4335 case PPC::STXSDX:
4336 if (PostRA) {
4337 if (IsVFReg)
4338 III.ImmOpcode = PPC::STXSD;
4339 else {
4340 III.ImmOpcode = PPC::STFD;
4341 III.ImmMustBeMultipleOf = 1;
4342 }
4343 break;
4344 }
4345 [[fallthrough]];
4346 case PPC::XFSTOREf64:
4347 III.ImmOpcode = PPC::DFSTOREf64;
4348 break;
4349 }
4350 break;
4351 }
4352 return true;
4353}
4354
4355// Utility function for swaping two arbitrary operands of an instruction.
4356static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4357 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4358
4359 unsigned MaxOp = std::max(Op1, Op2);
4360 unsigned MinOp = std::min(Op1, Op2);
4361 MachineOperand MOp1 = MI.getOperand(MinOp);
4362 MachineOperand MOp2 = MI.getOperand(MaxOp);
4363 MI.removeOperand(std::max(Op1, Op2));
4364 MI.removeOperand(std::min(Op1, Op2));
4365
4366 // If the operands we are swapping are the two at the end (the common case)
4367 // we can just remove both and add them in the opposite order.
4368 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4369 MI.addOperand(MOp2);
4370 MI.addOperand(MOp1);
4371 } else {
4372 // Store all operands in a temporary vector, remove them and re-add in the
4373 // right order.
4375 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4376 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4377 MOps.push_back(MI.getOperand(i));
4378 MI.removeOperand(i);
4379 }
4380 // MOp2 needs to be added next.
4381 MI.addOperand(MOp2);
4382 // Now add the rest.
4383 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4384 if (i == MaxOp)
4385 MI.addOperand(MOp1);
4386 else {
4387 MI.addOperand(MOps.back());
4388 MOps.pop_back();
4389 }
4390 }
4391 }
4392}
4393
4394// Check if the 'MI' that has the index OpNoForForwarding
4395// meets the requirement described in the ImmInstrInfo.
4396bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4397 const ImmInstrInfo &III,
4398 unsigned OpNoForForwarding
4399 ) const {
4400 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4401 // would not work pre-RA, we can only do the check post RA.
4402 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4403 if (MRI.isSSA())
4404 return false;
4405
4406 // Cannot do the transform if MI isn't summing the operands.
4407 if (!III.IsSummingOperands)
4408 return false;
4409
4410 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4411 if (!III.ZeroIsSpecialOrig)
4412 return false;
4413
4414 // We cannot do the transform if the operand we are trying to replace
4415 // isn't the same as the operand the instruction allows.
4416 if (OpNoForForwarding != III.OpNoForForwarding)
4417 return false;
4418
4419 // Check if the instruction we are trying to transform really has
4420 // the special zero register as its operand.
4421 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4422 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4423 return false;
4424
4425 // This machine instruction is convertible if it is,
4426 // 1. summing the operands.
4427 // 2. one of the operands is special zero register.
4428 // 3. the operand we are trying to replace is allowed by the MI.
4429 return true;
4430}
4431
4432// Check if the DefMI is the add inst and set the ImmMO and RegMO
4433// accordingly.
4434bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4435 const ImmInstrInfo &III,
4436 MachineOperand *&ImmMO,
4437 MachineOperand *&RegMO) const {
4438 unsigned Opc = DefMI.getOpcode();
4439 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4440 return false;
4441
4442 // Skip the optimization of transformTo[NewImm|Imm]FormFedByAdd for ADDItocL8
4443 // on AIX which is used for toc-data access. TODO: Follow up to see if it can
4444 // apply for AIX toc-data as well.
4445 if (Opc == PPC::ADDItocL8 && Subtarget.isAIX())
4446 return false;
4447
4448 assert(DefMI.getNumOperands() >= 3 &&
4449 "Add inst must have at least three operands");
4450 RegMO = &DefMI.getOperand(1);
4451 ImmMO = &DefMI.getOperand(2);
4452
4453 // Before RA, ADDI first operand could be a frame index.
4454 if (!RegMO->isReg())
4455 return false;
4456
4457 // This DefMI is elgible for forwarding if it is:
4458 // 1. add inst
4459 // 2. one of the operands is Imm/CPI/Global.
4460 return isAnImmediateOperand(*ImmMO);
4461}
4462
4463bool PPCInstrInfo::isRegElgibleForForwarding(
4464 const MachineOperand &RegMO, const MachineInstr &DefMI,
4465 const MachineInstr &MI, bool KillDefMI,
4466 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4467 // x = addi y, imm
4468 // ...
4469 // z = lfdx 0, x -> z = lfd imm(y)
4470 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4471 // of "y" between the DEF of "x" and "z".
4472 // The query is only valid post RA.
4473 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4474 if (MRI.isSSA())
4475 return false;
4476
4477 Register Reg = RegMO.getReg();
4478
4479 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4481 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4482 It++;
4483 for (; It != E; ++It) {
4484 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4485 return false;
4486 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4487 IsFwdFeederRegKilled = true;
4488 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4489 SeenIntermediateUse = true;
4490 // Made it to DefMI without encountering a clobber.
4491 if ((&*It) == &DefMI)
4492 break;
4493 }
4494 assert((&*It) == &DefMI && "DefMI is missing");
4495
4496 // If DefMI also defines the register to be forwarded, we can only forward it
4497 // if DefMI is being erased.
4498 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4499 return KillDefMI;
4500
4501 return true;
4502}
4503
4504bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4505 const MachineInstr &DefMI,
4506 const ImmInstrInfo &III,
4507 int64_t &Imm,
4508 int64_t BaseImm) const {
4509 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4510 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4511 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4512 // However, we know that, it is 16-bit width, and has the alignment of 4.
4513 // Check if the instruction met the requirement.
4514 if (III.ImmMustBeMultipleOf > 4 ||
4515 III.TruncateImmTo || III.ImmWidth != 16)
4516 return false;
4517
4518 // Going from XForm to DForm loads means that the displacement needs to be
4519 // not just an immediate but also a multiple of 4, or 16 depending on the
4520 // load. A DForm load cannot be represented if it is a multiple of say 2.
4521 // XForm loads do not have this restriction.
4522 if (ImmMO.isGlobal()) {
4523 const DataLayout &DL = ImmMO.getGlobal()->getDataLayout();
4525 return false;
4526 }
4527
4528 return true;
4529 }
4530
4531 if (ImmMO.isImm()) {
4532 // It is Imm, we need to check if the Imm fit the range.
4533 // Sign-extend to 64-bits.
4534 // DefMI may be folded with another imm form instruction, the result Imm is
4535 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4536 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4537 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4538 return false;
4539 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4540 return false;
4541 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4542
4543 if (Imm % III.ImmMustBeMultipleOf)
4544 return false;
4545 if (III.TruncateImmTo)
4546 Imm &= ((1 << III.TruncateImmTo) - 1);
4547 }
4548 else
4549 return false;
4550
4551 // This ImmMO is forwarded if it meets the requriement describle
4552 // in ImmInstrInfo
4553 return true;
4554}
4555
4556bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4557 unsigned OpNoForForwarding,
4558 MachineInstr **KilledDef) const {
4559 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4560 !DefMI.getOperand(1).isImm())
4561 return false;
4562
4563 MachineFunction *MF = MI.getParent()->getParent();
4565 bool PostRA = !MRI->isSSA();
4566
4567 int64_t Immediate = DefMI.getOperand(1).getImm();
4568 // Sign-extend to 64-bits.
4569 int64_t SExtImm = SignExtend64<16>(Immediate);
4570
4571 bool ReplaceWithLI = false;
4572 bool Is64BitLI = false;
4573 int64_t NewImm = 0;
4574 bool SetCR = false;
4575 unsigned Opc = MI.getOpcode();
4576 switch (Opc) {
4577 default:
4578 return false;
4579
4580 // FIXME: Any branches conditional on such a comparison can be made
4581 // unconditional. At this time, this happens too infrequently to be worth
4582 // the implementation effort, but if that ever changes, we could convert
4583 // such a pattern here.
4584 case PPC::CMPWI:
4585 case PPC::CMPLWI:
4586 case PPC::CMPDI:
4587 case PPC::CMPLDI: {
4588 // Doing this post-RA would require dataflow analysis to reliably find uses
4589 // of the CR register set by the compare.
4590 // No need to fixup killed/dead flag since this transformation is only valid
4591 // before RA.
4592 if (PostRA)
4593 return false;
4594 // If a compare-immediate is fed by an immediate and is itself an input of
4595 // an ISEL (the most common case) into a COPY of the correct register.
4596 bool Changed = false;
4597 Register DefReg = MI.getOperand(0).getReg();
4598 int64_t Comparand = MI.getOperand(2).getImm();
4599 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4600 ? (Comparand | 0xFFFFFFFFFFFF0000)
4601 : Comparand;
4602
4603 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4604 unsigned UseOpc = CompareUseMI.getOpcode();
4605 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4606 continue;
4607 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4608 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4609 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4610 unsigned RegToCopy =
4611 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4612 if (RegToCopy == PPC::NoRegister)
4613 continue;
4614 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4615 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4616 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4617 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4618 CompareUseMI.removeOperand(3);
4619 CompareUseMI.removeOperand(2);
4620 continue;
4621 }
4622 LLVM_DEBUG(
4623 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4624 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4625 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4626 // Convert to copy and remove unneeded operands.
4627 CompareUseMI.setDesc(get(PPC::COPY));
4628 CompareUseMI.removeOperand(3);
4629 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4630 CmpIselsConverted++;
4631 Changed = true;
4632 LLVM_DEBUG(CompareUseMI.dump());
4633 }
4634 if (Changed)
4635 return true;
4636 // This may end up incremented multiple times since this function is called
4637 // during a fixed-point transformation, but it is only meant to indicate the
4638 // presence of this opportunity.
4639 MissedConvertibleImmediateInstrs++;
4640 return false;
4641 }
4642
4643 // Immediate forms - may simply be convertable to an LI.
4644 case PPC::ADDI:
4645 case PPC::ADDI8: {
4646 // Does the sum fit in a 16-bit signed field?
4647 int64_t Addend = MI.getOperand(2).getImm();
4648 if (isInt<16>(Addend + SExtImm)) {
4649 ReplaceWithLI = true;
4650 Is64BitLI = Opc == PPC::ADDI8;
4651 NewImm = Addend + SExtImm;
4652 break;
4653 }
4654 return false;
4655 }
4656 case PPC::SUBFIC:
4657 case PPC::SUBFIC8: {
4658 // Only transform this if the CARRY implicit operand is dead.
4659 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4660 return false;
4661 int64_t Minuend = MI.getOperand(2).getImm();
4662 if (isInt<16>(Minuend - SExtImm)) {
4663 ReplaceWithLI = true;
4664 Is64BitLI = Opc == PPC::SUBFIC8;
4665 NewImm = Minuend - SExtImm;
4666 break;
4667 }
4668 return false;
4669 }
4670 case PPC::RLDICL:
4671 case PPC::RLDICL_rec:
4672 case PPC::RLDICL_32:
4673 case PPC::RLDICL_32_64: {
4674 // Use APInt's rotate function.
4675 int64_t SH = MI.getOperand(2).getImm();
4676 int64_t MB = MI.getOperand(3).getImm();
4677 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4678 SExtImm, true);
4679 InVal = InVal.rotl(SH);
4680 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4681 InVal &= Mask;
4682 // Can't replace negative values with an LI as that will sign-extend
4683 // and not clear the left bits. If we're setting the CR bit, we will use
4684 // ANDI_rec which won't sign extend, so that's safe.
4685 if (isUInt<15>(InVal.getSExtValue()) ||
4686 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4687 ReplaceWithLI = true;
4688 Is64BitLI = Opc != PPC::RLDICL_32;
4689 NewImm = InVal.getSExtValue();
4690 SetCR = Opc == PPC::RLDICL_rec;
4691 break;
4692 }
4693 return false;
4694 }
4695 case PPC::RLWINM:
4696 case PPC::RLWINM8:
4697 case PPC::RLWINM_rec:
4698 case PPC::RLWINM8_rec: {
4699 int64_t SH = MI.getOperand(2).getImm();
4700 int64_t MB = MI.getOperand(3).getImm();
4701 int64_t ME = MI.getOperand(4).getImm();
4702 APInt InVal(32, SExtImm, true);
4703 InVal = InVal.rotl(SH);
4704 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4705 InVal &= Mask;
4706 // Can't replace negative values with an LI as that will sign-extend
4707 // and not clear the left bits. If we're setting the CR bit, we will use
4708 // ANDI_rec which won't sign extend, so that's safe.
4709 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4710 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4711 isUInt<16>(InVal.getSExtValue()));
4712 if (ValueFits) {
4713 ReplaceWithLI = true;
4714 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4715 NewImm = InVal.getSExtValue();
4716 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4717 break;
4718 }
4719 return false;
4720 }
4721 case PPC::ORI:
4722 case PPC::ORI8:
4723 case PPC::XORI:
4724 case PPC::XORI8: {
4725 int64_t LogicalImm = MI.getOperand(2).getImm();
4726 int64_t Result = 0;
4727 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4728 Result = LogicalImm | SExtImm;
4729 else
4730 Result = LogicalImm ^ SExtImm;
4731 if (isInt<16>(Result)) {
4732 ReplaceWithLI = true;
4733 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4734 NewImm = Result;
4735 break;
4736 }
4737 return false;
4738 }
4739 }
4740
4741 if (ReplaceWithLI) {
4742 // We need to be careful with CR-setting instructions we're replacing.
4743 if (SetCR) {
4744 // We don't know anything about uses when we're out of SSA, so only
4745 // replace if the new immediate will be reproduced.
4746 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4747 if (PostRA && ImmChanged)
4748 return false;
4749
4750 if (!PostRA) {
4751 // If the defining load-immediate has no other uses, we can just replace
4752 // the immediate with the new immediate.
4753 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4754 DefMI.getOperand(1).setImm(NewImm);
4755
4756 // If we're not using the GPR result of the CR-setting instruction, we
4757 // just need to and with zero/non-zero depending on the new immediate.
4758 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4759 if (NewImm) {
4760 assert(Immediate && "Transformation converted zero to non-zero?");
4761 NewImm = Immediate;
4762 }
4763 } else if (ImmChanged)
4764 return false;
4765 }
4766 }
4767
4768 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4769 LLVM_DEBUG(MI.dump());
4770 LLVM_DEBUG(dbgs() << "Fed by:\n");
4771 LLVM_DEBUG(DefMI.dump());
4773 LII.Imm = NewImm;
4774 LII.Is64Bit = Is64BitLI;
4775 LII.SetCR = SetCR;
4776 // If we're setting the CR, the original load-immediate must be kept (as an
4777 // operand to ANDI_rec/ANDI8_rec).
4778 if (KilledDef && SetCR)
4779 *KilledDef = nullptr;
4780 replaceInstrWithLI(MI, LII);
4781
4782 if (PostRA)
4783 recomputeLivenessFlags(*MI.getParent());
4784
4785 LLVM_DEBUG(dbgs() << "With:\n");
4786 LLVM_DEBUG(MI.dump());
4787 return true;
4788 }
4789 return false;
4790}
4791
4792bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4793 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4794 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4795 bool PostRA = !MRI->isSSA();
4796 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4797 // for post-ra.
4798 if (PostRA)
4799 return false;
4800
4801 // Only handle load/store.
4802 if (!MI.mayLoadOrStore())
4803 return false;
4804
4805 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4806
4807 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4808 "MI must have x-form opcode");
4809
4810 // get Imm Form info.
4811 ImmInstrInfo III;
4812 bool IsVFReg = MI.getOperand(0).isReg()
4813 ? PPC::isVFRegister(MI.getOperand(0).getReg())
4814 : false;
4815
4816 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4817 return false;
4818
4819 if (!III.IsSummingOperands)
4820 return false;
4821
4822 if (OpNoForForwarding != III.OpNoForForwarding)
4823 return false;
4824
4825 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4826 if (!ImmOperandMI.isImm())
4827 return false;
4828
4829 // Check DefMI.
4830 MachineOperand *ImmMO = nullptr;
4831 MachineOperand *RegMO = nullptr;
4832 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4833 return false;
4834 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4835
4836 // Check Imm.
4837 // Set ImmBase from imm instruction as base and get new Imm inside
4838 // isImmElgibleForForwarding.
4839 int64_t ImmBase = ImmOperandMI.getImm();
4840 int64_t Imm = 0;
4841 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4842 return false;
4843
4844 // Do the transform
4845 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4846 LLVM_DEBUG(MI.dump());
4847 LLVM_DEBUG(dbgs() << "Fed by:\n");
4848 LLVM_DEBUG(DefMI.dump());
4849
4850 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4851 MI.getOperand(III.ImmOpNo).setImm(Imm);
4852
4853 LLVM_DEBUG(dbgs() << "With:\n");
4854 LLVM_DEBUG(MI.dump());
4855 return true;
4856}
4857
4858// If an X-Form instruction is fed by an add-immediate and one of its operands
4859// is the literal zero, attempt to forward the source of the add-immediate to
4860// the corresponding D-Form instruction with the displacement coming from
4861// the immediate being added.
4862bool PPCInstrInfo::transformToImmFormFedByAdd(
4863 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4864 MachineInstr &DefMI, bool KillDefMI) const {
4865 // RegMO ImmMO
4866 // | |
4867 // x = addi reg, imm <----- DefMI
4868 // y = op 0 , x <----- MI
4869 // |
4870 // OpNoForForwarding
4871 // Check if the MI meet the requirement described in the III.
4872 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4873 return false;
4874
4875 // Check if the DefMI meet the requirement
4876 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4877 MachineOperand *ImmMO = nullptr;
4878 MachineOperand *RegMO = nullptr;
4879 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4880 return false;
4881 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4882
4883 // As we get the Imm operand now, we need to check if the ImmMO meet
4884 // the requirement described in the III. If yes set the Imm.
4885 int64_t Imm = 0;
4886 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4887 return false;
4888
4889 bool IsFwdFeederRegKilled = false;
4890 bool SeenIntermediateUse = false;
4891 // Check if the RegMO can be forwarded to MI.
4892 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4893 IsFwdFeederRegKilled, SeenIntermediateUse))
4894 return false;
4895
4896 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4897 bool PostRA = !MRI.isSSA();
4898
4899 // We know that, the MI and DefMI both meet the pattern, and
4900 // the Imm also meet the requirement with the new Imm-form.
4901 // It is safe to do the transformation now.
4902 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4903 LLVM_DEBUG(MI.dump());
4904 LLVM_DEBUG(dbgs() << "Fed by:\n");
4905 LLVM_DEBUG(DefMI.dump());
4906
4907 // Update the base reg first.
4908 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
4909 false, false,
4910 RegMO->isKill());
4911
4912 // Then, update the imm.
4913 if (ImmMO->isImm()) {
4914 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4915 // directly.
4917 }
4918 else {
4919 // Otherwise, it is Constant Pool Index(CPI) or Global,
4920 // which is relocation in fact. We need to replace the special zero
4921 // register with ImmMO.
4922 // Before that, we need to fixup the target flags for imm.
4923 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4924 if (DefMI.getOpcode() == PPC::ADDItocL8)
4926
4927 // MI didn't have the interface such as MI.setOperand(i) though
4928 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4929 // ImmMO, we need to remove ZERO operand and all the operands behind it,
4930 // and, add the ImmMO, then, move back all the operands behind ZERO.
4932 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
4933 MOps.push_back(MI.getOperand(i));
4934 MI.removeOperand(i);
4935 }
4936
4937 // Remove the last MO in the list, which is ZERO operand in fact.
4938 MOps.pop_back();
4939 // Add the imm operand.
4940 MI.addOperand(*ImmMO);
4941 // Now add the rest back.
4942 for (auto &MO : MOps)
4943 MI.addOperand(MO);
4944 }
4945
4946 // Update the opcode.
4947 MI.setDesc(get(III.ImmOpcode));
4948
4949 if (PostRA)
4950 recomputeLivenessFlags(*MI.getParent());
4951 LLVM_DEBUG(dbgs() << "With:\n");
4952 LLVM_DEBUG(MI.dump());
4953
4954 return true;
4955}
4956
4957bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
4958 const ImmInstrInfo &III,
4959 unsigned ConstantOpNo,
4960 MachineInstr &DefMI) const {
4961 // DefMI must be LI or LI8.
4962 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4963 !DefMI.getOperand(1).isImm())
4964 return false;
4965
4966 // Get Imm operand and Sign-extend to 64-bits.
4967 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
4968
4969 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4970 bool PostRA = !MRI.isSSA();
4971 // Exit early if we can't convert this.
4972 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
4973 return false;
4974 if (Imm % III.ImmMustBeMultipleOf)
4975 return false;
4976 if (III.TruncateImmTo)
4977 Imm &= ((1 << III.TruncateImmTo) - 1);
4978 if (III.SignedImm) {
4979 APInt ActualValue(64, Imm, true);
4980 if (!ActualValue.isSignedIntN(III.ImmWidth))
4981 return false;
4982 } else {
4983 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
4984 if ((uint64_t)Imm > UnsignedMax)
4985 return false;
4986 }
4987
4988 // If we're post-RA, the instructions don't agree on whether register zero is
4989 // special, we can transform this as long as the register operand that will
4990 // end up in the location where zero is special isn't R0.
4991 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
4992 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
4993 III.ZeroIsSpecialNew + 1;
4994 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
4995 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
4996 // If R0 is in the operand where zero is special for the new instruction,
4997 // it is unsafe to transform if the constant operand isn't that operand.
4998 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
4999 ConstantOpNo != III.ZeroIsSpecialNew)
5000 return false;
5001 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
5002 ConstantOpNo != PosForOrigZero)
5003 return false;
5004 }
5005
5006 unsigned Opc = MI.getOpcode();
5007 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
5008 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
5009 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
5010 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
5011 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
5012 Opc == PPC::SRD || Opc == PPC::SRD_rec;
5013 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
5014 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
5015 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
5016 Opc == PPC::SRD_rec;
5017
5018 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5019 LLVM_DEBUG(MI.dump());
5020 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5021 LLVM_DEBUG(DefMI.dump());
5022 MI.setDesc(get(III.ImmOpcode));
5023 if (ConstantOpNo == III.OpNoForForwarding) {
5024 // Converting shifts to immediate form is a bit tricky since they may do
5025 // one of three things:
5026 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5027 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5028 // setting CR0)
5029 // 3. If the shift amount is in [1, OpSize), it's just a shift
5030 if (SpecialShift32 || SpecialShift64) {
5032 LII.Imm = 0;
5033 LII.SetCR = SetCR;
5034 LII.Is64Bit = SpecialShift64;
5035 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5036 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5037 replaceInstrWithLI(MI, LII);
5038 // Shifts by zero don't change the value. If we don't need to set CR0,
5039 // just convert this to a COPY. Can't do this post-RA since we've already
5040 // cleaned up the copies.
5041 else if (!SetCR && ShAmt == 0 && !PostRA) {
5042 MI.removeOperand(2);
5043 MI.setDesc(get(PPC::COPY));
5044 } else {
5045 // The 32 bit and 64 bit instructions are quite different.
5046 if (SpecialShift32) {
5047 // Left shifts use (N, 0, 31-N).
5048 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5049 // use (0, 0, 31) if N == 0.
5050 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5051 uint64_t MB = RightShift ? ShAmt : 0;
5052 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5054 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5055 .addImm(ME);
5056 } else {
5057 // Left shifts use (N, 63-N).
5058 // Right shifts use (64-N, N) if 0 < N < 64.
5059 // use (0, 0) if N == 0.
5060 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5061 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5063 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5064 }
5065 }
5066 } else
5067 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5068 }
5069 // Convert commutative instructions (switch the operands and convert the
5070 // desired one to an immediate.
5071 else if (III.IsCommutative) {
5072 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5073 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5074 } else
5075 llvm_unreachable("Should have exited early!");
5076
5077 // For instructions for which the constant register replaces a different
5078 // operand than where the immediate goes, we need to swap them.
5079 if (III.OpNoForForwarding != III.ImmOpNo)
5081
5082 // If the special R0/X0 register index are different for original instruction
5083 // and new instruction, we need to fix up the register class in new
5084 // instruction.
5085 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5086 if (III.ZeroIsSpecialNew) {
5087 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5088 // need to fix up register class.
5089 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5090 if (RegToModify.isVirtual()) {
5091 const TargetRegisterClass *NewRC =
5092 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5093 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5094 MRI.setRegClass(RegToModify, NewRC);
5095 }
5096 }
5097 }
5098
5099 if (PostRA)
5100 recomputeLivenessFlags(*MI.getParent());
5101
5102 LLVM_DEBUG(dbgs() << "With: ");
5103 LLVM_DEBUG(MI.dump());
5104 LLVM_DEBUG(dbgs() << "\n");
5105 return true;
5106}
5107
5108const TargetRegisterClass *
5110 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5111 return &PPC::VSRCRegClass;
5112 return RC;
5113}
5114
5116 return PPC::getRecordFormOpcode(Opcode);
5117}
5118
5119static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5120 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5121 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5122 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5123 Opcode == PPC::LHZUX8);
5124}
5125
5126// This function checks for sign extension from 32 bits to 64 bits.
5127static bool definedBySignExtendingOp(const unsigned Reg,
5128 const MachineRegisterInfo *MRI) {
5130 return false;
5131
5132 MachineInstr *MI = MRI->getVRegDef(Reg);
5133 if (!MI)
5134 return false;
5135
5136 int Opcode = MI->getOpcode();
5137 const PPCInstrInfo *TII =
5138 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5139 if (TII->isSExt32To64(Opcode))
5140 return true;
5141
5142 // The first def of LBZU/LHZU is sign extended.
5143 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5144 return true;
5145
5146 // RLDICL generates sign-extended output if it clears at least
5147 // 33 bits from the left (MSB).
5148 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5149 return true;
5150
5151 // If at least one bit from left in a lower word is masked out,
5152 // all of 0 to 32-th bits of the output are cleared.
5153 // Hence the output is already sign extended.
5154 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5155 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5156 MI->getOperand(3).getImm() > 0 &&
5157 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5158 return true;
5159
5160 // If the most significant bit of immediate in ANDIS is zero,
5161 // all of 0 to 32-th bits are cleared.
5162 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5163 uint16_t Imm = MI->getOperand(2).getImm();
5164 if ((Imm & 0x8000) == 0)
5165 return true;
5166 }
5167
5168 return false;
5169}
5170
5171// This function checks the machine instruction that defines the input register
5172// Reg. If that machine instruction always outputs a value that has only zeros
5173// in the higher 32 bits then this function will return true.
5174static bool definedByZeroExtendingOp(const unsigned Reg,
5175 const MachineRegisterInfo *MRI) {
5177 return false;
5178
5179 MachineInstr *MI = MRI->getVRegDef(Reg);
5180 if (!MI)
5181 return false;
5182
5183 int Opcode = MI->getOpcode();
5184 const PPCInstrInfo *TII =
5185 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5186 if (TII->isZExt32To64(Opcode))
5187 return true;
5188
5189 // The first def of LBZU/LHZU/LWZU are zero extended.
5190 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5191 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5192 MI->getOperand(0).getReg() == Reg)
5193 return true;
5194
5195 // The 16-bit immediate is sign-extended in li/lis.
5196 // If the most significant bit is zero, all higher bits are zero.
5197 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5198 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5199 int64_t Imm = MI->getOperand(1).getImm();
5200 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5201 return true;
5202 }
5203
5204 // We have some variations of rotate-and-mask instructions
5205 // that clear higher 32-bits.
5206 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5207 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5208 Opcode == PPC::RLDICL_32_64) &&
5209 MI->getOperand(3).getImm() >= 32)
5210 return true;
5211
5212 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5213 MI->getOperand(3).getImm() >= 32 &&
5214 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5215 return true;
5216
5217 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5218 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5219 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5220 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5221 return true;
5222
5223 return false;
5224}
5225
5226// This function returns true if the input MachineInstr is a TOC save
5227// instruction.
5229 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5230 return false;
5231 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5232 unsigned StackOffset = MI.getOperand(1).getImm();
5233 Register StackReg = MI.getOperand(2).getReg();
5234 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5235 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5236 return true;
5237
5238 return false;
5239}
5240
5241// We limit the max depth to track incoming values of PHIs or binary ops
5242// (e.g. AND) to avoid excessive cost.
5243const unsigned MAX_BINOP_DEPTH = 1;
5244// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5245// does not count all of the recursions. The parameter BinOpDepth is incremented
5246// only when isSignOrZeroExtended calls itself more than once. This is done to
5247// prevent expontential recursion. There is no parameter to track linear
5248// recursion.
5249std::pair<bool, bool>
5251 const unsigned BinOpDepth,
5252 const MachineRegisterInfo *MRI) const {
5254 return std::pair<bool, bool>(false, false);
5255
5256 MachineInstr *MI = MRI->getVRegDef(Reg);
5257 if (!MI)
5258 return std::pair<bool, bool>(false, false);
5259
5260 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5261 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5262
5263 // If we know the instruction always returns sign- and zero-extended result,
5264 // return here.
5265 if (IsSExt && IsZExt)
5266 return std::pair<bool, bool>(IsSExt, IsZExt);
5267
5268 switch (MI->getOpcode()) {
5269 case PPC::COPY: {
5270 Register SrcReg = MI->getOperand(1).getReg();
5271
5272 // In both ELFv1 and v2 ABI, method parameters and the return value
5273 // are sign- or zero-extended.
5274 const MachineFunction *MF = MI->getMF();
5275
5276 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5277 // If this is a copy from another register, we recursively check source.
5278 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5279 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5280 SrcExt.second || IsZExt);
5281 }
5282
5283 // From here on everything is SVR4ABI
5284 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5285 // We check the ZExt/SExt flags for a method parameter.
5286 if (MI->getParent()->getBasicBlock() ==
5287 &MF->getFunction().getEntryBlock()) {
5288 Register VReg = MI->getOperand(0).getReg();
5289 if (MF->getRegInfo().isLiveIn(VReg)) {
5290 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5291 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5292 return std::pair<bool, bool>(IsSExt, IsZExt);
5293 }
5294 }
5295
5296 if (SrcReg != PPC::X3) {
5297 // If this is a copy from another register, we recursively check source.
5298 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5299 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5300 SrcExt.second || IsZExt);
5301 }
5302
5303 // For a method return value, we check the ZExt/SExt flags in attribute.
5304 // We assume the following code sequence for method call.
5305 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5306 // BL8_NOP @func,...
5307 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5308 // %5 = COPY %x3; G8RC:%5
5309 const MachineBasicBlock *MBB = MI->getParent();
5310 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5313 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5314 return IsExtendPair;
5315
5316 const MachineInstr &CallMI = *(--II);
5317 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5318 return IsExtendPair;
5319
5320 const Function *CalleeFn =
5321 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
5322 if (!CalleeFn)
5323 return IsExtendPair;
5324 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5325 if (IntTy && IntTy->getBitWidth() <= 32) {
5326 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5327 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5328 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5329 return std::pair<bool, bool>(IsSExt, IsZExt);
5330 }
5331
5332 return IsExtendPair;
5333 }
5334
5335 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5336 // So, we track the operand register as we do for register copy.
5337 case PPC::ORI:
5338 case PPC::XORI:
5339 case PPC::ORI8:
5340 case PPC::XORI8: {
5341 Register SrcReg = MI->getOperand(1).getReg();
5342 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5343 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5344 SrcExt.second || IsZExt);
5345 }
5346
5347 // OR, XOR with shifted 16-bit immediate does not change the upper
5348 // 32 bits. So, we track the operand register for zero extension.
5349 // For sign extension when the MSB of the immediate is zero, we also
5350 // track the operand register since the upper 33 bits are unchanged.
5351 case PPC::ORIS:
5352 case PPC::XORIS:
5353 case PPC::ORIS8:
5354 case PPC::XORIS8: {
5355 Register SrcReg = MI->getOperand(1).getReg();
5356 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5357 uint16_t Imm = MI->getOperand(2).getImm();
5358 if (Imm & 0x8000)
5359 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5360 else
5361 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5362 SrcExt.second || IsZExt);
5363 }
5364
5365 // If all incoming values are sign-/zero-extended,
5366 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5367 case PPC::OR:
5368 case PPC::OR8:
5369 case PPC::ISEL:
5370 case PPC::PHI: {
5371 if (BinOpDepth >= MAX_BINOP_DEPTH)
5372 return std::pair<bool, bool>(false, false);
5373
5374 // The input registers for PHI are operand 1, 3, ...
5375 // The input registers for others are operand 1 and 2.
5376 unsigned OperandEnd = 3, OperandStride = 1;
5377 if (MI->getOpcode() == PPC::PHI) {
5378 OperandEnd = MI->getNumOperands();
5379 OperandStride = 2;
5380 }
5381
5382 IsSExt = true;
5383 IsZExt = true;
5384 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5385 if (!MI->getOperand(I).isReg())
5386 return std::pair<bool, bool>(false, false);
5387
5388 Register SrcReg = MI->getOperand(I).getReg();
5389 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5390 IsSExt &= SrcExt.first;
5391 IsZExt &= SrcExt.second;
5392 }
5393 return std::pair<bool, bool>(IsSExt, IsZExt);
5394 }
5395
5396 // If at least one of the incoming values of an AND is zero extended
5397 // then the output is also zero-extended. If both of the incoming values
5398 // are sign-extended then the output is also sign extended.
5399 case PPC::AND:
5400 case PPC::AND8: {
5401 if (BinOpDepth >= MAX_BINOP_DEPTH)
5402 return std::pair<bool, bool>(false, false);
5403
5404 Register SrcReg1 = MI->getOperand(1).getReg();
5405 Register SrcReg2 = MI->getOperand(2).getReg();
5406 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5407 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5408 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5409 Src1Ext.second || Src2Ext.second);
5410 }
5411
5412 default:
5413 break;
5414 }
5415 return std::pair<bool, bool>(IsSExt, IsZExt);
5416}
5417
5418bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5419 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5420}
5421
5422namespace {
5423class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5424 MachineInstr *Loop, *EndLoop, *LoopCount;
5425 MachineFunction *MF;
5426 const TargetInstrInfo *TII;
5427 int64_t TripCount;
5428
5429public:
5430 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5431 MachineInstr *LoopCount)
5432 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5433 MF(Loop->getParent()->getParent()),
5434 TII(MF->getSubtarget().getInstrInfo()) {
5435 // Inspect the Loop instruction up-front, as it may be deleted when we call
5436 // createTripCountGreaterCondition.
5437 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5438 TripCount = LoopCount->getOperand(1).getImm();
5439 else
5440 TripCount = -1;
5441 }
5442
5443 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5444 // Only ignore the terminator.
5445 return MI == EndLoop;
5446 }
5447
5448 std::optional<bool> createTripCountGreaterCondition(
5449 int TC, MachineBasicBlock &MBB,
5451 if (TripCount == -1) {
5452 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5453 // so we don't need to generate any thing here.
5454 Cond.push_back(MachineOperand::CreateImm(0));
5456 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5457 true));
5458 return {};
5459 }
5460
5461 return TripCount > TC;
5462 }
5463
5464 void setPreheader(MachineBasicBlock *NewPreheader) override {
5465 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5466 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5467 }
5468
5469 void adjustTripCount(int TripCountAdjust) override {
5470 // If the loop trip count is a compile-time value, then just change the
5471 // value.
5472 if (LoopCount->getOpcode() == PPC::LI8 ||
5473 LoopCount->getOpcode() == PPC::LI) {
5474 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5475 LoopCount->getOperand(1).setImm(TripCount);
5476 return;
5477 }
5478
5479 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5480 // so we don't need to generate any thing here.
5481 }
5482
5483 void disposed() override {
5484 Loop->eraseFromParent();
5485 // Ensure the loop setup instruction is deleted too.
5486 LoopCount->eraseFromParent();
5487 }
5488};
5489} // namespace
5490
5491std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5493 // We really "analyze" only hardware loops right now.
5495 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5496 if (Preheader == LoopBB)
5497 Preheader = *std::next(LoopBB->pred_begin());
5498 MachineFunction *MF = Preheader->getParent();
5499
5500 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5502 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5503 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5505 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5506 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5507 }
5508 }
5509 return nullptr;
5510}
5511
5513 MachineBasicBlock &PreHeader,
5514 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5515
5516 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5517
5518 // The loop set-up instruction should be in preheader
5519 for (auto &I : PreHeader.instrs())
5520 if (I.getOpcode() == LOOPi)
5521 return &I;
5522 return nullptr;
5523}
5524
5525// Return true if get the base operand, byte offset of an instruction and the
5526// memory width. Width is the size of memory that is being loaded/stored.
5528 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5529 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5530 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5531 return false;
5532
5533 // Handle only loads/stores with base register followed by immediate offset.
5534 if (!LdSt.getOperand(1).isImm() ||
5535 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5536 return false;
5537 if (!LdSt.getOperand(1).isImm() ||
5538 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5539 return false;
5540
5541 if (!LdSt.hasOneMemOperand())
5542 return false;
5543
5544 Width = (*LdSt.memoperands_begin())->getSize();
5545 Offset = LdSt.getOperand(1).getImm();
5546 BaseReg = &LdSt.getOperand(2);
5547 return true;
5548}
5549
5551 const MachineInstr &MIa, const MachineInstr &MIb) const {
5552 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5553 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5554
5557 return false;
5558
5559 // Retrieve the base register, offset from the base register and width. Width
5560 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5561 // base registers are identical, and the offset of a lower memory access +
5562 // the width doesn't overlap the offset of a higher memory access,
5563 // then the memory accesses are different.
5565 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5566 int64_t OffsetA = 0, OffsetB = 0;
5567 LocationSize WidthA = 0, WidthB = 0;
5568 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5569 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5570 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5571 int LowOffset = std::min(OffsetA, OffsetB);
5572 int HighOffset = std::max(OffsetA, OffsetB);
5573 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5574 if (LowWidth.hasValue() &&
5575 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5576 return true;
5577 }
5578 }
5579 return false;
5580}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static unsigned getSize(unsigned Kind)
void changeSign()
Definition: APFloat.h:1214
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition: APInt.cpp:1111
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:248
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:842
A debug info location.
Definition: DebugLoc.h:33
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
A possibly irreducible generalization of a Loop.
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:124
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
bool hasValue() const
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
Definition: MCInstrDesc.h:565
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:269
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:85
uint16_t Constraints
Operand constraints (see OperandConstraint enum).
Definition: MCInstrDesc.h:100
bool isLookupPtrRegClass() const
Set if this operand is a pointer value and it requires a callback to look up its register class.
Definition: MCInstrDesc.h:104
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:91
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:950
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:733
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:643
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:815
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:685
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:800
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:391
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
PPCInstrInfo(PPCSubtarget &STI)
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
MCInst getNop() const override
Return the noop instruction to use for a noop.
static int getRecordFormOpcode(unsigned Opcode)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
Definition: PPCInstrInfo.h:276
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:274
CombinerObjective getCombinerObjective(unsigned Pattern) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:623
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
Definition: PPCInstrInfo.h:506
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
bool foldFrameOffset(MachineInstr &MI) const
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:617
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const
getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode for a given imm form load/s...
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:143
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:216
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:135
bool isLittleEndian() const
Definition: PPCSubtarget.h:182
bool isTargetLinux() const
Definition: PPCSubtarget.h:213
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:156
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:209
void setGlibcHWCAPAccess(bool Val=true) const
void dump() const
Definition: Pass.cpp:136
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:703
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ MO_TOC_LO
Definition: PPC.h:185
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
int getAltVSXFMAOpcode(uint16_t Opcode)
int getNonRecordFormOpcode(uint16_t)
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
static bool isVFRegister(unsigned Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
@ REASSOC_XY_BCA
Definition: PPCInstrInfo.h:96
@ REASSOC_XY_BAC
Definition: PPCInstrInfo.h:97
@ REASSOC_XY_AMM_BMM
Definition: PPCInstrInfo.h:91
@ REASSOC_XMM_AMM_BMM
Definition: PPCInstrInfo.h:92
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
unsigned getKillRegState(bool B)
@ SOK_CRBitSpill
Definition: PPCInstrInfo.h:73
@ SOK_VSXVectorSpill
Definition: PPCInstrInfo.h:75
@ SOK_SpillToVSR
Definition: PPCInstrInfo.h:78
@ SOK_Int4Spill
Definition: PPCInstrInfo.h:68
@ SOK_PairedVecSpill
Definition: PPCInstrInfo.h:79
@ SOK_VectorFloat8Spill
Definition: PPCInstrInfo.h:76
@ SOK_UAccumulatorSpill
Definition: PPCInstrInfo.h:81
@ SOK_PairedG8Spill
Definition: PPCInstrInfo.h:84
@ SOK_VectorFloat4Spill
Definition: PPCInstrInfo.h:77
@ SOK_Float8Spill
Definition: PPCInstrInfo.h:70
@ SOK_Float4Spill
Definition: PPCInstrInfo.h:71
@ SOK_VRVectorSpill
Definition: PPCInstrInfo.h:74
@ SOK_WAccumulatorSpill
Definition: PPCInstrInfo.h:82
@ SOK_SPESpill
Definition: PPCInstrInfo.h:83
@ SOK_CRSpill
Definition: PPCInstrInfo.h:72
@ SOK_AccumulatorSpill
Definition: PPCInstrInfo.h:80
@ SOK_Int8Spill
Definition: PPCInstrInfo.h:69
@ SOK_LastOpcodeSpill
Definition: PPCInstrInfo.h:85
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t IsSummingOperands
Definition: PPCInstrInfo.h:55
uint64_t OpNoForForwarding
Definition: PPCInstrInfo.h:45
uint64_t ImmMustBeMultipleOf
Definition: PPCInstrInfo.h:35
uint64_t IsCommutative
Definition: PPCInstrInfo.h:43
uint64_t ZeroIsSpecialNew
Definition: PPCInstrInfo.h:41
uint64_t TruncateImmTo
Definition: PPCInstrInfo.h:53
uint64_t ZeroIsSpecialOrig
Definition: PPCInstrInfo.h:38
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.