LLVM 19.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCInst.h"
42#include "llvm/Support/Debug.h"
45
46using namespace llvm;
47
48#define DEBUG_TYPE "ppc-instr-info"
49
50#define GET_INSTRMAP_INFO
51#define GET_INSTRINFO_CTOR_DTOR
52#include "PPCGenInstrInfo.inc"
53
54STATISTIC(NumStoreSPILLVSRRCAsVec,
55 "Number of spillvsrrc spilled to stack as vec");
56STATISTIC(NumStoreSPILLVSRRCAsGpr,
57 "Number of spillvsrrc spilled to stack as gpr");
58STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
59STATISTIC(CmpIselsConverted,
60 "Number of ISELs that depend on comparison of constants converted");
61STATISTIC(MissedConvertibleImmediateInstrs,
62 "Number of compare-immediate instructions fed by constants");
63STATISTIC(NumRcRotatesConvertedToRcAnd,
64 "Number of record-form rotates converted to record-form andi");
65
66static cl::
67opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
68 cl::desc("Disable analysis for CTR loops"));
69
70static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
71cl::desc("Disable compare instruction optimization"), cl::Hidden);
72
73static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
74cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
76
77static cl::opt<bool>
78UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
79 cl::desc("Use the old (incorrect) instruction latency calculation"));
80
81static cl::opt<float>
82 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
83 cl::desc("register pressure factor for the transformations."));
84
86 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
87 cl::desc("enable register pressure reduce in machine combiner pass."));
88
89// Pin the vtable to this file.
90void PPCInstrInfo::anchor() {}
91
93 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
94 /* CatchRetOpcode */ -1,
95 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
96 Subtarget(STI), RI(STI.getTargetMachine()) {}
97
98/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
99/// this target when scheduling the DAG.
102 const ScheduleDAG *DAG) const {
103 unsigned Directive =
104 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
107 const InstrItineraryData *II =
108 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
109 return new ScoreboardHazardRecognizer(II, DAG);
110 }
111
113}
114
115/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
116/// to use for this target when scheduling the DAG.
119 const ScheduleDAG *DAG) const {
120 unsigned Directive =
121 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
122
123 // FIXME: Leaving this as-is until we have POWER9 scheduling info
125 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
126
127 // Most subtargets use a PPC970 recognizer.
130 assert(DAG->TII && "No InstrInfo?");
131
132 return new PPCHazardRecognizer970(*DAG);
133 }
134
135 return new ScoreboardHazardRecognizer(II, DAG);
136}
137
139 const MachineInstr &MI,
140 unsigned *PredCost) const {
141 if (!ItinData || UseOldLatencyCalc)
142 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
143
144 // The default implementation of getInstrLatency calls getStageLatency, but
145 // getStageLatency does not do the right thing for us. While we have
146 // itinerary, most cores are fully pipelined, and so the itineraries only
147 // express the first part of the pipeline, not every stage. Instead, we need
148 // to use the listed output operand cycle number (using operand 0 here, which
149 // is an output).
150
151 unsigned Latency = 1;
152 unsigned DefClass = MI.getDesc().getSchedClass();
153 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
154 const MachineOperand &MO = MI.getOperand(i);
155 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
156 continue;
157
158 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
159 if (!Cycle)
160 continue;
161
162 Latency = std::max(Latency, *Cycle);
163 }
164
165 return Latency;
166}
167
168std::optional<unsigned> PPCInstrInfo::getOperandLatency(
169 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
170 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
171 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
172 ItinData, DefMI, DefIdx, UseMI, UseIdx);
173
174 if (!DefMI.getParent())
175 return Latency;
176
177 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
178 Register Reg = DefMO.getReg();
179
180 bool IsRegCR;
181 if (Reg.isVirtual()) {
182 const MachineRegisterInfo *MRI =
183 &DefMI.getParent()->getParent()->getRegInfo();
184 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
185 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
186 } else {
187 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
188 PPC::CRBITRCRegClass.contains(Reg);
189 }
190
191 if (UseMI.isBranch() && IsRegCR) {
192 if (!Latency)
193 Latency = getInstrLatency(ItinData, DefMI);
194
195 // On some cores, there is an additional delay between writing to a condition
196 // register, and using it from a branch.
197 unsigned Directive = Subtarget.getCPUDirective();
198 switch (Directive) {
199 default: break;
200 case PPC::DIR_7400:
201 case PPC::DIR_750:
202 case PPC::DIR_970:
203 case PPC::DIR_E5500:
204 case PPC::DIR_PWR4:
205 case PPC::DIR_PWR5:
206 case PPC::DIR_PWR5X:
207 case PPC::DIR_PWR6:
208 case PPC::DIR_PWR6X:
209 case PPC::DIR_PWR7:
210 case PPC::DIR_PWR8:
211 // FIXME: Is this needed for POWER9?
212 Latency = *Latency + 2;
213 break;
214 }
215 }
216
217 return Latency;
218}
219
221 uint32_t Flags) const {
222 MI.setFlags(Flags);
226}
227
228// This function does not list all associative and commutative operations, but
229// only those worth feeding through the machine combiner in an attempt to
230// reduce the critical path. Mostly, this means floating-point operations,
231// because they have high latencies(>=5) (compared to other operations, such as
232// and/or, which are also associative and commutative, but have low latencies).
234 bool Invert) const {
235 if (Invert)
236 return false;
237 switch (Inst.getOpcode()) {
238 // Floating point:
239 // FP Add:
240 case PPC::FADD:
241 case PPC::FADDS:
242 // FP Multiply:
243 case PPC::FMUL:
244 case PPC::FMULS:
245 // Altivec Add:
246 case PPC::VADDFP:
247 // VSX Add:
248 case PPC::XSADDDP:
249 case PPC::XVADDDP:
250 case PPC::XVADDSP:
251 case PPC::XSADDSP:
252 // VSX Multiply:
253 case PPC::XSMULDP:
254 case PPC::XVMULDP:
255 case PPC::XVMULSP:
256 case PPC::XSMULSP:
259 // Fixed point:
260 // Multiply:
261 case PPC::MULHD:
262 case PPC::MULLD:
263 case PPC::MULHW:
264 case PPC::MULLW:
265 return true;
266 default:
267 return false;
268 }
269}
270
271#define InfoArrayIdxFMAInst 0
272#define InfoArrayIdxFAddInst 1
273#define InfoArrayIdxFMULInst 2
274#define InfoArrayIdxAddOpIdx 3
275#define InfoArrayIdxMULOpIdx 4
276#define InfoArrayIdxFSubInst 5
277// Array keeps info for FMA instructions:
278// Index 0(InfoArrayIdxFMAInst): FMA instruction;
279// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
280// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
281// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
282// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
283// second MUL operand index is plus 1;
284// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
285static const uint16_t FMAOpIdxInfo[][6] = {
286 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
287 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
288 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
289 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
290 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
291 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
292 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
293
294// Check if an opcode is a FMA instruction. If it is, return the index in array
295// FMAOpIdxInfo. Otherwise, return -1.
296int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
297 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
298 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
299 return I;
300 return -1;
301}
302
303// On PowerPC target, we have two kinds of patterns related to FMA:
304// 1: Improve ILP.
305// Try to reassociate FMA chains like below:
306//
307// Pattern 1:
308// A = FADD X, Y (Leaf)
309// B = FMA A, M21, M22 (Prev)
310// C = FMA B, M31, M32 (Root)
311// -->
312// A = FMA X, M21, M22
313// B = FMA Y, M31, M32
314// C = FADD A, B
315//
316// Pattern 2:
317// A = FMA X, M11, M12 (Leaf)
318// B = FMA A, M21, M22 (Prev)
319// C = FMA B, M31, M32 (Root)
320// -->
321// A = FMUL M11, M12
322// B = FMA X, M21, M22
323// D = FMA A, M31, M32
324// C = FADD B, D
325//
326// breaking the dependency between A and B, allowing FMA to be executed in
327// parallel (or back-to-back in a pipeline) instead of depending on each other.
328//
329// 2: Reduce register pressure.
330// Try to reassociate FMA with FSUB and a constant like below:
331// C is a floating point const.
332//
333// Pattern 1:
334// A = FSUB X, Y (Leaf)
335// D = FMA B, C, A (Root)
336// -->
337// A = FMA B, Y, -C
338// D = FMA A, X, C
339//
340// Pattern 2:
341// A = FSUB X, Y (Leaf)
342// D = FMA B, A, C (Root)
343// -->
344// A = FMA B, Y, -C
345// D = FMA A, X, C
346//
347// Before the transformation, A must be assigned with different hardware
348// register with D. After the transformation, A and D must be assigned with
349// same hardware register due to TIE attribute of FMA instructions.
350//
353 bool DoRegPressureReduce) const {
357
358 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
359 for (const auto &MO : Instr.explicit_operands())
360 if (!(MO.isReg() && MO.getReg().isVirtual()))
361 return false;
362 return true;
363 };
364
365 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
366 unsigned OpType) {
367 if (Instr.getOpcode() !=
368 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
369 return false;
370
371 // Instruction can be reassociated.
372 // fast math flags may prohibit reassociation.
373 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
374 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
375 return false;
376
377 // Instruction operands are virtual registers for reassociation.
378 if (!IsAllOpsVirtualReg(Instr))
379 return false;
380
381 // For register pressure reassociation, the FSub must have only one use as
382 // we want to delete the sub to save its def.
383 if (OpType == InfoArrayIdxFSubInst &&
384 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
385 return false;
386
387 return true;
388 };
389
390 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
391 int16_t &MulOpIdx, bool IsLeaf) {
392 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
393 if (Idx < 0)
394 return false;
395
396 // Instruction can be reassociated.
397 // fast math flags may prohibit reassociation.
398 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
399 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
400 return false;
401
402 // Instruction operands are virtual registers for reassociation.
403 if (!IsAllOpsVirtualReg(Instr))
404 return false;
405
407 if (IsLeaf)
408 return true;
409
411
412 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
413 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
414 // If 'add' operand's def is not in current block, don't do ILP related opt.
415 if (!MIAdd || MIAdd->getParent() != MBB)
416 return false;
417
418 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
419 // as this fma will be changed later.
420 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
421 };
422
423 int16_t AddOpIdx = -1;
424 int16_t MulOpIdx = -1;
425
426 bool IsUsedOnceL = false;
427 bool IsUsedOnceR = false;
428 MachineInstr *MULInstrL = nullptr;
429 MachineInstr *MULInstrR = nullptr;
430
431 auto IsRPReductionCandidate = [&]() {
432 // Currently, we only support float and double.
433 // FIXME: add support for other types.
434 unsigned Opcode = Root.getOpcode();
435 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
436 return false;
437
438 // Root must be a valid FMA like instruction.
439 // Treat it as leaf as we don't care its add operand.
440 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
441 assert((MulOpIdx >= 0) && "mul operand index not right!");
442 Register MULRegL = TRI->lookThruSingleUseCopyChain(
443 Root.getOperand(MulOpIdx).getReg(), MRI);
444 Register MULRegR = TRI->lookThruSingleUseCopyChain(
445 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
446 if (!MULRegL && !MULRegR)
447 return false;
448
449 if (MULRegL && !MULRegR) {
450 MULRegR =
451 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
452 IsUsedOnceL = true;
453 } else if (!MULRegL && MULRegR) {
454 MULRegL =
455 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
456 IsUsedOnceR = true;
457 } else {
458 IsUsedOnceL = true;
459 IsUsedOnceR = true;
460 }
461
462 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
463 return false;
464
465 MULInstrL = MRI->getVRegDef(MULRegL);
466 MULInstrR = MRI->getVRegDef(MULRegR);
467 return true;
468 }
469 return false;
470 };
471
472 // Register pressure fma reassociation patterns.
473 if (DoRegPressureReduce && IsRPReductionCandidate()) {
474 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
475 // Register pressure pattern 1
476 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
477 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
478 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
480 return true;
481 }
482
483 // Register pressure pattern 2
484 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
485 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
486 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
488 return true;
489 }
490 }
491
492 // ILP fma reassociation patterns.
493 // Root must be a valid FMA like instruction.
494 AddOpIdx = -1;
495 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
496 return false;
497
498 assert((AddOpIdx >= 0) && "add operand index not right!");
499
500 Register RegB = Root.getOperand(AddOpIdx).getReg();
501 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
502
503 // Prev must be a valid FMA like instruction.
504 AddOpIdx = -1;
505 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
506 return false;
507
508 assert((AddOpIdx >= 0) && "add operand index not right!");
509
510 Register RegA = Prev->getOperand(AddOpIdx).getReg();
511 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
512 AddOpIdx = -1;
513 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
515 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
516 return true;
517 }
518 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
520 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
521 return true;
522 }
523 return false;
524}
525
527 MachineInstr &Root, unsigned &Pattern,
528 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
529 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
530
531 MachineFunction *MF = Root.getMF();
535
536 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
537 if (Idx < 0)
538 return;
539
541
542 // For now we only need to fix up placeholder for register pressure reduce
543 // patterns.
544 Register ConstReg = 0;
545 switch (Pattern) {
547 ConstReg =
548 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
549 break;
551 ConstReg =
552 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
553 break;
554 default:
555 // Not register pressure reduce patterns.
556 return;
557 }
558
559 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
560 // Get const value from const pool.
561 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
562 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
563
564 // Get negative fp const.
565 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
566 F1.changeSign();
567 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
568 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
569
570 // Put negative fp const into constant pool.
571 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
572
573 MachineOperand *Placeholder = nullptr;
574 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
575 for (auto *Inst : InsInstrs) {
576 for (MachineOperand &Operand : Inst->explicit_operands()) {
577 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
578 if (Operand.getReg() == PPC::ZERO8) {
579 Placeholder = &Operand;
580 break;
581 }
582 }
583 }
584
585 assert(Placeholder && "Placeholder does not exist!");
586
587 // Generate instructions to load the const fp from constant pool.
588 // We only support PPC64 and medium code model.
589 Register LoadNewConst =
590 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
591
592 // Fill the placeholder with the new load from constant pool.
593 Placeholder->setReg(LoadNewConst);
594}
595
597 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
598
600 return false;
601
602 // Currently, we only enable register pressure reducing in machine combiner
603 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
604 // support.
605 //
606 // So we need following instructions to access a TOC entry:
607 //
608 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
609 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
610 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
611 //
612 // FIXME: add more supported targets, like Small and Large code model, PPC32,
613 // AIX.
614 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
616 return false;
617
619 const MachineFunction *MF = MBB->getParent();
620 const MachineRegisterInfo *MRI = &MF->getRegInfo();
621
622 auto GetMBBPressure =
623 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
624 RegionPressure Pressure;
625 RegPressureTracker RPTracker(Pressure);
626
627 // Initialize the register pressure tracker.
628 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
629 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
630
631 for (const auto &MI : reverse(*MBB)) {
632 if (MI.isDebugValue() || MI.isDebugLabel())
633 continue;
634 RegisterOperands RegOpers;
635 RegOpers.collect(MI, *TRI, *MRI, false, false);
636 RPTracker.recedeSkipDebugValues();
637 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
638 RPTracker.recede(RegOpers);
639 }
640
641 // Close the RPTracker to finalize live ins.
642 RPTracker.closeRegion();
643
644 return RPTracker.getPressure().MaxSetPressure;
645 };
646
647 // For now we only care about float and double type fma.
648 unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
649 *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
650
651 // Only reduce register pressure when pressure is high.
652 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
653 (float)VSSRCLimit * FMARPFactor;
654}
655
657 // I has only one memory operand which is load from constant pool.
658 if (!I->hasOneMemOperand())
659 return false;
660
661 MachineMemOperand *Op = I->memoperands()[0];
662 return Op->isLoad() && Op->getPseudoValue() &&
663 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
664}
665
666Register PPCInstrInfo::generateLoadForNewConst(
667 unsigned Idx, MachineInstr *MI, Type *Ty,
668 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
669 // Now we only support PPC64, Medium code model and P9 with vector.
670 // We have immutable pattern to access const pool. See function
671 // shouldReduceRegisterPressure.
672 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
674 "Target not supported!\n");
675
676 MachineFunction *MF = MI->getMF();
678
679 // Generate ADDIStocHA8
680 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
681 MachineInstrBuilder TOCOffset =
682 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
683 .addReg(PPC::X2)
685
686 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
687 "Only float and double are supported!");
688
689 unsigned LoadOpcode;
690 // Should be float type or double type.
691 if (Ty->isFloatTy())
692 LoadOpcode = PPC::DFLOADf32;
693 else
694 LoadOpcode = PPC::DFLOADf64;
695
696 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
697 Register VReg2 = MRI->createVirtualRegister(RC);
701
702 // Generate Load from constant pool.
704 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
706 .addReg(VReg1, getKillRegState(true))
707 .addMemOperand(MMO);
708
709 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
710
711 // Insert the toc load instructions into InsInstrs.
712 InsInstrs.insert(InsInstrs.begin(), Load);
713 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
714 return VReg2;
715}
716
717// This function returns the const value in constant pool if the \p I is a load
718// from constant pool.
719const Constant *
721 MachineFunction *MF = I->getMF();
724 assert(I->mayLoad() && "Should be a load instruction.\n");
725 for (auto MO : I->uses()) {
726 if (!MO.isReg())
727 continue;
728 Register Reg = MO.getReg();
729 if (Reg == 0 || !Reg.isVirtual())
730 continue;
731 // Find the toc address.
732 MachineInstr *DefMI = MRI->getVRegDef(Reg);
733 for (auto MO2 : DefMI->uses())
734 if (MO2.isCPI())
735 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
736 }
737 return nullptr;
738}
739
741 switch (Pattern) {
748 default:
750 }
751}
752
755 bool DoRegPressureReduce) const {
756 // Using the machine combiner in this way is potentially expensive, so
757 // restrict to when aggressive optimizations are desired.
759 return false;
760
761 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
762 return true;
763
765 DoRegPressureReduce);
766}
767
769 MachineInstr &Root, unsigned Pattern,
772 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
773 switch (Pattern) {
778 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
779 break;
780 default:
781 // Reassociate default patterns.
783 DelInstrs, InstrIdxForVirtReg);
784 break;
785 }
786}
787
788void PPCInstrInfo::reassociateFMA(
789 MachineInstr &Root, unsigned Pattern,
792 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
793 MachineFunction *MF = Root.getMF();
796 MachineOperand &OpC = Root.getOperand(0);
797 Register RegC = OpC.getReg();
798 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
799 MRI.constrainRegClass(RegC, RC);
800
801 unsigned FmaOp = Root.getOpcode();
802 int16_t Idx = getFMAOpIdxInfo(FmaOp);
803 assert(Idx >= 0 && "Root must be a FMA instruction");
804
805 bool IsILPReassociate =
808
811
812 MachineInstr *Prev = nullptr;
813 MachineInstr *Leaf = nullptr;
814 switch (Pattern) {
815 default:
816 llvm_unreachable("not recognized pattern!");
819 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
820 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
821 break;
823 Register MULReg =
824 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
825 Leaf = MRI.getVRegDef(MULReg);
826 break;
827 }
829 Register MULReg = TRI->lookThruCopyLike(
830 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
831 Leaf = MRI.getVRegDef(MULReg);
832 break;
833 }
834 }
835
836 uint32_t IntersectedFlags = 0;
837 if (IsILPReassociate)
838 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
839 else
840 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
841
842 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
843 bool &KillFlag) {
844 Reg = Operand.getReg();
845 MRI.constrainRegClass(Reg, RC);
846 KillFlag = Operand.isKill();
847 };
848
849 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
850 Register &MulOp2, Register &AddOp,
851 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
852 bool &AddOpKillFlag) {
853 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
854 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
855 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
856 };
857
858 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
859 RegA21, RegB;
860 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
861 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
862 KillA11 = false, KillA21 = false, KillB = false;
863
864 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
865
866 if (IsILPReassociate)
867 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
868
870 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
871 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
873 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
874 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
875 } else {
876 // Get FSUB instruction info.
877 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
878 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
879 }
880
881 // Create new virtual registers for the new results instead of
882 // recycling legacy ones because the MachineCombiner's computation of the
883 // critical path requires a new register definition rather than an existing
884 // one.
885 // For register pressure reassociation, we only need create one virtual
886 // register for the new fma.
887 Register NewVRA = MRI.createVirtualRegister(RC);
888 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
889
890 Register NewVRB = 0;
891 if (IsILPReassociate) {
892 NewVRB = MRI.createVirtualRegister(RC);
893 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
894 }
895
896 Register NewVRD = 0;
898 NewVRD = MRI.createVirtualRegister(RC);
899 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
900 }
901
902 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
903 Register RegMul1, bool KillRegMul1,
904 Register RegMul2, bool KillRegMul2) {
905 MI->getOperand(AddOpIdx).setReg(RegAdd);
906 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
907 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
908 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
909 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
910 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
911 };
912
913 MachineInstrBuilder NewARegPressure, NewCRegPressure;
914 switch (Pattern) {
915 default:
916 llvm_unreachable("not recognized pattern!");
918 // Create new instructions for insertion.
919 MachineInstrBuilder MINewB =
920 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
921 .addReg(RegX, getKillRegState(KillX))
922 .addReg(RegM21, getKillRegState(KillM21))
923 .addReg(RegM22, getKillRegState(KillM22));
924 MachineInstrBuilder MINewA =
925 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
926 .addReg(RegY, getKillRegState(KillY))
927 .addReg(RegM31, getKillRegState(KillM31))
928 .addReg(RegM32, getKillRegState(KillM32));
929 // If AddOpIdx is not 1, adjust the order.
930 if (AddOpIdx != 1) {
931 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
932 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
933 }
934
935 MachineInstrBuilder MINewC =
936 BuildMI(*MF, Root.getDebugLoc(),
938 .addReg(NewVRB, getKillRegState(true))
939 .addReg(NewVRA, getKillRegState(true));
940
941 // Update flags for newly created instructions.
942 setSpecialOperandAttr(*MINewA, IntersectedFlags);
943 setSpecialOperandAttr(*MINewB, IntersectedFlags);
944 setSpecialOperandAttr(*MINewC, IntersectedFlags);
945
946 // Record new instructions for insertion.
947 InsInstrs.push_back(MINewA);
948 InsInstrs.push_back(MINewB);
949 InsInstrs.push_back(MINewC);
950 break;
951 }
953 assert(NewVRD && "new FMA register not created!");
954 // Create new instructions for insertion.
955 MachineInstrBuilder MINewA =
956 BuildMI(*MF, Leaf->getDebugLoc(),
958 .addReg(RegM11, getKillRegState(KillM11))
959 .addReg(RegM12, getKillRegState(KillM12));
960 MachineInstrBuilder MINewB =
961 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
962 .addReg(RegX, getKillRegState(KillX))
963 .addReg(RegM21, getKillRegState(KillM21))
964 .addReg(RegM22, getKillRegState(KillM22));
965 MachineInstrBuilder MINewD =
966 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
967 .addReg(NewVRA, getKillRegState(true))
968 .addReg(RegM31, getKillRegState(KillM31))
969 .addReg(RegM32, getKillRegState(KillM32));
970 // If AddOpIdx is not 1, adjust the order.
971 if (AddOpIdx != 1) {
972 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
973 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
974 KillM32);
975 }
976
977 MachineInstrBuilder MINewC =
978 BuildMI(*MF, Root.getDebugLoc(),
980 .addReg(NewVRB, getKillRegState(true))
981 .addReg(NewVRD, getKillRegState(true));
982
983 // Update flags for newly created instructions.
984 setSpecialOperandAttr(*MINewA, IntersectedFlags);
985 setSpecialOperandAttr(*MINewB, IntersectedFlags);
986 setSpecialOperandAttr(*MINewD, IntersectedFlags);
987 setSpecialOperandAttr(*MINewC, IntersectedFlags);
988
989 // Record new instructions for insertion.
990 InsInstrs.push_back(MINewA);
991 InsInstrs.push_back(MINewB);
992 InsInstrs.push_back(MINewD);
993 InsInstrs.push_back(MINewC);
994 break;
995 }
998 Register VarReg;
999 bool KillVarReg = false;
1001 VarReg = RegM31;
1002 KillVarReg = KillM31;
1003 } else {
1004 VarReg = RegM32;
1005 KillVarReg = KillM32;
1006 }
1007 // We don't want to get negative const from memory pool too early, as the
1008 // created entry will not be deleted even if it has no users. Since all
1009 // operand of Leaf and Root are virtual register, we use zero register
1010 // here as a placeholder. When the InsInstrs is selected in
1011 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1012 // with a virtual register which is a load from constant pool.
1013 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1014 .addReg(RegB, getKillRegState(RegB))
1015 .addReg(RegY, getKillRegState(KillY))
1016 .addReg(PPC::ZERO8);
1017 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1018 .addReg(NewVRA, getKillRegState(true))
1019 .addReg(RegX, getKillRegState(KillX))
1020 .addReg(VarReg, getKillRegState(KillVarReg));
1021 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1022 // both at index 1, no need to adjust.
1023 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1024 // the operand index here.
1025 break;
1026 }
1027 }
1028
1029 if (!IsILPReassociate) {
1030 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1031 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1032
1033 InsInstrs.push_back(NewARegPressure);
1034 InsInstrs.push_back(NewCRegPressure);
1035 }
1036
1037 assert(!InsInstrs.empty() &&
1038 "Insertion instructions set should not be empty!");
1039
1040 // Record old instructions for deletion.
1041 DelInstrs.push_back(Leaf);
1042 if (IsILPReassociate)
1043 DelInstrs.push_back(Prev);
1044 DelInstrs.push_back(&Root);
1045}
1046
1047// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1049 Register &SrcReg, Register &DstReg,
1050 unsigned &SubIdx) const {
1051 switch (MI.getOpcode()) {
1052 default: return false;
1053 case PPC::EXTSW:
1054 case PPC::EXTSW_32:
1055 case PPC::EXTSW_32_64:
1056 SrcReg = MI.getOperand(1).getReg();
1057 DstReg = MI.getOperand(0).getReg();
1058 SubIdx = PPC::sub_32;
1059 return true;
1060 }
1061}
1062
1064 int &FrameIndex) const {
1065 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1066 // Check for the operands added by addFrameReference (the immediate is the
1067 // offset which defaults to 0).
1068 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1069 MI.getOperand(2).isFI()) {
1070 FrameIndex = MI.getOperand(2).getIndex();
1071 return MI.getOperand(0).getReg();
1072 }
1073 }
1074 return 0;
1075}
1076
1077// For opcodes with the ReMaterializable flag set, this function is called to
1078// verify the instruction is really rematable.
1080 const MachineInstr &MI) const {
1081 switch (MI.getOpcode()) {
1082 default:
1083 // Let base implementaion decide.
1084 break;
1085 case PPC::LI:
1086 case PPC::LI8:
1087 case PPC::PLI:
1088 case PPC::PLI8:
1089 case PPC::LIS:
1090 case PPC::LIS8:
1091 case PPC::ADDIStocHA:
1092 case PPC::ADDIStocHA8:
1093 case PPC::ADDItocL:
1094 case PPC::ADDItocL8:
1095 case PPC::LOAD_STACK_GUARD:
1096 case PPC::PPCLdFixedAddr:
1097 case PPC::XXLXORz:
1098 case PPC::XXLXORspz:
1099 case PPC::XXLXORdpz:
1100 case PPC::XXLEQVOnes:
1101 case PPC::XXSPLTI32DX:
1102 case PPC::XXSPLTIW:
1103 case PPC::XXSPLTIDP:
1104 case PPC::V_SET0B:
1105 case PPC::V_SET0H:
1106 case PPC::V_SET0:
1107 case PPC::V_SETALLONESB:
1108 case PPC::V_SETALLONESH:
1109 case PPC::V_SETALLONES:
1110 case PPC::CRSET:
1111 case PPC::CRUNSET:
1112 case PPC::XXSETACCZ:
1113 case PPC::XXSETACCZW:
1114 return true;
1115 }
1117}
1118
1120 int &FrameIndex) const {
1121 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1122 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1123 MI.getOperand(2).isFI()) {
1124 FrameIndex = MI.getOperand(2).getIndex();
1125 return MI.getOperand(0).getReg();
1126 }
1127 }
1128 return 0;
1129}
1130
1132 unsigned OpIdx1,
1133 unsigned OpIdx2) const {
1134 MachineFunction &MF = *MI.getParent()->getParent();
1135
1136 // Normal instructions can be commuted the obvious way.
1137 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1138 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1139 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1140 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1141 // changing the relative order of the mask operands might change what happens
1142 // to the high-bits of the mask (and, thus, the result).
1143
1144 // Cannot commute if it has a non-zero rotate count.
1145 if (MI.getOperand(3).getImm() != 0)
1146 return nullptr;
1147
1148 // If we have a zero rotate count, we have:
1149 // M = mask(MB,ME)
1150 // Op0 = (Op1 & ~M) | (Op2 & M)
1151 // Change this to:
1152 // M = mask((ME+1)&31, (MB-1)&31)
1153 // Op0 = (Op2 & ~M) | (Op1 & M)
1154
1155 // Swap op1/op2
1156 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1157 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1158 Register Reg0 = MI.getOperand(0).getReg();
1159 Register Reg1 = MI.getOperand(1).getReg();
1160 Register Reg2 = MI.getOperand(2).getReg();
1161 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1162 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1163 bool Reg1IsKill = MI.getOperand(1).isKill();
1164 bool Reg2IsKill = MI.getOperand(2).isKill();
1165 bool ChangeReg0 = false;
1166 // If machine instrs are no longer in two-address forms, update
1167 // destination register as well.
1168 if (Reg0 == Reg1) {
1169 // Must be two address instruction (i.e. op1 is tied to op0).
1170 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1171 "Expecting a two-address instruction!");
1172 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1173 Reg2IsKill = false;
1174 ChangeReg0 = true;
1175 }
1176
1177 // Masks.
1178 unsigned MB = MI.getOperand(4).getImm();
1179 unsigned ME = MI.getOperand(5).getImm();
1180
1181 // We can't commute a trivial mask (there is no way to represent an all-zero
1182 // mask).
1183 if (MB == 0 && ME == 31)
1184 return nullptr;
1185
1186 if (NewMI) {
1187 // Create a new instruction.
1188 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1189 bool Reg0IsDead = MI.getOperand(0).isDead();
1190 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1191 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1192 .addReg(Reg2, getKillRegState(Reg2IsKill))
1193 .addReg(Reg1, getKillRegState(Reg1IsKill))
1194 .addImm((ME + 1) & 31)
1195 .addImm((MB - 1) & 31);
1196 }
1197
1198 if (ChangeReg0) {
1199 MI.getOperand(0).setReg(Reg2);
1200 MI.getOperand(0).setSubReg(SubReg2);
1201 }
1202 MI.getOperand(2).setReg(Reg1);
1203 MI.getOperand(1).setReg(Reg2);
1204 MI.getOperand(2).setSubReg(SubReg1);
1205 MI.getOperand(1).setSubReg(SubReg2);
1206 MI.getOperand(2).setIsKill(Reg1IsKill);
1207 MI.getOperand(1).setIsKill(Reg2IsKill);
1208
1209 // Swap the mask around.
1210 MI.getOperand(4).setImm((ME + 1) & 31);
1211 MI.getOperand(5).setImm((MB - 1) & 31);
1212 return &MI;
1213}
1214
1216 unsigned &SrcOpIdx1,
1217 unsigned &SrcOpIdx2) const {
1218 // For VSX A-Type FMA instructions, it is the first two operands that can be
1219 // commuted, however, because the non-encoded tied input operand is listed
1220 // first, the operands to swap are actually the second and third.
1221
1222 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1223 if (AltOpc == -1)
1224 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1225
1226 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1227 // and SrcOpIdx2.
1228 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1229}
1230
1233 // This function is used for scheduling, and the nop wanted here is the type
1234 // that terminates dispatch groups on the POWER cores.
1235 unsigned Directive = Subtarget.getCPUDirective();
1236 unsigned Opcode;
1237 switch (Directive) {
1238 default: Opcode = PPC::NOP; break;
1239 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1240 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1241 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1242 // FIXME: Update when POWER9 scheduling model is ready.
1243 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1244 }
1245
1246 DebugLoc DL;
1247 BuildMI(MBB, MI, DL, get(Opcode));
1248}
1249
1250/// Return the noop instruction to use for a noop.
1252 MCInst Nop;
1253 Nop.setOpcode(PPC::NOP);
1254 return Nop;
1255}
1256
1257// Branch analysis.
1258// Note: If the condition register is set to CTR or CTR8 then this is a
1259// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1262 MachineBasicBlock *&FBB,
1264 bool AllowModify) const {
1265 bool isPPC64 = Subtarget.isPPC64();
1266
1267 // If the block has no terminators, it just falls into the block after it.
1269 if (I == MBB.end())
1270 return false;
1271
1272 if (!isUnpredicatedTerminator(*I))
1273 return false;
1274
1275 if (AllowModify) {
1276 // If the BB ends with an unconditional branch to the fallthrough BB,
1277 // we eliminate the branch instruction.
1278 if (I->getOpcode() == PPC::B &&
1279 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1280 I->eraseFromParent();
1281
1282 // We update iterator after deleting the last branch.
1284 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1285 return false;
1286 }
1287 }
1288
1289 // Get the last instruction in the block.
1290 MachineInstr &LastInst = *I;
1291
1292 // If there is only one terminator instruction, process it.
1293 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1294 if (LastInst.getOpcode() == PPC::B) {
1295 if (!LastInst.getOperand(0).isMBB())
1296 return true;
1297 TBB = LastInst.getOperand(0).getMBB();
1298 return false;
1299 } else if (LastInst.getOpcode() == PPC::BCC) {
1300 if (!LastInst.getOperand(2).isMBB())
1301 return true;
1302 // Block ends with fall-through condbranch.
1303 TBB = LastInst.getOperand(2).getMBB();
1304 Cond.push_back(LastInst.getOperand(0));
1305 Cond.push_back(LastInst.getOperand(1));
1306 return false;
1307 } else if (LastInst.getOpcode() == PPC::BC) {
1308 if (!LastInst.getOperand(1).isMBB())
1309 return true;
1310 // Block ends with fall-through condbranch.
1311 TBB = LastInst.getOperand(1).getMBB();
1313 Cond.push_back(LastInst.getOperand(0));
1314 return false;
1315 } else if (LastInst.getOpcode() == PPC::BCn) {
1316 if (!LastInst.getOperand(1).isMBB())
1317 return true;
1318 // Block ends with fall-through condbranch.
1319 TBB = LastInst.getOperand(1).getMBB();
1321 Cond.push_back(LastInst.getOperand(0));
1322 return false;
1323 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1324 LastInst.getOpcode() == PPC::BDNZ) {
1325 if (!LastInst.getOperand(0).isMBB())
1326 return true;
1328 return true;
1329 TBB = LastInst.getOperand(0).getMBB();
1330 Cond.push_back(MachineOperand::CreateImm(1));
1331 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1332 true));
1333 return false;
1334 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1335 LastInst.getOpcode() == PPC::BDZ) {
1336 if (!LastInst.getOperand(0).isMBB())
1337 return true;
1339 return true;
1340 TBB = LastInst.getOperand(0).getMBB();
1341 Cond.push_back(MachineOperand::CreateImm(0));
1342 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1343 true));
1344 return false;
1345 }
1346
1347 // Otherwise, don't know what this is.
1348 return true;
1349 }
1350
1351 // Get the instruction before it if it's a terminator.
1352 MachineInstr &SecondLastInst = *I;
1353
1354 // If there are three terminators, we don't know what sort of block this is.
1355 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1356 return true;
1357
1358 // If the block ends with PPC::B and PPC:BCC, handle it.
1359 if (SecondLastInst.getOpcode() == PPC::BCC &&
1360 LastInst.getOpcode() == PPC::B) {
1361 if (!SecondLastInst.getOperand(2).isMBB() ||
1362 !LastInst.getOperand(0).isMBB())
1363 return true;
1364 TBB = SecondLastInst.getOperand(2).getMBB();
1365 Cond.push_back(SecondLastInst.getOperand(0));
1366 Cond.push_back(SecondLastInst.getOperand(1));
1367 FBB = LastInst.getOperand(0).getMBB();
1368 return false;
1369 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1370 LastInst.getOpcode() == PPC::B) {
1371 if (!SecondLastInst.getOperand(1).isMBB() ||
1372 !LastInst.getOperand(0).isMBB())
1373 return true;
1374 TBB = SecondLastInst.getOperand(1).getMBB();
1376 Cond.push_back(SecondLastInst.getOperand(0));
1377 FBB = LastInst.getOperand(0).getMBB();
1378 return false;
1379 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1380 LastInst.getOpcode() == PPC::B) {
1381 if (!SecondLastInst.getOperand(1).isMBB() ||
1382 !LastInst.getOperand(0).isMBB())
1383 return true;
1384 TBB = SecondLastInst.getOperand(1).getMBB();
1386 Cond.push_back(SecondLastInst.getOperand(0));
1387 FBB = LastInst.getOperand(0).getMBB();
1388 return false;
1389 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1390 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1391 LastInst.getOpcode() == PPC::B) {
1392 if (!SecondLastInst.getOperand(0).isMBB() ||
1393 !LastInst.getOperand(0).isMBB())
1394 return true;
1396 return true;
1397 TBB = SecondLastInst.getOperand(0).getMBB();
1398 Cond.push_back(MachineOperand::CreateImm(1));
1399 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1400 true));
1401 FBB = LastInst.getOperand(0).getMBB();
1402 return false;
1403 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1404 SecondLastInst.getOpcode() == PPC::BDZ) &&
1405 LastInst.getOpcode() == PPC::B) {
1406 if (!SecondLastInst.getOperand(0).isMBB() ||
1407 !LastInst.getOperand(0).isMBB())
1408 return true;
1410 return true;
1411 TBB = SecondLastInst.getOperand(0).getMBB();
1412 Cond.push_back(MachineOperand::CreateImm(0));
1413 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1414 true));
1415 FBB = LastInst.getOperand(0).getMBB();
1416 return false;
1417 }
1418
1419 // If the block ends with two PPC:Bs, handle it. The second one is not
1420 // executed, so remove it.
1421 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1422 if (!SecondLastInst.getOperand(0).isMBB())
1423 return true;
1424 TBB = SecondLastInst.getOperand(0).getMBB();
1425 I = LastInst;
1426 if (AllowModify)
1427 I->eraseFromParent();
1428 return false;
1429 }
1430
1431 // Otherwise, can't handle this.
1432 return true;
1433}
1434
1436 int *BytesRemoved) const {
1437 assert(!BytesRemoved && "code size not handled");
1438
1440 if (I == MBB.end())
1441 return 0;
1442
1443 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1444 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1445 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1446 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1447 return 0;
1448
1449 // Remove the branch.
1450 I->eraseFromParent();
1451
1452 I = MBB.end();
1453
1454 if (I == MBB.begin()) return 1;
1455 --I;
1456 if (I->getOpcode() != PPC::BCC &&
1457 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1458 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1459 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1460 return 1;
1461
1462 // Remove the branch.
1463 I->eraseFromParent();
1464 return 2;
1465}
1466
1469 MachineBasicBlock *FBB,
1471 const DebugLoc &DL,
1472 int *BytesAdded) const {
1473 // Shouldn't be a fall through.
1474 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1475 assert((Cond.size() == 2 || Cond.size() == 0) &&
1476 "PPC branch conditions have two components!");
1477 assert(!BytesAdded && "code size not handled");
1478
1479 bool isPPC64 = Subtarget.isPPC64();
1480
1481 // One-way branch.
1482 if (!FBB) {
1483 if (Cond.empty()) // Unconditional branch
1484 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1485 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1486 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1487 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1488 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1489 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1490 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1491 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1492 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1493 else // Conditional branch
1494 BuildMI(&MBB, DL, get(PPC::BCC))
1495 .addImm(Cond[0].getImm())
1496 .add(Cond[1])
1497 .addMBB(TBB);
1498 return 1;
1499 }
1500
1501 // Two-way Conditional Branch.
1502 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1503 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1504 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1505 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1506 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1507 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1508 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1509 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1510 else
1511 BuildMI(&MBB, DL, get(PPC::BCC))
1512 .addImm(Cond[0].getImm())
1513 .add(Cond[1])
1514 .addMBB(TBB);
1515 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1516 return 2;
1517}
1518
1519// Select analysis.
1522 Register DstReg, Register TrueReg,
1523 Register FalseReg, int &CondCycles,
1524 int &TrueCycles, int &FalseCycles) const {
1525 if (!Subtarget.hasISEL())
1526 return false;
1527
1528 if (Cond.size() != 2)
1529 return false;
1530
1531 // If this is really a bdnz-like condition, then it cannot be turned into a
1532 // select.
1533 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1534 return false;
1535
1536 // If the conditional branch uses a physical register, then it cannot be
1537 // turned into a select.
1538 if (Cond[1].getReg().isPhysical())
1539 return false;
1540
1541 // Check register classes.
1543 const TargetRegisterClass *RC =
1544 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1545 if (!RC)
1546 return false;
1547
1548 // isel is for regular integer GPRs only.
1549 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1550 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1551 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1552 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1553 return false;
1554
1555 // FIXME: These numbers are for the A2, how well they work for other cores is
1556 // an open question. On the A2, the isel instruction has a 2-cycle latency
1557 // but single-cycle throughput. These numbers are used in combination with
1558 // the MispredictPenalty setting from the active SchedMachineModel.
1559 CondCycles = 1;
1560 TrueCycles = 1;
1561 FalseCycles = 1;
1562
1563 return true;
1564}
1565
1568 const DebugLoc &dl, Register DestReg,
1570 Register FalseReg) const {
1571 assert(Cond.size() == 2 &&
1572 "PPC branch conditions have two components!");
1573
1574 // Get the register classes.
1576 const TargetRegisterClass *RC =
1577 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1578 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1579
1580 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1581 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1582 assert((Is64Bit ||
1583 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1584 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1585 "isel is for regular integer GPRs only");
1586
1587 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1588 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1589
1590 unsigned SubIdx = 0;
1591 bool SwapOps = false;
1592 switch (SelectPred) {
1593 case PPC::PRED_EQ:
1594 case PPC::PRED_EQ_MINUS:
1595 case PPC::PRED_EQ_PLUS:
1596 SubIdx = PPC::sub_eq; SwapOps = false; break;
1597 case PPC::PRED_NE:
1598 case PPC::PRED_NE_MINUS:
1599 case PPC::PRED_NE_PLUS:
1600 SubIdx = PPC::sub_eq; SwapOps = true; break;
1601 case PPC::PRED_LT:
1602 case PPC::PRED_LT_MINUS:
1603 case PPC::PRED_LT_PLUS:
1604 SubIdx = PPC::sub_lt; SwapOps = false; break;
1605 case PPC::PRED_GE:
1606 case PPC::PRED_GE_MINUS:
1607 case PPC::PRED_GE_PLUS:
1608 SubIdx = PPC::sub_lt; SwapOps = true; break;
1609 case PPC::PRED_GT:
1610 case PPC::PRED_GT_MINUS:
1611 case PPC::PRED_GT_PLUS:
1612 SubIdx = PPC::sub_gt; SwapOps = false; break;
1613 case PPC::PRED_LE:
1614 case PPC::PRED_LE_MINUS:
1615 case PPC::PRED_LE_PLUS:
1616 SubIdx = PPC::sub_gt; SwapOps = true; break;
1617 case PPC::PRED_UN:
1618 case PPC::PRED_UN_MINUS:
1619 case PPC::PRED_UN_PLUS:
1620 SubIdx = PPC::sub_un; SwapOps = false; break;
1621 case PPC::PRED_NU:
1622 case PPC::PRED_NU_MINUS:
1623 case PPC::PRED_NU_PLUS:
1624 SubIdx = PPC::sub_un; SwapOps = true; break;
1625 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1626 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1627 }
1628
1629 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1630 SecondReg = SwapOps ? TrueReg : FalseReg;
1631
1632 // The first input register of isel cannot be r0. If it is a member
1633 // of a register class that can be r0, then copy it first (the
1634 // register allocator should eliminate the copy).
1635 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1636 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1637 const TargetRegisterClass *FirstRC =
1638 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1639 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1640 Register OldFirstReg = FirstReg;
1641 FirstReg = MRI.createVirtualRegister(FirstRC);
1642 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1643 .addReg(OldFirstReg);
1644 }
1645
1646 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1647 .addReg(FirstReg).addReg(SecondReg)
1648 .addReg(Cond[1].getReg(), 0, SubIdx);
1649}
1650
1651static unsigned getCRBitValue(unsigned CRBit) {
1652 unsigned Ret = 4;
1653 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1654 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1655 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1656 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1657 Ret = 3;
1658 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1659 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1660 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1661 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1662 Ret = 2;
1663 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1664 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1665 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1666 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1667 Ret = 1;
1668 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1669 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1670 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1671 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1672 Ret = 0;
1673
1674 assert(Ret != 4 && "Invalid CR bit register");
1675 return Ret;
1676}
1677
1680 const DebugLoc &DL, MCRegister DestReg,
1681 MCRegister SrcReg, bool KillSrc) const {
1682 // We can end up with self copies and similar things as a result of VSX copy
1683 // legalization. Promote them here.
1685 if (PPC::F8RCRegClass.contains(DestReg) &&
1686 PPC::VSRCRegClass.contains(SrcReg)) {
1687 MCRegister SuperReg =
1688 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1689
1690 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1691 llvm_unreachable("nop VSX copy");
1692
1693 DestReg = SuperReg;
1694 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1695 PPC::VSRCRegClass.contains(DestReg)) {
1696 MCRegister SuperReg =
1697 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1698
1699 if (VSXSelfCopyCrash && DestReg == SuperReg)
1700 llvm_unreachable("nop VSX copy");
1701
1702 SrcReg = SuperReg;
1703 }
1704
1705 // Different class register copy
1706 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1707 PPC::GPRCRegClass.contains(DestReg)) {
1708 MCRegister CRReg = getCRFromCRBit(SrcReg);
1709 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1710 getKillRegState(KillSrc);
1711 // Rotate the CR bit in the CR fields to be the least significant bit and
1712 // then mask with 0x1 (MB = ME = 31).
1713 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1714 .addReg(DestReg, RegState::Kill)
1715 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1716 .addImm(31)
1717 .addImm(31);
1718 return;
1719 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1720 (PPC::G8RCRegClass.contains(DestReg) ||
1721 PPC::GPRCRegClass.contains(DestReg))) {
1722 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1723 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1724 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1725 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1726 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1727 getKillRegState(KillSrc);
1728 if (CRNum == 7)
1729 return;
1730 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1731 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1732 .addReg(DestReg, RegState::Kill)
1733 .addImm(CRNum * 4 + 4)
1734 .addImm(28)
1735 .addImm(31);
1736 return;
1737 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1738 PPC::VSFRCRegClass.contains(DestReg)) {
1739 assert(Subtarget.hasDirectMove() &&
1740 "Subtarget doesn't support directmove, don't know how to copy.");
1741 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1742 NumGPRtoVSRSpill++;
1743 getKillRegState(KillSrc);
1744 return;
1745 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1746 PPC::G8RCRegClass.contains(DestReg)) {
1747 assert(Subtarget.hasDirectMove() &&
1748 "Subtarget doesn't support directmove, don't know how to copy.");
1749 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1750 getKillRegState(KillSrc);
1751 return;
1752 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1753 PPC::GPRCRegClass.contains(DestReg)) {
1754 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1755 getKillRegState(KillSrc);
1756 return;
1757 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1758 PPC::SPERCRegClass.contains(DestReg)) {
1759 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1760 getKillRegState(KillSrc);
1761 return;
1762 }
1763
1764 unsigned Opc;
1765 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1766 Opc = PPC::OR;
1767 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1768 Opc = PPC::OR8;
1769 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1770 Opc = PPC::FMR;
1771 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1772 Opc = PPC::MCRF;
1773 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1774 Opc = PPC::VOR;
1775 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1776 // There are two different ways this can be done:
1777 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1778 // issue in VSU pipeline 0.
1779 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1780 // can go to either pipeline.
1781 // We'll always use xxlor here, because in practically all cases where
1782 // copies are generated, they are close enough to some use that the
1783 // lower-latency form is preferable.
1784 Opc = PPC::XXLOR;
1785 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1786 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1787 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1788 else if (Subtarget.pairedVectorMemops() &&
1789 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1790 if (SrcReg > PPC::VSRp15)
1791 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1792 else
1793 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1794 if (DestReg > PPC::VSRp15)
1795 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1796 else
1797 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1798 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1799 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1800 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1801 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1802 return;
1803 }
1804 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1805 Opc = PPC::CROR;
1806 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1807 Opc = PPC::EVOR;
1808 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1809 PPC::UACCRCRegClass.contains(DestReg)) &&
1810 (PPC::ACCRCRegClass.contains(SrcReg) ||
1811 PPC::UACCRCRegClass.contains(SrcReg))) {
1812 // If primed, de-prime the source register, copy the individual registers
1813 // and prime the destination if needed. The vector subregisters are
1814 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1815 // source is primed, we need to re-prime it after the copy as well.
1816 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1817 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1818 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1819 MCRegister VSLSrcReg =
1820 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1821 MCRegister VSLDestReg =
1822 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1823 if (SrcPrimed)
1824 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1825 for (unsigned Idx = 0; Idx < 4; Idx++)
1826 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1827 .addReg(VSLSrcReg + Idx)
1828 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1829 if (DestPrimed)
1830 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1831 if (SrcPrimed && !KillSrc)
1832 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1833 return;
1834 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1835 PPC::G8pRCRegClass.contains(SrcReg)) {
1836 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1837 unsigned DestRegIdx = DestReg - PPC::G8p0;
1838 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1839 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1840 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1841 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1842 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1843 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1844 .addReg(SrcRegSub0)
1845 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1846 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1847 .addReg(SrcRegSub1)
1848 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1849 return;
1850 } else
1851 llvm_unreachable("Impossible reg-to-reg copy");
1852
1853 const MCInstrDesc &MCID = get(Opc);
1854 if (MCID.getNumOperands() == 3)
1855 BuildMI(MBB, I, DL, MCID, DestReg)
1856 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1857 else
1858 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1859}
1860
1861unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1862 int OpcodeIndex = 0;
1863
1864 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1865 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1867 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1868 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1870 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1872 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1874 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1876 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1878 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1880 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1882 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1884 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1886 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1888 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1890 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1891 assert(Subtarget.pairedVectorMemops() &&
1892 "Register unexpected when paired memops are disabled.");
1894 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1895 assert(Subtarget.pairedVectorMemops() &&
1896 "Register unexpected when paired memops are disabled.");
1898 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1899 assert(Subtarget.pairedVectorMemops() &&
1900 "Register unexpected when paired memops are disabled.");
1902 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1903 assert(Subtarget.pairedVectorMemops() &&
1904 "Register unexpected when paired memops are disabled.");
1906 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1908 } else {
1909 llvm_unreachable("Unknown regclass!");
1910 }
1911 return OpcodeIndex;
1912}
1913
1914unsigned
1916 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1917 return OpcodesForSpill[getSpillIndex(RC)];
1918}
1919
1920unsigned
1922 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1923 return OpcodesForSpill[getSpillIndex(RC)];
1924}
1925
1926void PPCInstrInfo::StoreRegToStackSlot(
1927 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1928 const TargetRegisterClass *RC,
1929 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1930 unsigned Opcode = getStoreOpcodeForSpill(RC);
1931 DebugLoc DL;
1932
1933 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1934 FuncInfo->setHasSpills();
1935
1937 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
1938 FrameIdx));
1939
1940 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
1941 PPC::CRBITRCRegClass.hasSubClassEq(RC))
1942 FuncInfo->setSpillsCR();
1943
1944 if (isXFormMemOp(Opcode))
1945 FuncInfo->setHasNonRISpills();
1946}
1947
1950 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1951 const TargetRegisterInfo *TRI) const {
1952 MachineFunction &MF = *MBB.getParent();
1954
1955 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
1956
1957 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
1958 MBB.insert(MI, NewMIs[i]);
1959
1960 const MachineFrameInfo &MFI = MF.getFrameInfo();
1964 MFI.getObjectAlign(FrameIdx));
1965 NewMIs.back()->addMemOperand(MF, MMO);
1966}
1967
1970 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1971 const TargetRegisterInfo *TRI, Register VReg) const {
1972 // We need to avoid a situation in which the value from a VRRC register is
1973 // spilled using an Altivec instruction and reloaded into a VSRC register
1974 // using a VSX instruction. The issue with this is that the VSX
1975 // load/store instructions swap the doublewords in the vector and the Altivec
1976 // ones don't. The register classes on the spill/reload may be different if
1977 // the register is defined using an Altivec instruction and is then used by a
1978 // VSX instruction.
1979 RC = updatedRC(RC);
1980 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
1981}
1982
1983void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
1984 unsigned DestReg, int FrameIdx,
1985 const TargetRegisterClass *RC,
1987 const {
1988 unsigned Opcode = getLoadOpcodeForSpill(RC);
1989 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
1990 FrameIdx));
1991}
1992
1995 int FrameIdx, const TargetRegisterClass *RC,
1996 const TargetRegisterInfo *TRI) const {
1997 MachineFunction &MF = *MBB.getParent();
1999 DebugLoc DL;
2000 if (MI != MBB.end()) DL = MI->getDebugLoc();
2001
2002 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2003
2004 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
2005 MBB.insert(MI, NewMIs[i]);
2006
2007 const MachineFrameInfo &MFI = MF.getFrameInfo();
2011 MFI.getObjectAlign(FrameIdx));
2012 NewMIs.back()->addMemOperand(MF, MMO);
2013}
2014
2017 Register DestReg, int FrameIdx,
2018 const TargetRegisterClass *RC,
2019 const TargetRegisterInfo *TRI,
2020 Register VReg) const {
2021 // We need to avoid a situation in which the value from a VRRC register is
2022 // spilled using an Altivec instruction and reloaded into a VSRC register
2023 // using a VSX instruction. The issue with this is that the VSX
2024 // load/store instructions swap the doublewords in the vector and the Altivec
2025 // ones don't. The register classes on the spill/reload may be different if
2026 // the register is defined using an Altivec instruction and is then used by a
2027 // VSX instruction.
2028 RC = updatedRC(RC);
2029
2030 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2031}
2032
2035 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2036 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2037 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2038 else
2039 // Leave the CR# the same, but invert the condition.
2040 Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
2041 return false;
2042}
2043
2044// For some instructions, it is legal to fold ZERO into the RA register field.
2045// This function performs that fold by replacing the operand with PPC::ZERO,
2046// it does not consider whether the load immediate zero is no longer in use.
2048 Register Reg) const {
2049 // A zero immediate should always be loaded with a single li.
2050 unsigned DefOpc = DefMI.getOpcode();
2051 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2052 return false;
2053 if (!DefMI.getOperand(1).isImm())
2054 return false;
2055 if (DefMI.getOperand(1).getImm() != 0)
2056 return false;
2057
2058 // Note that we cannot here invert the arguments of an isel in order to fold
2059 // a ZERO into what is presented as the second argument. All we have here
2060 // is the condition bit, and that might come from a CR-logical bit operation.
2061
2062 const MCInstrDesc &UseMCID = UseMI.getDesc();
2063
2064 // Only fold into real machine instructions.
2065 if (UseMCID.isPseudo())
2066 return false;
2067
2068 // We need to find which of the User's operands is to be folded, that will be
2069 // the operand that matches the given register ID.
2070 unsigned UseIdx;
2071 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2072 if (UseMI.getOperand(UseIdx).isReg() &&
2073 UseMI.getOperand(UseIdx).getReg() == Reg)
2074 break;
2075
2076 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2077 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2078
2079 const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2080
2081 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2082 // register (which might also be specified as a pointer class kind).
2083 if (UseInfo->isLookupPtrRegClass()) {
2084 if (UseInfo->RegClass /* Kind */ != 1)
2085 return false;
2086 } else {
2087 if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2088 UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2089 return false;
2090 }
2091
2092 // Make sure this is not tied to an output register (or otherwise
2093 // constrained). This is true for ST?UX registers, for example, which
2094 // are tied to their output registers.
2095 if (UseInfo->Constraints != 0)
2096 return false;
2097
2098 MCRegister ZeroReg;
2099 if (UseInfo->isLookupPtrRegClass()) {
2100 bool isPPC64 = Subtarget.isPPC64();
2101 ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2102 } else {
2103 ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2104 PPC::ZERO8 : PPC::ZERO;
2105 }
2106
2107 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2108 LLVM_DEBUG(UseMI.dump());
2109 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2110 LLVM_DEBUG(dbgs() << "Into: ");
2111 LLVM_DEBUG(UseMI.dump());
2112 return true;
2113}
2114
2115// Folds zero into instructions which have a load immediate zero as an operand
2116// but also recognize zero as immediate zero. If the definition of the load
2117// has no more users it is deleted.
2119 Register Reg, MachineRegisterInfo *MRI) const {
2120 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2121 if (MRI->use_nodbg_empty(Reg))
2122 DefMI.eraseFromParent();
2123 return Changed;
2124}
2125
2127 for (MachineInstr &MI : MBB)
2128 if (MI.definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
2129 MI.definesRegister(PPC::CTR8, /*TRI=*/nullptr))
2130 return true;
2131 return false;
2132}
2133
2134// We should make sure that, if we're going to predicate both sides of a
2135// condition (a diamond), that both sides don't define the counter register. We
2136// can predicate counter-decrement-based branches, but while that predicates
2137// the branching, it does not predicate the counter decrement. If we tried to
2138// merge the triangle into one predicated block, we'd decrement the counter
2139// twice.
2141 unsigned NumT, unsigned ExtraT,
2142 MachineBasicBlock &FMBB,
2143 unsigned NumF, unsigned ExtraF,
2144 BranchProbability Probability) const {
2145 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2146}
2147
2148
2150 // The predicated branches are identified by their type, not really by the
2151 // explicit presence of a predicate. Furthermore, some of them can be
2152 // predicated more than once. Because if conversion won't try to predicate
2153 // any instruction which already claims to be predicated (by returning true
2154 // here), always return false. In doing so, we let isPredicable() be the
2155 // final word on whether not the instruction can be (further) predicated.
2156
2157 return false;
2158}
2159
2161 const MachineBasicBlock *MBB,
2162 const MachineFunction &MF) const {
2163 switch (MI.getOpcode()) {
2164 default:
2165 break;
2166 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2167 // across them, since some FP operations may change content of FPSCR.
2168 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2169 case PPC::MFFS:
2170 case PPC::MTFSF:
2171 case PPC::FENCE:
2172 return true;
2173 }
2175}
2176
2178 ArrayRef<MachineOperand> Pred) const {
2179 unsigned OpC = MI.getOpcode();
2180 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2181 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2182 bool isPPC64 = Subtarget.isPPC64();
2183 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2184 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2185 // Need add Def and Use for CTR implicit operand.
2186 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2187 .addReg(Pred[1].getReg(), RegState::Implicit)
2189 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2190 MI.setDesc(get(PPC::BCLR));
2191 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2192 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2193 MI.setDesc(get(PPC::BCLRn));
2194 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2195 } else {
2196 MI.setDesc(get(PPC::BCCLR));
2197 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2198 .addImm(Pred[0].getImm())
2199 .add(Pred[1]);
2200 }
2201
2202 return true;
2203 } else if (OpC == PPC::B) {
2204 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2205 bool isPPC64 = Subtarget.isPPC64();
2206 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2207 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2208 // Need add Def and Use for CTR implicit operand.
2209 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2210 .addReg(Pred[1].getReg(), RegState::Implicit)
2212 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2213 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2214 MI.removeOperand(0);
2215
2216 MI.setDesc(get(PPC::BC));
2217 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2218 .add(Pred[1])
2219 .addMBB(MBB);
2220 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2221 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2222 MI.removeOperand(0);
2223
2224 MI.setDesc(get(PPC::BCn));
2225 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2226 .add(Pred[1])
2227 .addMBB(MBB);
2228 } else {
2229 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2230 MI.removeOperand(0);
2231
2232 MI.setDesc(get(PPC::BCC));
2233 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2234 .addImm(Pred[0].getImm())
2235 .add(Pred[1])
2236 .addMBB(MBB);
2237 }
2238
2239 return true;
2240 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2241 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2242 OpC == PPC::BCTRL8_RM) {
2243 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2244 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2245
2246 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2247 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2248 bool isPPC64 = Subtarget.isPPC64();
2249
2250 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2251 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2252 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2253 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2254 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2255 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2256 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2257 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2258 } else {
2259 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2260 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2261 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2262 .addImm(Pred[0].getImm())
2263 .add(Pred[1]);
2264 }
2265
2266 // Need add Def and Use for LR implicit operand.
2267 if (setLR)
2268 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2269 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2270 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2271 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2272 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2274
2275 return true;
2276 }
2277
2278 return false;
2279}
2280
2282 ArrayRef<MachineOperand> Pred2) const {
2283 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2284 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2285
2286 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2287 return false;
2288 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2289 return false;
2290
2291 // P1 can only subsume P2 if they test the same condition register.
2292 if (Pred1[1].getReg() != Pred2[1].getReg())
2293 return false;
2294
2295 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2296 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2297
2298 if (P1 == P2)
2299 return true;
2300
2301 // Does P1 subsume P2, e.g. GE subsumes GT.
2302 if (P1 == PPC::PRED_LE &&
2303 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2304 return true;
2305 if (P1 == PPC::PRED_GE &&
2306 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2307 return true;
2308
2309 return false;
2310}
2311
2313 std::vector<MachineOperand> &Pred,
2314 bool SkipDead) const {
2315 // Note: At the present time, the contents of Pred from this function is
2316 // unused by IfConversion. This implementation follows ARM by pushing the
2317 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2318 // predicate, instructions defining CTR or CTR8 are also included as
2319 // predicate-defining instructions.
2320
2321 const TargetRegisterClass *RCs[] =
2322 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2323 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2324
2325 bool Found = false;
2326 for (const MachineOperand &MO : MI.operands()) {
2327 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2328 const TargetRegisterClass *RC = RCs[c];
2329 if (MO.isReg()) {
2330 if (MO.isDef() && RC->contains(MO.getReg())) {
2331 Pred.push_back(MO);
2332 Found = true;
2333 }
2334 } else if (MO.isRegMask()) {
2335 for (MCPhysReg R : *RC)
2336 if (MO.clobbersPhysReg(R)) {
2337 Pred.push_back(MO);
2338 Found = true;
2339 }
2340 }
2341 }
2342 }
2343
2344 return Found;
2345}
2346
2348 Register &SrcReg2, int64_t &Mask,
2349 int64_t &Value) const {
2350 unsigned Opc = MI.getOpcode();
2351
2352 switch (Opc) {
2353 default: return false;
2354 case PPC::CMPWI:
2355 case PPC::CMPLWI:
2356 case PPC::CMPDI:
2357 case PPC::CMPLDI:
2358 SrcReg = MI.getOperand(1).getReg();
2359 SrcReg2 = 0;
2360 Value = MI.getOperand(2).getImm();
2361 Mask = 0xFFFF;
2362 return true;
2363 case PPC::CMPW:
2364 case PPC::CMPLW:
2365 case PPC::CMPD:
2366 case PPC::CMPLD:
2367 case PPC::FCMPUS:
2368 case PPC::FCMPUD:
2369 SrcReg = MI.getOperand(1).getReg();
2370 SrcReg2 = MI.getOperand(2).getReg();
2371 Value = 0;
2372 Mask = 0;
2373 return true;
2374 }
2375}
2376
2378 Register SrcReg2, int64_t Mask,
2379 int64_t Value,
2380 const MachineRegisterInfo *MRI) const {
2381 if (DisableCmpOpt)
2382 return false;
2383
2384 int OpC = CmpInstr.getOpcode();
2385 Register CRReg = CmpInstr.getOperand(0).getReg();
2386
2387 // FP record forms set CR1 based on the exception status bits, not a
2388 // comparison with zero.
2389 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2390 return false;
2391
2393 // The record forms set the condition register based on a signed comparison
2394 // with zero (so says the ISA manual). This is not as straightforward as it
2395 // seems, however, because this is always a 64-bit comparison on PPC64, even
2396 // for instructions that are 32-bit in nature (like slw for example).
2397 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2398 // for equality checks (as those don't depend on the sign). On PPC64,
2399 // we are restricted to equality for unsigned 64-bit comparisons and for
2400 // signed 32-bit comparisons the applicability is more restricted.
2401 bool isPPC64 = Subtarget.isPPC64();
2402 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2403 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2404 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2405
2406 // Look through copies unless that gets us to a physical register.
2407 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2408 if (ActualSrc.isVirtual())
2409 SrcReg = ActualSrc;
2410
2411 // Get the unique definition of SrcReg.
2412 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2413 if (!MI) return false;
2414
2415 bool equalityOnly = false;
2416 bool noSub = false;
2417 if (isPPC64) {
2418 if (is32BitSignedCompare) {
2419 // We can perform this optimization only if SrcReg is sign-extending.
2420 if (isSignExtended(SrcReg, MRI))
2421 noSub = true;
2422 else
2423 return false;
2424 } else if (is32BitUnsignedCompare) {
2425 // We can perform this optimization, equality only, if SrcReg is
2426 // zero-extending.
2427 if (isZeroExtended(SrcReg, MRI)) {
2428 noSub = true;
2429 equalityOnly = true;
2430 } else
2431 return false;
2432 } else
2433 equalityOnly = is64BitUnsignedCompare;
2434 } else
2435 equalityOnly = is32BitUnsignedCompare;
2436
2437 if (equalityOnly) {
2438 // We need to check the uses of the condition register in order to reject
2439 // non-equality comparisons.
2441 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2442 I != IE; ++I) {
2443 MachineInstr *UseMI = &*I;
2444 if (UseMI->getOpcode() == PPC::BCC) {
2446 unsigned PredCond = PPC::getPredicateCondition(Pred);
2447 // We ignore hint bits when checking for non-equality comparisons.
2448 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2449 return false;
2450 } else if (UseMI->getOpcode() == PPC::ISEL ||
2451 UseMI->getOpcode() == PPC::ISEL8) {
2452 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2453 if (SubIdx != PPC::sub_eq)
2454 return false;
2455 } else
2456 return false;
2457 }
2458 }
2459
2460 MachineBasicBlock::iterator I = CmpInstr;
2461
2462 // Scan forward to find the first use of the compare.
2463 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2464 ++I) {
2465 bool FoundUse = false;
2467 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2468 J != JE; ++J)
2469 if (&*J == &*I) {
2470 FoundUse = true;
2471 break;
2472 }
2473
2474 if (FoundUse)
2475 break;
2476 }
2477
2480
2481 // There are two possible candidates which can be changed to set CR[01].
2482 // One is MI, the other is a SUB instruction.
2483 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2484 MachineInstr *Sub = nullptr;
2485 if (SrcReg2 != 0)
2486 // MI is not a candidate for CMPrr.
2487 MI = nullptr;
2488 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2489 // same BB as the comparison. This is to allow the check below to avoid calls
2490 // (and other explicit clobbers); instead we should really check for these
2491 // more explicitly (in at least a few predecessors).
2492 else if (MI->getParent() != CmpInstr.getParent())
2493 return false;
2494 else if (Value != 0) {
2495 // The record-form instructions set CR bit based on signed comparison
2496 // against 0. We try to convert a compare against 1 or -1 into a compare
2497 // against 0 to exploit record-form instructions. For example, we change
2498 // the condition "greater than -1" into "greater than or equal to 0"
2499 // and "less than 1" into "less than or equal to 0".
2500
2501 // Since we optimize comparison based on a specific branch condition,
2502 // we don't optimize if condition code is used by more than once.
2503 if (equalityOnly || !MRI->hasOneUse(CRReg))
2504 return false;
2505
2506 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2507 if (UseMI->getOpcode() != PPC::BCC)
2508 return false;
2509
2511 unsigned PredCond = PPC::getPredicateCondition(Pred);
2512 unsigned PredHint = PPC::getPredicateHint(Pred);
2513 int16_t Immed = (int16_t)Value;
2514
2515 // When modifying the condition in the predicate, we propagate hint bits
2516 // from the original predicate to the new one.
2517 if (Immed == -1 && PredCond == PPC::PRED_GT)
2518 // We convert "greater than -1" into "greater than or equal to 0",
2519 // since we are assuming signed comparison by !equalityOnly
2520 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2521 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2522 // We convert "less than or equal to -1" into "less than 0".
2523 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2524 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2525 // We convert "less than 1" into "less than or equal to 0".
2526 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2527 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2528 // We convert "greater than or equal to 1" into "greater than 0".
2529 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2530 else
2531 return false;
2532
2533 // Convert the comparison and its user to a compare against zero with the
2534 // appropriate predicate on the branch. Zero comparison might provide
2535 // optimization opportunities post-RA (see optimization in
2536 // PPCPreEmitPeephole.cpp).
2537 UseMI->getOperand(0).setImm(Pred);
2538 CmpInstr.getOperand(2).setImm(0);
2539 }
2540
2541 // Search for Sub.
2542 --I;
2543
2544 // Get ready to iterate backward from CmpInstr.
2545 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2546
2547 for (; I != E && !noSub; --I) {
2548 const MachineInstr &Instr = *I;
2549 unsigned IOpC = Instr.getOpcode();
2550
2551 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2552 Instr.readsRegister(PPC::CR0, TRI)))
2553 // This instruction modifies or uses the record condition register after
2554 // the one we want to change. While we could do this transformation, it
2555 // would likely not be profitable. This transformation removes one
2556 // instruction, and so even forcing RA to generate one move probably
2557 // makes it unprofitable.
2558 return false;
2559
2560 // Check whether CmpInstr can be made redundant by the current instruction.
2561 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2562 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2563 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2564 ((Instr.getOperand(1).getReg() == SrcReg &&
2565 Instr.getOperand(2).getReg() == SrcReg2) ||
2566 (Instr.getOperand(1).getReg() == SrcReg2 &&
2567 Instr.getOperand(2).getReg() == SrcReg))) {
2568 Sub = &*I;
2569 break;
2570 }
2571
2572 if (I == B)
2573 // The 'and' is below the comparison instruction.
2574 return false;
2575 }
2576
2577 // Return false if no candidates exist.
2578 if (!MI && !Sub)
2579 return false;
2580
2581 // The single candidate is called MI.
2582 if (!MI) MI = Sub;
2583
2584 int NewOpC = -1;
2585 int MIOpC = MI->getOpcode();
2586 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2587 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2588 NewOpC = MIOpC;
2589 else {
2590 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2591 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2592 NewOpC = MIOpC;
2593 }
2594
2595 // FIXME: On the non-embedded POWER architectures, only some of the record
2596 // forms are fast, and we should use only the fast ones.
2597
2598 // The defining instruction has a record form (or is already a record
2599 // form). It is possible, however, that we'll need to reverse the condition
2600 // code of the users.
2601 if (NewOpC == -1)
2602 return false;
2603
2604 // This transformation should not be performed if `nsw` is missing and is not
2605 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2606 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2607 // CRReg can reflect if compared values are equal, this optz is still valid.
2608 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2609 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2610 return false;
2611
2612 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2613 // needs to be updated to be based on SUB. Push the condition code
2614 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2615 // condition code of these operands will be modified.
2616 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2617 // comparison against 0, which may modify predicate.
2618 bool ShouldSwap = false;
2619 if (Sub && Value == 0) {
2620 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2621 Sub->getOperand(2).getReg() == SrcReg;
2622
2623 // The operands to subf are the opposite of sub, so only in the fixed-point
2624 // case, invert the order.
2625 ShouldSwap = !ShouldSwap;
2626 }
2627
2628 if (ShouldSwap)
2630 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2631 I != IE; ++I) {
2632 MachineInstr *UseMI = &*I;
2633 if (UseMI->getOpcode() == PPC::BCC) {
2635 unsigned PredCond = PPC::getPredicateCondition(Pred);
2636 assert((!equalityOnly ||
2637 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2638 "Invalid predicate for equality-only optimization");
2639 (void)PredCond; // To suppress warning in release build.
2640 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2642 } else if (UseMI->getOpcode() == PPC::ISEL ||
2643 UseMI->getOpcode() == PPC::ISEL8) {
2644 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2645 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2646 "Invalid CR bit for equality-only optimization");
2647
2648 if (NewSubReg == PPC::sub_lt)
2649 NewSubReg = PPC::sub_gt;
2650 else if (NewSubReg == PPC::sub_gt)
2651 NewSubReg = PPC::sub_lt;
2652
2653 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2654 NewSubReg));
2655 } else // We need to abort on a user we don't understand.
2656 return false;
2657 }
2658 assert(!(Value != 0 && ShouldSwap) &&
2659 "Non-zero immediate support and ShouldSwap"
2660 "may conflict in updating predicate");
2661
2662 // Create a new virtual register to hold the value of the CR set by the
2663 // record-form instruction. If the instruction was not previously in
2664 // record form, then set the kill flag on the CR.
2665 CmpInstr.eraseFromParent();
2666
2668 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2669 get(TargetOpcode::COPY), CRReg)
2670 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2671
2672 // Even if CR0 register were dead before, it is alive now since the
2673 // instruction we just built uses it.
2674 MI->clearRegisterDeads(PPC::CR0);
2675
2676 if (MIOpC != NewOpC) {
2677 // We need to be careful here: we're replacing one instruction with
2678 // another, and we need to make sure that we get all of the right
2679 // implicit uses and defs. On the other hand, the caller may be holding
2680 // an iterator to this instruction, and so we can't delete it (this is
2681 // specifically the case if this is the instruction directly after the
2682 // compare).
2683
2684 // Rotates are expensive instructions. If we're emitting a record-form
2685 // rotate that can just be an andi/andis, we should just emit that.
2686 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2687 Register GPRRes = MI->getOperand(0).getReg();
2688 int64_t SH = MI->getOperand(2).getImm();
2689 int64_t MB = MI->getOperand(3).getImm();
2690 int64_t ME = MI->getOperand(4).getImm();
2691 // We can only do this if both the start and end of the mask are in the
2692 // same halfword.
2693 bool MBInLoHWord = MB >= 16;
2694 bool MEInLoHWord = ME >= 16;
2695 uint64_t Mask = ~0LLU;
2696
2697 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2698 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2699 // The mask value needs to shift right 16 if we're emitting andis.
2700 Mask >>= MBInLoHWord ? 0 : 16;
2701 NewOpC = MIOpC == PPC::RLWINM
2702 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2703 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2704 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2705 (ME - MB + 1 == SH) && (MB >= 16)) {
2706 // If we are rotating by the exact number of bits as are in the mask
2707 // and the mask is in the least significant bits of the register,
2708 // that's just an andis. (as long as the GPR result has no uses).
2709 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2710 Mask >>= 16;
2711 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2712 }
2713 // If we've set the mask, we can transform.
2714 if (Mask != ~0LLU) {
2715 MI->removeOperand(4);
2716 MI->removeOperand(3);
2717 MI->getOperand(2).setImm(Mask);
2718 NumRcRotatesConvertedToRcAnd++;
2719 }
2720 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2721 int64_t MB = MI->getOperand(3).getImm();
2722 if (MB >= 48) {
2723 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2724 NewOpC = PPC::ANDI8_rec;
2725 MI->removeOperand(3);
2726 MI->getOperand(2).setImm(Mask);
2727 NumRcRotatesConvertedToRcAnd++;
2728 }
2729 }
2730
2731 const MCInstrDesc &NewDesc = get(NewOpC);
2732 MI->setDesc(NewDesc);
2733
2734 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2735 if (!MI->definesRegister(ImpDef, /*TRI=*/nullptr)) {
2736 MI->addOperand(*MI->getParent()->getParent(),
2737 MachineOperand::CreateReg(ImpDef, true, true));
2738 }
2739 }
2740 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2741 if (!MI->readsRegister(ImpUse, /*TRI=*/nullptr)) {
2742 MI->addOperand(*MI->getParent()->getParent(),
2743 MachineOperand::CreateReg(ImpUse, false, true));
2744 }
2745 }
2746 }
2747 assert(MI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2748 "Record-form instruction does not define cr0?");
2749
2750 // Modify the condition code of operands in OperandsToUpdate.
2751 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2752 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2753 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2754 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2755
2756 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2757 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2758
2759 return true;
2760}
2761
2764 if (MRI->isSSA())
2765 return false;
2766
2767 Register SrcReg, SrcReg2;
2768 int64_t CmpMask, CmpValue;
2769 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2770 return false;
2771
2772 // Try to optimize the comparison against 0.
2773 if (CmpValue || !CmpMask || SrcReg2)
2774 return false;
2775
2776 // The record forms set the condition register based on a signed comparison
2777 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2778 // equality checks in post-RA, we are more restricted on a unsigned
2779 // comparison.
2780 unsigned Opc = CmpMI.getOpcode();
2781 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2782 return false;
2783
2784 // The record forms are always based on a 64-bit comparison on PPC64
2785 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2786 // comparison. Since we can't do the equality checks in post-RA, we bail out
2787 // the case.
2788 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2789 return false;
2790
2791 // CmpMI can't be deleted if it has implicit def.
2792 if (CmpMI.hasImplicitDef())
2793 return false;
2794
2795 bool SrcRegHasOtherUse = false;
2796 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2797 if (!SrcMI || !SrcMI->definesRegister(SrcReg, /*TRI=*/nullptr))
2798 return false;
2799
2800 MachineOperand RegMO = CmpMI.getOperand(0);
2801 Register CRReg = RegMO.getReg();
2802 if (CRReg != PPC::CR0)
2803 return false;
2804
2805 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2806 bool SeenUseOfCRReg = false;
2807 bool IsCRRegKilled = false;
2808 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2809 SeenUseOfCRReg) ||
2810 SrcMI->definesRegister(CRReg, /*TRI=*/nullptr) || SeenUseOfCRReg)
2811 return false;
2812
2813 int SrcMIOpc = SrcMI->getOpcode();
2814 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2815 if (NewOpC == -1)
2816 return false;
2817
2818 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2819 LLVM_DEBUG(SrcMI->dump());
2820
2821 const MCInstrDesc &NewDesc = get(NewOpC);
2822 SrcMI->setDesc(NewDesc);
2823 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2825 SrcMI->clearRegisterDeads(CRReg);
2826
2827 assert(SrcMI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2828 "Record-form instruction does not define cr0?");
2829
2830 LLVM_DEBUG(dbgs() << "with: ");
2831 LLVM_DEBUG(SrcMI->dump());
2832 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2833 LLVM_DEBUG(CmpMI.dump());
2834 return true;
2835}
2836
2839 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2840 const TargetRegisterInfo *TRI) const {
2841 const MachineOperand *BaseOp;
2842 OffsetIsScalable = false;
2843 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2844 return false;
2845 BaseOps.push_back(BaseOp);
2846 return true;
2847}
2848
2849static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2850 const TargetRegisterInfo *TRI) {
2851 // If this is a volatile load/store, don't mess with it.
2852 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2853 return false;
2854
2855 if (LdSt.getOperand(2).isFI())
2856 return true;
2857
2858 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2859 // Can't cluster if the instruction modifies the base register
2860 // or it is update form. e.g. ld r2,3(r2)
2861 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2862 return false;
2863
2864 return true;
2865}
2866
2867// Only cluster instruction pair that have the same opcode, and they are
2868// clusterable according to PowerPC specification.
2869static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2870 const PPCSubtarget &Subtarget) {
2871 switch (FirstOpc) {
2872 default:
2873 return false;
2874 case PPC::STD:
2875 case PPC::STFD:
2876 case PPC::STXSD:
2877 case PPC::DFSTOREf64:
2878 return FirstOpc == SecondOpc;
2879 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2880 // 32bit and 64bit instruction selection. They are clusterable pair though
2881 // they are different opcode.
2882 case PPC::STW:
2883 case PPC::STW8:
2884 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2885 }
2886}
2887
2889 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2890 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2891 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2892 unsigned NumBytes) const {
2893
2894 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2895 const MachineOperand &BaseOp1 = *BaseOps1.front();
2896 const MachineOperand &BaseOp2 = *BaseOps2.front();
2897 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2898 "Only base registers and frame indices are supported.");
2899
2900 // ClusterSize means the number of memory operations that will have been
2901 // clustered if this hook returns true.
2902 // Don't cluster memory op if there are already two ops clustered at least.
2903 if (ClusterSize > 2)
2904 return false;
2905
2906 // Cluster the load/store only when they have the same base
2907 // register or FI.
2908 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2909 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2910 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2911 return false;
2912
2913 // Check if the load/store are clusterable according to the PowerPC
2914 // specification.
2915 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2916 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2917 unsigned FirstOpc = FirstLdSt.getOpcode();
2918 unsigned SecondOpc = SecondLdSt.getOpcode();
2920 // Cluster the load/store only when they have the same opcode, and they are
2921 // clusterable opcode according to PowerPC specification.
2922 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2923 return false;
2924
2925 // Can't cluster load/store that have ordered or volatile memory reference.
2926 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2927 !isLdStSafeToCluster(SecondLdSt, TRI))
2928 return false;
2929
2930 int64_t Offset1 = 0, Offset2 = 0;
2931 LocationSize Width1 = 0, Width2 = 0;
2932 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2933 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2934 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2935 Width1 != Width2)
2936 return false;
2937
2938 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2939 "getMemOperandWithOffsetWidth return incorrect base op");
2940 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2941 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2942 return Offset1 + (int64_t)Width1.getValue() == Offset2;
2943}
2944
2945/// GetInstSize - Return the number of bytes of code the specified
2946/// instruction may be. This returns the maximum number of bytes.
2947///
2949 unsigned Opcode = MI.getOpcode();
2950
2951 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
2952 const MachineFunction *MF = MI.getParent()->getParent();
2953 const char *AsmStr = MI.getOperand(0).getSymbolName();
2954 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
2955 } else if (Opcode == TargetOpcode::STACKMAP) {
2956 StackMapOpers Opers(&MI);
2957 return Opers.getNumPatchBytes();
2958 } else if (Opcode == TargetOpcode::PATCHPOINT) {
2959 PatchPointOpers Opers(&MI);
2960 return Opers.getNumPatchBytes();
2961 } else {
2962 return get(Opcode).getSize();
2963 }
2964}
2965
2966std::pair<unsigned, unsigned>
2968 // PPC always uses a direct mask.
2969 return std::make_pair(TF, 0u);
2970}
2971
2974 using namespace PPCII;
2975 static const std::pair<unsigned, const char *> TargetFlags[] = {
2976 {MO_PLT, "ppc-plt"},
2977 {MO_PIC_FLAG, "ppc-pic"},
2978 {MO_PCREL_FLAG, "ppc-pcrel"},
2979 {MO_GOT_FLAG, "ppc-got"},
2980 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
2981 {MO_TLSGD_FLAG, "ppc-tlsgd"},
2982 {MO_TPREL_FLAG, "ppc-tprel"},
2983 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
2984 {MO_TLSLD_FLAG, "ppc-tlsld"},
2985 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
2986 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
2987 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
2988 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
2989 {MO_LO, "ppc-lo"},
2990 {MO_HA, "ppc-ha"},
2991 {MO_TPREL_LO, "ppc-tprel-lo"},
2992 {MO_TPREL_HA, "ppc-tprel-ha"},
2993 {MO_DTPREL_LO, "ppc-dtprel-lo"},
2994 {MO_TLSLD_LO, "ppc-tlsld-lo"},
2995 {MO_TOC_LO, "ppc-toc-lo"},
2996 {MO_TLS, "ppc-tls"},
2997 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
2998 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
2999 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
3000 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
3001 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3002 };
3003 return ArrayRef(TargetFlags);
3004}
3005
3006// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3007// The VSX versions have the advantage of a full 64-register target whereas
3008// the FP ones have the advantage of lower latency and higher throughput. So
3009// what we are after is using the faster instructions in low register pressure
3010// situations and using the larger register file in high register pressure
3011// situations.
3013 unsigned UpperOpcode, LowerOpcode;
3014 switch (MI.getOpcode()) {
3015 case PPC::DFLOADf32:
3016 UpperOpcode = PPC::LXSSP;
3017 LowerOpcode = PPC::LFS;
3018 break;
3019 case PPC::DFLOADf64:
3020 UpperOpcode = PPC::LXSD;
3021 LowerOpcode = PPC::LFD;
3022 break;
3023 case PPC::DFSTOREf32:
3024 UpperOpcode = PPC::STXSSP;
3025 LowerOpcode = PPC::STFS;
3026 break;
3027 case PPC::DFSTOREf64:
3028 UpperOpcode = PPC::STXSD;
3029 LowerOpcode = PPC::STFD;
3030 break;
3031 case PPC::XFLOADf32:
3032 UpperOpcode = PPC::LXSSPX;
3033 LowerOpcode = PPC::LFSX;
3034 break;
3035 case PPC::XFLOADf64:
3036 UpperOpcode = PPC::LXSDX;
3037 LowerOpcode = PPC::LFDX;
3038 break;
3039 case PPC::XFSTOREf32:
3040 UpperOpcode = PPC::STXSSPX;
3041 LowerOpcode = PPC::STFSX;
3042 break;
3043 case PPC::XFSTOREf64:
3044 UpperOpcode = PPC::STXSDX;
3045 LowerOpcode = PPC::STFDX;
3046 break;
3047 case PPC::LIWAX:
3048 UpperOpcode = PPC::LXSIWAX;
3049 LowerOpcode = PPC::LFIWAX;
3050 break;
3051 case PPC::LIWZX:
3052 UpperOpcode = PPC::LXSIWZX;
3053 LowerOpcode = PPC::LFIWZX;
3054 break;
3055 case PPC::STIWX:
3056 UpperOpcode = PPC::STXSIWX;
3057 LowerOpcode = PPC::STFIWX;
3058 break;
3059 default:
3060 llvm_unreachable("Unknown Operation!");
3061 }
3062
3063 Register TargetReg = MI.getOperand(0).getReg();
3064 unsigned Opcode;
3065 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3066 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3067 Opcode = LowerOpcode;
3068 else
3069 Opcode = UpperOpcode;
3070 MI.setDesc(get(Opcode));
3071 return true;
3072}
3073
3074static bool isAnImmediateOperand(const MachineOperand &MO) {
3075 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3076}
3077
3079 auto &MBB = *MI.getParent();
3080 auto DL = MI.getDebugLoc();
3081
3082 switch (MI.getOpcode()) {
3083 case PPC::BUILD_UACC: {
3084 MCRegister ACC = MI.getOperand(0).getReg();
3085 MCRegister UACC = MI.getOperand(1).getReg();
3086 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3087 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3088 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3089 // FIXME: This can easily be improved to look up to the top of the MBB
3090 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3091 // we can just re-target any such XXLOR's to DstVSR + offset.
3092 for (int VecNo = 0; VecNo < 4; VecNo++)
3093 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3094 .addReg(SrcVSR + VecNo)
3095 .addReg(SrcVSR + VecNo);
3096 }
3097 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3098 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3099 // with a NOP.
3100 [[fallthrough]];
3101 }
3102 case PPC::KILL_PAIR: {
3103 MI.setDesc(get(PPC::UNENCODED_NOP));
3104 MI.removeOperand(1);
3105 MI.removeOperand(0);
3106 return true;
3107 }
3108 case TargetOpcode::LOAD_STACK_GUARD: {
3109 assert(Subtarget.isTargetLinux() &&
3110 "Only Linux target is expected to contain LOAD_STACK_GUARD");
3111 const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3112 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3113 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3114 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3115 .addImm(Offset)
3116 .addReg(Reg);
3117 return true;
3118 }
3119 case PPC::PPCLdFixedAddr: {
3120 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3121 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3122 int64_t Offset = 0;
3123 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3124 MI.setDesc(get(PPC::LWZ));
3125 uint64_t FAType = MI.getOperand(1).getImm();
3126#undef PPC_LNX_FEATURE
3127#undef PPC_LNX_CPU
3128#define PPC_LNX_DEFINE_OFFSETS
3129#include "llvm/TargetParser/PPCTargetParser.def"
3130 bool IsLE = Subtarget.isLittleEndian();
3131 bool Is64 = Subtarget.isPPC64();
3132 if (FAType == PPC_FAWORD_HWCAP) {
3133 if (IsLE)
3134 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3135 else
3136 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3137 } else if (FAType == PPC_FAWORD_HWCAP2) {
3138 if (IsLE)
3139 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3140 else
3141 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3142 } else if (FAType == PPC_FAWORD_CPUID) {
3143 if (IsLE)
3144 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3145 else
3146 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3147 }
3148 assert(Offset && "Do not know the offset for this fixed addr load");
3149 MI.removeOperand(1);
3151 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3152 .addImm(Offset)
3153 .addReg(Reg);
3154 return true;
3155#define PPC_TGT_PARSER_UNDEF_MACROS
3156#include "llvm/TargetParser/PPCTargetParser.def"
3157#undef PPC_TGT_PARSER_UNDEF_MACROS
3158 }
3159 case PPC::DFLOADf32:
3160 case PPC::DFLOADf64:
3161 case PPC::DFSTOREf32:
3162 case PPC::DFSTOREf64: {
3163 assert(Subtarget.hasP9Vector() &&
3164 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3165 assert(MI.getOperand(2).isReg() &&
3166 isAnImmediateOperand(MI.getOperand(1)) &&
3167 "D-form op must have register and immediate operands");
3168 return expandVSXMemPseudo(MI);
3169 }
3170 case PPC::XFLOADf32:
3171 case PPC::XFSTOREf32:
3172 case PPC::LIWAX:
3173 case PPC::LIWZX:
3174 case PPC::STIWX: {
3175 assert(Subtarget.hasP8Vector() &&
3176 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3177 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3178 "X-form op must have register and register operands");
3179 return expandVSXMemPseudo(MI);
3180 }
3181 case PPC::XFLOADf64:
3182 case PPC::XFSTOREf64: {
3183 assert(Subtarget.hasVSX() &&
3184 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3185 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3186 "X-form op must have register and register operands");
3187 return expandVSXMemPseudo(MI);
3188 }
3189 case PPC::SPILLTOVSR_LD: {
3190 Register TargetReg = MI.getOperand(0).getReg();
3191 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3192 MI.setDesc(get(PPC::DFLOADf64));
3193 return expandPostRAPseudo(MI);
3194 }
3195 else
3196 MI.setDesc(get(PPC::LD));
3197 return true;
3198 }
3199 case PPC::SPILLTOVSR_ST: {
3200 Register SrcReg = MI.getOperand(0).getReg();
3201 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3202 NumStoreSPILLVSRRCAsVec++;
3203 MI.setDesc(get(PPC::DFSTOREf64));
3204 return expandPostRAPseudo(MI);
3205 } else {
3206 NumStoreSPILLVSRRCAsGpr++;
3207 MI.setDesc(get(PPC::STD));
3208 }
3209 return true;
3210 }
3211 case PPC::SPILLTOVSR_LDX: {
3212 Register TargetReg = MI.getOperand(0).getReg();
3213 if (PPC::VSFRCRegClass.contains(TargetReg))
3214 MI.setDesc(get(PPC::LXSDX));
3215 else
3216 MI.setDesc(get(PPC::LDX));
3217 return true;
3218 }
3219 case PPC::SPILLTOVSR_STX: {
3220 Register SrcReg = MI.getOperand(0).getReg();
3221 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3222 NumStoreSPILLVSRRCAsVec++;
3223 MI.setDesc(get(PPC::STXSDX));
3224 } else {
3225 NumStoreSPILLVSRRCAsGpr++;
3226 MI.setDesc(get(PPC::STDX));
3227 }
3228 return true;
3229 }
3230
3231 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3232 case PPC::CFENCE:
3233 case PPC::CFENCE8: {
3234 auto Val = MI.getOperand(0).getReg();
3235 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3236 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3237 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3239 .addReg(PPC::CR7)
3240 .addImm(1);
3241 MI.setDesc(get(PPC::ISYNC));
3242 MI.removeOperand(0);
3243 return true;
3244 }
3245 }
3246 return false;
3247}
3248
3249// Essentially a compile-time implementation of a compare->isel sequence.
3250// It takes two constants to compare, along with the true/false registers
3251// and the comparison type (as a subreg to a CR field) and returns one
3252// of the true/false registers, depending on the comparison results.
3253static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3254 unsigned TrueReg, unsigned FalseReg,
3255 unsigned CRSubReg) {
3256 // Signed comparisons. The immediates are assumed to be sign-extended.
3257 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3258 switch (CRSubReg) {
3259 default: llvm_unreachable("Unknown integer comparison type.");
3260 case PPC::sub_lt:
3261 return Imm1 < Imm2 ? TrueReg : FalseReg;
3262 case PPC::sub_gt:
3263 return Imm1 > Imm2 ? TrueReg : FalseReg;
3264 case PPC::sub_eq:
3265 return Imm1 == Imm2 ? TrueReg : FalseReg;
3266 }
3267 }
3268 // Unsigned comparisons.
3269 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3270 switch (CRSubReg) {
3271 default: llvm_unreachable("Unknown integer comparison type.");
3272 case PPC::sub_lt:
3273 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3274 case PPC::sub_gt:
3275 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3276 case PPC::sub_eq:
3277 return Imm1 == Imm2 ? TrueReg : FalseReg;
3278 }
3279 }
3280 return PPC::NoRegister;
3281}
3282
3284 unsigned OpNo,
3285 int64_t Imm) const {
3286 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3287 // Replace the REG with the Immediate.
3288 Register InUseReg = MI.getOperand(OpNo).getReg();
3289 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3290
3291 // We need to make sure that the MI didn't have any implicit use
3292 // of this REG any more. We don't call MI.implicit_operands().empty() to
3293 // return early, since MI's MCID might be changed in calling context, as a
3294 // result its number of explicit operands may be changed, thus the begin of
3295 // implicit operand is changed.
3297 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, TRI, false);
3298 if (UseOpIdx >= 0) {
3299 MachineOperand &MO = MI.getOperand(UseOpIdx);
3300 if (MO.isImplicit())
3301 // The operands must always be in the following order:
3302 // - explicit reg defs,
3303 // - other explicit operands (reg uses, immediates, etc.),
3304 // - implicit reg defs
3305 // - implicit reg uses
3306 // Therefore, removing the implicit operand won't change the explicit
3307 // operands layout.
3308 MI.removeOperand(UseOpIdx);
3309 }
3310}
3311
3312// Replace an instruction with one that materializes a constant (and sets
3313// CR0 if the original instruction was a record-form instruction).
3315 const LoadImmediateInfo &LII) const {
3316 // Remove existing operands.
3317 int OperandToKeep = LII.SetCR ? 1 : 0;
3318 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3319 MI.removeOperand(i);
3320
3321 // Replace the instruction.
3322 if (LII.SetCR) {
3323 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3324 // Set the immediate.
3325 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3326 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3327 return;
3328 }
3329 else
3330 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3331
3332 // Set the immediate.
3333 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3334 .addImm(LII.Imm);
3335}
3336
3338 bool &SeenIntermediateUse) const {
3339 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3340 "Should be called after register allocation.");
3342 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3343 It++;
3344 SeenIntermediateUse = false;
3345 for (; It != E; ++It) {
3346 if (It->modifiesRegister(Reg, TRI))
3347 return &*It;
3348 if (It->readsRegister(Reg, TRI))
3349 SeenIntermediateUse = true;
3350 }
3351 return nullptr;
3352}
3353
3356 const DebugLoc &DL, Register Reg,
3357 int64_t Imm) const {
3359 "Register should be in non-SSA form after RA");
3360 bool isPPC64 = Subtarget.isPPC64();
3361 // FIXME: Materialization here is not optimal.
3362 // For some special bit patterns we can use less instructions.
3363 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3364 if (isInt<16>(Imm)) {
3365 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3366 } else if (isInt<32>(Imm)) {
3367 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3368 .addImm(Imm >> 16);
3369 if (Imm & 0xFFFF)
3370 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3371 .addReg(Reg, RegState::Kill)
3372 .addImm(Imm & 0xFFFF);
3373 } else {
3374 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3375 "only supported in PPC64");
3376 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3377 if ((Imm >> 32) & 0xFFFF)
3378 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3379 .addReg(Reg, RegState::Kill)
3380 .addImm((Imm >> 32) & 0xFFFF);
3381 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3382 .addReg(Reg, RegState::Kill)
3383 .addImm(32)
3384 .addImm(31);
3385 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3386 .addReg(Reg, RegState::Kill)
3387 .addImm((Imm >> 16) & 0xFFFF);
3388 if (Imm & 0xFFFF)
3389 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3390 .addReg(Reg, RegState::Kill)
3391 .addImm(Imm & 0xFFFF);
3392 }
3393}
3394
3395MachineInstr *PPCInstrInfo::getForwardingDefMI(
3397 unsigned &OpNoForForwarding,
3398 bool &SeenIntermediateUse) const {
3399 OpNoForForwarding = ~0U;
3400 MachineInstr *DefMI = nullptr;
3401 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3403 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3404 // within the basic block to see if the register is defined using an
3405 // LI/LI8/ADDI/ADDI8.
3406 if (MRI->isSSA()) {
3407 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3408 if (!MI.getOperand(i).isReg())
3409 continue;
3410 Register Reg = MI.getOperand(i).getReg();
3411 if (!Reg.isVirtual())
3412 continue;
3413 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3414 if (TrueReg.isVirtual()) {
3415 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3416 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3417 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3418 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3419 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3420 OpNoForForwarding = i;
3421 DefMI = DefMIForTrueReg;
3422 // The ADDI and LI operand maybe exist in one instruction at same
3423 // time. we prefer to fold LI operand as LI only has one Imm operand
3424 // and is more possible to be converted. So if current DefMI is
3425 // ADDI/ADDI8, we continue to find possible LI/LI8.
3426 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3427 break;
3428 }
3429 }
3430 }
3431 } else {
3432 // Looking back through the definition for each operand could be expensive,
3433 // so exit early if this isn't an instruction that either has an immediate
3434 // form or is already an immediate form that we can handle.
3435 ImmInstrInfo III;
3436 unsigned Opc = MI.getOpcode();
3437 bool ConvertibleImmForm =
3438 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3439 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3440 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3441 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3442 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3443 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3444 Opc == PPC::RLWINM8_rec;
3445 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3446 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3447 : false;
3448 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3449 return nullptr;
3450
3451 // Don't convert or %X, %Y, %Y since that's just a register move.
3452 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3453 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3454 return nullptr;
3455 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3456 MachineOperand &MO = MI.getOperand(i);
3457 SeenIntermediateUse = false;
3458 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3459 Register Reg = MI.getOperand(i).getReg();
3460 // If we see another use of this reg between the def and the MI,
3461 // we want to flag it so the def isn't deleted.
3462 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3463 if (DefMI) {
3464 // Is this register defined by some form of add-immediate (including
3465 // load-immediate) within this basic block?
3466 switch (DefMI->getOpcode()) {
3467 default:
3468 break;
3469 case PPC::LI:
3470 case PPC::LI8:
3471 case PPC::ADDItocL8:
3472 case PPC::ADDI:
3473 case PPC::ADDI8:
3474 OpNoForForwarding = i;
3475 return DefMI;
3476 }
3477 }
3478 }
3479 }
3480 }
3481 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3482}
3483
3484unsigned PPCInstrInfo::getSpillTarget() const {
3485 // With P10, we may need to spill paired vector registers or accumulator
3486 // registers. MMA implies paired vectors, so we can just check that.
3487 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3488 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3489 2 : Subtarget.hasP9Vector() ?
3490 1 : 0;
3491}
3492
3493ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3494 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3495}
3496
3497ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3498 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3499}
3500
3501// This opt tries to convert the following imm form to an index form to save an
3502// add for stack variables.
3503// Return false if no such pattern found.
3504//
3505// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3506// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3507// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3508//
3509// can be converted to:
3510//
3511// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3512// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3513//
3514// In order to eliminate ADD instr, make sure that:
3515// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3516// new ADDI instr and ADDI can only take int16 Imm.
3517// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3518// between ADDI and ADD instr since its original def in ADDI will be changed
3519// in new ADDI instr. And also there should be no new def for it between
3520// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3521// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3522// between ADD and Imm instr since ADD instr will be eliminated.
3523// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3524// moved to Index instr.
3526 MachineFunction *MF = MI.getParent()->getParent();
3528 bool PostRA = !MRI->isSSA();
3529 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3530 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3531 // frame base(OffsetAddi) are determined.
3532 if (!PostRA)
3533 return false;
3534 unsigned ToBeDeletedReg = 0;
3535 int64_t OffsetImm = 0;
3536 unsigned XFormOpcode = 0;
3537 ImmInstrInfo III;
3538
3539 // Check if Imm instr meets requirement.
3540 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3541 III))
3542 return false;
3543
3544 bool OtherIntermediateUse = false;
3545 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3546
3547 // Exit if there is other use between ADD and Imm instr or no def found.
3548 if (OtherIntermediateUse || !ADDMI)
3549 return false;
3550
3551 // Check if ADD instr meets requirement.
3552 if (!isADDInstrEligibleForFolding(*ADDMI))
3553 return false;
3554
3555 unsigned ScaleRegIdx = 0;
3556 int64_t OffsetAddi = 0;
3557 MachineInstr *ADDIMI = nullptr;
3558
3559 // Check if there is a valid ToBeChangedReg in ADDMI.
3560 // 1: It must be killed.
3561 // 2: Its definition must be a valid ADDIMI.
3562 // 3: It must satify int16 offset requirement.
3563 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3564 ScaleRegIdx = 2;
3565 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3566 ScaleRegIdx = 1;
3567 else
3568 return false;
3569
3570 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3571 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3572 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3573 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3575 for (auto It = ++Start; It != End; It++)
3576 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3577 return true;
3578 return false;
3579 };
3580
3581 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3582 // treated as special zero when ScaleReg is R0/X0 register.
3583 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3584 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3585 return false;
3586
3587 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3588 // and Imm Instr.
3589 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3590 return false;
3591
3592 // Now start to do the transformation.
3593 LLVM_DEBUG(dbgs() << "Replace instruction: "
3594 << "\n");
3595 LLVM_DEBUG(ADDIMI->dump());
3596 LLVM_DEBUG(ADDMI->dump());
3597 LLVM_DEBUG(MI.dump());
3598 LLVM_DEBUG(dbgs() << "with: "
3599 << "\n");
3600
3601 // Update ADDI instr.
3602 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3603
3604 // Update Imm instr.
3605 MI.setDesc(get(XFormOpcode));
3606 MI.getOperand(III.ImmOpNo)
3607 .ChangeToRegister(ScaleReg, false, false,
3608 ADDMI->getOperand(ScaleRegIdx).isKill());
3609
3610 MI.getOperand(III.OpNoForForwarding)
3611 .ChangeToRegister(ToBeChangedReg, false, false, true);
3612
3613 // Eliminate ADD instr.
3614 ADDMI->eraseFromParent();
3615
3616 LLVM_DEBUG(ADDIMI->dump());
3617 LLVM_DEBUG(MI.dump());
3618
3619 return true;
3620}
3621
3623 int64_t &Imm) const {
3624 unsigned Opc = ADDIMI.getOpcode();
3625
3626 // Exit if the instruction is not ADDI.
3627 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3628 return false;
3629
3630 // The operand may not necessarily be an immediate - it could be a relocation.
3631 if (!ADDIMI.getOperand(2).isImm())
3632 return false;
3633
3634 Imm = ADDIMI.getOperand(2).getImm();
3635
3636 return true;
3637}
3638
3640 unsigned Opc = ADDMI.getOpcode();
3641
3642 // Exit if the instruction is not ADD.
3643 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3644}
3645
3647 unsigned &ToBeDeletedReg,
3648 unsigned &XFormOpcode,
3649 int64_t &OffsetImm,
3650 ImmInstrInfo &III) const {
3651 // Only handle load/store.
3652 if (!MI.mayLoadOrStore())
3653 return false;
3654
3655 unsigned Opc = MI.getOpcode();
3656
3657 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3658
3659 // Exit if instruction has no index form.
3660 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3661 return false;
3662
3663 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3664 if (!instrHasImmForm(XFormOpcode,
3665 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3666 return false;
3667
3668 if (!III.IsSummingOperands)
3669 return false;
3670
3671 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3672 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3673 // Only support imm operands, not relocation slots or others.
3674 if (!ImmOperand.isImm())
3675 return false;
3676
3677 assert(RegOperand.isReg() && "Instruction format is not right");
3678
3679 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3680 if (!RegOperand.isKill())
3681 return false;
3682
3683 ToBeDeletedReg = RegOperand.getReg();
3684 OffsetImm = ImmOperand.getImm();
3685
3686 return true;
3687}
3688
3690 MachineInstr *&ADDIMI,
3691 int64_t &OffsetAddi,
3692 int64_t OffsetImm) const {
3693 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3694 MachineOperand &MO = ADDMI->getOperand(Index);
3695
3696 if (!MO.isKill())
3697 return false;
3698
3699 bool OtherIntermediateUse = false;
3700
3701 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3702 // Currently handle only one "add + Imminstr" pair case, exit if other
3703 // intermediate use for ToBeChangedReg found.
3704 // TODO: handle the cases where there are other "add + Imminstr" pairs
3705 // with same offset in Imminstr which is like:
3706 //
3707 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3708 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3709 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3710 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3711 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3712 //
3713 // can be converted to:
3714 //
3715 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3716 // (OffsetAddi + OffsetImm)
3717 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3718 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3719
3720 if (OtherIntermediateUse || !ADDIMI)
3721 return false;
3722 // Check if ADDI instr meets requirement.
3723 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3724 return false;
3725
3726 if (isInt<16>(OffsetAddi + OffsetImm))
3727 return true;
3728 return false;
3729}
3730
3731// If this instruction has an immediate form and one of its operands is a
3732// result of a load-immediate or an add-immediate, convert it to
3733// the immediate form if the constant is in range.
3735 SmallSet<Register, 4> &RegsToUpdate,
3736 MachineInstr **KilledDef) const {
3737 MachineFunction *MF = MI.getParent()->getParent();
3739 bool PostRA = !MRI->isSSA();
3740 bool SeenIntermediateUse = true;
3741 unsigned ForwardingOperand = ~0U;
3742 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3743 SeenIntermediateUse);
3744 if (!DefMI)
3745 return false;
3746 assert(ForwardingOperand < MI.getNumOperands() &&
3747 "The forwarding operand needs to be valid at this point");
3748 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3749 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3750 if (KilledDef && KillFwdDefMI)
3751 *KilledDef = DefMI;
3752
3753 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3754 // registers that need their kill flags updated.
3755 for (const MachineOperand &MO : DefMI->operands())
3756 if (MO.isReg() && MO.isDef())
3757 RegsToUpdate.insert(MO.getReg());
3758 for (const MachineOperand &MO : MI.operands())
3759 if (MO.isReg())
3760 RegsToUpdate.insert(MO.getReg());
3761
3762 // If this is a imm instruction and its register operands is produced by ADDI,
3763 // put the imm into imm inst directly.
3764 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3765 PPC::INSTRUCTION_LIST_END &&
3766 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3767 return true;
3768
3769 ImmInstrInfo III;
3770 bool IsVFReg = MI.getOperand(0).isReg()
3771 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3772 : false;
3773 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3774 // If this is a reg+reg instruction that has a reg+imm form,
3775 // and one of the operands is produced by an add-immediate,
3776 // try to convert it.
3777 if (HasImmForm &&
3778 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3779 KillFwdDefMI))
3780 return true;
3781
3782 // If this is a reg+reg instruction that has a reg+imm form,
3783 // and one of the operands is produced by LI, convert it now.
3784 if (HasImmForm &&
3785 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3786 return true;
3787
3788 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3789 // can be simpified to LI.
3790 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3791 return true;
3792
3793 return false;
3794}
3795
3797 MachineInstr **ToErase) const {
3798 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3799 Register FoldingReg = MI.getOperand(1).getReg();
3800 if (!FoldingReg.isVirtual())
3801 return false;
3802 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3803 if (SrcMI->getOpcode() != PPC::RLWINM &&
3804 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3805 SrcMI->getOpcode() != PPC::RLWINM8 &&
3806 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3807 return false;
3808 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3809 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3810 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3811 "Invalid PPC::RLWINM Instruction!");
3812 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3813 uint64_t SHMI = MI.getOperand(2).getImm();
3814 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3815 uint64_t MBMI = MI.getOperand(3).getImm();
3816 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3817 uint64_t MEMI = MI.getOperand(4).getImm();
3818
3819 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3820 "Invalid PPC::RLWINM Instruction!");
3821 // If MBMI is bigger than MEMI, we always can not get run of ones.
3822 // RotatedSrcMask non-wrap:
3823 // 0........31|32........63
3824 // RotatedSrcMask: B---E B---E
3825 // MaskMI: -----------|--E B------
3826 // Result: ----- --- (Bad candidate)
3827 //
3828 // RotatedSrcMask wrap:
3829 // 0........31|32........63
3830 // RotatedSrcMask: --E B----|--E B----
3831 // MaskMI: -----------|--E B------
3832 // Result: --- -----|--- ----- (Bad candidate)
3833 //
3834 // One special case is RotatedSrcMask is a full set mask.
3835 // RotatedSrcMask full:
3836 // 0........31|32........63
3837 // RotatedSrcMask: ------EB---|-------EB---
3838 // MaskMI: -----------|--E B------
3839 // Result: -----------|--- ------- (Good candidate)
3840
3841 // Mark special case.
3842 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3843
3844 // For other MBMI > MEMI cases, just return.
3845 if ((MBMI > MEMI) && !SrcMaskFull)
3846 return false;
3847
3848 // Handle MBMI <= MEMI cases.
3849 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3850 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3851 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3852 // while in PowerPC ISA, lowerest bit is at index 63.
3853 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3854
3855 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3856 APInt FinalMask = RotatedSrcMask & MaskMI;
3857 uint32_t NewMB, NewME;
3858 bool Simplified = false;
3859
3860 // If final mask is 0, MI result should be 0 too.
3861 if (FinalMask.isZero()) {
3862 bool Is64Bit =
3863 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3864 Simplified = true;
3865 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3866 LLVM_DEBUG(MI.dump());
3867
3868 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3869 // Replace MI with "LI 0"
3870 MI.removeOperand(4);
3871 MI.removeOperand(3);
3872 MI.removeOperand(2);
3873 MI.getOperand(1).ChangeToImmediate(0);
3874 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3875 } else {
3876 // Replace MI with "ANDI_rec reg, 0"
3877 MI.removeOperand(4);
3878 MI.removeOperand(3);
3879 MI.getOperand(2).setImm(0);
3880 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3881 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3882 if (SrcMI->getOperand(1).isKill()) {
3883 MI.getOperand(1).setIsKill(true);
3884 SrcMI->getOperand(1).setIsKill(false);
3885 } else
3886 // About to replace MI.getOperand(1), clear its kill flag.
3887 MI.getOperand(1).setIsKill(false);
3888 }
3889
3890 LLVM_DEBUG(dbgs() << "With: ");
3891 LLVM_DEBUG(MI.dump());
3892
3893 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3894 NewMB <= NewME) ||
3895 SrcMaskFull) {
3896 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3897 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3898 // return a 32 bit value.
3899 Simplified = true;
3900 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3901 LLVM_DEBUG(MI.dump());
3902
3903 uint16_t NewSH = (SHSrc + SHMI) % 32;
3904 MI.getOperand(2).setImm(NewSH);
3905 // If SrcMI mask is full, no need to update MBMI and MEMI.
3906 if (!SrcMaskFull) {
3907 MI.getOperand(3).setImm(NewMB);
3908 MI.getOperand(4).setImm(NewME);
3909 }
3910 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3911 if (SrcMI->getOperand(1).isKill()) {
3912 MI.getOperand(1).setIsKill(true);
3913 SrcMI->getOperand(1).setIsKill(false);
3914 } else
3915 // About to replace MI.getOperand(1), clear its kill flag.
3916 MI.getOperand(1).setIsKill(false);
3917
3918 LLVM_DEBUG(dbgs() << "To: ");
3919 LLVM_DEBUG(MI.dump());
3920 }
3921 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3922 !SrcMI->hasImplicitDef()) {
3923 // If FoldingReg has no non-debug use and it has no implicit def (it
3924 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3925 // Otherwise keep it.
3926 *ToErase = SrcMI;
3927 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3928 LLVM_DEBUG(SrcMI->dump());
3929 }
3930 return Simplified;
3931}
3932
3933bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3934 ImmInstrInfo &III, bool PostRA) const {
3935 // The vast majority of the instructions would need their operand 2 replaced
3936 // with an immediate when switching to the reg+imm form. A marked exception
3937 // are the update form loads/stores for which a constant operand 2 would need
3938 // to turn into a displacement and move operand 1 to the operand 2 position.
3939 III.ImmOpNo = 2;
3940 III.OpNoForForwarding = 2;
3941 III.ImmWidth = 16;
3942 III.ImmMustBeMultipleOf = 1;
3943 III.TruncateImmTo = 0;
3944 III.IsSummingOperands = false;
3945 switch (Opc) {
3946 default: return false;
3947 case PPC::ADD4:
3948 case PPC::ADD8:
3949 III.SignedImm = true;
3950 III.ZeroIsSpecialOrig = 0;
3951 III.ZeroIsSpecialNew = 1;
3952 III.IsCommutative = true;
3953 III.IsSummingOperands = true;
3954 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
3955 break;
3956 case PPC::ADDC:
3957 case PPC::ADDC8:
3958 III.SignedImm = true;
3959 III.ZeroIsSpecialOrig = 0;
3960 III.ZeroIsSpecialNew = 0;
3961 III.IsCommutative = true;
3962 III.IsSummingOperands = true;
3963 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
3964 break;
3965 case PPC::ADDC_rec:
3966 III.SignedImm = true;
3967 III.ZeroIsSpecialOrig = 0;
3968 III.ZeroIsSpecialNew = 0;
3969 III.IsCommutative = true;
3970 III.IsSummingOperands = true;
3971 III.ImmOpcode = PPC::ADDIC_rec;
3972 break;
3973 case PPC::SUBFC:
3974 case PPC::SUBFC8:
3975 III.SignedImm = true;
3976 III.ZeroIsSpecialOrig = 0;
3977 III.ZeroIsSpecialNew = 0;
3978 III.IsCommutative = false;
3979 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
3980 break;
3981 case PPC::CMPW:
3982 case PPC::CMPD:
3983 III.SignedImm = true;
3984 III.ZeroIsSpecialOrig = 0;
3985 III.ZeroIsSpecialNew = 0;
3986 III.IsCommutative = false;
3987 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
3988 break;
3989 case PPC::CMPLW:
3990 case PPC::CMPLD:
3991 III.SignedImm = false;
3992 III.ZeroIsSpecialOrig = 0;
3993 III.ZeroIsSpecialNew = 0;
3994 III.IsCommutative = false;
3995 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
3996 break;
3997 case PPC::AND_rec:
3998 case PPC::AND8_rec:
3999 case PPC::OR:
4000 case PPC::OR8:
4001 case PPC::XOR:
4002 case PPC::XOR8:
4003 III.SignedImm = false;
4004 III.ZeroIsSpecialOrig = 0;
4005 III.ZeroIsSpecialNew = 0;
4006 III.IsCommutative = true;
4007 switch(Opc) {
4008 default: llvm_unreachable("Unknown opcode");
4009 case PPC::AND_rec:
4010 III.ImmOpcode = PPC::ANDI_rec;
4011 break;
4012 case PPC::AND8_rec:
4013 III.ImmOpcode = PPC::ANDI8_rec;
4014 break;
4015 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4016 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4017 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4018 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4019 }
4020 break;
4021 case PPC::RLWNM:
4022 case PPC::RLWNM8:
4023 case PPC::RLWNM_rec:
4024 case PPC::RLWNM8_rec:
4025 case PPC::SLW:
4026 case PPC::SLW8:
4027 case PPC::SLW_rec:
4028 case PPC::SLW8_rec:
4029 case PPC::SRW:
4030 case PPC::SRW8:
4031 case PPC::SRW_rec:
4032 case PPC::SRW8_rec:
4033 case PPC::SRAW:
4034 case PPC::SRAW_rec:
4035 III.SignedImm = false;
4036 III.ZeroIsSpecialOrig = 0;
4037 III.ZeroIsSpecialNew = 0;
4038 III.IsCommutative = false;
4039 // This isn't actually true, but the instructions ignore any of the
4040 // upper bits, so any immediate loaded with an LI is acceptable.
4041 // This does not apply to shift right algebraic because a value
4042 // out of range will produce a -1/0.
4043 III.ImmWidth = 16;
4044 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4045 Opc == PPC::RLWNM8_rec)
4046 III.TruncateImmTo = 5;
4047 else
4048 III.TruncateImmTo = 6;
4049 switch(Opc) {
4050 default: llvm_unreachable("Unknown opcode");
4051 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4052 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4053 case PPC::RLWNM_rec:
4054 III.ImmOpcode = PPC::RLWINM_rec;
4055 break;
4056 case PPC::RLWNM8_rec:
4057 III.ImmOpcode = PPC::RLWINM8_rec;
4058 break;
4059 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4060 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4061 case PPC::SLW_rec:
4062 III.ImmOpcode = PPC::RLWINM_rec;
4063 break;
4064 case PPC::SLW8_rec:
4065 III.ImmOpcode = PPC::RLWINM8_rec;
4066 break;
4067 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4068 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4069 case PPC::SRW_rec:
4070 III.ImmOpcode = PPC::RLWINM_rec;
4071 break;
4072 case PPC::SRW8_rec:
4073 III.ImmOpcode = PPC::RLWINM8_rec;
4074 break;
4075 case PPC::SRAW:
4076 III.ImmWidth = 5;
4077 III.TruncateImmTo = 0;
4078 III.ImmOpcode = PPC::SRAWI;
4079 break;
4080 case PPC::SRAW_rec:
4081 III.ImmWidth = 5;
4082 III.TruncateImmTo = 0;
4083 III.ImmOpcode = PPC::SRAWI_rec;
4084 break;
4085 }
4086 break;
4087 case PPC::RLDCL:
4088 case PPC::RLDCL_rec:
4089 case PPC::RLDCR:
4090 case PPC::RLDCR_rec:
4091 case PPC::SLD:
4092 case PPC::SLD_rec:
4093 case PPC::SRD:
4094 case PPC::SRD_rec:
4095 case PPC::SRAD:
4096 case PPC::SRAD_rec:
4097 III.SignedImm = false;
4098 III.ZeroIsSpecialOrig = 0;
4099 III.ZeroIsSpecialNew = 0;
4100 III.IsCommutative = false;
4101 // This isn't actually true, but the instructions ignore any of the
4102 // upper bits, so any immediate loaded with an LI is acceptable.
4103 // This does not apply to shift right algebraic because a value
4104 // out of range will produce a -1/0.
4105 III.ImmWidth = 16;
4106 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4107 Opc == PPC::RLDCR_rec)
4108 III.TruncateImmTo = 6;
4109 else
4110 III.TruncateImmTo = 7;
4111 switch(Opc) {
4112 default: llvm_unreachable("Unknown opcode");
4113 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4114 case PPC::RLDCL_rec:
4115 III.ImmOpcode = PPC::RLDICL_rec;
4116 break;
4117 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4118 case PPC::RLDCR_rec:
4119 III.ImmOpcode = PPC::RLDICR_rec;
4120 break;
4121 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4122 case PPC::SLD_rec:
4123 III.ImmOpcode = PPC::RLDICR_rec;
4124 break;
4125 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4126 case PPC::SRD_rec:
4127 III.ImmOpcode = PPC::RLDICL_rec;
4128 break;
4129 case PPC::SRAD:
4130 III.ImmWidth = 6;
4131 III.TruncateImmTo = 0;
4132 III.ImmOpcode = PPC::SRADI;
4133 break;
4134 case PPC::SRAD_rec:
4135 III.ImmWidth = 6;
4136 III.TruncateImmTo = 0;
4137 III.ImmOpcode = PPC::SRADI_rec;
4138 break;
4139 }
4140 break;
4141 // Loads and stores:
4142 case PPC::LBZX:
4143 case PPC::LBZX8:
4144 case PPC::LHZX:
4145 case PPC::LHZX8:
4146 case PPC::LHAX:
4147 case PPC::LHAX8:
4148 case PPC::LWZX:
4149 case PPC::LWZX8:
4150 case PPC::LWAX:
4151 case PPC::LDX:
4152 case PPC::LFSX:
4153 case PPC::LFDX:
4154 case PPC::STBX:
4155 case PPC::STBX8:
4156 case PPC::STHX:
4157 case PPC::STHX8:
4158 case PPC::STWX:
4159 case PPC::STWX8:
4160 case PPC::STDX:
4161 case PPC::STFSX:
4162 case PPC::STFDX:
4163 III.SignedImm = true;
4164 III.ZeroIsSpecialOrig = 1;
4165 III.ZeroIsSpecialNew = 2;
4166 III.IsCommutative = true;
4167 III.IsSummingOperands = true;
4168 III.ImmOpNo = 1;
4169 III.OpNoForForwarding = 2;
4170 switch(Opc) {
4171 default: llvm_unreachable("Unknown opcode");
4172 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4173 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4174 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4175 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4176 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4177 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4178 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4179 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4180 case PPC::LWAX:
4181 III.ImmOpcode = PPC::LWA;
4182 III.ImmMustBeMultipleOf = 4;
4183 break;
4184 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4185 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4186 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4187 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4188 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4189 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4190 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4191 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4192 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4193 case PPC::STDX:
4194 III.ImmOpcode = PPC::STD;
4195 III.ImmMustBeMultipleOf = 4;
4196 break;
4197 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4198 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4199 }
4200 break;
4201 case PPC::LBZUX:
4202 case PPC::LBZUX8:
4203 case PPC::LHZUX:
4204 case PPC::LHZUX8:
4205 case PPC::LHAUX:
4206 case PPC::LHAUX8:
4207 case PPC::LWZUX:
4208 case PPC::LWZUX8:
4209 case PPC::LDUX:
4210 case PPC::LFSUX:
4211 case PPC::LFDUX:
4212 case PPC::STBUX:
4213 case PPC::STBUX8:
4214 case PPC::STHUX:
4215 case PPC::STHUX8:
4216 case PPC::STWUX:
4217 case PPC::STWUX8:
4218 case PPC::STDUX:
4219 case PPC::STFSUX:
4220 case PPC::STFDUX:
4221 III.SignedImm = true;
4222 III.ZeroIsSpecialOrig = 2;
4223 III.ZeroIsSpecialNew = 3;
4224 III.IsCommutative = false;
4225 III.IsSummingOperands = true;
4226 III.ImmOpNo = 2;
4227 III.OpNoForForwarding = 3;
4228 switch(Opc) {
4229 default: llvm_unreachable("Unknown opcode");
4230 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4231 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4232 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4233 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4234 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4235 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4236 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4237 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4238 case PPC::LDUX:
4239 III.ImmOpcode = PPC::LDU;
4240 III.ImmMustBeMultipleOf = 4;
4241 break;
4242 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4243 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4244 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4245 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4246 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4247 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4248 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4249 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4250 case PPC::STDUX:
4251 III.ImmOpcode = PPC::STDU;
4252 III.ImmMustBeMultipleOf = 4;
4253 break;
4254 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4255 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4256 }
4257 break;
4258 // Power9 and up only. For some of these, the X-Form version has access to all
4259 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4260 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4261 // into or stored from is one of the VR registers.
4262 case PPC::LXVX:
4263 case PPC::LXSSPX:
4264 case PPC::LXSDX:
4265 case PPC::STXVX:
4266 case PPC::STXSSPX:
4267 case PPC::STXSDX:
4268 case PPC::XFLOADf32:
4269 case PPC::XFLOADf64:
4270 case PPC::XFSTOREf32:
4271 case PPC::XFSTOREf64:
4272 if (!Subtarget.hasP9Vector())
4273 return false;
4274 III.SignedImm = true;
4275 III.ZeroIsSpecialOrig = 1;
4276 III.ZeroIsSpecialNew = 2;
4277 III.IsCommutative = true;
4278 III.IsSummingOperands = true;
4279 III.ImmOpNo = 1;
4280 III.OpNoForForwarding = 2;
4281 III.ImmMustBeMultipleOf = 4;
4282 switch(Opc) {
4283 default: llvm_unreachable("Unknown opcode");
4284 case PPC::LXVX:
4285 III.ImmOpcode = PPC::LXV;
4286 III.ImmMustBeMultipleOf = 16;
4287 break;
4288 case PPC::LXSSPX:
4289 if (PostRA) {
4290 if (IsVFReg)
4291 III.ImmOpcode = PPC::LXSSP;
4292 else {
4293 III.ImmOpcode = PPC::LFS;
4294 III.ImmMustBeMultipleOf = 1;
4295 }
4296 break;
4297 }
4298 [[fallthrough]];
4299 case PPC::XFLOADf32:
4300 III.ImmOpcode = PPC::DFLOADf32;
4301 break;
4302 case PPC::LXSDX:
4303 if (PostRA) {
4304 if (IsVFReg)
4305 III.ImmOpcode = PPC::LXSD;
4306 else {
4307 III.ImmOpcode = PPC::LFD;
4308 III.ImmMustBeMultipleOf = 1;
4309 }
4310 break;
4311 }
4312 [[fallthrough]];
4313 case PPC::XFLOADf64:
4314 III.ImmOpcode = PPC::DFLOADf64;
4315 break;
4316 case PPC::STXVX:
4317 III.ImmOpcode = PPC::STXV;
4318 III.ImmMustBeMultipleOf = 16;
4319 break;
4320 case PPC::STXSSPX:
4321 if (PostRA) {
4322 if (IsVFReg)
4323 III.ImmOpcode = PPC::STXSSP;
4324 else {
4325 III.ImmOpcode = PPC::STFS;
4326 III.ImmMustBeMultipleOf = 1;
4327 }
4328 break;
4329 }
4330 [[fallthrough]];
4331 case PPC::XFSTOREf32:
4332 III.ImmOpcode = PPC::DFSTOREf32;
4333 break;
4334 case PPC::STXSDX:
4335 if (PostRA) {
4336 if (IsVFReg)
4337 III.ImmOpcode = PPC::STXSD;
4338 else {
4339 III.ImmOpcode = PPC::STFD;
4340 III.ImmMustBeMultipleOf = 1;
4341 }
4342 break;
4343 }
4344 [[fallthrough]];
4345 case PPC::XFSTOREf64:
4346 III.ImmOpcode = PPC::DFSTOREf64;
4347 break;
4348 }
4349 break;
4350 }
4351 return true;
4352}
4353
4354// Utility function for swaping two arbitrary operands of an instruction.
4355static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4356 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4357
4358 unsigned MaxOp = std::max(Op1, Op2);
4359 unsigned MinOp = std::min(Op1, Op2);
4360 MachineOperand MOp1 = MI.getOperand(MinOp);
4361 MachineOperand MOp2 = MI.getOperand(MaxOp);
4362 MI.removeOperand(std::max(Op1, Op2));
4363 MI.removeOperand(std::min(Op1, Op2));
4364
4365 // If the operands we are swapping are the two at the end (the common case)
4366 // we can just remove both and add them in the opposite order.
4367 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4368 MI.addOperand(MOp2);
4369 MI.addOperand(MOp1);
4370 } else {
4371 // Store all operands in a temporary vector, remove them and re-add in the
4372 // right order.
4374 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4375 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4376 MOps.push_back(MI.getOperand(i));
4377 MI.removeOperand(i);
4378 }
4379 // MOp2 needs to be added next.
4380 MI.addOperand(MOp2);
4381 // Now add the rest.
4382 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4383 if (i == MaxOp)
4384 MI.addOperand(MOp1);
4385 else {
4386 MI.addOperand(MOps.back());
4387 MOps.pop_back();
4388 }
4389 }
4390 }
4391}
4392
4393// Check if the 'MI' that has the index OpNoForForwarding
4394// meets the requirement described in the ImmInstrInfo.
4395bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4396 const ImmInstrInfo &III,
4397 unsigned OpNoForForwarding
4398 ) const {
4399 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4400 // would not work pre-RA, we can only do the check post RA.
4401 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4402 if (MRI.isSSA())
4403 return false;
4404
4405 // Cannot do the transform if MI isn't summing the operands.
4406 if (!III.IsSummingOperands)
4407 return false;
4408
4409 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4410 if (!III.ZeroIsSpecialOrig)
4411 return false;
4412
4413 // We cannot do the transform if the operand we are trying to replace
4414 // isn't the same as the operand the instruction allows.
4415 if (OpNoForForwarding != III.OpNoForForwarding)
4416 return false;
4417
4418 // Check if the instruction we are trying to transform really has
4419 // the special zero register as its operand.
4420 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4421 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4422 return false;
4423
4424 // This machine instruction is convertible if it is,
4425 // 1. summing the operands.
4426 // 2. one of the operands is special zero register.
4427 // 3. the operand we are trying to replace is allowed by the MI.
4428 return true;
4429}
4430
4431// Check if the DefMI is the add inst and set the ImmMO and RegMO
4432// accordingly.
4433bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4434 const ImmInstrInfo &III,
4435 MachineOperand *&ImmMO,
4436 MachineOperand *&RegMO) const {
4437 unsigned Opc = DefMI.getOpcode();
4438 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4439 return false;
4440
4441 // Skip the optimization of transformTo[NewImm|Imm]FormFedByAdd for ADDItocL8
4442 // on AIX which is used for toc-data access. TODO: Follow up to see if it can
4443 // apply for AIX toc-data as well.
4444 if (Opc == PPC::ADDItocL8 && Subtarget.isAIX())
4445 return false;
4446
4447 assert(DefMI.getNumOperands() >= 3 &&
4448 "Add inst must have at least three operands");
4449 RegMO = &DefMI.getOperand(1);
4450 ImmMO = &DefMI.getOperand(2);
4451
4452 // Before RA, ADDI first operand could be a frame index.
4453 if (!RegMO->isReg())
4454 return false;
4455
4456 // This DefMI is elgible for forwarding if it is:
4457 // 1. add inst
4458 // 2. one of the operands is Imm/CPI/Global.
4459 return isAnImmediateOperand(*ImmMO);
4460}
4461
4462bool PPCInstrInfo::isRegElgibleForForwarding(
4463 const MachineOperand &RegMO, const MachineInstr &DefMI,
4464 const MachineInstr &MI, bool KillDefMI,
4465 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4466 // x = addi y, imm
4467 // ...
4468 // z = lfdx 0, x -> z = lfd imm(y)
4469 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4470 // of "y" between the DEF of "x" and "z".
4471 // The query is only valid post RA.
4472 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4473 if (MRI.isSSA())
4474 return false;
4475
4476 Register Reg = RegMO.getReg();
4477
4478 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4480 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4481 It++;
4482 for (; It != E; ++It) {
4483 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4484 return false;
4485 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4486 IsFwdFeederRegKilled = true;
4487 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4488 SeenIntermediateUse = true;
4489 // Made it to DefMI without encountering a clobber.
4490 if ((&*It) == &DefMI)
4491 break;
4492 }
4493 assert((&*It) == &DefMI && "DefMI is missing");
4494
4495 // If DefMI also defines the register to be forwarded, we can only forward it
4496 // if DefMI is being erased.
4497 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4498 return KillDefMI;
4499
4500 return true;
4501}
4502
4503bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4504 const MachineInstr &DefMI,
4505 const ImmInstrInfo &III,
4506 int64_t &Imm,
4507 int64_t BaseImm) const {
4508 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4509 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4510 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4511 // However, we know that, it is 16-bit width, and has the alignment of 4.
4512 // Check if the instruction met the requirement.
4513 if (III.ImmMustBeMultipleOf > 4 ||
4514 III.TruncateImmTo || III.ImmWidth != 16)
4515 return false;
4516
4517 // Going from XForm to DForm loads means that the displacement needs to be
4518 // not just an immediate but also a multiple of 4, or 16 depending on the
4519 // load. A DForm load cannot be represented if it is a multiple of say 2.
4520 // XForm loads do not have this restriction.
4521 if (ImmMO.isGlobal()) {
4522 const DataLayout &DL = ImmMO.getGlobal()->getDataLayout();
4524 return false;
4525 }
4526
4527 return true;
4528 }
4529
4530 if (ImmMO.isImm()) {
4531 // It is Imm, we need to check if the Imm fit the range.
4532 // Sign-extend to 64-bits.
4533 // DefMI may be folded with another imm form instruction, the result Imm is
4534 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4535 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4536 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4537 return false;
4538 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4539 return false;
4540 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4541
4542 if (Imm % III.ImmMustBeMultipleOf)
4543 return false;
4544 if (III.TruncateImmTo)
4545 Imm &= ((1 << III.TruncateImmTo) - 1);
4546 }
4547 else
4548 return false;
4549
4550 // This ImmMO is forwarded if it meets the requriement describle
4551 // in ImmInstrInfo
4552 return true;
4553}
4554
4555bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4556 unsigned OpNoForForwarding,
4557 MachineInstr **KilledDef) const {
4558 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4559 !DefMI.getOperand(1).isImm())
4560 return false;
4561
4562 MachineFunction *MF = MI.getParent()->getParent();
4564 bool PostRA = !MRI->isSSA();
4565
4566 int64_t Immediate = DefMI.getOperand(1).getImm();
4567 // Sign-extend to 64-bits.
4568 int64_t SExtImm = SignExtend64<16>(Immediate);
4569
4570 bool ReplaceWithLI = false;
4571 bool Is64BitLI = false;
4572 int64_t NewImm = 0;
4573 bool SetCR = false;
4574 unsigned Opc = MI.getOpcode();
4575 switch (Opc) {
4576 default:
4577 return false;
4578
4579 // FIXME: Any branches conditional on such a comparison can be made
4580 // unconditional. At this time, this happens too infrequently to be worth
4581 // the implementation effort, but if that ever changes, we could convert
4582 // such a pattern here.
4583 case PPC::CMPWI:
4584 case PPC::CMPLWI:
4585 case PPC::CMPDI:
4586 case PPC::CMPLDI: {
4587 // Doing this post-RA would require dataflow analysis to reliably find uses
4588 // of the CR register set by the compare.
4589 // No need to fixup killed/dead flag since this transformation is only valid
4590 // before RA.
4591 if (PostRA)
4592 return false;
4593 // If a compare-immediate is fed by an immediate and is itself an input of
4594 // an ISEL (the most common case) into a COPY of the correct register.
4595 bool Changed = false;
4596 Register DefReg = MI.getOperand(0).getReg();
4597 int64_t Comparand = MI.getOperand(2).getImm();
4598 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4599 ? (Comparand | 0xFFFFFFFFFFFF0000)
4600 : Comparand;
4601
4602 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4603 unsigned UseOpc = CompareUseMI.getOpcode();
4604 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4605 continue;
4606 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4607 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4608 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4609 unsigned RegToCopy =
4610 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4611 if (RegToCopy == PPC::NoRegister)
4612 continue;
4613 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4614 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4615 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4616 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4617 CompareUseMI.removeOperand(3);
4618 CompareUseMI.removeOperand(2);
4619 continue;
4620 }
4621 LLVM_DEBUG(
4622 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4623 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4624 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4625 // Convert to copy and remove unneeded operands.
4626 CompareUseMI.setDesc(get(PPC::COPY));
4627 CompareUseMI.removeOperand(3);
4628 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4629 CmpIselsConverted++;
4630 Changed = true;
4631 LLVM_DEBUG(CompareUseMI.dump());
4632 }
4633 if (Changed)
4634 return true;
4635 // This may end up incremented multiple times since this function is called
4636 // during a fixed-point transformation, but it is only meant to indicate the
4637 // presence of this opportunity.
4638 MissedConvertibleImmediateInstrs++;
4639 return false;
4640 }
4641
4642 // Immediate forms - may simply be convertable to an LI.
4643 case PPC::ADDI:
4644 case PPC::ADDI8: {
4645 // Does the sum fit in a 16-bit signed field?
4646 int64_t Addend = MI.getOperand(2).getImm();
4647 if (isInt<16>(Addend + SExtImm)) {
4648 ReplaceWithLI = true;
4649 Is64BitLI = Opc == PPC::ADDI8;
4650 NewImm = Addend + SExtImm;
4651 break;
4652 }
4653 return false;
4654 }
4655 case PPC::SUBFIC:
4656 case PPC::SUBFIC8: {
4657 // Only transform this if the CARRY implicit operand is dead.
4658 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4659 return false;
4660 int64_t Minuend = MI.getOperand(2).getImm();
4661 if (isInt<16>(Minuend - SExtImm)) {
4662 ReplaceWithLI = true;
4663 Is64BitLI = Opc == PPC::SUBFIC8;
4664 NewImm = Minuend - SExtImm;
4665 break;
4666 }
4667 return false;
4668 }
4669 case PPC::RLDICL:
4670 case PPC::RLDICL_rec:
4671 case PPC::RLDICL_32:
4672 case PPC::RLDICL_32_64: {
4673 // Use APInt's rotate function.
4674 int64_t SH = MI.getOperand(2).getImm();
4675 int64_t MB = MI.getOperand(3).getImm();
4676 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4677 SExtImm, true);
4678 InVal = InVal.rotl(SH);
4679 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4680 InVal &= Mask;
4681 // Can't replace negative values with an LI as that will sign-extend
4682 // and not clear the left bits. If we're setting the CR bit, we will use
4683 // ANDI_rec which won't sign extend, so that's safe.
4684 if (isUInt<15>(InVal.getSExtValue()) ||
4685 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4686 ReplaceWithLI = true;
4687 Is64BitLI = Opc != PPC::RLDICL_32;
4688 NewImm = InVal.getSExtValue();
4689 SetCR = Opc == PPC::RLDICL_rec;
4690 break;
4691 }
4692 return false;
4693 }
4694 case PPC::RLWINM:
4695 case PPC::RLWINM8:
4696 case PPC::RLWINM_rec:
4697 case PPC::RLWINM8_rec: {
4698 int64_t SH = MI.getOperand(2).getImm();
4699 int64_t MB = MI.getOperand(3).getImm();
4700 int64_t ME = MI.getOperand(4).getImm();
4701 APInt InVal(32, SExtImm, true);
4702 InVal = InVal.rotl(SH);
4703 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4704 InVal &= Mask;
4705 // Can't replace negative values with an LI as that will sign-extend
4706 // and not clear the left bits. If we're setting the CR bit, we will use
4707 // ANDI_rec which won't sign extend, so that's safe.
4708 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4709 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4710 isUInt<16>(InVal.getSExtValue()));
4711 if (ValueFits) {
4712 ReplaceWithLI = true;
4713 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4714 NewImm = InVal.getSExtValue();
4715 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4716 break;
4717 }
4718 return false;
4719 }
4720 case PPC::ORI:
4721 case PPC::ORI8:
4722 case PPC::XORI:
4723 case PPC::XORI8: {
4724 int64_t LogicalImm = MI.getOperand(2).getImm();
4725 int64_t Result = 0;
4726 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4727 Result = LogicalImm | SExtImm;
4728 else
4729 Result = LogicalImm ^ SExtImm;
4730 if (isInt<16>(Result)) {
4731 ReplaceWithLI = true;
4732 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4733 NewImm = Result;
4734 break;
4735 }
4736 return false;
4737 }
4738 }
4739
4740 if (ReplaceWithLI) {
4741 // We need to be careful with CR-setting instructions we're replacing.
4742 if (SetCR) {
4743 // We don't know anything about uses when we're out of SSA, so only
4744 // replace if the new immediate will be reproduced.
4745 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4746 if (PostRA && ImmChanged)
4747 return false;
4748
4749 if (!PostRA) {
4750 // If the defining load-immediate has no other uses, we can just replace
4751 // the immediate with the new immediate.
4752 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4753 DefMI.getOperand(1).setImm(NewImm);
4754
4755 // If we're not using the GPR result of the CR-setting instruction, we
4756 // just need to and with zero/non-zero depending on the new immediate.
4757 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4758 if (NewImm) {
4759 assert(Immediate && "Transformation converted zero to non-zero?");
4760 NewImm = Immediate;
4761 }
4762 } else if (ImmChanged)
4763 return false;
4764 }
4765 }
4766
4767 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4768 LLVM_DEBUG(MI.dump());
4769 LLVM_DEBUG(dbgs() << "Fed by:\n");
4770 LLVM_DEBUG(DefMI.dump());
4772 LII.Imm = NewImm;
4773 LII.Is64Bit = Is64BitLI;
4774 LII.SetCR = SetCR;
4775 // If we're setting the CR, the original load-immediate must be kept (as an
4776 // operand to ANDI_rec/ANDI8_rec).
4777 if (KilledDef && SetCR)
4778 *KilledDef = nullptr;
4779 replaceInstrWithLI(MI, LII);
4780
4781 if (PostRA)
4782 recomputeLivenessFlags(*MI.getParent());
4783
4784 LLVM_DEBUG(dbgs() << "With:\n");
4785 LLVM_DEBUG(MI.dump());
4786 return true;
4787 }
4788 return false;
4789}
4790
4791bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4792 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4793 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4794 bool PostRA = !MRI->isSSA();
4795 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4796 // for post-ra.
4797 if (PostRA)
4798 return false;
4799
4800 // Only handle load/store.
4801 if (!MI.mayLoadOrStore())
4802 return false;
4803
4804 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4805
4806 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4807 "MI must have x-form opcode");
4808
4809 // get Imm Form info.
4810 ImmInstrInfo III;
4811 bool IsVFReg = MI.getOperand(0).isReg()
4812 ? PPC::isVFRegister(MI.getOperand(0).getReg())
4813 : false;
4814
4815 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4816 return false;
4817
4818 if (!III.IsSummingOperands)
4819 return false;
4820
4821 if (OpNoForForwarding != III.OpNoForForwarding)
4822 return false;
4823
4824 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4825 if (!ImmOperandMI.isImm())
4826 return false;
4827
4828 // Check DefMI.
4829 MachineOperand *ImmMO = nullptr;
4830 MachineOperand *RegMO = nullptr;
4831 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4832 return false;
4833 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4834
4835 // Check Imm.
4836 // Set ImmBase from imm instruction as base and get new Imm inside
4837 // isImmElgibleForForwarding.
4838 int64_t ImmBase = ImmOperandMI.getImm();
4839 int64_t Imm = 0;
4840 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4841 return false;
4842
4843 // Do the transform
4844 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4845 LLVM_DEBUG(MI.dump());
4846 LLVM_DEBUG(dbgs() << "Fed by:\n");
4847 LLVM_DEBUG(DefMI.dump());
4848
4849 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4850 MI.getOperand(III.ImmOpNo).setImm(Imm);
4851
4852 LLVM_DEBUG(dbgs() << "With:\n");
4853 LLVM_DEBUG(MI.dump());
4854 return true;
4855}
4856
4857// If an X-Form instruction is fed by an add-immediate and one of its operands
4858// is the literal zero, attempt to forward the source of the add-immediate to
4859// the corresponding D-Form instruction with the displacement coming from
4860// the immediate being added.
4861bool PPCInstrInfo::transformToImmFormFedByAdd(
4862 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4863 MachineInstr &DefMI, bool KillDefMI) const {
4864 // RegMO ImmMO
4865 // | |
4866 // x = addi reg, imm <----- DefMI
4867 // y = op 0 , x <----- MI
4868 // |
4869 // OpNoForForwarding
4870 // Check if the MI meet the requirement described in the III.
4871 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4872 return false;
4873
4874 // Check if the DefMI meet the requirement
4875 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4876 MachineOperand *ImmMO = nullptr;
4877 MachineOperand *RegMO = nullptr;
4878 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4879 return false;
4880 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4881
4882 // As we get the Imm operand now, we need to check if the ImmMO meet
4883 // the requirement described in the III. If yes set the Imm.
4884 int64_t Imm = 0;
4885 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4886 return false;
4887
4888 bool IsFwdFeederRegKilled = false;
4889 bool SeenIntermediateUse = false;
4890 // Check if the RegMO can be forwarded to MI.
4891 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4892 IsFwdFeederRegKilled, SeenIntermediateUse))
4893 return false;
4894
4895 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4896 bool PostRA = !MRI.isSSA();
4897
4898 // We know that, the MI and DefMI both meet the pattern, and
4899 // the Imm also meet the requirement with the new Imm-form.
4900 // It is safe to do the transformation now.
4901 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4902 LLVM_DEBUG(MI.dump());
4903 LLVM_DEBUG(dbgs() << "Fed by:\n");
4904 LLVM_DEBUG(DefMI.dump());
4905
4906 // Update the base reg first.
4907 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
4908 false, false,
4909 RegMO->isKill());
4910
4911 // Then, update the imm.
4912 if (ImmMO->isImm()) {
4913 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4914 // directly.
4916 }
4917 else {
4918 // Otherwise, it is Constant Pool Index(CPI) or Global,
4919 // which is relocation in fact. We need to replace the special zero
4920 // register with ImmMO.
4921 // Before that, we need to fixup the target flags for imm.
4922 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4923 if (DefMI.getOpcode() == PPC::ADDItocL8)
4925
4926 // MI didn't have the interface such as MI.setOperand(i) though
4927 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4928 // ImmMO, we need to remove ZERO operand and all the operands behind it,
4929 // and, add the ImmMO, then, move back all the operands behind ZERO.
4931 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
4932 MOps.push_back(MI.getOperand(i));
4933 MI.removeOperand(i);
4934 }
4935
4936 // Remove the last MO in the list, which is ZERO operand in fact.
4937 MOps.pop_back();
4938 // Add the imm operand.
4939 MI.addOperand(*ImmMO);
4940 // Now add the rest back.
4941 for (auto &MO : MOps)
4942 MI.addOperand(MO);
4943 }
4944
4945 // Update the opcode.
4946 MI.setDesc(get(III.ImmOpcode));
4947
4948 if (PostRA)
4949 recomputeLivenessFlags(*MI.getParent());
4950 LLVM_DEBUG(dbgs() << "With:\n");
4951 LLVM_DEBUG(MI.dump());
4952
4953 return true;
4954}
4955
4956bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
4957 const ImmInstrInfo &III,
4958 unsigned ConstantOpNo,
4959 MachineInstr &DefMI) const {
4960 // DefMI must be LI or LI8.
4961 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4962 !DefMI.getOperand(1).isImm())
4963 return false;
4964
4965 // Get Imm operand and Sign-extend to 64-bits.
4966 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
4967
4968 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4969 bool PostRA = !MRI.isSSA();
4970 // Exit early if we can't convert this.
4971 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
4972 return false;
4973 if (Imm % III.ImmMustBeMultipleOf)
4974 return false;
4975 if (III.TruncateImmTo)
4976 Imm &= ((1 << III.TruncateImmTo) - 1);
4977 if (III.SignedImm) {
4978 APInt ActualValue(64, Imm, true);
4979 if (!ActualValue.isSignedIntN(III.ImmWidth))
4980 return false;
4981 } else {
4982 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
4983 if ((uint64_t)Imm > UnsignedMax)
4984 return false;
4985 }
4986
4987 // If we're post-RA, the instructions don't agree on whether register zero is
4988 // special, we can transform this as long as the register operand that will
4989 // end up in the location where zero is special isn't R0.
4990 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
4991 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
4992 III.ZeroIsSpecialNew + 1;
4993 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
4994 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
4995 // If R0 is in the operand where zero is special for the new instruction,
4996 // it is unsafe to transform if the constant operand isn't that operand.
4997 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
4998 ConstantOpNo != III.ZeroIsSpecialNew)
4999 return false;
5000 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
5001 ConstantOpNo != PosForOrigZero)
5002 return false;
5003 }
5004
5005 unsigned Opc = MI.getOpcode();
5006 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
5007 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
5008 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
5009 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
5010 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
5011 Opc == PPC::SRD || Opc == PPC::SRD_rec;
5012 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
5013 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
5014 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
5015 Opc == PPC::SRD_rec;
5016
5017 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5018 LLVM_DEBUG(MI.dump());
5019 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5020 LLVM_DEBUG(DefMI.dump());
5021 MI.setDesc(get(III.ImmOpcode));
5022 if (ConstantOpNo == III.OpNoForForwarding) {
5023 // Converting shifts to immediate form is a bit tricky since they may do
5024 // one of three things:
5025 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5026 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5027 // setting CR0)
5028 // 3. If the shift amount is in [1, OpSize), it's just a shift
5029 if (SpecialShift32 || SpecialShift64) {
5031 LII.Imm = 0;
5032 LII.SetCR = SetCR;
5033 LII.Is64Bit = SpecialShift64;
5034 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5035 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5036 replaceInstrWithLI(MI, LII);
5037 // Shifts by zero don't change the value. If we don't need to set CR0,
5038 // just convert this to a COPY. Can't do this post-RA since we've already
5039 // cleaned up the copies.
5040 else if (!SetCR && ShAmt == 0 && !PostRA) {
5041 MI.removeOperand(2);
5042 MI.setDesc(get(PPC::COPY));
5043 } else {
5044 // The 32 bit and 64 bit instructions are quite different.
5045 if (SpecialShift32) {
5046 // Left shifts use (N, 0, 31-N).
5047 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5048 // use (0, 0, 31) if N == 0.
5049 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5050 uint64_t MB = RightShift ? ShAmt : 0;
5051 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5053 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5054 .addImm(ME);
5055 } else {
5056 // Left shifts use (N, 63-N).
5057 // Right shifts use (64-N, N) if 0 < N < 64.
5058 // use (0, 0) if N == 0.
5059 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5060 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5062 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5063 }
5064 }
5065 } else
5066 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5067 }
5068 // Convert commutative instructions (switch the operands and convert the
5069 // desired one to an immediate.
5070 else if (III.IsCommutative) {
5071 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5072 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5073 } else
5074 llvm_unreachable("Should have exited early!");
5075
5076 // For instructions for which the constant register replaces a different
5077 // operand than where the immediate goes, we need to swap them.
5078 if (III.OpNoForForwarding != III.ImmOpNo)
5080
5081 // If the special R0/X0 register index are different for original instruction
5082 // and new instruction, we need to fix up the register class in new
5083 // instruction.
5084 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5085 if (III.ZeroIsSpecialNew) {
5086 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5087 // need to fix up register class.
5088 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5089 if (RegToModify.isVirtual()) {
5090 const TargetRegisterClass *NewRC =
5091 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5092 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5093 MRI.setRegClass(RegToModify, NewRC);
5094 }
5095 }
5096 }
5097
5098 if (PostRA)
5099 recomputeLivenessFlags(*MI.getParent());
5100
5101 LLVM_DEBUG(dbgs() << "With: ");
5102 LLVM_DEBUG(MI.dump());
5103 LLVM_DEBUG(dbgs() << "\n");
5104 return true;
5105}
5106
5107const TargetRegisterClass *
5109 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5110 return &PPC::VSRCRegClass;
5111 return RC;
5112}
5113
5115 return PPC::getRecordFormOpcode(Opcode);
5116}
5117
5118static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5119 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5120 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5121 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5122 Opcode == PPC::LHZUX8);
5123}
5124
5125// This function checks for sign extension from 32 bits to 64 bits.
5126static bool definedBySignExtendingOp(const unsigned Reg,
5127 const MachineRegisterInfo *MRI) {
5129 return false;
5130
5131 MachineInstr *MI = MRI->getVRegDef(Reg);
5132 if (!MI)
5133 return false;
5134
5135 int Opcode = MI->getOpcode();
5136 const PPCInstrInfo *TII =
5137 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5138 if (TII->isSExt32To64(Opcode))
5139 return true;
5140
5141 // The first def of LBZU/LHZU is sign extended.
5142 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5143 return true;
5144
5145 // RLDICL generates sign-extended output if it clears at least
5146 // 33 bits from the left (MSB).
5147 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5148 return true;
5149
5150 // If at least one bit from left in a lower word is masked out,
5151 // all of 0 to 32-th bits of the output are cleared.
5152 // Hence the output is already sign extended.
5153 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5154 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5155 MI->getOperand(3).getImm() > 0 &&
5156 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5157 return true;
5158
5159 // If the most significant bit of immediate in ANDIS is zero,
5160 // all of 0 to 32-th bits are cleared.
5161 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5162 uint16_t Imm = MI->getOperand(2).getImm();
5163 if ((Imm & 0x8000) == 0)
5164 return true;
5165 }
5166
5167 return false;
5168}
5169
5170// This function checks the machine instruction that defines the input register
5171// Reg. If that machine instruction always outputs a value that has only zeros
5172// in the higher 32 bits then this function will return true.
5173static bool definedByZeroExtendingOp(const unsigned Reg,
5174 const MachineRegisterInfo *MRI) {
5176 return false;
5177
5178 MachineInstr *MI = MRI->getVRegDef(Reg);
5179 if (!MI)
5180 return false;
5181
5182 int Opcode = MI->getOpcode();
5183 const PPCInstrInfo *TII =
5184 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5185 if (TII->isZExt32To64(Opcode))
5186 return true;
5187
5188 // The first def of LBZU/LHZU/LWZU are zero extended.
5189 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5190 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5191 MI->getOperand(0).getReg() == Reg)
5192 return true;
5193
5194 // The 16-bit immediate is sign-extended in li/lis.
5195 // If the most significant bit is zero, all higher bits are zero.
5196 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5197 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5198 int64_t Imm = MI->getOperand(1).getImm();
5199 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5200 return true;
5201 }
5202
5203 // We have some variations of rotate-and-mask instructions
5204 // that clear higher 32-bits.
5205 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5206 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5207 Opcode == PPC::RLDICL_32_64) &&
5208 MI->getOperand(3).getImm() >= 32)
5209 return true;
5210
5211 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5212 MI->getOperand(3).getImm() >= 32 &&
5213 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5214 return true;
5215
5216 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5217 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5218 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5219 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5220 return true;
5221
5222 return false;
5223}
5224
5225// This function returns true if the input MachineInstr is a TOC save
5226// instruction.
5228 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5229 return false;
5230 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5231 unsigned StackOffset = MI.getOperand(1).getImm();
5232 Register StackReg = MI.getOperand(2).getReg();
5233 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5234 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5235 return true;
5236
5237 return false;
5238}
5239
5240// We limit the max depth to track incoming values of PHIs or binary ops
5241// (e.g. AND) to avoid excessive cost.
5242const unsigned MAX_BINOP_DEPTH = 1;
5243// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5244// does not count all of the recursions. The parameter BinOpDepth is incremented
5245// only when isSignOrZeroExtended calls itself more than once. This is done to
5246// prevent expontential recursion. There is no parameter to track linear
5247// recursion.
5248std::pair<bool, bool>
5250 const unsigned BinOpDepth,
5251 const MachineRegisterInfo *MRI) const {
5253 return std::pair<bool, bool>(false, false);
5254
5255 MachineInstr *MI = MRI->getVRegDef(Reg);
5256 if (!MI)
5257 return std::pair<bool, bool>(false, false);
5258
5259 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5260 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5261
5262 // If we know the instruction always returns sign- and zero-extended result,
5263 // return here.
5264 if (IsSExt && IsZExt)
5265 return std::pair<bool, bool>(IsSExt, IsZExt);
5266
5267 switch (MI->getOpcode()) {
5268 case PPC::COPY: {
5269 Register SrcReg = MI->getOperand(1).getReg();
5270
5271 // In both ELFv1 and v2 ABI, method parameters and the return value
5272 // are sign- or zero-extended.
5273 const MachineFunction *MF = MI->getMF();
5274
5275 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5276 // If this is a copy from another register, we recursively check source.
5277 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5278 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5279 SrcExt.second || IsZExt);
5280 }
5281
5282 // From here on everything is SVR4ABI
5283 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5284 // We check the ZExt/SExt flags for a method parameter.
5285 if (MI->getParent()->getBasicBlock() ==
5286 &MF->getFunction().getEntryBlock()) {
5287 Register VReg = MI->getOperand(0).getReg();
5288 if (MF->getRegInfo().isLiveIn(VReg)) {
5289 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5290 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5291 return std::pair<bool, bool>(IsSExt, IsZExt);
5292 }
5293 }
5294
5295 if (SrcReg != PPC::X3) {
5296 // If this is a copy from another register, we recursively check source.
5297 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5298 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5299 SrcExt.second || IsZExt);
5300 }
5301
5302 // For a method return value, we check the ZExt/SExt flags in attribute.
5303 // We assume the following code sequence for method call.
5304 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5305 // BL8_NOP @func,...
5306 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5307 // %5 = COPY %x3; G8RC:%5
5308 const MachineBasicBlock *MBB = MI->getParent();
5309 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5312 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5313 return IsExtendPair;
5314
5315 const MachineInstr &CallMI = *(--II);
5316 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5317 return IsExtendPair;
5318
5319 const Function *CalleeFn =
5320 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
5321 if (!CalleeFn)
5322 return IsExtendPair;
5323 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5324 if (IntTy && IntTy->getBitWidth() <= 32) {
5325 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5326 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5327 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5328 return std::pair<bool, bool>(IsSExt, IsZExt);
5329 }
5330
5331 return IsExtendPair;
5332 }
5333
5334 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5335 // So, we track the operand register as we do for register copy.
5336 case PPC::ORI:
5337 case PPC::XORI:
5338 case PPC::ORI8:
5339 case PPC::XORI8: {
5340 Register SrcReg = MI->getOperand(1).getReg();
5341 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5342 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5343 SrcExt.second || IsZExt);
5344 }
5345
5346 // OR, XOR with shifted 16-bit immediate does not change the upper
5347 // 32 bits. So, we track the operand register for zero extension.
5348 // For sign extension when the MSB of the immediate is zero, we also
5349 // track the operand register since the upper 33 bits are unchanged.
5350 case PPC::ORIS:
5351 case PPC::XORIS:
5352 case PPC::ORIS8:
5353 case PPC::XORIS8: {
5354 Register SrcReg = MI->getOperand(1).getReg();
5355 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5356 uint16_t Imm = MI->getOperand(2).getImm();
5357 if (Imm & 0x8000)
5358 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5359 else
5360 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5361 SrcExt.second || IsZExt);
5362 }
5363
5364 // If all incoming values are sign-/zero-extended,
5365 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5366 case PPC::OR:
5367 case PPC::OR8:
5368 case PPC::ISEL:
5369 case PPC::PHI: {
5370 if (BinOpDepth >= MAX_BINOP_DEPTH)
5371 return std::pair<bool, bool>(false, false);
5372
5373 // The input registers for PHI are operand 1, 3, ...
5374 // The input registers for others are operand 1 and 2.
5375 unsigned OperandEnd = 3, OperandStride = 1;
5376 if (MI->getOpcode() == PPC::PHI) {
5377 OperandEnd = MI->getNumOperands();
5378 OperandStride = 2;
5379 }
5380
5381 IsSExt = true;
5382 IsZExt = true;
5383 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5384 if (!MI->getOperand(I).isReg())
5385 return std::pair<bool, bool>(false, false);
5386
5387 Register SrcReg = MI->getOperand(I).getReg();
5388 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5389 IsSExt &= SrcExt.first;
5390 IsZExt &= SrcExt.second;
5391 }
5392 return std::pair<bool, bool>(IsSExt, IsZExt);
5393 }
5394
5395 // If at least one of the incoming values of an AND is zero extended
5396 // then the output is also zero-extended. If both of the incoming values
5397 // are sign-extended then the output is also sign extended.
5398 case PPC::AND:
5399 case PPC::AND8: {
5400 if (BinOpDepth >= MAX_BINOP_DEPTH)
5401 return std::pair<bool, bool>(false, false);
5402
5403 Register SrcReg1 = MI->getOperand(1).getReg();
5404 Register SrcReg2 = MI->getOperand(2).getReg();
5405 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5406 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5407 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5408 Src1Ext.second || Src2Ext.second);
5409 }
5410
5411 default:
5412 break;
5413 }
5414 return std::pair<bool, bool>(IsSExt, IsZExt);
5415}
5416
5417bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5418 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5419}
5420
5421namespace {
5422class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5423 MachineInstr *Loop, *EndLoop, *LoopCount;
5424 MachineFunction *MF;
5425 const TargetInstrInfo *TII;
5426 int64_t TripCount;
5427
5428public:
5429 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5430 MachineInstr *LoopCount)
5431 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5432 MF(Loop->getParent()->getParent()),
5433 TII(MF->getSubtarget().getInstrInfo()) {
5434 // Inspect the Loop instruction up-front, as it may be deleted when we call
5435 // createTripCountGreaterCondition.
5436 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5437 TripCount = LoopCount->getOperand(1).getImm();
5438 else
5439 TripCount = -1;
5440 }
5441
5442 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5443 // Only ignore the terminator.
5444 return MI == EndLoop;
5445 }
5446
5447 std::optional<bool> createTripCountGreaterCondition(
5448 int TC, MachineBasicBlock &MBB,
5450 if (TripCount == -1) {
5451 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5452 // so we don't need to generate any thing here.
5453 Cond.push_back(MachineOperand::CreateImm(0));
5455 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5456 true));
5457 return {};
5458 }
5459
5460 return TripCount > TC;
5461 }
5462
5463 void setPreheader(MachineBasicBlock *NewPreheader) override {
5464 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5465 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5466 }
5467
5468 void adjustTripCount(int TripCountAdjust) override {
5469 // If the loop trip count is a compile-time value, then just change the
5470 // value.
5471 if (LoopCount->getOpcode() == PPC::LI8 ||
5472 LoopCount->getOpcode() == PPC::LI) {
5473 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5474 LoopCount->getOperand(1).setImm(TripCount);
5475 return;
5476 }
5477
5478 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5479 // so we don't need to generate any thing here.
5480 }
5481
5482 void disposed() override {
5483 Loop->eraseFromParent();
5484 // Ensure the loop setup instruction is deleted too.
5485 LoopCount->eraseFromParent();
5486 }
5487};
5488} // namespace
5489
5490std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5492 // We really "analyze" only hardware loops right now.
5494 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5495 if (Preheader == LoopBB)
5496 Preheader = *std::next(LoopBB->pred_begin());
5497 MachineFunction *MF = Preheader->getParent();
5498
5499 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5501 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5502 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5504 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5505 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5506 }
5507 }
5508 return nullptr;
5509}
5510
5512 MachineBasicBlock &PreHeader,
5513 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5514
5515 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5516
5517 // The loop set-up instruction should be in preheader
5518 for (auto &I : PreHeader.instrs())
5519 if (I.getOpcode() == LOOPi)
5520 return &I;
5521 return nullptr;
5522}
5523
5524// Return true if get the base operand, byte offset of an instruction and the
5525// memory width. Width is the size of memory that is being loaded/stored.
5527 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5528 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5529 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5530 return false;
5531
5532 // Handle only loads/stores with base register followed by immediate offset.
5533 if (!LdSt.getOperand(1).isImm() ||
5534 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5535 return false;
5536 if (!LdSt.getOperand(1).isImm() ||
5537 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5538 return false;
5539
5540 if (!LdSt.hasOneMemOperand())
5541 return false;
5542
5543 Width = (*LdSt.memoperands_begin())->getSize();
5544 Offset = LdSt.getOperand(1).getImm();
5545 BaseReg = &LdSt.getOperand(2);
5546 return true;
5547}
5548
5550 const MachineInstr &MIa, const MachineInstr &MIb) const {
5551 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5552 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5553
5556 return false;
5557
5558 // Retrieve the base register, offset from the base register and width. Width
5559 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5560 // base registers are identical, and the offset of a lower memory access +
5561 // the width doesn't overlap the offset of a higher memory access,
5562 // then the memory accesses are different.
5564 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5565 int64_t OffsetA = 0, OffsetB = 0;
5566 LocationSize WidthA = 0, WidthB = 0;
5567 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5568 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5569 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5570 int LowOffset = std::min(OffsetA, OffsetB);
5571 int HighOffset = std::max(OffsetA, OffsetB);
5572 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5573 if (LowWidth.hasValue() &&
5574 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5575 return true;
5576 }
5577 }
5578 return false;
5579}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static unsigned getSize(unsigned Kind)
void changeSign()
Definition: APFloat.h:1202
Class for arbitrary precision integers.
Definition: APInt.h:77
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition: APInt.cpp:1111
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:249
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
const BasicBlock & getEntryBlock() const
Definition: Function.h:800
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:212
A possibly irreducible generalization of a Loop.
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:124
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
bool hasValue() const
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
Definition: MCInstrDesc.h:565
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:269
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:85
uint16_t Constraints
Operand constraints (see OperandConstraint enum).
Definition: MCInstrDesc.h:100
bool isLookupPtrRegClass() const
Set if this operand is a pointer value and it requires a callback to look up its register class.
Definition: MCInstrDesc.h:104
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:91
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:950
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:733
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:643
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:815
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:685
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:800
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:391
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
PPCInstrInfo(PPCSubtarget &STI)
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
MCInst getNop() const override
Return the noop instruction to use for a noop.
static int getRecordFormOpcode(unsigned Opcode)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
Definition: PPCInstrInfo.h:276
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:274
CombinerObjective getCombinerObjective(unsigned Pattern) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:623
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
Definition: PPCInstrInfo.h:506
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
bool foldFrameOffset(MachineInstr &MI) const
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:617
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const
getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode for a given imm form load/s...
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:208
void setGlibcHWCAPAccess(bool Val=true) const
void dump() const
Definition: Pass.cpp:136
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
CodeModel::Model getCodeModel() const
Returns the code model.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:703
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ MO_TOC_LO
Definition: PPC.h:185
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
int getAltVSXFMAOpcode(uint16_t Opcode)
int getNonRecordFormOpcode(uint16_t)
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
static bool isVFRegister(unsigned Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:31
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
@ REASSOC_XY_BCA
Definition: PPCInstrInfo.h:96
@ REASSOC_XY_BAC
Definition: PPCInstrInfo.h:97
@ REASSOC_XY_AMM_BMM
Definition: PPCInstrInfo.h:91
@ REASSOC_XMM_AMM_BMM
Definition: PPCInstrInfo.h:92
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
unsigned getKillRegState(bool B)
@ SOK_CRBitSpill
Definition: PPCInstrInfo.h:73
@ SOK_VSXVectorSpill
Definition: PPCInstrInfo.h:75
@ SOK_SpillToVSR
Definition: PPCInstrInfo.h:78
@ SOK_Int4Spill
Definition: PPCInstrInfo.h:68
@ SOK_PairedVecSpill
Definition: PPCInstrInfo.h:79
@ SOK_VectorFloat8Spill
Definition: PPCInstrInfo.h:76
@ SOK_UAccumulatorSpill
Definition: PPCInstrInfo.h:81
@ SOK_PairedG8Spill
Definition: PPCInstrInfo.h:84
@ SOK_VectorFloat4Spill
Definition: PPCInstrInfo.h:77
@ SOK_Float8Spill
Definition: PPCInstrInfo.h:70
@ SOK_Float4Spill
Definition: PPCInstrInfo.h:71
@ SOK_VRVectorSpill
Definition: PPCInstrInfo.h:74
@ SOK_WAccumulatorSpill
Definition: PPCInstrInfo.h:82
@ SOK_SPESpill
Definition: PPCInstrInfo.h:83
@ SOK_CRSpill
Definition: PPCInstrInfo.h:72
@ SOK_AccumulatorSpill
Definition: PPCInstrInfo.h:80
@ SOK_Int8Spill
Definition: PPCInstrInfo.h:69
@ SOK_LastOpcodeSpill
Definition: PPCInstrInfo.h:85
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t IsSummingOperands
Definition: PPCInstrInfo.h:55
uint64_t OpNoForForwarding
Definition: PPCInstrInfo.h:45
uint64_t ImmMustBeMultipleOf
Definition: PPCInstrInfo.h:35
uint64_t IsCommutative
Definition: PPCInstrInfo.h:43
uint64_t ZeroIsSpecialNew
Definition: PPCInstrInfo.h:41
uint64_t TruncateImmTo
Definition: PPCInstrInfo.h:53
uint64_t ZeroIsSpecialOrig
Definition: PPCInstrInfo.h:38
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.