LLVM 19.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCInst.h"
42#include "llvm/Support/Debug.h"
45
46using namespace llvm;
47
48#define DEBUG_TYPE "ppc-instr-info"
49
50#define GET_INSTRMAP_INFO
51#define GET_INSTRINFO_CTOR_DTOR
52#include "PPCGenInstrInfo.inc"
53
54STATISTIC(NumStoreSPILLVSRRCAsVec,
55 "Number of spillvsrrc spilled to stack as vec");
56STATISTIC(NumStoreSPILLVSRRCAsGpr,
57 "Number of spillvsrrc spilled to stack as gpr");
58STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
59STATISTIC(CmpIselsConverted,
60 "Number of ISELs that depend on comparison of constants converted");
61STATISTIC(MissedConvertibleImmediateInstrs,
62 "Number of compare-immediate instructions fed by constants");
63STATISTIC(NumRcRotatesConvertedToRcAnd,
64 "Number of record-form rotates converted to record-form andi");
65
66static cl::
67opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
68 cl::desc("Disable analysis for CTR loops"));
69
70static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
71cl::desc("Disable compare instruction optimization"), cl::Hidden);
72
73static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
74cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
76
77static cl::opt<bool>
78UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
79 cl::desc("Use the old (incorrect) instruction latency calculation"));
80
81static cl::opt<float>
82 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
83 cl::desc("register pressure factor for the transformations."));
84
86 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
87 cl::desc("enable register pressure reduce in machine combiner pass."));
88
89// Pin the vtable to this file.
90void PPCInstrInfo::anchor() {}
91
93 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
94 /* CatchRetOpcode */ -1,
95 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
96 Subtarget(STI), RI(STI.getTargetMachine()) {}
97
98/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
99/// this target when scheduling the DAG.
102 const ScheduleDAG *DAG) const {
103 unsigned Directive =
104 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
107 const InstrItineraryData *II =
108 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
109 return new ScoreboardHazardRecognizer(II, DAG);
110 }
111
113}
114
115/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
116/// to use for this target when scheduling the DAG.
119 const ScheduleDAG *DAG) const {
120 unsigned Directive =
121 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
122
123 // FIXME: Leaving this as-is until we have POWER9 scheduling info
125 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
126
127 // Most subtargets use a PPC970 recognizer.
130 assert(DAG->TII && "No InstrInfo?");
131
132 return new PPCHazardRecognizer970(*DAG);
133 }
134
135 return new ScoreboardHazardRecognizer(II, DAG);
136}
137
139 const MachineInstr &MI,
140 unsigned *PredCost) const {
141 if (!ItinData || UseOldLatencyCalc)
142 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
143
144 // The default implementation of getInstrLatency calls getStageLatency, but
145 // getStageLatency does not do the right thing for us. While we have
146 // itinerary, most cores are fully pipelined, and so the itineraries only
147 // express the first part of the pipeline, not every stage. Instead, we need
148 // to use the listed output operand cycle number (using operand 0 here, which
149 // is an output).
150
151 unsigned Latency = 1;
152 unsigned DefClass = MI.getDesc().getSchedClass();
153 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
154 const MachineOperand &MO = MI.getOperand(i);
155 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
156 continue;
157
158 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
159 if (!Cycle)
160 continue;
161
162 Latency = std::max(Latency, *Cycle);
163 }
164
165 return Latency;
166}
167
168std::optional<unsigned> PPCInstrInfo::getOperandLatency(
169 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
170 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
171 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
172 ItinData, DefMI, DefIdx, UseMI, UseIdx);
173
174 if (!DefMI.getParent())
175 return Latency;
176
177 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
178 Register Reg = DefMO.getReg();
179
180 bool IsRegCR;
181 if (Reg.isVirtual()) {
182 const MachineRegisterInfo *MRI =
183 &DefMI.getParent()->getParent()->getRegInfo();
184 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
185 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
186 } else {
187 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
188 PPC::CRBITRCRegClass.contains(Reg);
189 }
190
191 if (UseMI.isBranch() && IsRegCR) {
192 if (!Latency)
193 Latency = getInstrLatency(ItinData, DefMI);
194
195 // On some cores, there is an additional delay between writing to a condition
196 // register, and using it from a branch.
197 unsigned Directive = Subtarget.getCPUDirective();
198 switch (Directive) {
199 default: break;
200 case PPC::DIR_7400:
201 case PPC::DIR_750:
202 case PPC::DIR_970:
203 case PPC::DIR_E5500:
204 case PPC::DIR_PWR4:
205 case PPC::DIR_PWR5:
206 case PPC::DIR_PWR5X:
207 case PPC::DIR_PWR6:
208 case PPC::DIR_PWR6X:
209 case PPC::DIR_PWR7:
210 case PPC::DIR_PWR8:
211 // FIXME: Is this needed for POWER9?
212 Latency = *Latency + 2;
213 break;
214 }
215 }
216
217 return Latency;
218}
219
221 uint32_t Flags) const {
222 MI.setFlags(Flags);
226}
227
228// This function does not list all associative and commutative operations, but
229// only those worth feeding through the machine combiner in an attempt to
230// reduce the critical path. Mostly, this means floating-point operations,
231// because they have high latencies(>=5) (compared to other operations, such as
232// and/or, which are also associative and commutative, but have low latencies).
234 bool Invert) const {
235 if (Invert)
236 return false;
237 switch (Inst.getOpcode()) {
238 // Floating point:
239 // FP Add:
240 case PPC::FADD:
241 case PPC::FADDS:
242 // FP Multiply:
243 case PPC::FMUL:
244 case PPC::FMULS:
245 // Altivec Add:
246 case PPC::VADDFP:
247 // VSX Add:
248 case PPC::XSADDDP:
249 case PPC::XVADDDP:
250 case PPC::XVADDSP:
251 case PPC::XSADDSP:
252 // VSX Multiply:
253 case PPC::XSMULDP:
254 case PPC::XVMULDP:
255 case PPC::XVMULSP:
256 case PPC::XSMULSP:
259 // Fixed point:
260 // Multiply:
261 case PPC::MULHD:
262 case PPC::MULLD:
263 case PPC::MULHW:
264 case PPC::MULLW:
265 return true;
266 default:
267 return false;
268 }
269}
270
271#define InfoArrayIdxFMAInst 0
272#define InfoArrayIdxFAddInst 1
273#define InfoArrayIdxFMULInst 2
274#define InfoArrayIdxAddOpIdx 3
275#define InfoArrayIdxMULOpIdx 4
276#define InfoArrayIdxFSubInst 5
277// Array keeps info for FMA instructions:
278// Index 0(InfoArrayIdxFMAInst): FMA instruction;
279// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
280// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
281// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
282// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
283// second MUL operand index is plus 1;
284// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
285static const uint16_t FMAOpIdxInfo[][6] = {
286 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
287 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
288 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
289 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
290 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
291 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
292 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
293
294// Check if an opcode is a FMA instruction. If it is, return the index in array
295// FMAOpIdxInfo. Otherwise, return -1.
296int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
297 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
298 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
299 return I;
300 return -1;
301}
302
303// On PowerPC target, we have two kinds of patterns related to FMA:
304// 1: Improve ILP.
305// Try to reassociate FMA chains like below:
306//
307// Pattern 1:
308// A = FADD X, Y (Leaf)
309// B = FMA A, M21, M22 (Prev)
310// C = FMA B, M31, M32 (Root)
311// -->
312// A = FMA X, M21, M22
313// B = FMA Y, M31, M32
314// C = FADD A, B
315//
316// Pattern 2:
317// A = FMA X, M11, M12 (Leaf)
318// B = FMA A, M21, M22 (Prev)
319// C = FMA B, M31, M32 (Root)
320// -->
321// A = FMUL M11, M12
322// B = FMA X, M21, M22
323// D = FMA A, M31, M32
324// C = FADD B, D
325//
326// breaking the dependency between A and B, allowing FMA to be executed in
327// parallel (or back-to-back in a pipeline) instead of depending on each other.
328//
329// 2: Reduce register pressure.
330// Try to reassociate FMA with FSUB and a constant like below:
331// C is a floating point const.
332//
333// Pattern 1:
334// A = FSUB X, Y (Leaf)
335// D = FMA B, C, A (Root)
336// -->
337// A = FMA B, Y, -C
338// D = FMA A, X, C
339//
340// Pattern 2:
341// A = FSUB X, Y (Leaf)
342// D = FMA B, A, C (Root)
343// -->
344// A = FMA B, Y, -C
345// D = FMA A, X, C
346//
347// Before the transformation, A must be assigned with different hardware
348// register with D. After the transformation, A and D must be assigned with
349// same hardware register due to TIE attribute of FMA instructions.
350//
353 bool DoRegPressureReduce) const {
357
358 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
359 for (const auto &MO : Instr.explicit_operands())
360 if (!(MO.isReg() && MO.getReg().isVirtual()))
361 return false;
362 return true;
363 };
364
365 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
366 unsigned OpType) {
367 if (Instr.getOpcode() !=
368 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
369 return false;
370
371 // Instruction can be reassociated.
372 // fast math flags may prohibit reassociation.
373 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
374 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
375 return false;
376
377 // Instruction operands are virtual registers for reassociation.
378 if (!IsAllOpsVirtualReg(Instr))
379 return false;
380
381 // For register pressure reassociation, the FSub must have only one use as
382 // we want to delete the sub to save its def.
383 if (OpType == InfoArrayIdxFSubInst &&
384 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
385 return false;
386
387 return true;
388 };
389
390 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
391 int16_t &MulOpIdx, bool IsLeaf) {
392 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
393 if (Idx < 0)
394 return false;
395
396 // Instruction can be reassociated.
397 // fast math flags may prohibit reassociation.
398 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
399 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
400 return false;
401
402 // Instruction operands are virtual registers for reassociation.
403 if (!IsAllOpsVirtualReg(Instr))
404 return false;
405
407 if (IsLeaf)
408 return true;
409
411
412 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
413 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
414 // If 'add' operand's def is not in current block, don't do ILP related opt.
415 if (!MIAdd || MIAdd->getParent() != MBB)
416 return false;
417
418 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
419 // as this fma will be changed later.
420 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
421 };
422
423 int16_t AddOpIdx = -1;
424 int16_t MulOpIdx = -1;
425
426 bool IsUsedOnceL = false;
427 bool IsUsedOnceR = false;
428 MachineInstr *MULInstrL = nullptr;
429 MachineInstr *MULInstrR = nullptr;
430
431 auto IsRPReductionCandidate = [&]() {
432 // Currently, we only support float and double.
433 // FIXME: add support for other types.
434 unsigned Opcode = Root.getOpcode();
435 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
436 return false;
437
438 // Root must be a valid FMA like instruction.
439 // Treat it as leaf as we don't care its add operand.
440 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
441 assert((MulOpIdx >= 0) && "mul operand index not right!");
442 Register MULRegL = TRI->lookThruSingleUseCopyChain(
443 Root.getOperand(MulOpIdx).getReg(), MRI);
444 Register MULRegR = TRI->lookThruSingleUseCopyChain(
445 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
446 if (!MULRegL && !MULRegR)
447 return false;
448
449 if (MULRegL && !MULRegR) {
450 MULRegR =
451 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
452 IsUsedOnceL = true;
453 } else if (!MULRegL && MULRegR) {
454 MULRegL =
455 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
456 IsUsedOnceR = true;
457 } else {
458 IsUsedOnceL = true;
459 IsUsedOnceR = true;
460 }
461
462 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
463 return false;
464
465 MULInstrL = MRI->getVRegDef(MULRegL);
466 MULInstrR = MRI->getVRegDef(MULRegR);
467 return true;
468 }
469 return false;
470 };
471
472 // Register pressure fma reassociation patterns.
473 if (DoRegPressureReduce && IsRPReductionCandidate()) {
474 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
475 // Register pressure pattern 1
476 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
477 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
478 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
480 return true;
481 }
482
483 // Register pressure pattern 2
484 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
485 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
486 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
488 return true;
489 }
490 }
491
492 // ILP fma reassociation patterns.
493 // Root must be a valid FMA like instruction.
494 AddOpIdx = -1;
495 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
496 return false;
497
498 assert((AddOpIdx >= 0) && "add operand index not right!");
499
500 Register RegB = Root.getOperand(AddOpIdx).getReg();
501 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
502
503 // Prev must be a valid FMA like instruction.
504 AddOpIdx = -1;
505 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
506 return false;
507
508 assert((AddOpIdx >= 0) && "add operand index not right!");
509
510 Register RegA = Prev->getOperand(AddOpIdx).getReg();
511 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
512 AddOpIdx = -1;
513 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
515 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
516 return true;
517 }
518 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
520 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
521 return true;
522 }
523 return false;
524}
525
528 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
529 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
530
531 MachineFunction *MF = Root.getMF();
535
536 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
537 if (Idx < 0)
538 return;
539
541
542 // For now we only need to fix up placeholder for register pressure reduce
543 // patterns.
544 Register ConstReg = 0;
545 switch (P) {
547 ConstReg =
548 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
549 break;
551 ConstReg =
552 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
553 break;
554 default:
555 // Not register pressure reduce patterns.
556 return;
557 }
558
559 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
560 // Get const value from const pool.
561 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
562 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
563
564 // Get negative fp const.
565 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
566 F1.changeSign();
567 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
568 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
569
570 // Put negative fp const into constant pool.
571 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
572
573 MachineOperand *Placeholder = nullptr;
574 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
575 for (auto *Inst : InsInstrs) {
576 for (MachineOperand &Operand : Inst->explicit_operands()) {
577 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
578 if (Operand.getReg() == PPC::ZERO8) {
579 Placeholder = &Operand;
580 break;
581 }
582 }
583 }
584
585 assert(Placeholder && "Placeholder does not exist!");
586
587 // Generate instructions to load the const fp from constant pool.
588 // We only support PPC64 and medium code model.
589 Register LoadNewConst =
590 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
591
592 // Fill the placeholder with the new load from constant pool.
593 Placeholder->setReg(LoadNewConst);
594}
595
597 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
598
600 return false;
601
602 // Currently, we only enable register pressure reducing in machine combiner
603 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
604 // support.
605 //
606 // So we need following instructions to access a TOC entry:
607 //
608 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
609 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
610 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
611 //
612 // FIXME: add more supported targets, like Small and Large code model, PPC32,
613 // AIX.
614 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
616 return false;
617
619 const MachineFunction *MF = MBB->getParent();
620 const MachineRegisterInfo *MRI = &MF->getRegInfo();
621
622 auto GetMBBPressure =
623 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
624 RegionPressure Pressure;
625 RegPressureTracker RPTracker(Pressure);
626
627 // Initialize the register pressure tracker.
628 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
629 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
630
631 for (const auto &MI : reverse(*MBB)) {
632 if (MI.isDebugValue() || MI.isDebugLabel())
633 continue;
634 RegisterOperands RegOpers;
635 RegOpers.collect(MI, *TRI, *MRI, false, false);
636 RPTracker.recedeSkipDebugValues();
637 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
638 RPTracker.recede(RegOpers);
639 }
640
641 // Close the RPTracker to finalize live ins.
642 RPTracker.closeRegion();
643
644 return RPTracker.getPressure().MaxSetPressure;
645 };
646
647 // For now we only care about float and double type fma.
648 unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
649 *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
650
651 // Only reduce register pressure when pressure is high.
652 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
653 (float)VSSRCLimit * FMARPFactor;
654}
655
657 // I has only one memory operand which is load from constant pool.
658 if (!I->hasOneMemOperand())
659 return false;
660
661 MachineMemOperand *Op = I->memoperands()[0];
662 return Op->isLoad() && Op->getPseudoValue() &&
663 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
664}
665
666Register PPCInstrInfo::generateLoadForNewConst(
667 unsigned Idx, MachineInstr *MI, Type *Ty,
668 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
669 // Now we only support PPC64, Medium code model and P9 with vector.
670 // We have immutable pattern to access const pool. See function
671 // shouldReduceRegisterPressure.
672 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
674 "Target not supported!\n");
675
676 MachineFunction *MF = MI->getMF();
678
679 // Generate ADDIStocHA8
680 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
681 MachineInstrBuilder TOCOffset =
682 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
683 .addReg(PPC::X2)
685
686 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
687 "Only float and double are supported!");
688
689 unsigned LoadOpcode;
690 // Should be float type or double type.
691 if (Ty->isFloatTy())
692 LoadOpcode = PPC::DFLOADf32;
693 else
694 LoadOpcode = PPC::DFLOADf64;
695
696 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
697 Register VReg2 = MRI->createVirtualRegister(RC);
701
702 // Generate Load from constant pool.
704 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
706 .addReg(VReg1, getKillRegState(true))
707 .addMemOperand(MMO);
708
709 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
710
711 // Insert the toc load instructions into InsInstrs.
712 InsInstrs.insert(InsInstrs.begin(), Load);
713 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
714 return VReg2;
715}
716
717// This function returns the const value in constant pool if the \p I is a load
718// from constant pool.
719const Constant *
721 MachineFunction *MF = I->getMF();
724 assert(I->mayLoad() && "Should be a load instruction.\n");
725 for (auto MO : I->uses()) {
726 if (!MO.isReg())
727 continue;
728 Register Reg = MO.getReg();
729 if (Reg == 0 || !Reg.isVirtual())
730 continue;
731 // Find the toc address.
732 MachineInstr *DefMI = MRI->getVRegDef(Reg);
733 for (auto MO2 : DefMI->uses())
734 if (MO2.isCPI())
735 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
736 }
737 return nullptr;
738}
739
742 bool DoRegPressureReduce) const {
743 // Using the machine combiner in this way is potentially expensive, so
744 // restrict to when aggressive optimizations are desired.
746 return false;
747
748 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
749 return true;
750
752 DoRegPressureReduce);
753}
754
759 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
760 switch (Pattern) {
765 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
766 break;
767 default:
768 // Reassociate default patterns.
770 DelInstrs, InstrIdxForVirtReg);
771 break;
772 }
773}
774
775void PPCInstrInfo::reassociateFMA(
779 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
780 MachineFunction *MF = Root.getMF();
783 MachineOperand &OpC = Root.getOperand(0);
784 Register RegC = OpC.getReg();
785 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
786 MRI.constrainRegClass(RegC, RC);
787
788 unsigned FmaOp = Root.getOpcode();
789 int16_t Idx = getFMAOpIdxInfo(FmaOp);
790 assert(Idx >= 0 && "Root must be a FMA instruction");
791
792 bool IsILPReassociate =
795
798
799 MachineInstr *Prev = nullptr;
800 MachineInstr *Leaf = nullptr;
801 switch (Pattern) {
802 default:
803 llvm_unreachable("not recognized pattern!");
806 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
807 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
808 break;
810 Register MULReg =
811 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
812 Leaf = MRI.getVRegDef(MULReg);
813 break;
814 }
816 Register MULReg = TRI->lookThruCopyLike(
817 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
818 Leaf = MRI.getVRegDef(MULReg);
819 break;
820 }
821 }
822
823 uint32_t IntersectedFlags = 0;
824 if (IsILPReassociate)
825 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
826 else
827 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
828
829 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
830 bool &KillFlag) {
831 Reg = Operand.getReg();
832 MRI.constrainRegClass(Reg, RC);
833 KillFlag = Operand.isKill();
834 };
835
836 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
837 Register &MulOp2, Register &AddOp,
838 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
839 bool &AddOpKillFlag) {
840 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
841 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
842 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
843 };
844
845 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
846 RegA21, RegB;
847 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
848 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
849 KillA11 = false, KillA21 = false, KillB = false;
850
851 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
852
853 if (IsILPReassociate)
854 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
855
857 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
858 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
860 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
861 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
862 } else {
863 // Get FSUB instruction info.
864 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
865 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
866 }
867
868 // Create new virtual registers for the new results instead of
869 // recycling legacy ones because the MachineCombiner's computation of the
870 // critical path requires a new register definition rather than an existing
871 // one.
872 // For register pressure reassociation, we only need create one virtual
873 // register for the new fma.
874 Register NewVRA = MRI.createVirtualRegister(RC);
875 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
876
877 Register NewVRB = 0;
878 if (IsILPReassociate) {
879 NewVRB = MRI.createVirtualRegister(RC);
880 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
881 }
882
883 Register NewVRD = 0;
885 NewVRD = MRI.createVirtualRegister(RC);
886 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
887 }
888
889 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
890 Register RegMul1, bool KillRegMul1,
891 Register RegMul2, bool KillRegMul2) {
892 MI->getOperand(AddOpIdx).setReg(RegAdd);
893 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
894 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
895 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
896 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
897 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
898 };
899
900 MachineInstrBuilder NewARegPressure, NewCRegPressure;
901 switch (Pattern) {
902 default:
903 llvm_unreachable("not recognized pattern!");
905 // Create new instructions for insertion.
906 MachineInstrBuilder MINewB =
907 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
908 .addReg(RegX, getKillRegState(KillX))
909 .addReg(RegM21, getKillRegState(KillM21))
910 .addReg(RegM22, getKillRegState(KillM22));
911 MachineInstrBuilder MINewA =
912 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
913 .addReg(RegY, getKillRegState(KillY))
914 .addReg(RegM31, getKillRegState(KillM31))
915 .addReg(RegM32, getKillRegState(KillM32));
916 // If AddOpIdx is not 1, adjust the order.
917 if (AddOpIdx != 1) {
918 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
919 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
920 }
921
922 MachineInstrBuilder MINewC =
923 BuildMI(*MF, Root.getDebugLoc(),
925 .addReg(NewVRB, getKillRegState(true))
926 .addReg(NewVRA, getKillRegState(true));
927
928 // Update flags for newly created instructions.
929 setSpecialOperandAttr(*MINewA, IntersectedFlags);
930 setSpecialOperandAttr(*MINewB, IntersectedFlags);
931 setSpecialOperandAttr(*MINewC, IntersectedFlags);
932
933 // Record new instructions for insertion.
934 InsInstrs.push_back(MINewA);
935 InsInstrs.push_back(MINewB);
936 InsInstrs.push_back(MINewC);
937 break;
938 }
940 assert(NewVRD && "new FMA register not created!");
941 // Create new instructions for insertion.
942 MachineInstrBuilder MINewA =
943 BuildMI(*MF, Leaf->getDebugLoc(),
945 .addReg(RegM11, getKillRegState(KillM11))
946 .addReg(RegM12, getKillRegState(KillM12));
947 MachineInstrBuilder MINewB =
948 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
949 .addReg(RegX, getKillRegState(KillX))
950 .addReg(RegM21, getKillRegState(KillM21))
951 .addReg(RegM22, getKillRegState(KillM22));
952 MachineInstrBuilder MINewD =
953 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
954 .addReg(NewVRA, getKillRegState(true))
955 .addReg(RegM31, getKillRegState(KillM31))
956 .addReg(RegM32, getKillRegState(KillM32));
957 // If AddOpIdx is not 1, adjust the order.
958 if (AddOpIdx != 1) {
959 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
960 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
961 KillM32);
962 }
963
964 MachineInstrBuilder MINewC =
965 BuildMI(*MF, Root.getDebugLoc(),
967 .addReg(NewVRB, getKillRegState(true))
968 .addReg(NewVRD, getKillRegState(true));
969
970 // Update flags for newly created instructions.
971 setSpecialOperandAttr(*MINewA, IntersectedFlags);
972 setSpecialOperandAttr(*MINewB, IntersectedFlags);
973 setSpecialOperandAttr(*MINewD, IntersectedFlags);
974 setSpecialOperandAttr(*MINewC, IntersectedFlags);
975
976 // Record new instructions for insertion.
977 InsInstrs.push_back(MINewA);
978 InsInstrs.push_back(MINewB);
979 InsInstrs.push_back(MINewD);
980 InsInstrs.push_back(MINewC);
981 break;
982 }
985 Register VarReg;
986 bool KillVarReg = false;
988 VarReg = RegM31;
989 KillVarReg = KillM31;
990 } else {
991 VarReg = RegM32;
992 KillVarReg = KillM32;
993 }
994 // We don't want to get negative const from memory pool too early, as the
995 // created entry will not be deleted even if it has no users. Since all
996 // operand of Leaf and Root are virtual register, we use zero register
997 // here as a placeholder. When the InsInstrs is selected in
998 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
999 // with a virtual register which is a load from constant pool.
1000 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1001 .addReg(RegB, getKillRegState(RegB))
1002 .addReg(RegY, getKillRegState(KillY))
1003 .addReg(PPC::ZERO8);
1004 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1005 .addReg(NewVRA, getKillRegState(true))
1006 .addReg(RegX, getKillRegState(KillX))
1007 .addReg(VarReg, getKillRegState(KillVarReg));
1008 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1009 // both at index 1, no need to adjust.
1010 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1011 // the operand index here.
1012 break;
1013 }
1014 }
1015
1016 if (!IsILPReassociate) {
1017 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1018 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1019
1020 InsInstrs.push_back(NewARegPressure);
1021 InsInstrs.push_back(NewCRegPressure);
1022 }
1023
1024 assert(!InsInstrs.empty() &&
1025 "Insertion instructions set should not be empty!");
1026
1027 // Record old instructions for deletion.
1028 DelInstrs.push_back(Leaf);
1029 if (IsILPReassociate)
1030 DelInstrs.push_back(Prev);
1031 DelInstrs.push_back(&Root);
1032}
1033
1034// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1036 Register &SrcReg, Register &DstReg,
1037 unsigned &SubIdx) const {
1038 switch (MI.getOpcode()) {
1039 default: return false;
1040 case PPC::EXTSW:
1041 case PPC::EXTSW_32:
1042 case PPC::EXTSW_32_64:
1043 SrcReg = MI.getOperand(1).getReg();
1044 DstReg = MI.getOperand(0).getReg();
1045 SubIdx = PPC::sub_32;
1046 return true;
1047 }
1048}
1049
1051 int &FrameIndex) const {
1052 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1053 // Check for the operands added by addFrameReference (the immediate is the
1054 // offset which defaults to 0).
1055 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1056 MI.getOperand(2).isFI()) {
1057 FrameIndex = MI.getOperand(2).getIndex();
1058 return MI.getOperand(0).getReg();
1059 }
1060 }
1061 return 0;
1062}
1063
1064// For opcodes with the ReMaterializable flag set, this function is called to
1065// verify the instruction is really rematable.
1067 const MachineInstr &MI) const {
1068 switch (MI.getOpcode()) {
1069 default:
1070 // Let base implementaion decide.
1071 break;
1072 case PPC::LI:
1073 case PPC::LI8:
1074 case PPC::PLI:
1075 case PPC::PLI8:
1076 case PPC::LIS:
1077 case PPC::LIS8:
1078 case PPC::ADDIStocHA:
1079 case PPC::ADDIStocHA8:
1080 case PPC::ADDItocL8:
1081 case PPC::LOAD_STACK_GUARD:
1082 case PPC::PPCLdFixedAddr:
1083 case PPC::XXLXORz:
1084 case PPC::XXLXORspz:
1085 case PPC::XXLXORdpz:
1086 case PPC::XXLEQVOnes:
1087 case PPC::XXSPLTI32DX:
1088 case PPC::XXSPLTIW:
1089 case PPC::XXSPLTIDP:
1090 case PPC::V_SET0B:
1091 case PPC::V_SET0H:
1092 case PPC::V_SET0:
1093 case PPC::V_SETALLONESB:
1094 case PPC::V_SETALLONESH:
1095 case PPC::V_SETALLONES:
1096 case PPC::CRSET:
1097 case PPC::CRUNSET:
1098 case PPC::XXSETACCZ:
1099 case PPC::XXSETACCZW:
1100 return true;
1101 }
1103}
1104
1106 int &FrameIndex) const {
1107 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1108 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1109 MI.getOperand(2).isFI()) {
1110 FrameIndex = MI.getOperand(2).getIndex();
1111 return MI.getOperand(0).getReg();
1112 }
1113 }
1114 return 0;
1115}
1116
1118 unsigned OpIdx1,
1119 unsigned OpIdx2) const {
1120 MachineFunction &MF = *MI.getParent()->getParent();
1121
1122 // Normal instructions can be commuted the obvious way.
1123 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1124 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1125 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1126 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1127 // changing the relative order of the mask operands might change what happens
1128 // to the high-bits of the mask (and, thus, the result).
1129
1130 // Cannot commute if it has a non-zero rotate count.
1131 if (MI.getOperand(3).getImm() != 0)
1132 return nullptr;
1133
1134 // If we have a zero rotate count, we have:
1135 // M = mask(MB,ME)
1136 // Op0 = (Op1 & ~M) | (Op2 & M)
1137 // Change this to:
1138 // M = mask((ME+1)&31, (MB-1)&31)
1139 // Op0 = (Op2 & ~M) | (Op1 & M)
1140
1141 // Swap op1/op2
1142 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1143 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1144 Register Reg0 = MI.getOperand(0).getReg();
1145 Register Reg1 = MI.getOperand(1).getReg();
1146 Register Reg2 = MI.getOperand(2).getReg();
1147 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1148 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1149 bool Reg1IsKill = MI.getOperand(1).isKill();
1150 bool Reg2IsKill = MI.getOperand(2).isKill();
1151 bool ChangeReg0 = false;
1152 // If machine instrs are no longer in two-address forms, update
1153 // destination register as well.
1154 if (Reg0 == Reg1) {
1155 // Must be two address instruction (i.e. op1 is tied to op0).
1156 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1157 "Expecting a two-address instruction!");
1158 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1159 Reg2IsKill = false;
1160 ChangeReg0 = true;
1161 }
1162
1163 // Masks.
1164 unsigned MB = MI.getOperand(4).getImm();
1165 unsigned ME = MI.getOperand(5).getImm();
1166
1167 // We can't commute a trivial mask (there is no way to represent an all-zero
1168 // mask).
1169 if (MB == 0 && ME == 31)
1170 return nullptr;
1171
1172 if (NewMI) {
1173 // Create a new instruction.
1174 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1175 bool Reg0IsDead = MI.getOperand(0).isDead();
1176 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1177 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1178 .addReg(Reg2, getKillRegState(Reg2IsKill))
1179 .addReg(Reg1, getKillRegState(Reg1IsKill))
1180 .addImm((ME + 1) & 31)
1181 .addImm((MB - 1) & 31);
1182 }
1183
1184 if (ChangeReg0) {
1185 MI.getOperand(0).setReg(Reg2);
1186 MI.getOperand(0).setSubReg(SubReg2);
1187 }
1188 MI.getOperand(2).setReg(Reg1);
1189 MI.getOperand(1).setReg(Reg2);
1190 MI.getOperand(2).setSubReg(SubReg1);
1191 MI.getOperand(1).setSubReg(SubReg2);
1192 MI.getOperand(2).setIsKill(Reg1IsKill);
1193 MI.getOperand(1).setIsKill(Reg2IsKill);
1194
1195 // Swap the mask around.
1196 MI.getOperand(4).setImm((ME + 1) & 31);
1197 MI.getOperand(5).setImm((MB - 1) & 31);
1198 return &MI;
1199}
1200
1202 unsigned &SrcOpIdx1,
1203 unsigned &SrcOpIdx2) const {
1204 // For VSX A-Type FMA instructions, it is the first two operands that can be
1205 // commuted, however, because the non-encoded tied input operand is listed
1206 // first, the operands to swap are actually the second and third.
1207
1208 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1209 if (AltOpc == -1)
1210 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1211
1212 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1213 // and SrcOpIdx2.
1214 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1215}
1216
1219 // This function is used for scheduling, and the nop wanted here is the type
1220 // that terminates dispatch groups on the POWER cores.
1221 unsigned Directive = Subtarget.getCPUDirective();
1222 unsigned Opcode;
1223 switch (Directive) {
1224 default: Opcode = PPC::NOP; break;
1225 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1226 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1227 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1228 // FIXME: Update when POWER9 scheduling model is ready.
1229 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1230 }
1231
1232 DebugLoc DL;
1233 BuildMI(MBB, MI, DL, get(Opcode));
1234}
1235
1236/// Return the noop instruction to use for a noop.
1238 MCInst Nop;
1239 Nop.setOpcode(PPC::NOP);
1240 return Nop;
1241}
1242
1243// Branch analysis.
1244// Note: If the condition register is set to CTR or CTR8 then this is a
1245// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1248 MachineBasicBlock *&FBB,
1250 bool AllowModify) const {
1251 bool isPPC64 = Subtarget.isPPC64();
1252
1253 // If the block has no terminators, it just falls into the block after it.
1255 if (I == MBB.end())
1256 return false;
1257
1258 if (!isUnpredicatedTerminator(*I))
1259 return false;
1260
1261 if (AllowModify) {
1262 // If the BB ends with an unconditional branch to the fallthrough BB,
1263 // we eliminate the branch instruction.
1264 if (I->getOpcode() == PPC::B &&
1265 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1266 I->eraseFromParent();
1267
1268 // We update iterator after deleting the last branch.
1270 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1271 return false;
1272 }
1273 }
1274
1275 // Get the last instruction in the block.
1276 MachineInstr &LastInst = *I;
1277
1278 // If there is only one terminator instruction, process it.
1279 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1280 if (LastInst.getOpcode() == PPC::B) {
1281 if (!LastInst.getOperand(0).isMBB())
1282 return true;
1283 TBB = LastInst.getOperand(0).getMBB();
1284 return false;
1285 } else if (LastInst.getOpcode() == PPC::BCC) {
1286 if (!LastInst.getOperand(2).isMBB())
1287 return true;
1288 // Block ends with fall-through condbranch.
1289 TBB = LastInst.getOperand(2).getMBB();
1290 Cond.push_back(LastInst.getOperand(0));
1291 Cond.push_back(LastInst.getOperand(1));
1292 return false;
1293 } else if (LastInst.getOpcode() == PPC::BC) {
1294 if (!LastInst.getOperand(1).isMBB())
1295 return true;
1296 // Block ends with fall-through condbranch.
1297 TBB = LastInst.getOperand(1).getMBB();
1299 Cond.push_back(LastInst.getOperand(0));
1300 return false;
1301 } else if (LastInst.getOpcode() == PPC::BCn) {
1302 if (!LastInst.getOperand(1).isMBB())
1303 return true;
1304 // Block ends with fall-through condbranch.
1305 TBB = LastInst.getOperand(1).getMBB();
1307 Cond.push_back(LastInst.getOperand(0));
1308 return false;
1309 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1310 LastInst.getOpcode() == PPC::BDNZ) {
1311 if (!LastInst.getOperand(0).isMBB())
1312 return true;
1314 return true;
1315 TBB = LastInst.getOperand(0).getMBB();
1316 Cond.push_back(MachineOperand::CreateImm(1));
1317 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1318 true));
1319 return false;
1320 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1321 LastInst.getOpcode() == PPC::BDZ) {
1322 if (!LastInst.getOperand(0).isMBB())
1323 return true;
1325 return true;
1326 TBB = LastInst.getOperand(0).getMBB();
1327 Cond.push_back(MachineOperand::CreateImm(0));
1328 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1329 true));
1330 return false;
1331 }
1332
1333 // Otherwise, don't know what this is.
1334 return true;
1335 }
1336
1337 // Get the instruction before it if it's a terminator.
1338 MachineInstr &SecondLastInst = *I;
1339
1340 // If there are three terminators, we don't know what sort of block this is.
1341 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1342 return true;
1343
1344 // If the block ends with PPC::B and PPC:BCC, handle it.
1345 if (SecondLastInst.getOpcode() == PPC::BCC &&
1346 LastInst.getOpcode() == PPC::B) {
1347 if (!SecondLastInst.getOperand(2).isMBB() ||
1348 !LastInst.getOperand(0).isMBB())
1349 return true;
1350 TBB = SecondLastInst.getOperand(2).getMBB();
1351 Cond.push_back(SecondLastInst.getOperand(0));
1352 Cond.push_back(SecondLastInst.getOperand(1));
1353 FBB = LastInst.getOperand(0).getMBB();
1354 return false;
1355 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1356 LastInst.getOpcode() == PPC::B) {
1357 if (!SecondLastInst.getOperand(1).isMBB() ||
1358 !LastInst.getOperand(0).isMBB())
1359 return true;
1360 TBB = SecondLastInst.getOperand(1).getMBB();
1362 Cond.push_back(SecondLastInst.getOperand(0));
1363 FBB = LastInst.getOperand(0).getMBB();
1364 return false;
1365 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1366 LastInst.getOpcode() == PPC::B) {
1367 if (!SecondLastInst.getOperand(1).isMBB() ||
1368 !LastInst.getOperand(0).isMBB())
1369 return true;
1370 TBB = SecondLastInst.getOperand(1).getMBB();
1372 Cond.push_back(SecondLastInst.getOperand(0));
1373 FBB = LastInst.getOperand(0).getMBB();
1374 return false;
1375 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1376 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1377 LastInst.getOpcode() == PPC::B) {
1378 if (!SecondLastInst.getOperand(0).isMBB() ||
1379 !LastInst.getOperand(0).isMBB())
1380 return true;
1382 return true;
1383 TBB = SecondLastInst.getOperand(0).getMBB();
1384 Cond.push_back(MachineOperand::CreateImm(1));
1385 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1386 true));
1387 FBB = LastInst.getOperand(0).getMBB();
1388 return false;
1389 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1390 SecondLastInst.getOpcode() == PPC::BDZ) &&
1391 LastInst.getOpcode() == PPC::B) {
1392 if (!SecondLastInst.getOperand(0).isMBB() ||
1393 !LastInst.getOperand(0).isMBB())
1394 return true;
1396 return true;
1397 TBB = SecondLastInst.getOperand(0).getMBB();
1398 Cond.push_back(MachineOperand::CreateImm(0));
1399 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1400 true));
1401 FBB = LastInst.getOperand(0).getMBB();
1402 return false;
1403 }
1404
1405 // If the block ends with two PPC:Bs, handle it. The second one is not
1406 // executed, so remove it.
1407 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1408 if (!SecondLastInst.getOperand(0).isMBB())
1409 return true;
1410 TBB = SecondLastInst.getOperand(0).getMBB();
1411 I = LastInst;
1412 if (AllowModify)
1413 I->eraseFromParent();
1414 return false;
1415 }
1416
1417 // Otherwise, can't handle this.
1418 return true;
1419}
1420
1422 int *BytesRemoved) const {
1423 assert(!BytesRemoved && "code size not handled");
1424
1426 if (I == MBB.end())
1427 return 0;
1428
1429 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1430 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1431 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1432 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1433 return 0;
1434
1435 // Remove the branch.
1436 I->eraseFromParent();
1437
1438 I = MBB.end();
1439
1440 if (I == MBB.begin()) return 1;
1441 --I;
1442 if (I->getOpcode() != PPC::BCC &&
1443 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1444 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1445 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1446 return 1;
1447
1448 // Remove the branch.
1449 I->eraseFromParent();
1450 return 2;
1451}
1452
1455 MachineBasicBlock *FBB,
1457 const DebugLoc &DL,
1458 int *BytesAdded) const {
1459 // Shouldn't be a fall through.
1460 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1461 assert((Cond.size() == 2 || Cond.size() == 0) &&
1462 "PPC branch conditions have two components!");
1463 assert(!BytesAdded && "code size not handled");
1464
1465 bool isPPC64 = Subtarget.isPPC64();
1466
1467 // One-way branch.
1468 if (!FBB) {
1469 if (Cond.empty()) // Unconditional branch
1470 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1471 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1472 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1473 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1474 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1475 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1476 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1477 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1478 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1479 else // Conditional branch
1480 BuildMI(&MBB, DL, get(PPC::BCC))
1481 .addImm(Cond[0].getImm())
1482 .add(Cond[1])
1483 .addMBB(TBB);
1484 return 1;
1485 }
1486
1487 // Two-way Conditional Branch.
1488 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1489 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1490 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1491 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1492 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1493 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1494 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1495 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1496 else
1497 BuildMI(&MBB, DL, get(PPC::BCC))
1498 .addImm(Cond[0].getImm())
1499 .add(Cond[1])
1500 .addMBB(TBB);
1501 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1502 return 2;
1503}
1504
1505// Select analysis.
1508 Register DstReg, Register TrueReg,
1509 Register FalseReg, int &CondCycles,
1510 int &TrueCycles, int &FalseCycles) const {
1511 if (!Subtarget.hasISEL())
1512 return false;
1513
1514 if (Cond.size() != 2)
1515 return false;
1516
1517 // If this is really a bdnz-like condition, then it cannot be turned into a
1518 // select.
1519 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1520 return false;
1521
1522 // If the conditional branch uses a physical register, then it cannot be
1523 // turned into a select.
1524 if (Cond[1].getReg().isPhysical())
1525 return false;
1526
1527 // Check register classes.
1529 const TargetRegisterClass *RC =
1530 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1531 if (!RC)
1532 return false;
1533
1534 // isel is for regular integer GPRs only.
1535 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1536 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1537 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1538 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1539 return false;
1540
1541 // FIXME: These numbers are for the A2, how well they work for other cores is
1542 // an open question. On the A2, the isel instruction has a 2-cycle latency
1543 // but single-cycle throughput. These numbers are used in combination with
1544 // the MispredictPenalty setting from the active SchedMachineModel.
1545 CondCycles = 1;
1546 TrueCycles = 1;
1547 FalseCycles = 1;
1548
1549 return true;
1550}
1551
1554 const DebugLoc &dl, Register DestReg,
1556 Register FalseReg) const {
1557 assert(Cond.size() == 2 &&
1558 "PPC branch conditions have two components!");
1559
1560 // Get the register classes.
1562 const TargetRegisterClass *RC =
1563 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1564 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1565
1566 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1567 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1568 assert((Is64Bit ||
1569 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1570 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1571 "isel is for regular integer GPRs only");
1572
1573 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1574 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1575
1576 unsigned SubIdx = 0;
1577 bool SwapOps = false;
1578 switch (SelectPred) {
1579 case PPC::PRED_EQ:
1580 case PPC::PRED_EQ_MINUS:
1581 case PPC::PRED_EQ_PLUS:
1582 SubIdx = PPC::sub_eq; SwapOps = false; break;
1583 case PPC::PRED_NE:
1584 case PPC::PRED_NE_MINUS:
1585 case PPC::PRED_NE_PLUS:
1586 SubIdx = PPC::sub_eq; SwapOps = true; break;
1587 case PPC::PRED_LT:
1588 case PPC::PRED_LT_MINUS:
1589 case PPC::PRED_LT_PLUS:
1590 SubIdx = PPC::sub_lt; SwapOps = false; break;
1591 case PPC::PRED_GE:
1592 case PPC::PRED_GE_MINUS:
1593 case PPC::PRED_GE_PLUS:
1594 SubIdx = PPC::sub_lt; SwapOps = true; break;
1595 case PPC::PRED_GT:
1596 case PPC::PRED_GT_MINUS:
1597 case PPC::PRED_GT_PLUS:
1598 SubIdx = PPC::sub_gt; SwapOps = false; break;
1599 case PPC::PRED_LE:
1600 case PPC::PRED_LE_MINUS:
1601 case PPC::PRED_LE_PLUS:
1602 SubIdx = PPC::sub_gt; SwapOps = true; break;
1603 case PPC::PRED_UN:
1604 case PPC::PRED_UN_MINUS:
1605 case PPC::PRED_UN_PLUS:
1606 SubIdx = PPC::sub_un; SwapOps = false; break;
1607 case PPC::PRED_NU:
1608 case PPC::PRED_NU_MINUS:
1609 case PPC::PRED_NU_PLUS:
1610 SubIdx = PPC::sub_un; SwapOps = true; break;
1611 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1612 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1613 }
1614
1615 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1616 SecondReg = SwapOps ? TrueReg : FalseReg;
1617
1618 // The first input register of isel cannot be r0. If it is a member
1619 // of a register class that can be r0, then copy it first (the
1620 // register allocator should eliminate the copy).
1621 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1622 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1623 const TargetRegisterClass *FirstRC =
1624 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1625 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1626 Register OldFirstReg = FirstReg;
1627 FirstReg = MRI.createVirtualRegister(FirstRC);
1628 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1629 .addReg(OldFirstReg);
1630 }
1631
1632 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1633 .addReg(FirstReg).addReg(SecondReg)
1634 .addReg(Cond[1].getReg(), 0, SubIdx);
1635}
1636
1637static unsigned getCRBitValue(unsigned CRBit) {
1638 unsigned Ret = 4;
1639 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1640 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1641 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1642 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1643 Ret = 3;
1644 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1645 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1646 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1647 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1648 Ret = 2;
1649 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1650 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1651 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1652 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1653 Ret = 1;
1654 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1655 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1656 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1657 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1658 Ret = 0;
1659
1660 assert(Ret != 4 && "Invalid CR bit register");
1661 return Ret;
1662}
1663
1666 const DebugLoc &DL, MCRegister DestReg,
1667 MCRegister SrcReg, bool KillSrc) const {
1668 // We can end up with self copies and similar things as a result of VSX copy
1669 // legalization. Promote them here.
1671 if (PPC::F8RCRegClass.contains(DestReg) &&
1672 PPC::VSRCRegClass.contains(SrcReg)) {
1673 MCRegister SuperReg =
1674 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1675
1676 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1677 llvm_unreachable("nop VSX copy");
1678
1679 DestReg = SuperReg;
1680 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1681 PPC::VSRCRegClass.contains(DestReg)) {
1682 MCRegister SuperReg =
1683 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1684
1685 if (VSXSelfCopyCrash && DestReg == SuperReg)
1686 llvm_unreachable("nop VSX copy");
1687
1688 SrcReg = SuperReg;
1689 }
1690
1691 // Different class register copy
1692 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1693 PPC::GPRCRegClass.contains(DestReg)) {
1694 MCRegister CRReg = getCRFromCRBit(SrcReg);
1695 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1696 getKillRegState(KillSrc);
1697 // Rotate the CR bit in the CR fields to be the least significant bit and
1698 // then mask with 0x1 (MB = ME = 31).
1699 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1700 .addReg(DestReg, RegState::Kill)
1701 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1702 .addImm(31)
1703 .addImm(31);
1704 return;
1705 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1706 (PPC::G8RCRegClass.contains(DestReg) ||
1707 PPC::GPRCRegClass.contains(DestReg))) {
1708 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1709 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1710 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1711 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1712 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1713 getKillRegState(KillSrc);
1714 if (CRNum == 7)
1715 return;
1716 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1717 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1718 .addReg(DestReg, RegState::Kill)
1719 .addImm(CRNum * 4 + 4)
1720 .addImm(28)
1721 .addImm(31);
1722 return;
1723 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1724 PPC::VSFRCRegClass.contains(DestReg)) {
1725 assert(Subtarget.hasDirectMove() &&
1726 "Subtarget doesn't support directmove, don't know how to copy.");
1727 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1728 NumGPRtoVSRSpill++;
1729 getKillRegState(KillSrc);
1730 return;
1731 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1732 PPC::G8RCRegClass.contains(DestReg)) {
1733 assert(Subtarget.hasDirectMove() &&
1734 "Subtarget doesn't support directmove, don't know how to copy.");
1735 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1736 getKillRegState(KillSrc);
1737 return;
1738 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1739 PPC::GPRCRegClass.contains(DestReg)) {
1740 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1741 getKillRegState(KillSrc);
1742 return;
1743 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1744 PPC::SPERCRegClass.contains(DestReg)) {
1745 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1746 getKillRegState(KillSrc);
1747 return;
1748 }
1749
1750 unsigned Opc;
1751 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1752 Opc = PPC::OR;
1753 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1754 Opc = PPC::OR8;
1755 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1756 Opc = PPC::FMR;
1757 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1758 Opc = PPC::MCRF;
1759 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1760 Opc = PPC::VOR;
1761 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1762 // There are two different ways this can be done:
1763 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1764 // issue in VSU pipeline 0.
1765 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1766 // can go to either pipeline.
1767 // We'll always use xxlor here, because in practically all cases where
1768 // copies are generated, they are close enough to some use that the
1769 // lower-latency form is preferable.
1770 Opc = PPC::XXLOR;
1771 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1772 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1773 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1774 else if (Subtarget.pairedVectorMemops() &&
1775 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1776 if (SrcReg > PPC::VSRp15)
1777 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1778 else
1779 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1780 if (DestReg > PPC::VSRp15)
1781 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1782 else
1783 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1784 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1785 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1786 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1787 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1788 return;
1789 }
1790 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1791 Opc = PPC::CROR;
1792 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1793 Opc = PPC::EVOR;
1794 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1795 PPC::UACCRCRegClass.contains(DestReg)) &&
1796 (PPC::ACCRCRegClass.contains(SrcReg) ||
1797 PPC::UACCRCRegClass.contains(SrcReg))) {
1798 // If primed, de-prime the source register, copy the individual registers
1799 // and prime the destination if needed. The vector subregisters are
1800 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1801 // source is primed, we need to re-prime it after the copy as well.
1802 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1803 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1804 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1805 MCRegister VSLSrcReg =
1806 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1807 MCRegister VSLDestReg =
1808 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1809 if (SrcPrimed)
1810 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1811 for (unsigned Idx = 0; Idx < 4; Idx++)
1812 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1813 .addReg(VSLSrcReg + Idx)
1814 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1815 if (DestPrimed)
1816 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1817 if (SrcPrimed && !KillSrc)
1818 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1819 return;
1820 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1821 PPC::G8pRCRegClass.contains(SrcReg)) {
1822 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1823 unsigned DestRegIdx = DestReg - PPC::G8p0;
1824 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1825 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1826 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1827 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1828 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1829 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1830 .addReg(SrcRegSub0)
1831 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1832 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1833 .addReg(SrcRegSub1)
1834 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1835 return;
1836 } else
1837 llvm_unreachable("Impossible reg-to-reg copy");
1838
1839 const MCInstrDesc &MCID = get(Opc);
1840 if (MCID.getNumOperands() == 3)
1841 BuildMI(MBB, I, DL, MCID, DestReg)
1842 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1843 else
1844 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1845}
1846
1847unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1848 int OpcodeIndex = 0;
1849
1850 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1851 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1853 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1854 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1856 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1858 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1860 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1862 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1864 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1866 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1868 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1870 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1872 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1874 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1876 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1877 assert(Subtarget.pairedVectorMemops() &&
1878 "Register unexpected when paired memops are disabled.");
1880 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1881 assert(Subtarget.pairedVectorMemops() &&
1882 "Register unexpected when paired memops are disabled.");
1884 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1885 assert(Subtarget.pairedVectorMemops() &&
1886 "Register unexpected when paired memops are disabled.");
1888 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1889 assert(Subtarget.pairedVectorMemops() &&
1890 "Register unexpected when paired memops are disabled.");
1892 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1894 } else {
1895 llvm_unreachable("Unknown regclass!");
1896 }
1897 return OpcodeIndex;
1898}
1899
1900unsigned
1902 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1903 return OpcodesForSpill[getSpillIndex(RC)];
1904}
1905
1906unsigned
1908 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1909 return OpcodesForSpill[getSpillIndex(RC)];
1910}
1911
1912void PPCInstrInfo::StoreRegToStackSlot(
1913 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1914 const TargetRegisterClass *RC,
1915 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1916 unsigned Opcode = getStoreOpcodeForSpill(RC);
1917 DebugLoc DL;
1918
1919 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1920 FuncInfo->setHasSpills();
1921
1923 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
1924 FrameIdx));
1925
1926 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
1927 PPC::CRBITRCRegClass.hasSubClassEq(RC))
1928 FuncInfo->setSpillsCR();
1929
1930 if (isXFormMemOp(Opcode))
1931 FuncInfo->setHasNonRISpills();
1932}
1933
1936 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1937 const TargetRegisterInfo *TRI) const {
1938 MachineFunction &MF = *MBB.getParent();
1940
1941 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
1942
1943 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
1944 MBB.insert(MI, NewMIs[i]);
1945
1946 const MachineFrameInfo &MFI = MF.getFrameInfo();
1950 MFI.getObjectAlign(FrameIdx));
1951 NewMIs.back()->addMemOperand(MF, MMO);
1952}
1953
1956 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1957 const TargetRegisterInfo *TRI, Register VReg) const {
1958 // We need to avoid a situation in which the value from a VRRC register is
1959 // spilled using an Altivec instruction and reloaded into a VSRC register
1960 // using a VSX instruction. The issue with this is that the VSX
1961 // load/store instructions swap the doublewords in the vector and the Altivec
1962 // ones don't. The register classes on the spill/reload may be different if
1963 // the register is defined using an Altivec instruction and is then used by a
1964 // VSX instruction.
1965 RC = updatedRC(RC);
1966 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
1967}
1968
1969void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
1970 unsigned DestReg, int FrameIdx,
1971 const TargetRegisterClass *RC,
1973 const {
1974 unsigned Opcode = getLoadOpcodeForSpill(RC);
1975 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
1976 FrameIdx));
1977}
1978
1981 int FrameIdx, const TargetRegisterClass *RC,
1982 const TargetRegisterInfo *TRI) const {
1983 MachineFunction &MF = *MBB.getParent();
1985 DebugLoc DL;
1986 if (MI != MBB.end()) DL = MI->getDebugLoc();
1987
1988 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
1989
1990 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
1991 MBB.insert(MI, NewMIs[i]);
1992
1993 const MachineFrameInfo &MFI = MF.getFrameInfo();
1997 MFI.getObjectAlign(FrameIdx));
1998 NewMIs.back()->addMemOperand(MF, MMO);
1999}
2000
2003 Register DestReg, int FrameIdx,
2004 const TargetRegisterClass *RC,
2005 const TargetRegisterInfo *TRI,
2006 Register VReg) const {
2007 // We need to avoid a situation in which the value from a VRRC register is
2008 // spilled using an Altivec instruction and reloaded into a VSRC register
2009 // using a VSX instruction. The issue with this is that the VSX
2010 // load/store instructions swap the doublewords in the vector and the Altivec
2011 // ones don't. The register classes on the spill/reload may be different if
2012 // the register is defined using an Altivec instruction and is then used by a
2013 // VSX instruction.
2014 RC = updatedRC(RC);
2015
2016 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2017}
2018
2021 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2022 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2023 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2024 else
2025 // Leave the CR# the same, but invert the condition.
2026 Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
2027 return false;
2028}
2029
2030// For some instructions, it is legal to fold ZERO into the RA register field.
2031// This function performs that fold by replacing the operand with PPC::ZERO,
2032// it does not consider whether the load immediate zero is no longer in use.
2034 Register Reg) const {
2035 // A zero immediate should always be loaded with a single li.
2036 unsigned DefOpc = DefMI.getOpcode();
2037 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2038 return false;
2039 if (!DefMI.getOperand(1).isImm())
2040 return false;
2041 if (DefMI.getOperand(1).getImm() != 0)
2042 return false;
2043
2044 // Note that we cannot here invert the arguments of an isel in order to fold
2045 // a ZERO into what is presented as the second argument. All we have here
2046 // is the condition bit, and that might come from a CR-logical bit operation.
2047
2048 const MCInstrDesc &UseMCID = UseMI.getDesc();
2049
2050 // Only fold into real machine instructions.
2051 if (UseMCID.isPseudo())
2052 return false;
2053
2054 // We need to find which of the User's operands is to be folded, that will be
2055 // the operand that matches the given register ID.
2056 unsigned UseIdx;
2057 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2058 if (UseMI.getOperand(UseIdx).isReg() &&
2059 UseMI.getOperand(UseIdx).getReg() == Reg)
2060 break;
2061
2062 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2063 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2064
2065 const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2066
2067 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2068 // register (which might also be specified as a pointer class kind).
2069 if (UseInfo->isLookupPtrRegClass()) {
2070 if (UseInfo->RegClass /* Kind */ != 1)
2071 return false;
2072 } else {
2073 if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2074 UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2075 return false;
2076 }
2077
2078 // Make sure this is not tied to an output register (or otherwise
2079 // constrained). This is true for ST?UX registers, for example, which
2080 // are tied to their output registers.
2081 if (UseInfo->Constraints != 0)
2082 return false;
2083
2084 MCRegister ZeroReg;
2085 if (UseInfo->isLookupPtrRegClass()) {
2086 bool isPPC64 = Subtarget.isPPC64();
2087 ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2088 } else {
2089 ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2090 PPC::ZERO8 : PPC::ZERO;
2091 }
2092
2093 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2094 LLVM_DEBUG(UseMI.dump());
2095 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2096 LLVM_DEBUG(dbgs() << "Into: ");
2097 LLVM_DEBUG(UseMI.dump());
2098 return true;
2099}
2100
2101// Folds zero into instructions which have a load immediate zero as an operand
2102// but also recognize zero as immediate zero. If the definition of the load
2103// has no more users it is deleted.
2105 Register Reg, MachineRegisterInfo *MRI) const {
2106 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2107 if (MRI->use_nodbg_empty(Reg))
2108 DefMI.eraseFromParent();
2109 return Changed;
2110}
2111
2113 for (MachineInstr &MI : MBB)
2114 if (MI.definesRegister(PPC::CTR) || MI.definesRegister(PPC::CTR8))
2115 return true;
2116 return false;
2117}
2118
2119// We should make sure that, if we're going to predicate both sides of a
2120// condition (a diamond), that both sides don't define the counter register. We
2121// can predicate counter-decrement-based branches, but while that predicates
2122// the branching, it does not predicate the counter decrement. If we tried to
2123// merge the triangle into one predicated block, we'd decrement the counter
2124// twice.
2126 unsigned NumT, unsigned ExtraT,
2127 MachineBasicBlock &FMBB,
2128 unsigned NumF, unsigned ExtraF,
2129 BranchProbability Probability) const {
2130 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2131}
2132
2133
2135 // The predicated branches are identified by their type, not really by the
2136 // explicit presence of a predicate. Furthermore, some of them can be
2137 // predicated more than once. Because if conversion won't try to predicate
2138 // any instruction which already claims to be predicated (by returning true
2139 // here), always return false. In doing so, we let isPredicable() be the
2140 // final word on whether not the instruction can be (further) predicated.
2141
2142 return false;
2143}
2144
2146 const MachineBasicBlock *MBB,
2147 const MachineFunction &MF) const {
2148 switch (MI.getOpcode()) {
2149 default:
2150 break;
2151 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2152 // across them, since some FP operations may change content of FPSCR.
2153 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2154 case PPC::MFFS:
2155 case PPC::MTFSF:
2156 case PPC::FENCE:
2157 return true;
2158 }
2160}
2161
2163 ArrayRef<MachineOperand> Pred) const {
2164 unsigned OpC = MI.getOpcode();
2165 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2166 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2167 bool isPPC64 = Subtarget.isPPC64();
2168 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2169 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2170 // Need add Def and Use for CTR implicit operand.
2171 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2172 .addReg(Pred[1].getReg(), RegState::Implicit)
2174 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2175 MI.setDesc(get(PPC::BCLR));
2176 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2177 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2178 MI.setDesc(get(PPC::BCLRn));
2179 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2180 } else {
2181 MI.setDesc(get(PPC::BCCLR));
2182 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2183 .addImm(Pred[0].getImm())
2184 .add(Pred[1]);
2185 }
2186
2187 return true;
2188 } else if (OpC == PPC::B) {
2189 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2190 bool isPPC64 = Subtarget.isPPC64();
2191 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2192 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2193 // Need add Def and Use for CTR implicit operand.
2194 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2195 .addReg(Pred[1].getReg(), RegState::Implicit)
2197 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2198 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2199 MI.removeOperand(0);
2200
2201 MI.setDesc(get(PPC::BC));
2202 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2203 .add(Pred[1])
2204 .addMBB(MBB);
2205 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2206 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2207 MI.removeOperand(0);
2208
2209 MI.setDesc(get(PPC::BCn));
2210 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2211 .add(Pred[1])
2212 .addMBB(MBB);
2213 } else {
2214 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2215 MI.removeOperand(0);
2216
2217 MI.setDesc(get(PPC::BCC));
2218 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2219 .addImm(Pred[0].getImm())
2220 .add(Pred[1])
2221 .addMBB(MBB);
2222 }
2223
2224 return true;
2225 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2226 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2227 OpC == PPC::BCTRL8_RM) {
2228 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2229 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2230
2231 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2232 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2233 bool isPPC64 = Subtarget.isPPC64();
2234
2235 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2236 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2237 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2238 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2239 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2240 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2241 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2242 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2243 } else {
2244 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2245 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2246 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2247 .addImm(Pred[0].getImm())
2248 .add(Pred[1]);
2249 }
2250
2251 // Need add Def and Use for LR implicit operand.
2252 if (setLR)
2253 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2254 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2255 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2256 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2257 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2259
2260 return true;
2261 }
2262
2263 return false;
2264}
2265
2267 ArrayRef<MachineOperand> Pred2) const {
2268 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2269 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2270
2271 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2272 return false;
2273 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2274 return false;
2275
2276 // P1 can only subsume P2 if they test the same condition register.
2277 if (Pred1[1].getReg() != Pred2[1].getReg())
2278 return false;
2279
2280 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2281 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2282
2283 if (P1 == P2)
2284 return true;
2285
2286 // Does P1 subsume P2, e.g. GE subsumes GT.
2287 if (P1 == PPC::PRED_LE &&
2288 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2289 return true;
2290 if (P1 == PPC::PRED_GE &&
2291 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2292 return true;
2293
2294 return false;
2295}
2296
2298 std::vector<MachineOperand> &Pred,
2299 bool SkipDead) const {
2300 // Note: At the present time, the contents of Pred from this function is
2301 // unused by IfConversion. This implementation follows ARM by pushing the
2302 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2303 // predicate, instructions defining CTR or CTR8 are also included as
2304 // predicate-defining instructions.
2305
2306 const TargetRegisterClass *RCs[] =
2307 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2308 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2309
2310 bool Found = false;
2311 for (const MachineOperand &MO : MI.operands()) {
2312 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2313 const TargetRegisterClass *RC = RCs[c];
2314 if (MO.isReg()) {
2315 if (MO.isDef() && RC->contains(MO.getReg())) {
2316 Pred.push_back(MO);
2317 Found = true;
2318 }
2319 } else if (MO.isRegMask()) {
2320 for (MCPhysReg R : *RC)
2321 if (MO.clobbersPhysReg(R)) {
2322 Pred.push_back(MO);
2323 Found = true;
2324 }
2325 }
2326 }
2327 }
2328
2329 return Found;
2330}
2331
2333 Register &SrcReg2, int64_t &Mask,
2334 int64_t &Value) const {
2335 unsigned Opc = MI.getOpcode();
2336
2337 switch (Opc) {
2338 default: return false;
2339 case PPC::CMPWI:
2340 case PPC::CMPLWI:
2341 case PPC::CMPDI:
2342 case PPC::CMPLDI:
2343 SrcReg = MI.getOperand(1).getReg();
2344 SrcReg2 = 0;
2345 Value = MI.getOperand(2).getImm();
2346 Mask = 0xFFFF;
2347 return true;
2348 case PPC::CMPW:
2349 case PPC::CMPLW:
2350 case PPC::CMPD:
2351 case PPC::CMPLD:
2352 case PPC::FCMPUS:
2353 case PPC::FCMPUD:
2354 SrcReg = MI.getOperand(1).getReg();
2355 SrcReg2 = MI.getOperand(2).getReg();
2356 Value = 0;
2357 Mask = 0;
2358 return true;
2359 }
2360}
2361
2363 Register SrcReg2, int64_t Mask,
2364 int64_t Value,
2365 const MachineRegisterInfo *MRI) const {
2366 if (DisableCmpOpt)
2367 return false;
2368
2369 int OpC = CmpInstr.getOpcode();
2370 Register CRReg = CmpInstr.getOperand(0).getReg();
2371
2372 // FP record forms set CR1 based on the exception status bits, not a
2373 // comparison with zero.
2374 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2375 return false;
2376
2378 // The record forms set the condition register based on a signed comparison
2379 // with zero (so says the ISA manual). This is not as straightforward as it
2380 // seems, however, because this is always a 64-bit comparison on PPC64, even
2381 // for instructions that are 32-bit in nature (like slw for example).
2382 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2383 // for equality checks (as those don't depend on the sign). On PPC64,
2384 // we are restricted to equality for unsigned 64-bit comparisons and for
2385 // signed 32-bit comparisons the applicability is more restricted.
2386 bool isPPC64 = Subtarget.isPPC64();
2387 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2388 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2389 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2390
2391 // Look through copies unless that gets us to a physical register.
2392 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2393 if (ActualSrc.isVirtual())
2394 SrcReg = ActualSrc;
2395
2396 // Get the unique definition of SrcReg.
2397 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2398 if (!MI) return false;
2399
2400 bool equalityOnly = false;
2401 bool noSub = false;
2402 if (isPPC64) {
2403 if (is32BitSignedCompare) {
2404 // We can perform this optimization only if SrcReg is sign-extending.
2405 if (isSignExtended(SrcReg, MRI))
2406 noSub = true;
2407 else
2408 return false;
2409 } else if (is32BitUnsignedCompare) {
2410 // We can perform this optimization, equality only, if SrcReg is
2411 // zero-extending.
2412 if (isZeroExtended(SrcReg, MRI)) {
2413 noSub = true;
2414 equalityOnly = true;
2415 } else
2416 return false;
2417 } else
2418 equalityOnly = is64BitUnsignedCompare;
2419 } else
2420 equalityOnly = is32BitUnsignedCompare;
2421
2422 if (equalityOnly) {
2423 // We need to check the uses of the condition register in order to reject
2424 // non-equality comparisons.
2426 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2427 I != IE; ++I) {
2428 MachineInstr *UseMI = &*I;
2429 if (UseMI->getOpcode() == PPC::BCC) {
2431 unsigned PredCond = PPC::getPredicateCondition(Pred);
2432 // We ignore hint bits when checking for non-equality comparisons.
2433 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2434 return false;
2435 } else if (UseMI->getOpcode() == PPC::ISEL ||
2436 UseMI->getOpcode() == PPC::ISEL8) {
2437 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2438 if (SubIdx != PPC::sub_eq)
2439 return false;
2440 } else
2441 return false;
2442 }
2443 }
2444
2445 MachineBasicBlock::iterator I = CmpInstr;
2446
2447 // Scan forward to find the first use of the compare.
2448 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2449 ++I) {
2450 bool FoundUse = false;
2452 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2453 J != JE; ++J)
2454 if (&*J == &*I) {
2455 FoundUse = true;
2456 break;
2457 }
2458
2459 if (FoundUse)
2460 break;
2461 }
2462
2465
2466 // There are two possible candidates which can be changed to set CR[01].
2467 // One is MI, the other is a SUB instruction.
2468 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2469 MachineInstr *Sub = nullptr;
2470 if (SrcReg2 != 0)
2471 // MI is not a candidate for CMPrr.
2472 MI = nullptr;
2473 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2474 // same BB as the comparison. This is to allow the check below to avoid calls
2475 // (and other explicit clobbers); instead we should really check for these
2476 // more explicitly (in at least a few predecessors).
2477 else if (MI->getParent() != CmpInstr.getParent())
2478 return false;
2479 else if (Value != 0) {
2480 // The record-form instructions set CR bit based on signed comparison
2481 // against 0. We try to convert a compare against 1 or -1 into a compare
2482 // against 0 to exploit record-form instructions. For example, we change
2483 // the condition "greater than -1" into "greater than or equal to 0"
2484 // and "less than 1" into "less than or equal to 0".
2485
2486 // Since we optimize comparison based on a specific branch condition,
2487 // we don't optimize if condition code is used by more than once.
2488 if (equalityOnly || !MRI->hasOneUse(CRReg))
2489 return false;
2490
2491 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2492 if (UseMI->getOpcode() != PPC::BCC)
2493 return false;
2494
2496 unsigned PredCond = PPC::getPredicateCondition(Pred);
2497 unsigned PredHint = PPC::getPredicateHint(Pred);
2498 int16_t Immed = (int16_t)Value;
2499
2500 // When modifying the condition in the predicate, we propagate hint bits
2501 // from the original predicate to the new one.
2502 if (Immed == -1 && PredCond == PPC::PRED_GT)
2503 // We convert "greater than -1" into "greater than or equal to 0",
2504 // since we are assuming signed comparison by !equalityOnly
2505 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2506 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2507 // We convert "less than or equal to -1" into "less than 0".
2508 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2509 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2510 // We convert "less than 1" into "less than or equal to 0".
2511 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2512 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2513 // We convert "greater than or equal to 1" into "greater than 0".
2514 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2515 else
2516 return false;
2517
2518 // Convert the comparison and its user to a compare against zero with the
2519 // appropriate predicate on the branch. Zero comparison might provide
2520 // optimization opportunities post-RA (see optimization in
2521 // PPCPreEmitPeephole.cpp).
2522 UseMI->getOperand(0).setImm(Pred);
2523 CmpInstr.getOperand(2).setImm(0);
2524 }
2525
2526 // Search for Sub.
2527 --I;
2528
2529 // Get ready to iterate backward from CmpInstr.
2530 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2531
2532 for (; I != E && !noSub; --I) {
2533 const MachineInstr &Instr = *I;
2534 unsigned IOpC = Instr.getOpcode();
2535
2536 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2537 Instr.readsRegister(PPC::CR0, TRI)))
2538 // This instruction modifies or uses the record condition register after
2539 // the one we want to change. While we could do this transformation, it
2540 // would likely not be profitable. This transformation removes one
2541 // instruction, and so even forcing RA to generate one move probably
2542 // makes it unprofitable.
2543 return false;
2544
2545 // Check whether CmpInstr can be made redundant by the current instruction.
2546 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2547 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2548 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2549 ((Instr.getOperand(1).getReg() == SrcReg &&
2550 Instr.getOperand(2).getReg() == SrcReg2) ||
2551 (Instr.getOperand(1).getReg() == SrcReg2 &&
2552 Instr.getOperand(2).getReg() == SrcReg))) {
2553 Sub = &*I;
2554 break;
2555 }
2556
2557 if (I == B)
2558 // The 'and' is below the comparison instruction.
2559 return false;
2560 }
2561
2562 // Return false if no candidates exist.
2563 if (!MI && !Sub)
2564 return false;
2565
2566 // The single candidate is called MI.
2567 if (!MI) MI = Sub;
2568
2569 int NewOpC = -1;
2570 int MIOpC = MI->getOpcode();
2571 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2572 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2573 NewOpC = MIOpC;
2574 else {
2575 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2576 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2577 NewOpC = MIOpC;
2578 }
2579
2580 // FIXME: On the non-embedded POWER architectures, only some of the record
2581 // forms are fast, and we should use only the fast ones.
2582
2583 // The defining instruction has a record form (or is already a record
2584 // form). It is possible, however, that we'll need to reverse the condition
2585 // code of the users.
2586 if (NewOpC == -1)
2587 return false;
2588
2589 // This transformation should not be performed if `nsw` is missing and is not
2590 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2591 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2592 // CRReg can reflect if compared values are equal, this optz is still valid.
2593 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2594 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2595 return false;
2596
2597 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2598 // needs to be updated to be based on SUB. Push the condition code
2599 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2600 // condition code of these operands will be modified.
2601 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2602 // comparison against 0, which may modify predicate.
2603 bool ShouldSwap = false;
2604 if (Sub && Value == 0) {
2605 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2606 Sub->getOperand(2).getReg() == SrcReg;
2607
2608 // The operands to subf are the opposite of sub, so only in the fixed-point
2609 // case, invert the order.
2610 ShouldSwap = !ShouldSwap;
2611 }
2612
2613 if (ShouldSwap)
2615 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2616 I != IE; ++I) {
2617 MachineInstr *UseMI = &*I;
2618 if (UseMI->getOpcode() == PPC::BCC) {
2620 unsigned PredCond = PPC::getPredicateCondition(Pred);
2621 assert((!equalityOnly ||
2622 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2623 "Invalid predicate for equality-only optimization");
2624 (void)PredCond; // To suppress warning in release build.
2625 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2627 } else if (UseMI->getOpcode() == PPC::ISEL ||
2628 UseMI->getOpcode() == PPC::ISEL8) {
2629 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2630 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2631 "Invalid CR bit for equality-only optimization");
2632
2633 if (NewSubReg == PPC::sub_lt)
2634 NewSubReg = PPC::sub_gt;
2635 else if (NewSubReg == PPC::sub_gt)
2636 NewSubReg = PPC::sub_lt;
2637
2638 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2639 NewSubReg));
2640 } else // We need to abort on a user we don't understand.
2641 return false;
2642 }
2643 assert(!(Value != 0 && ShouldSwap) &&
2644 "Non-zero immediate support and ShouldSwap"
2645 "may conflict in updating predicate");
2646
2647 // Create a new virtual register to hold the value of the CR set by the
2648 // record-form instruction. If the instruction was not previously in
2649 // record form, then set the kill flag on the CR.
2650 CmpInstr.eraseFromParent();
2651
2653 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2654 get(TargetOpcode::COPY), CRReg)
2655 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2656
2657 // Even if CR0 register were dead before, it is alive now since the
2658 // instruction we just built uses it.
2659 MI->clearRegisterDeads(PPC::CR0);
2660
2661 if (MIOpC != NewOpC) {
2662 // We need to be careful here: we're replacing one instruction with
2663 // another, and we need to make sure that we get all of the right
2664 // implicit uses and defs. On the other hand, the caller may be holding
2665 // an iterator to this instruction, and so we can't delete it (this is
2666 // specifically the case if this is the instruction directly after the
2667 // compare).
2668
2669 // Rotates are expensive instructions. If we're emitting a record-form
2670 // rotate that can just be an andi/andis, we should just emit that.
2671 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2672 Register GPRRes = MI->getOperand(0).getReg();
2673 int64_t SH = MI->getOperand(2).getImm();
2674 int64_t MB = MI->getOperand(3).getImm();
2675 int64_t ME = MI->getOperand(4).getImm();
2676 // We can only do this if both the start and end of the mask are in the
2677 // same halfword.
2678 bool MBInLoHWord = MB >= 16;
2679 bool MEInLoHWord = ME >= 16;
2680 uint64_t Mask = ~0LLU;
2681
2682 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2683 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2684 // The mask value needs to shift right 16 if we're emitting andis.
2685 Mask >>= MBInLoHWord ? 0 : 16;
2686 NewOpC = MIOpC == PPC::RLWINM
2687 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2688 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2689 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2690 (ME - MB + 1 == SH) && (MB >= 16)) {
2691 // If we are rotating by the exact number of bits as are in the mask
2692 // and the mask is in the least significant bits of the register,
2693 // that's just an andis. (as long as the GPR result has no uses).
2694 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2695 Mask >>= 16;
2696 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2697 }
2698 // If we've set the mask, we can transform.
2699 if (Mask != ~0LLU) {
2700 MI->removeOperand(4);
2701 MI->removeOperand(3);
2702 MI->getOperand(2).setImm(Mask);
2703 NumRcRotatesConvertedToRcAnd++;
2704 }
2705 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2706 int64_t MB = MI->getOperand(3).getImm();
2707 if (MB >= 48) {
2708 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2709 NewOpC = PPC::ANDI8_rec;
2710 MI->removeOperand(3);
2711 MI->getOperand(2).setImm(Mask);
2712 NumRcRotatesConvertedToRcAnd++;
2713 }
2714 }
2715
2716 const MCInstrDesc &NewDesc = get(NewOpC);
2717 MI->setDesc(NewDesc);
2718
2719 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2720 if (!MI->definesRegister(ImpDef)) {
2721 MI->addOperand(*MI->getParent()->getParent(),
2722 MachineOperand::CreateReg(ImpDef, true, true));
2723 }
2724 }
2725 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2726 if (!MI->readsRegister(ImpUse)) {
2727 MI->addOperand(*MI->getParent()->getParent(),
2728 MachineOperand::CreateReg(ImpUse, false, true));
2729 }
2730 }
2731 }
2732 assert(MI->definesRegister(PPC::CR0) &&
2733 "Record-form instruction does not define cr0?");
2734
2735 // Modify the condition code of operands in OperandsToUpdate.
2736 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2737 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2738 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2739 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2740
2741 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2742 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2743
2744 return true;
2745}
2746
2749 if (MRI->isSSA())
2750 return false;
2751
2752 Register SrcReg, SrcReg2;
2753 int64_t CmpMask, CmpValue;
2754 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2755 return false;
2756
2757 // Try to optimize the comparison against 0.
2758 if (CmpValue || !CmpMask || SrcReg2)
2759 return false;
2760
2761 // The record forms set the condition register based on a signed comparison
2762 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2763 // equality checks in post-RA, we are more restricted on a unsigned
2764 // comparison.
2765 unsigned Opc = CmpMI.getOpcode();
2766 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2767 return false;
2768
2769 // The record forms are always based on a 64-bit comparison on PPC64
2770 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2771 // comparison. Since we can't do the equality checks in post-RA, we bail out
2772 // the case.
2773 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2774 return false;
2775
2776 // CmpMI can't be deleted if it has implicit def.
2777 if (CmpMI.hasImplicitDef())
2778 return false;
2779
2780 bool SrcRegHasOtherUse = false;
2781 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2782 if (!SrcMI || !SrcMI->definesRegister(SrcReg))
2783 return false;
2784
2785 MachineOperand RegMO = CmpMI.getOperand(0);
2786 Register CRReg = RegMO.getReg();
2787 if (CRReg != PPC::CR0)
2788 return false;
2789
2790 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2791 bool SeenUseOfCRReg = false;
2792 bool IsCRRegKilled = false;
2793 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2794 SeenUseOfCRReg) ||
2795 SrcMI->definesRegister(CRReg) || SeenUseOfCRReg)
2796 return false;
2797
2798 int SrcMIOpc = SrcMI->getOpcode();
2799 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2800 if (NewOpC == -1)
2801 return false;
2802
2803 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2804 LLVM_DEBUG(SrcMI->dump());
2805
2806 const MCInstrDesc &NewDesc = get(NewOpC);
2807 SrcMI->setDesc(NewDesc);
2808 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2810 SrcMI->clearRegisterDeads(CRReg);
2811
2812 assert(SrcMI->definesRegister(PPC::CR0) &&
2813 "Record-form instruction does not define cr0?");
2814
2815 LLVM_DEBUG(dbgs() << "with: ");
2816 LLVM_DEBUG(SrcMI->dump());
2817 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2818 LLVM_DEBUG(CmpMI.dump());
2819 return true;
2820}
2821
2824 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2825 const TargetRegisterInfo *TRI) const {
2826 const MachineOperand *BaseOp;
2827 OffsetIsScalable = false;
2828 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2829 return false;
2830 BaseOps.push_back(BaseOp);
2831 return true;
2832}
2833
2834static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2835 const TargetRegisterInfo *TRI) {
2836 // If this is a volatile load/store, don't mess with it.
2837 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2838 return false;
2839
2840 if (LdSt.getOperand(2).isFI())
2841 return true;
2842
2843 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2844 // Can't cluster if the instruction modifies the base register
2845 // or it is update form. e.g. ld r2,3(r2)
2846 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2847 return false;
2848
2849 return true;
2850}
2851
2852// Only cluster instruction pair that have the same opcode, and they are
2853// clusterable according to PowerPC specification.
2854static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2855 const PPCSubtarget &Subtarget) {
2856 switch (FirstOpc) {
2857 default:
2858 return false;
2859 case PPC::STD:
2860 case PPC::STFD:
2861 case PPC::STXSD:
2862 case PPC::DFSTOREf64:
2863 return FirstOpc == SecondOpc;
2864 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2865 // 32bit and 64bit instruction selection. They are clusterable pair though
2866 // they are different opcode.
2867 case PPC::STW:
2868 case PPC::STW8:
2869 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2870 }
2871}
2872
2874 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2875 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2876 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2877 unsigned NumBytes) const {
2878
2879 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2880 const MachineOperand &BaseOp1 = *BaseOps1.front();
2881 const MachineOperand &BaseOp2 = *BaseOps2.front();
2882 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2883 "Only base registers and frame indices are supported.");
2884
2885 // ClusterSize means the number of memory operations that will have been
2886 // clustered if this hook returns true.
2887 // Don't cluster memory op if there are already two ops clustered at least.
2888 if (ClusterSize > 2)
2889 return false;
2890
2891 // Cluster the load/store only when they have the same base
2892 // register or FI.
2893 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2894 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2895 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2896 return false;
2897
2898 // Check if the load/store are clusterable according to the PowerPC
2899 // specification.
2900 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2901 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2902 unsigned FirstOpc = FirstLdSt.getOpcode();
2903 unsigned SecondOpc = SecondLdSt.getOpcode();
2905 // Cluster the load/store only when they have the same opcode, and they are
2906 // clusterable opcode according to PowerPC specification.
2907 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2908 return false;
2909
2910 // Can't cluster load/store that have ordered or volatile memory reference.
2911 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2912 !isLdStSafeToCluster(SecondLdSt, TRI))
2913 return false;
2914
2915 int64_t Offset1 = 0, Offset2 = 0;
2916 LocationSize Width1 = 0, Width2 = 0;
2917 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2918 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2919 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2920 Width1 != Width2)
2921 return false;
2922
2923 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2924 "getMemOperandWithOffsetWidth return incorrect base op");
2925 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2926 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2927 return Offset1 + (int64_t)Width1.getValue() == Offset2;
2928}
2929
2930/// GetInstSize - Return the number of bytes of code the specified
2931/// instruction may be. This returns the maximum number of bytes.
2932///
2934 unsigned Opcode = MI.getOpcode();
2935
2936 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
2937 const MachineFunction *MF = MI.getParent()->getParent();
2938 const char *AsmStr = MI.getOperand(0).getSymbolName();
2939 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
2940 } else if (Opcode == TargetOpcode::STACKMAP) {
2941 StackMapOpers Opers(&MI);
2942 return Opers.getNumPatchBytes();
2943 } else if (Opcode == TargetOpcode::PATCHPOINT) {
2944 PatchPointOpers Opers(&MI);
2945 return Opers.getNumPatchBytes();
2946 } else {
2947 return get(Opcode).getSize();
2948 }
2949}
2950
2951std::pair<unsigned, unsigned>
2953 // PPC always uses a direct mask.
2954 return std::make_pair(TF, 0u);
2955}
2956
2959 using namespace PPCII;
2960 static const std::pair<unsigned, const char *> TargetFlags[] = {
2961 {MO_PLT, "ppc-plt"},
2962 {MO_PIC_FLAG, "ppc-pic"},
2963 {MO_PCREL_FLAG, "ppc-pcrel"},
2964 {MO_GOT_FLAG, "ppc-got"},
2965 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
2966 {MO_TLSGD_FLAG, "ppc-tlsgd"},
2967 {MO_TPREL_FLAG, "ppc-tprel"},
2968 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
2969 {MO_TLSLD_FLAG, "ppc-tlsld"},
2970 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
2971 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
2972 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
2973 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
2974 {MO_LO, "ppc-lo"},
2975 {MO_HA, "ppc-ha"},
2976 {MO_TPREL_LO, "ppc-tprel-lo"},
2977 {MO_TPREL_HA, "ppc-tprel-ha"},
2978 {MO_DTPREL_LO, "ppc-dtprel-lo"},
2979 {MO_TLSLD_LO, "ppc-tlsld-lo"},
2980 {MO_TOC_LO, "ppc-toc-lo"},
2981 {MO_TLS, "ppc-tls"},
2982 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
2983 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
2984 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
2985 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
2986 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
2987 };
2988 return ArrayRef(TargetFlags);
2989}
2990
2991// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
2992// The VSX versions have the advantage of a full 64-register target whereas
2993// the FP ones have the advantage of lower latency and higher throughput. So
2994// what we are after is using the faster instructions in low register pressure
2995// situations and using the larger register file in high register pressure
2996// situations.
2998 unsigned UpperOpcode, LowerOpcode;
2999 switch (MI.getOpcode()) {
3000 case PPC::DFLOADf32:
3001 UpperOpcode = PPC::LXSSP;
3002 LowerOpcode = PPC::LFS;
3003 break;
3004 case PPC::DFLOADf64:
3005 UpperOpcode = PPC::LXSD;
3006 LowerOpcode = PPC::LFD;
3007 break;
3008 case PPC::DFSTOREf32:
3009 UpperOpcode = PPC::STXSSP;
3010 LowerOpcode = PPC::STFS;
3011 break;
3012 case PPC::DFSTOREf64:
3013 UpperOpcode = PPC::STXSD;
3014 LowerOpcode = PPC::STFD;
3015 break;
3016 case PPC::XFLOADf32:
3017 UpperOpcode = PPC::LXSSPX;
3018 LowerOpcode = PPC::LFSX;
3019 break;
3020 case PPC::XFLOADf64:
3021 UpperOpcode = PPC::LXSDX;
3022 LowerOpcode = PPC::LFDX;
3023 break;
3024 case PPC::XFSTOREf32:
3025 UpperOpcode = PPC::STXSSPX;
3026 LowerOpcode = PPC::STFSX;
3027 break;
3028 case PPC::XFSTOREf64:
3029 UpperOpcode = PPC::STXSDX;
3030 LowerOpcode = PPC::STFDX;
3031 break;
3032 case PPC::LIWAX:
3033 UpperOpcode = PPC::LXSIWAX;
3034 LowerOpcode = PPC::LFIWAX;
3035 break;
3036 case PPC::LIWZX:
3037 UpperOpcode = PPC::LXSIWZX;
3038 LowerOpcode = PPC::LFIWZX;
3039 break;
3040 case PPC::STIWX:
3041 UpperOpcode = PPC::STXSIWX;
3042 LowerOpcode = PPC::STFIWX;
3043 break;
3044 default:
3045 llvm_unreachable("Unknown Operation!");
3046 }
3047
3048 Register TargetReg = MI.getOperand(0).getReg();
3049 unsigned Opcode;
3050 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3051 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3052 Opcode = LowerOpcode;
3053 else
3054 Opcode = UpperOpcode;
3055 MI.setDesc(get(Opcode));
3056 return true;
3057}
3058
3059static bool isAnImmediateOperand(const MachineOperand &MO) {
3060 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3061}
3062
3064 auto &MBB = *MI.getParent();
3065 auto DL = MI.getDebugLoc();
3066
3067 switch (MI.getOpcode()) {
3068 case PPC::BUILD_UACC: {
3069 MCRegister ACC = MI.getOperand(0).getReg();
3070 MCRegister UACC = MI.getOperand(1).getReg();
3071 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3072 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3073 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3074 // FIXME: This can easily be improved to look up to the top of the MBB
3075 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3076 // we can just re-target any such XXLOR's to DstVSR + offset.
3077 for (int VecNo = 0; VecNo < 4; VecNo++)
3078 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3079 .addReg(SrcVSR + VecNo)
3080 .addReg(SrcVSR + VecNo);
3081 }
3082 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3083 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3084 // with a NOP.
3085 [[fallthrough]];
3086 }
3087 case PPC::KILL_PAIR: {
3088 MI.setDesc(get(PPC::UNENCODED_NOP));
3089 MI.removeOperand(1);
3090 MI.removeOperand(0);
3091 return true;
3092 }
3093 case TargetOpcode::LOAD_STACK_GUARD: {
3094 assert(Subtarget.isTargetLinux() &&
3095 "Only Linux target is expected to contain LOAD_STACK_GUARD");
3096 const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3097 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3098 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3099 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3100 .addImm(Offset)
3101 .addReg(Reg);
3102 return true;
3103 }
3104 case PPC::PPCLdFixedAddr: {
3105 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3106 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3107 int64_t Offset = 0;
3108 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3109 MI.setDesc(get(PPC::LWZ));
3110 uint64_t FAType = MI.getOperand(1).getImm();
3111#undef PPC_LNX_FEATURE
3112#undef PPC_LNX_CPU
3113#define PPC_LNX_DEFINE_OFFSETS
3114#include "llvm/TargetParser/PPCTargetParser.def"
3115 bool IsLE = Subtarget.isLittleEndian();
3116 bool Is64 = Subtarget.isPPC64();
3117 if (FAType == PPC_FAWORD_HWCAP) {
3118 if (IsLE)
3119 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3120 else
3121 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3122 } else if (FAType == PPC_FAWORD_HWCAP2) {
3123 if (IsLE)
3124 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3125 else
3126 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3127 } else if (FAType == PPC_FAWORD_CPUID) {
3128 if (IsLE)
3129 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3130 else
3131 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3132 }
3133 assert(Offset && "Do not know the offset for this fixed addr load");
3134 MI.removeOperand(1);
3136 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3137 .addImm(Offset)
3138 .addReg(Reg);
3139 return true;
3140#define PPC_TGT_PARSER_UNDEF_MACROS
3141#include "llvm/TargetParser/PPCTargetParser.def"
3142#undef PPC_TGT_PARSER_UNDEF_MACROS
3143 }
3144 case PPC::DFLOADf32:
3145 case PPC::DFLOADf64:
3146 case PPC::DFSTOREf32:
3147 case PPC::DFSTOREf64: {
3148 assert(Subtarget.hasP9Vector() &&
3149 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3150 assert(MI.getOperand(2).isReg() &&
3151 isAnImmediateOperand(MI.getOperand(1)) &&
3152 "D-form op must have register and immediate operands");
3153 return expandVSXMemPseudo(MI);
3154 }
3155 case PPC::XFLOADf32:
3156 case PPC::XFSTOREf32:
3157 case PPC::LIWAX:
3158 case PPC::LIWZX:
3159 case PPC::STIWX: {
3160 assert(Subtarget.hasP8Vector() &&
3161 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3162 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3163 "X-form op must have register and register operands");
3164 return expandVSXMemPseudo(MI);
3165 }
3166 case PPC::XFLOADf64:
3167 case PPC::XFSTOREf64: {
3168 assert(Subtarget.hasVSX() &&
3169 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3170 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3171 "X-form op must have register and register operands");
3172 return expandVSXMemPseudo(MI);
3173 }
3174 case PPC::SPILLTOVSR_LD: {
3175 Register TargetReg = MI.getOperand(0).getReg();
3176 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3177 MI.setDesc(get(PPC::DFLOADf64));
3178 return expandPostRAPseudo(MI);
3179 }
3180 else
3181 MI.setDesc(get(PPC::LD));
3182 return true;
3183 }
3184 case PPC::SPILLTOVSR_ST: {
3185 Register SrcReg = MI.getOperand(0).getReg();
3186 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3187 NumStoreSPILLVSRRCAsVec++;
3188 MI.setDesc(get(PPC::DFSTOREf64));
3189 return expandPostRAPseudo(MI);
3190 } else {
3191 NumStoreSPILLVSRRCAsGpr++;
3192 MI.setDesc(get(PPC::STD));
3193 }
3194 return true;
3195 }
3196 case PPC::SPILLTOVSR_LDX: {
3197 Register TargetReg = MI.getOperand(0).getReg();
3198 if (PPC::VSFRCRegClass.contains(TargetReg))
3199 MI.setDesc(get(PPC::LXSDX));
3200 else
3201 MI.setDesc(get(PPC::LDX));
3202 return true;
3203 }
3204 case PPC::SPILLTOVSR_STX: {
3205 Register SrcReg = MI.getOperand(0).getReg();
3206 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3207 NumStoreSPILLVSRRCAsVec++;
3208 MI.setDesc(get(PPC::STXSDX));
3209 } else {
3210 NumStoreSPILLVSRRCAsGpr++;
3211 MI.setDesc(get(PPC::STDX));
3212 }
3213 return true;
3214 }
3215
3216 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3217 case PPC::CFENCE:
3218 case PPC::CFENCE8: {
3219 auto Val = MI.getOperand(0).getReg();
3220 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3221 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3222 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3224 .addReg(PPC::CR7)
3225 .addImm(1);
3226 MI.setDesc(get(PPC::ISYNC));
3227 MI.removeOperand(0);
3228 return true;
3229 }
3230 }
3231 return false;
3232}
3233
3234// Essentially a compile-time implementation of a compare->isel sequence.
3235// It takes two constants to compare, along with the true/false registers
3236// and the comparison type (as a subreg to a CR field) and returns one
3237// of the true/false registers, depending on the comparison results.
3238static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3239 unsigned TrueReg, unsigned FalseReg,
3240 unsigned CRSubReg) {
3241 // Signed comparisons. The immediates are assumed to be sign-extended.
3242 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3243 switch (CRSubReg) {
3244 default: llvm_unreachable("Unknown integer comparison type.");
3245 case PPC::sub_lt:
3246 return Imm1 < Imm2 ? TrueReg : FalseReg;
3247 case PPC::sub_gt:
3248 return Imm1 > Imm2 ? TrueReg : FalseReg;
3249 case PPC::sub_eq:
3250 return Imm1 == Imm2 ? TrueReg : FalseReg;
3251 }
3252 }
3253 // Unsigned comparisons.
3254 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3255 switch (CRSubReg) {
3256 default: llvm_unreachable("Unknown integer comparison type.");
3257 case PPC::sub_lt:
3258 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3259 case PPC::sub_gt:
3260 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3261 case PPC::sub_eq:
3262 return Imm1 == Imm2 ? TrueReg : FalseReg;
3263 }
3264 }
3265 return PPC::NoRegister;
3266}
3267
3269 unsigned OpNo,
3270 int64_t Imm) const {
3271 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3272 // Replace the REG with the Immediate.
3273 Register InUseReg = MI.getOperand(OpNo).getReg();
3274 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3275
3276 // We need to make sure that the MI didn't have any implicit use
3277 // of this REG any more. We don't call MI.implicit_operands().empty() to
3278 // return early, since MI's MCID might be changed in calling context, as a
3279 // result its number of explicit operands may be changed, thus the begin of
3280 // implicit operand is changed.
3282 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI);
3283 if (UseOpIdx >= 0) {
3284 MachineOperand &MO = MI.getOperand(UseOpIdx);
3285 if (MO.isImplicit())
3286 // The operands must always be in the following order:
3287 // - explicit reg defs,
3288 // - other explicit operands (reg uses, immediates, etc.),
3289 // - implicit reg defs
3290 // - implicit reg uses
3291 // Therefore, removing the implicit operand won't change the explicit
3292 // operands layout.
3293 MI.removeOperand(UseOpIdx);
3294 }
3295}
3296
3297// Replace an instruction with one that materializes a constant (and sets
3298// CR0 if the original instruction was a record-form instruction).
3300 const LoadImmediateInfo &LII) const {
3301 // Remove existing operands.
3302 int OperandToKeep = LII.SetCR ? 1 : 0;
3303 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3304 MI.removeOperand(i);
3305
3306 // Replace the instruction.
3307 if (LII.SetCR) {
3308 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3309 // Set the immediate.
3310 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3311 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3312 return;
3313 }
3314 else
3315 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3316
3317 // Set the immediate.
3318 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3319 .addImm(LII.Imm);
3320}
3321
3323 bool &SeenIntermediateUse) const {
3324 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3325 "Should be called after register allocation.");
3327 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3328 It++;
3329 SeenIntermediateUse = false;
3330 for (; It != E; ++It) {
3331 if (It->modifiesRegister(Reg, TRI))
3332 return &*It;
3333 if (It->readsRegister(Reg, TRI))
3334 SeenIntermediateUse = true;
3335 }
3336 return nullptr;
3337}
3338
3341 const DebugLoc &DL, Register Reg,
3342 int64_t Imm) const {
3344 "Register should be in non-SSA form after RA");
3345 bool isPPC64 = Subtarget.isPPC64();
3346 // FIXME: Materialization here is not optimal.
3347 // For some special bit patterns we can use less instructions.
3348 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3349 if (isInt<16>(Imm)) {
3350 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3351 } else if (isInt<32>(Imm)) {
3352 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3353 .addImm(Imm >> 16);
3354 if (Imm & 0xFFFF)
3355 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3356 .addReg(Reg, RegState::Kill)
3357 .addImm(Imm & 0xFFFF);
3358 } else {
3359 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3360 "only supported in PPC64");
3361 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3362 if ((Imm >> 32) & 0xFFFF)
3363 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3364 .addReg(Reg, RegState::Kill)
3365 .addImm((Imm >> 32) & 0xFFFF);
3366 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3367 .addReg(Reg, RegState::Kill)
3368 .addImm(32)
3369 .addImm(31);
3370 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3371 .addReg(Reg, RegState::Kill)
3372 .addImm((Imm >> 16) & 0xFFFF);
3373 if (Imm & 0xFFFF)
3374 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3375 .addReg(Reg, RegState::Kill)
3376 .addImm(Imm & 0xFFFF);
3377 }
3378}
3379
3380MachineInstr *PPCInstrInfo::getForwardingDefMI(
3382 unsigned &OpNoForForwarding,
3383 bool &SeenIntermediateUse) const {
3384 OpNoForForwarding = ~0U;
3385 MachineInstr *DefMI = nullptr;
3386 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3388 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3389 // within the basic block to see if the register is defined using an
3390 // LI/LI8/ADDI/ADDI8.
3391 if (MRI->isSSA()) {
3392 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3393 if (!MI.getOperand(i).isReg())
3394 continue;
3395 Register Reg = MI.getOperand(i).getReg();
3396 if (!Reg.isVirtual())
3397 continue;
3398 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3399 if (TrueReg.isVirtual()) {
3400 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3401 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3402 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3403 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3404 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3405 OpNoForForwarding = i;
3406 DefMI = DefMIForTrueReg;
3407 // The ADDI and LI operand maybe exist in one instruction at same
3408 // time. we prefer to fold LI operand as LI only has one Imm operand
3409 // and is more possible to be converted. So if current DefMI is
3410 // ADDI/ADDI8, we continue to find possible LI/LI8.
3411 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3412 break;
3413 }
3414 }
3415 }
3416 } else {
3417 // Looking back through the definition for each operand could be expensive,
3418 // so exit early if this isn't an instruction that either has an immediate
3419 // form or is already an immediate form that we can handle.
3420 ImmInstrInfo III;
3421 unsigned Opc = MI.getOpcode();
3422 bool ConvertibleImmForm =
3423 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3424 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3425 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3426 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3427 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3428 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3429 Opc == PPC::RLWINM8_rec;
3430 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3431 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3432 : false;
3433 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3434 return nullptr;
3435
3436 // Don't convert or %X, %Y, %Y since that's just a register move.
3437 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3438 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3439 return nullptr;
3440 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3441 MachineOperand &MO = MI.getOperand(i);
3442 SeenIntermediateUse = false;
3443 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3444 Register Reg = MI.getOperand(i).getReg();
3445 // If we see another use of this reg between the def and the MI,
3446 // we want to flag it so the def isn't deleted.
3447 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3448 if (DefMI) {
3449 // Is this register defined by some form of add-immediate (including
3450 // load-immediate) within this basic block?
3451 switch (DefMI->getOpcode()) {
3452 default:
3453 break;
3454 case PPC::LI:
3455 case PPC::LI8:
3456 case PPC::ADDItocL8:
3457 case PPC::ADDI:
3458 case PPC::ADDI8:
3459 OpNoForForwarding = i;
3460 return DefMI;
3461 }
3462 }
3463 }
3464 }
3465 }
3466 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3467}
3468
3469unsigned PPCInstrInfo::getSpillTarget() const {
3470 // With P10, we may need to spill paired vector registers or accumulator
3471 // registers. MMA implies paired vectors, so we can just check that.
3472 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3473 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3474 2 : Subtarget.hasP9Vector() ?
3475 1 : 0;
3476}
3477
3478ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3479 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3480}
3481
3482ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3483 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3484}
3485
3486// This opt tries to convert the following imm form to an index form to save an
3487// add for stack variables.
3488// Return false if no such pattern found.
3489//
3490// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3491// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3492// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3493//
3494// can be converted to:
3495//
3496// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3497// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3498//
3499// In order to eliminate ADD instr, make sure that:
3500// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3501// new ADDI instr and ADDI can only take int16 Imm.
3502// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3503// between ADDI and ADD instr since its original def in ADDI will be changed
3504// in new ADDI instr. And also there should be no new def for it between
3505// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3506// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3507// between ADD and Imm instr since ADD instr will be eliminated.
3508// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3509// moved to Index instr.
3511 MachineFunction *MF = MI.getParent()->getParent();
3513 bool PostRA = !MRI->isSSA();
3514 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3515 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3516 // frame base(OffsetAddi) are determined.
3517 if (!PostRA)
3518 return false;
3519 unsigned ToBeDeletedReg = 0;
3520 int64_t OffsetImm = 0;
3521 unsigned XFormOpcode = 0;
3522 ImmInstrInfo III;
3523
3524 // Check if Imm instr meets requirement.
3525 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3526 III))
3527 return false;
3528
3529 bool OtherIntermediateUse = false;
3530 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3531
3532 // Exit if there is other use between ADD and Imm instr or no def found.
3533 if (OtherIntermediateUse || !ADDMI)
3534 return false;
3535
3536 // Check if ADD instr meets requirement.
3537 if (!isADDInstrEligibleForFolding(*ADDMI))
3538 return false;
3539
3540 unsigned ScaleRegIdx = 0;
3541 int64_t OffsetAddi = 0;
3542 MachineInstr *ADDIMI = nullptr;
3543
3544 // Check if there is a valid ToBeChangedReg in ADDMI.
3545 // 1: It must be killed.
3546 // 2: Its definition must be a valid ADDIMI.
3547 // 3: It must satify int16 offset requirement.
3548 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3549 ScaleRegIdx = 2;
3550 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3551 ScaleRegIdx = 1;
3552 else
3553 return false;
3554
3555 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3556 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3557 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3558 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3560 for (auto It = ++Start; It != End; It++)
3561 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3562 return true;
3563 return false;
3564 };
3565
3566 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3567 // treated as special zero when ScaleReg is R0/X0 register.
3568 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3569 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3570 return false;
3571
3572 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3573 // and Imm Instr.
3574 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3575 return false;
3576
3577 // Now start to do the transformation.
3578 LLVM_DEBUG(dbgs() << "Replace instruction: "
3579 << "\n");
3580 LLVM_DEBUG(ADDIMI->dump());
3581 LLVM_DEBUG(ADDMI->dump());
3582 LLVM_DEBUG(MI.dump());
3583 LLVM_DEBUG(dbgs() << "with: "
3584 << "\n");
3585
3586 // Update ADDI instr.
3587 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3588
3589 // Update Imm instr.
3590 MI.setDesc(get(XFormOpcode));
3591 MI.getOperand(III.ImmOpNo)
3592 .ChangeToRegister(ScaleReg, false, false,
3593 ADDMI->getOperand(ScaleRegIdx).isKill());
3594
3595 MI.getOperand(III.OpNoForForwarding)
3596 .ChangeToRegister(ToBeChangedReg, false, false, true);
3597
3598 // Eliminate ADD instr.
3599 ADDMI->eraseFromParent();
3600
3601 LLVM_DEBUG(ADDIMI->dump());
3602 LLVM_DEBUG(MI.dump());
3603
3604 return true;
3605}
3606
3608 int64_t &Imm) const {
3609 unsigned Opc = ADDIMI.getOpcode();
3610
3611 // Exit if the instruction is not ADDI.
3612 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3613 return false;
3614
3615 // The operand may not necessarily be an immediate - it could be a relocation.
3616 if (!ADDIMI.getOperand(2).isImm())
3617 return false;
3618
3619 Imm = ADDIMI.getOperand(2).getImm();
3620
3621 return true;
3622}
3623
3625 unsigned Opc = ADDMI.getOpcode();
3626
3627 // Exit if the instruction is not ADD.
3628 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3629}
3630
3632 unsigned &ToBeDeletedReg,
3633 unsigned &XFormOpcode,
3634 int64_t &OffsetImm,
3635 ImmInstrInfo &III) const {
3636 // Only handle load/store.
3637 if (!MI.mayLoadOrStore())
3638 return false;
3639
3640 unsigned Opc = MI.getOpcode();
3641
3642 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3643
3644 // Exit if instruction has no index form.
3645 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3646 return false;
3647
3648 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3649 if (!instrHasImmForm(XFormOpcode,
3650 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3651 return false;
3652
3653 if (!III.IsSummingOperands)
3654 return false;
3655
3656 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3657 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3658 // Only support imm operands, not relocation slots or others.
3659 if (!ImmOperand.isImm())
3660 return false;
3661
3662 assert(RegOperand.isReg() && "Instruction format is not right");
3663
3664 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3665 if (!RegOperand.isKill())
3666 return false;
3667
3668 ToBeDeletedReg = RegOperand.getReg();
3669 OffsetImm = ImmOperand.getImm();
3670
3671 return true;
3672}
3673
3675 MachineInstr *&ADDIMI,
3676 int64_t &OffsetAddi,
3677 int64_t OffsetImm) const {
3678 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3679 MachineOperand &MO = ADDMI->getOperand(Index);
3680
3681 if (!MO.isKill())
3682 return false;
3683
3684 bool OtherIntermediateUse = false;
3685
3686 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3687 // Currently handle only one "add + Imminstr" pair case, exit if other
3688 // intermediate use for ToBeChangedReg found.
3689 // TODO: handle the cases where there are other "add + Imminstr" pairs
3690 // with same offset in Imminstr which is like:
3691 //
3692 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3693 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3694 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3695 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3696 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3697 //
3698 // can be converted to:
3699 //
3700 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3701 // (OffsetAddi + OffsetImm)
3702 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3703 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3704
3705 if (OtherIntermediateUse || !ADDIMI)
3706 return false;
3707 // Check if ADDI instr meets requirement.
3708 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3709 return false;
3710
3711 if (isInt<16>(OffsetAddi + OffsetImm))
3712 return true;
3713 return false;
3714}
3715
3716// If this instruction has an immediate form and one of its operands is a
3717// result of a load-immediate or an add-immediate, convert it to
3718// the immediate form if the constant is in range.
3720 SmallSet<Register, 4> &RegsToUpdate,
3721 MachineInstr **KilledDef) const {
3722 MachineFunction *MF = MI.getParent()->getParent();
3724 bool PostRA = !MRI->isSSA();
3725 bool SeenIntermediateUse = true;
3726 unsigned ForwardingOperand = ~0U;
3727 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3728 SeenIntermediateUse);
3729 if (!DefMI)
3730 return false;
3731 assert(ForwardingOperand < MI.getNumOperands() &&
3732 "The forwarding operand needs to be valid at this point");
3733 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3734 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3735 if (KilledDef && KillFwdDefMI)
3736 *KilledDef = DefMI;
3737
3738 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3739 // registers that need their kill flags updated.
3740 for (const MachineOperand &MO : DefMI->operands())
3741 if (MO.isReg() && MO.isDef())
3742 RegsToUpdate.insert(MO.getReg());
3743 for (const MachineOperand &MO : MI.operands())
3744 if (MO.isReg())
3745 RegsToUpdate.insert(MO.getReg());
3746
3747 // If this is a imm instruction and its register operands is produced by ADDI,
3748 // put the imm into imm inst directly.
3749 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3750 PPC::INSTRUCTION_LIST_END &&
3751 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3752 return true;
3753
3754 ImmInstrInfo III;
3755 bool IsVFReg = MI.getOperand(0).isReg()
3756 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3757 : false;
3758 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3759 // If this is a reg+reg instruction that has a reg+imm form,
3760 // and one of the operands is produced by an add-immediate,
3761 // try to convert it.
3762 if (HasImmForm &&
3763 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3764 KillFwdDefMI))
3765 return true;
3766
3767 // If this is a reg+reg instruction that has a reg+imm form,
3768 // and one of the operands is produced by LI, convert it now.
3769 if (HasImmForm &&
3770 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3771 return true;
3772
3773 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3774 // can be simpified to LI.
3775 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3776 return true;
3777
3778 return false;
3779}
3780
3782 MachineInstr **ToErase) const {
3783 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3784 Register FoldingReg = MI.getOperand(1).getReg();
3785 if (!FoldingReg.isVirtual())
3786 return false;
3787 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3788 if (SrcMI->getOpcode() != PPC::RLWINM &&
3789 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3790 SrcMI->getOpcode() != PPC::RLWINM8 &&
3791 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3792 return false;
3793 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3794 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3795 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3796 "Invalid PPC::RLWINM Instruction!");
3797 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3798 uint64_t SHMI = MI.getOperand(2).getImm();
3799 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3800 uint64_t MBMI = MI.getOperand(3).getImm();
3801 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3802 uint64_t MEMI = MI.getOperand(4).getImm();
3803
3804 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3805 "Invalid PPC::RLWINM Instruction!");
3806 // If MBMI is bigger than MEMI, we always can not get run of ones.
3807 // RotatedSrcMask non-wrap:
3808 // 0........31|32........63
3809 // RotatedSrcMask: B---E B---E
3810 // MaskMI: -----------|--E B------
3811 // Result: ----- --- (Bad candidate)
3812 //
3813 // RotatedSrcMask wrap:
3814 // 0........31|32........63
3815 // RotatedSrcMask: --E B----|--E B----
3816 // MaskMI: -----------|--E B------
3817 // Result: --- -----|--- ----- (Bad candidate)
3818 //
3819 // One special case is RotatedSrcMask is a full set mask.
3820 // RotatedSrcMask full:
3821 // 0........31|32........63
3822 // RotatedSrcMask: ------EB---|-------EB---
3823 // MaskMI: -----------|--E B------
3824 // Result: -----------|--- ------- (Good candidate)
3825
3826 // Mark special case.
3827 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3828
3829 // For other MBMI > MEMI cases, just return.
3830 if ((MBMI > MEMI) && !SrcMaskFull)
3831 return false;
3832
3833 // Handle MBMI <= MEMI cases.
3834 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3835 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3836 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3837 // while in PowerPC ISA, lowerest bit is at index 63.
3838 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3839
3840 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3841 APInt FinalMask = RotatedSrcMask & MaskMI;
3842 uint32_t NewMB, NewME;
3843 bool Simplified = false;
3844
3845 // If final mask is 0, MI result should be 0 too.
3846 if (FinalMask.isZero()) {
3847 bool Is64Bit =
3848 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3849 Simplified = true;
3850 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3851 LLVM_DEBUG(MI.dump());
3852
3853 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3854 // Replace MI with "LI 0"
3855 MI.removeOperand(4);
3856 MI.removeOperand(3);
3857 MI.removeOperand(2);
3858 MI.getOperand(1).ChangeToImmediate(0);
3859 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3860 } else {
3861 // Replace MI with "ANDI_rec reg, 0"
3862 MI.removeOperand(4);
3863 MI.removeOperand(3);
3864 MI.getOperand(2).setImm(0);
3865 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3866 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3867 if (SrcMI->getOperand(1).isKill()) {
3868 MI.getOperand(1).setIsKill(true);
3869 SrcMI->getOperand(1).setIsKill(false);
3870 } else
3871 // About to replace MI.getOperand(1), clear its kill flag.
3872 MI.getOperand(1).setIsKill(false);
3873 }
3874
3875 LLVM_DEBUG(dbgs() << "With: ");
3876 LLVM_DEBUG(MI.dump());
3877
3878 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3879 NewMB <= NewME) ||
3880 SrcMaskFull) {
3881 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3882 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3883 // return a 32 bit value.
3884 Simplified = true;
3885 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3886 LLVM_DEBUG(MI.dump());
3887
3888 uint16_t NewSH = (SHSrc + SHMI) % 32;
3889 MI.getOperand(2).setImm(NewSH);
3890 // If SrcMI mask is full, no need to update MBMI and MEMI.
3891 if (!SrcMaskFull) {
3892 MI.getOperand(3).setImm(NewMB);
3893 MI.getOperand(4).setImm(NewME);
3894 }
3895 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3896 if (SrcMI->getOperand(1).isKill()) {
3897 MI.getOperand(1).setIsKill(true);
3898 SrcMI->getOperand(1).setIsKill(false);
3899 } else
3900 // About to replace MI.getOperand(1), clear its kill flag.
3901 MI.getOperand(1).setIsKill(false);
3902
3903 LLVM_DEBUG(dbgs() << "To: ");
3904 LLVM_DEBUG(MI.dump());
3905 }
3906 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3907 !SrcMI->hasImplicitDef()) {
3908 // If FoldingReg has no non-debug use and it has no implicit def (it
3909 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3910 // Otherwise keep it.
3911 *ToErase = SrcMI;
3912 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3913 LLVM_DEBUG(SrcMI->dump());
3914 }
3915 return Simplified;
3916}
3917
3918bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3919 ImmInstrInfo &III, bool PostRA) const {
3920 // The vast majority of the instructions would need their operand 2 replaced
3921 // with an immediate when switching to the reg+imm form. A marked exception
3922 // are the update form loads/stores for which a constant operand 2 would need
3923 // to turn into a displacement and move operand 1 to the operand 2 position.
3924 III.ImmOpNo = 2;
3925 III.OpNoForForwarding = 2;
3926 III.ImmWidth = 16;
3927 III.ImmMustBeMultipleOf = 1;
3928 III.TruncateImmTo = 0;
3929 III.IsSummingOperands = false;
3930 switch (Opc) {
3931 default: return false;
3932 case PPC::ADD4:
3933 case PPC::ADD8:
3934 III.SignedImm = true;
3935 III.ZeroIsSpecialOrig = 0;
3936 III.ZeroIsSpecialNew = 1;
3937 III.IsCommutative = true;
3938 III.IsSummingOperands = true;
3939 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
3940 break;
3941 case PPC::ADDC:
3942 case PPC::ADDC8:
3943 III.SignedImm = true;
3944 III.ZeroIsSpecialOrig = 0;
3945 III.ZeroIsSpecialNew = 0;
3946 III.IsCommutative = true;
3947 III.IsSummingOperands = true;
3948 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
3949 break;
3950 case PPC::ADDC_rec:
3951 III.SignedImm = true;
3952 III.ZeroIsSpecialOrig = 0;
3953 III.ZeroIsSpecialNew = 0;
3954 III.IsCommutative = true;
3955 III.IsSummingOperands = true;
3956 III.ImmOpcode = PPC::ADDIC_rec;
3957 break;
3958 case PPC::SUBFC:
3959 case PPC::SUBFC8:
3960 III.SignedImm = true;
3961 III.ZeroIsSpecialOrig = 0;
3962 III.ZeroIsSpecialNew = 0;
3963 III.IsCommutative = false;
3964 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
3965 break;
3966 case PPC::CMPW:
3967 case PPC::CMPD:
3968 III.SignedImm = true;
3969 III.ZeroIsSpecialOrig = 0;
3970 III.ZeroIsSpecialNew = 0;
3971 III.IsCommutative = false;
3972 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
3973 break;
3974 case PPC::CMPLW:
3975 case PPC::CMPLD:
3976 III.SignedImm = false;
3977 III.ZeroIsSpecialOrig = 0;
3978 III.ZeroIsSpecialNew = 0;
3979 III.IsCommutative = false;
3980 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
3981 break;
3982 case PPC::AND_rec:
3983 case PPC::AND8_rec:
3984 case PPC::OR:
3985 case PPC::OR8:
3986 case PPC::XOR:
3987 case PPC::XOR8:
3988 III.SignedImm = false;
3989 III.ZeroIsSpecialOrig = 0;
3990 III.ZeroIsSpecialNew = 0;
3991 III.IsCommutative = true;
3992 switch(Opc) {
3993 default: llvm_unreachable("Unknown opcode");
3994 case PPC::AND_rec:
3995 III.ImmOpcode = PPC::ANDI_rec;
3996 break;
3997 case PPC::AND8_rec:
3998 III.ImmOpcode = PPC::ANDI8_rec;
3999 break;
4000 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4001 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4002 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4003 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4004 }
4005 break;
4006 case PPC::RLWNM:
4007 case PPC::RLWNM8:
4008 case PPC::RLWNM_rec:
4009 case PPC::RLWNM8_rec:
4010 case PPC::SLW:
4011 case PPC::SLW8:
4012 case PPC::SLW_rec:
4013 case PPC::SLW8_rec:
4014 case PPC::SRW:
4015 case PPC::SRW8:
4016 case PPC::SRW_rec:
4017 case PPC::SRW8_rec:
4018 case PPC::SRAW:
4019 case PPC::SRAW_rec:
4020 III.SignedImm = false;
4021 III.ZeroIsSpecialOrig = 0;
4022 III.ZeroIsSpecialNew = 0;
4023 III.IsCommutative = false;
4024 // This isn't actually true, but the instructions ignore any of the
4025 // upper bits, so any immediate loaded with an LI is acceptable.
4026 // This does not apply to shift right algebraic because a value
4027 // out of range will produce a -1/0.
4028 III.ImmWidth = 16;
4029 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4030 Opc == PPC::RLWNM8_rec)
4031 III.TruncateImmTo = 5;
4032 else
4033 III.TruncateImmTo = 6;
4034 switch(Opc) {
4035 default: llvm_unreachable("Unknown opcode");
4036 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4037 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4038 case PPC::RLWNM_rec:
4039 III.ImmOpcode = PPC::RLWINM_rec;
4040 break;
4041 case PPC::RLWNM8_rec:
4042 III.ImmOpcode = PPC::RLWINM8_rec;
4043 break;
4044 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4045 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4046 case PPC::SLW_rec:
4047 III.ImmOpcode = PPC::RLWINM_rec;
4048 break;
4049 case PPC::SLW8_rec:
4050 III.ImmOpcode = PPC::RLWINM8_rec;
4051 break;
4052 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4053 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4054 case PPC::SRW_rec:
4055 III.ImmOpcode = PPC::RLWINM_rec;
4056 break;
4057 case PPC::SRW8_rec:
4058 III.ImmOpcode = PPC::RLWINM8_rec;
4059 break;
4060 case PPC::SRAW:
4061 III.ImmWidth = 5;
4062 III.TruncateImmTo = 0;
4063 III.ImmOpcode = PPC::SRAWI;
4064 break;
4065 case PPC::SRAW_rec:
4066 III.ImmWidth = 5;
4067 III.TruncateImmTo = 0;
4068 III.ImmOpcode = PPC::SRAWI_rec;
4069 break;
4070 }
4071 break;
4072 case PPC::RLDCL:
4073 case PPC::RLDCL_rec:
4074 case PPC::RLDCR:
4075 case PPC::RLDCR_rec:
4076 case PPC::SLD:
4077 case PPC::SLD_rec:
4078 case PPC::SRD:
4079 case PPC::SRD_rec:
4080 case PPC::SRAD:
4081 case PPC::SRAD_rec:
4082 III.SignedImm = false;
4083 III.ZeroIsSpecialOrig = 0;
4084 III.ZeroIsSpecialNew = 0;
4085 III.IsCommutative = false;
4086 // This isn't actually true, but the instructions ignore any of the
4087 // upper bits, so any immediate loaded with an LI is acceptable.
4088 // This does not apply to shift right algebraic because a value
4089 // out of range will produce a -1/0.
4090 III.ImmWidth = 16;
4091 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4092 Opc == PPC::RLDCR_rec)
4093 III.TruncateImmTo = 6;
4094 else
4095 III.TruncateImmTo = 7;
4096 switch(Opc) {
4097 default: llvm_unreachable("Unknown opcode");
4098 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4099 case PPC::RLDCL_rec:
4100 III.ImmOpcode = PPC::RLDICL_rec;
4101 break;
4102 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4103 case PPC::RLDCR_rec:
4104 III.ImmOpcode = PPC::RLDICR_rec;
4105 break;
4106 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4107 case PPC::SLD_rec:
4108 III.ImmOpcode = PPC::RLDICR_rec;
4109 break;
4110 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4111 case PPC::SRD_rec:
4112 III.ImmOpcode = PPC::RLDICL_rec;
4113 break;
4114 case PPC::SRAD:
4115 III.ImmWidth = 6;
4116 III.TruncateImmTo = 0;
4117 III.ImmOpcode = PPC::SRADI;
4118 break;
4119 case PPC::SRAD_rec:
4120 III.ImmWidth = 6;
4121 III.TruncateImmTo = 0;
4122 III.ImmOpcode = PPC::SRADI_rec;
4123 break;
4124 }
4125 break;
4126 // Loads and stores:
4127 case PPC::LBZX:
4128 case PPC::LBZX8:
4129 case PPC::LHZX:
4130 case PPC::LHZX8:
4131 case PPC::LHAX:
4132 case PPC::LHAX8:
4133 case PPC::LWZX:
4134 case PPC::LWZX8:
4135 case PPC::LWAX:
4136 case PPC::LDX:
4137 case PPC::LFSX:
4138 case PPC::LFDX:
4139 case PPC::STBX:
4140 case PPC::STBX8:
4141 case PPC::STHX:
4142 case PPC::STHX8:
4143 case PPC::STWX:
4144 case PPC::STWX8:
4145 case PPC::STDX:
4146 case PPC::STFSX:
4147 case PPC::STFDX:
4148 III.SignedImm = true;
4149 III.ZeroIsSpecialOrig = 1;
4150 III.ZeroIsSpecialNew = 2;
4151 III.IsCommutative = true;
4152 III.IsSummingOperands = true;
4153 III.ImmOpNo = 1;
4154 III.OpNoForForwarding = 2;
4155 switch(Opc) {
4156 default: llvm_unreachable("Unknown opcode");
4157 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4158 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4159 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4160 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4161 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4162 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4163 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4164 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4165 case PPC::LWAX:
4166 III.ImmOpcode = PPC::LWA;
4167 III.ImmMustBeMultipleOf = 4;
4168 break;
4169 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4170 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4171 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4172 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4173 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4174 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4175 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4176 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4177 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4178 case PPC::STDX:
4179 III.ImmOpcode = PPC::STD;
4180 III.ImmMustBeMultipleOf = 4;
4181 break;
4182 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4183 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4184 }
4185 break;
4186 case PPC::LBZUX:
4187 case PPC::LBZUX8:
4188 case PPC::LHZUX:
4189 case PPC::LHZUX8:
4190 case PPC::LHAUX:
4191 case PPC::LHAUX8:
4192 case PPC::LWZUX:
4193 case PPC::LWZUX8:
4194 case PPC::LDUX:
4195 case PPC::LFSUX:
4196 case PPC::LFDUX:
4197 case PPC::STBUX:
4198 case PPC::STBUX8:
4199 case PPC::STHUX:
4200 case PPC::STHUX8:
4201 case PPC::STWUX:
4202 case PPC::STWUX8:
4203 case PPC::STDUX:
4204 case PPC::STFSUX:
4205 case PPC::STFDUX:
4206 III.SignedImm = true;
4207 III.ZeroIsSpecialOrig = 2;
4208 III.ZeroIsSpecialNew = 3;
4209 III.IsCommutative = false;
4210 III.IsSummingOperands = true;
4211 III.ImmOpNo = 2;
4212 III.OpNoForForwarding = 3;
4213 switch(Opc) {
4214 default: llvm_unreachable("Unknown opcode");
4215 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4216 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4217 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4218 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4219 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4220 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4221 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4222 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4223 case PPC::LDUX:
4224 III.ImmOpcode = PPC::LDU;
4225 III.ImmMustBeMultipleOf = 4;
4226 break;
4227 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4228 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4229 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4230 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4231 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4232 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4233 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4234 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4235 case PPC::STDUX:
4236 III.ImmOpcode = PPC::STDU;
4237 III.ImmMustBeMultipleOf = 4;
4238 break;
4239 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4240 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4241 }
4242 break;
4243 // Power9 and up only. For some of these, the X-Form version has access to all
4244 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4245 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4246 // into or stored from is one of the VR registers.
4247 case PPC::LXVX:
4248 case PPC::LXSSPX:
4249 case PPC::LXSDX:
4250 case PPC::STXVX:
4251 case PPC::STXSSPX:
4252 case PPC::STXSDX:
4253 case PPC::XFLOADf32:
4254 case PPC::XFLOADf64:
4255 case PPC::XFSTOREf32:
4256 case PPC::XFSTOREf64:
4257 if (!Subtarget.hasP9Vector())
4258 return false;
4259 III.SignedImm = true;
4260 III.ZeroIsSpecialOrig = 1;
4261 III.ZeroIsSpecialNew = 2;
4262 III.IsCommutative = true;
4263 III.IsSummingOperands = true;
4264 III.ImmOpNo = 1;
4265 III.OpNoForForwarding = 2;
4266 III.ImmMustBeMultipleOf = 4;
4267 switch(Opc) {
4268 default: llvm_unreachable("Unknown opcode");
4269 case PPC::LXVX:
4270 III.ImmOpcode = PPC::LXV;
4271 III.ImmMustBeMultipleOf = 16;
4272 break;
4273 case PPC::LXSSPX:
4274 if (PostRA) {
4275 if (IsVFReg)
4276 III.ImmOpcode = PPC::LXSSP;
4277 else {
4278 III.ImmOpcode = PPC::LFS;
4279 III.ImmMustBeMultipleOf = 1;
4280 }
4281 break;
4282 }
4283 [[fallthrough]];
4284 case PPC::XFLOADf32:
4285 III.ImmOpcode = PPC::DFLOADf32;
4286 break;
4287 case PPC::LXSDX:
4288 if (PostRA) {
4289 if (IsVFReg)
4290 III.ImmOpcode = PPC::LXSD;
4291 else {
4292 III.ImmOpcode = PPC::LFD;
4293 III.ImmMustBeMultipleOf = 1;
4294 }
4295 break;
4296 }
4297 [[fallthrough]];
4298 case PPC::XFLOADf64:
4299 III.ImmOpcode = PPC::DFLOADf64;
4300 break;
4301 case PPC::STXVX:
4302 III.ImmOpcode = PPC::STXV;
4303 III.ImmMustBeMultipleOf = 16;
4304 break;
4305 case PPC::STXSSPX:
4306 if (PostRA) {
4307 if (IsVFReg)
4308 III.ImmOpcode = PPC::STXSSP;
4309 else {
4310 III.ImmOpcode = PPC::STFS;
4311 III.ImmMustBeMultipleOf = 1;
4312 }
4313 break;
4314 }
4315 [[fallthrough]];
4316 case PPC::XFSTOREf32:
4317 III.ImmOpcode = PPC::DFSTOREf32;
4318 break;
4319 case PPC::STXSDX:
4320 if (PostRA) {
4321 if (IsVFReg)
4322 III.ImmOpcode = PPC::STXSD;
4323 else {
4324 III.ImmOpcode = PPC::STFD;
4325 III.ImmMustBeMultipleOf = 1;
4326 }
4327 break;
4328 }
4329 [[fallthrough]];
4330 case PPC::XFSTOREf64:
4331 III.ImmOpcode = PPC::DFSTOREf64;
4332 break;
4333 }
4334 break;
4335 }
4336 return true;
4337}
4338
4339// Utility function for swaping two arbitrary operands of an instruction.
4340static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4341 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4342
4343 unsigned MaxOp = std::max(Op1, Op2);
4344 unsigned MinOp = std::min(Op1, Op2);
4345 MachineOperand MOp1 = MI.getOperand(MinOp);
4346 MachineOperand MOp2 = MI.getOperand(MaxOp);
4347 MI.removeOperand(std::max(Op1, Op2));
4348 MI.removeOperand(std::min(Op1, Op2));
4349
4350 // If the operands we are swapping are the two at the end (the common case)
4351 // we can just remove both and add them in the opposite order.
4352 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4353 MI.addOperand(MOp2);
4354 MI.addOperand(MOp1);
4355 } else {
4356 // Store all operands in a temporary vector, remove them and re-add in the
4357 // right order.
4359 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4360 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4361 MOps.push_back(MI.getOperand(i));
4362 MI.removeOperand(i);
4363 }
4364 // MOp2 needs to be added next.
4365 MI.addOperand(MOp2);
4366 // Now add the rest.
4367 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4368 if (i == MaxOp)
4369 MI.addOperand(MOp1);
4370 else {
4371 MI.addOperand(MOps.back());
4372 MOps.pop_back();
4373 }
4374 }
4375 }
4376}
4377
4378// Check if the 'MI' that has the index OpNoForForwarding
4379// meets the requirement described in the ImmInstrInfo.
4380bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4381 const ImmInstrInfo &III,
4382 unsigned OpNoForForwarding
4383 ) const {
4384 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4385 // would not work pre-RA, we can only do the check post RA.
4386 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4387 if (MRI.isSSA())
4388 return false;
4389
4390 // Cannot do the transform if MI isn't summing the operands.
4391 if (!III.IsSummingOperands)
4392 return false;
4393
4394 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4395 if (!III.ZeroIsSpecialOrig)
4396 return false;
4397
4398 // We cannot do the transform if the operand we are trying to replace
4399 // isn't the same as the operand the instruction allows.
4400 if (OpNoForForwarding != III.OpNoForForwarding)
4401 return false;
4402
4403 // Check if the instruction we are trying to transform really has
4404 // the special zero register as its operand.
4405 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4406 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4407 return false;
4408
4409 // This machine instruction is convertible if it is,
4410 // 1. summing the operands.
4411 // 2. one of the operands is special zero register.
4412 // 3. the operand we are trying to replace is allowed by the MI.
4413 return true;
4414}
4415
4416// Check if the DefMI is the add inst and set the ImmMO and RegMO
4417// accordingly.
4418bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4419 const ImmInstrInfo &III,
4420 MachineOperand *&ImmMO,
4421 MachineOperand *&RegMO) const {
4422 unsigned Opc = DefMI.getOpcode();
4423 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4424 return false;
4425
4426 assert(DefMI.getNumOperands() >= 3 &&
4427 "Add inst must have at least three operands");
4428 RegMO = &DefMI.getOperand(1);
4429 ImmMO = &DefMI.getOperand(2);
4430
4431 // Before RA, ADDI first operand could be a frame index.
4432 if (!RegMO->isReg())
4433 return false;
4434
4435 // This DefMI is elgible for forwarding if it is:
4436 // 1. add inst
4437 // 2. one of the operands is Imm/CPI/Global.
4438 return isAnImmediateOperand(*ImmMO);
4439}
4440
4441bool PPCInstrInfo::isRegElgibleForForwarding(
4442 const MachineOperand &RegMO, const MachineInstr &DefMI,
4443 const MachineInstr &MI, bool KillDefMI,
4444 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4445 // x = addi y, imm
4446 // ...
4447 // z = lfdx 0, x -> z = lfd imm(y)
4448 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4449 // of "y" between the DEF of "x" and "z".
4450 // The query is only valid post RA.
4451 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4452 if (MRI.isSSA())
4453 return false;
4454
4455 Register Reg = RegMO.getReg();
4456
4457 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4459 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4460 It++;
4461 for (; It != E; ++It) {
4462 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4463 return false;
4464 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4465 IsFwdFeederRegKilled = true;
4466 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4467 SeenIntermediateUse = true;
4468 // Made it to DefMI without encountering a clobber.
4469 if ((&*It) == &DefMI)
4470 break;
4471 }
4472 assert((&*It) == &DefMI && "DefMI is missing");
4473
4474 // If DefMI also defines the register to be forwarded, we can only forward it
4475 // if DefMI is being erased.
4476 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4477 return KillDefMI;
4478
4479 return true;
4480}
4481
4482bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4483 const MachineInstr &DefMI,
4484 const ImmInstrInfo &III,
4485 int64_t &Imm,
4486 int64_t BaseImm) const {
4487 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4488 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4489 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4490 // However, we know that, it is 16-bit width, and has the alignment of 4.
4491 // Check if the instruction met the requirement.
4492 if (III.ImmMustBeMultipleOf > 4 ||
4493 III.TruncateImmTo || III.ImmWidth != 16)
4494 return false;
4495
4496 // Going from XForm to DForm loads means that the displacement needs to be
4497 // not just an immediate but also a multiple of 4, or 16 depending on the
4498 // load. A DForm load cannot be represented if it is a multiple of say 2.
4499 // XForm loads do not have this restriction.
4500 if (ImmMO.isGlobal()) {
4501 const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout();
4503 return false;
4504 }
4505
4506 return true;
4507 }
4508
4509 if (ImmMO.isImm()) {
4510 // It is Imm, we need to check if the Imm fit the range.
4511 // Sign-extend to 64-bits.
4512 // DefMI may be folded with another imm form instruction, the result Imm is
4513 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4514 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4515 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4516 return false;
4517 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4518 return false;
4519 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4520
4521 if (Imm % III.ImmMustBeMultipleOf)
4522 return false;
4523 if (III.TruncateImmTo)
4524 Imm &= ((1 << III.TruncateImmTo) - 1);
4525 }
4526 else
4527 return false;
4528
4529 // This ImmMO is forwarded if it meets the requriement describle
4530 // in ImmInstrInfo
4531 return true;
4532}
4533
4534bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4535 unsigned OpNoForForwarding,
4536 MachineInstr **KilledDef) const {
4537 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4538 !DefMI.getOperand(1).isImm())
4539 return false;
4540
4541 MachineFunction *MF = MI.getParent()->getParent();
4543 bool PostRA = !MRI->isSSA();
4544
4545 int64_t Immediate = DefMI.getOperand(1).getImm();
4546 // Sign-extend to 64-bits.
4547 int64_t SExtImm = SignExtend64<16>(Immediate);
4548
4549 bool ReplaceWithLI = false;
4550 bool Is64BitLI = false;
4551 int64_t NewImm = 0;
4552 bool SetCR = false;
4553 unsigned Opc = MI.getOpcode();
4554 switch (Opc) {
4555 default:
4556 return false;
4557
4558 // FIXME: Any branches conditional on such a comparison can be made
4559 // unconditional. At this time, this happens too infrequently to be worth
4560 // the implementation effort, but if that ever changes, we could convert
4561 // such a pattern here.
4562 case PPC::CMPWI:
4563 case PPC::CMPLWI:
4564 case PPC::CMPDI:
4565 case PPC::CMPLDI: {
4566 // Doing this post-RA would require dataflow analysis to reliably find uses
4567 // of the CR register set by the compare.
4568 // No need to fixup killed/dead flag since this transformation is only valid
4569 // before RA.
4570 if (PostRA)
4571 return false;
4572 // If a compare-immediate is fed by an immediate and is itself an input of
4573 // an ISEL (the most common case) into a COPY of the correct register.
4574 bool Changed = false;
4575 Register DefReg = MI.getOperand(0).getReg();
4576 int64_t Comparand = MI.getOperand(2).getImm();
4577 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4578 ? (Comparand | 0xFFFFFFFFFFFF0000)
4579 : Comparand;
4580
4581 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4582 unsigned UseOpc = CompareUseMI.getOpcode();
4583 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4584 continue;
4585 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4586 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4587 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4588 unsigned RegToCopy =
4589 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4590 if (RegToCopy == PPC::NoRegister)
4591 continue;
4592 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4593 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4594 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4595 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4596 CompareUseMI.removeOperand(3);
4597 CompareUseMI.removeOperand(2);
4598 continue;
4599 }
4600 LLVM_DEBUG(
4601 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4602 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4603 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4604 // Convert to copy and remove unneeded operands.
4605 CompareUseMI.setDesc(get(PPC::COPY));
4606 CompareUseMI.removeOperand(3);
4607 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4608 CmpIselsConverted++;
4609 Changed = true;
4610 LLVM_DEBUG(CompareUseMI.dump());
4611 }
4612 if (Changed)
4613 return true;
4614 // This may end up incremented multiple times since this function is called
4615 // during a fixed-point transformation, but it is only meant to indicate the
4616 // presence of this opportunity.
4617 MissedConvertibleImmediateInstrs++;
4618 return false;
4619 }
4620
4621 // Immediate forms - may simply be convertable to an LI.
4622 case PPC::ADDI:
4623 case PPC::ADDI8: {
4624 // Does the sum fit in a 16-bit signed field?
4625 int64_t Addend = MI.getOperand(2).getImm();
4626 if (isInt<16>(Addend + SExtImm)) {
4627 ReplaceWithLI = true;
4628 Is64BitLI = Opc == PPC::ADDI8;
4629 NewImm = Addend + SExtImm;
4630 break;
4631 }
4632 return false;
4633 }
4634 case PPC::SUBFIC:
4635 case PPC::SUBFIC8: {
4636 // Only transform this if the CARRY implicit operand is dead.
4637 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4638 return false;
4639 int64_t Minuend = MI.getOperand(2).getImm();
4640 if (isInt<16>(Minuend - SExtImm)) {
4641 ReplaceWithLI = true;
4642 Is64BitLI = Opc == PPC::SUBFIC8;
4643 NewImm = Minuend - SExtImm;
4644 break;
4645 }
4646 return false;
4647 }
4648 case PPC::RLDICL:
4649 case PPC::RLDICL_rec:
4650 case PPC::RLDICL_32:
4651 case PPC::RLDICL_32_64: {
4652 // Use APInt's rotate function.
4653 int64_t SH = MI.getOperand(2).getImm();
4654 int64_t MB = MI.getOperand(3).getImm();
4655 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4656 SExtImm, true);
4657 InVal = InVal.rotl(SH);
4658 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4659 InVal &= Mask;
4660 // Can't replace negative values with an LI as that will sign-extend
4661 // and not clear the left bits. If we're setting the CR bit, we will use
4662 // ANDI_rec which won't sign extend, so that's safe.
4663 if (isUInt<15>(InVal.getSExtValue()) ||
4664 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4665 ReplaceWithLI = true;
4666 Is64BitLI = Opc != PPC::RLDICL_32;
4667 NewImm = InVal.getSExtValue();
4668 SetCR = Opc == PPC::RLDICL_rec;
4669 break;
4670 }
4671 return false;
4672 }
4673 case PPC::RLWINM:
4674 case PPC::RLWINM8:
4675 case PPC::RLWINM_rec:
4676 case PPC::RLWINM8_rec: {
4677 int64_t SH = MI.getOperand(2).getImm();
4678 int64_t MB = MI.getOperand(3).getImm();
4679 int64_t ME = MI.getOperand(4).getImm();
4680 APInt InVal(32, SExtImm, true);
4681 InVal = InVal.rotl(SH);
4682 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4683 InVal &= Mask;
4684 // Can't replace negative values with an LI as that will sign-extend
4685 // and not clear the left bits. If we're setting the CR bit, we will use
4686 // ANDI_rec which won't sign extend, so that's safe.
4687 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4688 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4689 isUInt<16>(InVal.getSExtValue()));
4690 if (ValueFits) {
4691 ReplaceWithLI = true;
4692 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4693 NewImm = InVal.getSExtValue();
4694 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4695 break;
4696 }
4697 return false;
4698 }
4699 case PPC::ORI:
4700 case PPC::ORI8:
4701 case PPC::XORI:
4702 case PPC::XORI8: {
4703 int64_t LogicalImm = MI.getOperand(2).getImm();
4704 int64_t Result = 0;
4705 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4706 Result = LogicalImm | SExtImm;
4707 else
4708 Result = LogicalImm ^ SExtImm;
4709 if (isInt<16>(Result)) {
4710 ReplaceWithLI = true;
4711 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4712 NewImm = Result;
4713 break;
4714 }
4715 return false;
4716 }
4717 }
4718
4719 if (ReplaceWithLI) {
4720 // We need to be careful with CR-setting instructions we're replacing.
4721 if (SetCR) {
4722 // We don't know anything about uses when we're out of SSA, so only
4723 // replace if the new immediate will be reproduced.
4724 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4725 if (PostRA && ImmChanged)
4726 return false;
4727
4728 if (!PostRA) {
4729 // If the defining load-immediate has no other uses, we can just replace
4730 // the immediate with the new immediate.
4731 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4732 DefMI.getOperand(1).setImm(NewImm);
4733
4734 // If we're not using the GPR result of the CR-setting instruction, we
4735 // just need to and with zero/non-zero depending on the new immediate.
4736 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4737 if (NewImm) {
4738 assert(Immediate && "Transformation converted zero to non-zero?");
4739 NewImm = Immediate;
4740 }
4741 } else if (ImmChanged)
4742 return false;
4743 }
4744 }
4745
4746 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4747 LLVM_DEBUG(MI.dump());
4748 LLVM_DEBUG(dbgs() << "Fed by:\n");
4749 LLVM_DEBUG(DefMI.dump());
4751 LII.Imm = NewImm;
4752 LII.Is64Bit = Is64BitLI;
4753 LII.SetCR = SetCR;
4754 // If we're setting the CR, the original load-immediate must be kept (as an
4755 // operand to ANDI_rec/ANDI8_rec).
4756 if (KilledDef && SetCR)
4757 *KilledDef = nullptr;
4758 replaceInstrWithLI(MI, LII);
4759
4760 if (PostRA)
4761 recomputeLivenessFlags(*MI.getParent());
4762
4763 LLVM_DEBUG(dbgs() << "With:\n");
4764 LLVM_DEBUG(MI.dump());
4765 return true;
4766 }
4767 return false;
4768}
4769
4770bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4771 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4772 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4773 bool PostRA = !MRI->isSSA();
4774 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4775 // for post-ra.
4776 if (PostRA)
4777 return false;
4778
4779 // Only handle load/store.
4780 if (!MI.mayLoadOrStore())
4781 return false;
4782
4783 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4784
4785 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4786 "MI must have x-form opcode");
4787
4788 // get Imm Form info.
4789 ImmInstrInfo III;
4790 bool IsVFReg = MI.getOperand(0).isReg()
4791 ? PPC::isVFRegister(MI.getOperand(0).getReg())
4792 : false;
4793
4794 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4795 return false;
4796
4797 if (!III.IsSummingOperands)
4798 return false;
4799
4800 if (OpNoForForwarding != III.OpNoForForwarding)
4801 return false;
4802
4803 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4804 if (!ImmOperandMI.isImm())
4805 return false;
4806
4807 // Check DefMI.
4808 MachineOperand *ImmMO = nullptr;
4809 MachineOperand *RegMO = nullptr;
4810 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4811 return false;
4812 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4813
4814 // Check Imm.
4815 // Set ImmBase from imm instruction as base and get new Imm inside
4816 // isImmElgibleForForwarding.
4817 int64_t ImmBase = ImmOperandMI.getImm();
4818 int64_t Imm = 0;
4819 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4820 return false;
4821
4822 // Do the transform
4823 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4824 LLVM_DEBUG(MI.dump());
4825 LLVM_DEBUG(dbgs() << "Fed by:\n");
4826 LLVM_DEBUG(DefMI.dump());
4827
4828 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4829 MI.getOperand(III.ImmOpNo).setImm(Imm);
4830
4831 LLVM_DEBUG(dbgs() << "With:\n");
4832 LLVM_DEBUG(MI.dump());
4833 return true;
4834}
4835
4836// If an X-Form instruction is fed by an add-immediate and one of its operands
4837// is the literal zero, attempt to forward the source of the add-immediate to
4838// the corresponding D-Form instruction with the displacement coming from
4839// the immediate being added.
4840bool PPCInstrInfo::transformToImmFormFedByAdd(
4841 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4842 MachineInstr &DefMI, bool KillDefMI) const {
4843 // RegMO ImmMO
4844 // | |
4845 // x = addi reg, imm <----- DefMI
4846 // y = op 0 , x <----- MI
4847 // |
4848 // OpNoForForwarding
4849 // Check if the MI meet the requirement described in the III.
4850 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4851 return false;
4852
4853 // Check if the DefMI meet the requirement
4854 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4855 MachineOperand *ImmMO = nullptr;
4856 MachineOperand *RegMO = nullptr;
4857 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4858 return false;
4859 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4860
4861 // As we get the Imm operand now, we need to check if the ImmMO meet
4862 // the requirement described in the III. If yes set the Imm.
4863 int64_t Imm = 0;
4864 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4865 return false;
4866
4867 bool IsFwdFeederRegKilled = false;
4868 bool SeenIntermediateUse = false;
4869 // Check if the RegMO can be forwarded to MI.
4870 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4871 IsFwdFeederRegKilled, SeenIntermediateUse))
4872 return false;
4873
4874 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4875 bool PostRA = !MRI.isSSA();
4876
4877 // We know that, the MI and DefMI both meet the pattern, and
4878 // the Imm also meet the requirement with the new Imm-form.
4879 // It is safe to do the transformation now.
4880 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4881 LLVM_DEBUG(MI.dump());
4882 LLVM_DEBUG(dbgs() << "Fed by:\n");
4883 LLVM_DEBUG(DefMI.dump());
4884
4885 // Update the base reg first.
4886 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
4887 false, false,
4888 RegMO->isKill());
4889
4890 // Then, update the imm.
4891 if (ImmMO->isImm()) {
4892 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4893 // directly.
4895 }
4896 else {
4897 // Otherwise, it is Constant Pool Index(CPI) or Global,
4898 // which is relocation in fact. We need to replace the special zero
4899 // register with ImmMO.
4900 // Before that, we need to fixup the target flags for imm.
4901 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4902 if (DefMI.getOpcode() == PPC::ADDItocL8)
4904
4905 // MI didn't have the interface such as MI.setOperand(i) though
4906 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4907 // ImmMO, we need to remove ZERO operand and all the operands behind it,
4908 // and, add the ImmMO, then, move back all the operands behind ZERO.
4910 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
4911 MOps.push_back(MI.getOperand(i));
4912 MI.removeOperand(i);
4913 }
4914
4915 // Remove the last MO in the list, which is ZERO operand in fact.
4916 MOps.pop_back();
4917 // Add the imm operand.
4918 MI.addOperand(*ImmMO);
4919 // Now add the rest back.
4920 for (auto &MO : MOps)
4921 MI.addOperand(MO);
4922 }
4923
4924 // Update the opcode.
4925 MI.setDesc(get(III.ImmOpcode));
4926
4927 if (PostRA)
4928 recomputeLivenessFlags(*MI.getParent());
4929 LLVM_DEBUG(dbgs() << "With:\n");
4930 LLVM_DEBUG(MI.dump());
4931
4932 return true;
4933}
4934
4935bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
4936 const ImmInstrInfo &III,
4937 unsigned ConstantOpNo,
4938 MachineInstr &DefMI) const {
4939 // DefMI must be LI or LI8.
4940 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4941 !DefMI.getOperand(1).isImm())
4942 return false;
4943
4944 // Get Imm operand and Sign-extend to 64-bits.
4945 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
4946
4947 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4948 bool PostRA = !MRI.isSSA();
4949 // Exit early if we can't convert this.
4950 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
4951 return false;
4952 if (Imm % III.ImmMustBeMultipleOf)
4953 return false;
4954 if (III.TruncateImmTo)
4955 Imm &= ((1 << III.TruncateImmTo) - 1);
4956 if (III.SignedImm) {
4957 APInt ActualValue(64, Imm, true);
4958 if (!ActualValue.isSignedIntN(III.ImmWidth))
4959 return false;
4960 } else {
4961 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
4962 if ((uint64_t)Imm > UnsignedMax)
4963 return false;
4964 }
4965
4966 // If we're post-RA, the instructions don't agree on whether register zero is
4967 // special, we can transform this as long as the register operand that will
4968 // end up in the location where zero is special isn't R0.
4969 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
4970 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
4971 III.ZeroIsSpecialNew + 1;
4972 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
4973 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
4974 // If R0 is in the operand where zero is special for the new instruction,
4975 // it is unsafe to transform if the constant operand isn't that operand.
4976 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
4977 ConstantOpNo != III.ZeroIsSpecialNew)
4978 return false;
4979 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
4980 ConstantOpNo != PosForOrigZero)
4981 return false;
4982 }
4983
4984 unsigned Opc = MI.getOpcode();
4985 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
4986 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
4987 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
4988 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
4989 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
4990 Opc == PPC::SRD || Opc == PPC::SRD_rec;
4991 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
4992 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
4993 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
4994 Opc == PPC::SRD_rec;
4995
4996 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
4997 LLVM_DEBUG(MI.dump());
4998 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
4999 LLVM_DEBUG(DefMI.dump());
5000 MI.setDesc(get(III.ImmOpcode));
5001 if (ConstantOpNo == III.OpNoForForwarding) {
5002 // Converting shifts to immediate form is a bit tricky since they may do
5003 // one of three things:
5004 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5005 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5006 // setting CR0)
5007 // 3. If the shift amount is in [1, OpSize), it's just a shift
5008 if (SpecialShift32 || SpecialShift64) {
5010 LII.Imm = 0;
5011 LII.SetCR = SetCR;
5012 LII.Is64Bit = SpecialShift64;
5013 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5014 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5015 replaceInstrWithLI(MI, LII);
5016 // Shifts by zero don't change the value. If we don't need to set CR0,
5017 // just convert this to a COPY. Can't do this post-RA since we've already
5018 // cleaned up the copies.
5019 else if (!SetCR && ShAmt == 0 && !PostRA) {
5020 MI.removeOperand(2);
5021 MI.setDesc(get(PPC::COPY));
5022 } else {
5023 // The 32 bit and 64 bit instructions are quite different.
5024 if (SpecialShift32) {
5025 // Left shifts use (N, 0, 31-N).
5026 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5027 // use (0, 0, 31) if N == 0.
5028 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5029 uint64_t MB = RightShift ? ShAmt : 0;
5030 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5032 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5033 .addImm(ME);
5034 } else {
5035 // Left shifts use (N, 63-N).
5036 // Right shifts use (64-N, N) if 0 < N < 64.
5037 // use (0, 0) if N == 0.
5038 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5039 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5041 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5042 }
5043 }
5044 } else
5045 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5046 }
5047 // Convert commutative instructions (switch the operands and convert the
5048 // desired one to an immediate.
5049 else if (III.IsCommutative) {
5050 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5051 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5052 } else
5053 llvm_unreachable("Should have exited early!");
5054
5055 // For instructions for which the constant register replaces a different
5056 // operand than where the immediate goes, we need to swap them.
5057 if (III.OpNoForForwarding != III.ImmOpNo)
5059
5060 // If the special R0/X0 register index are different for original instruction
5061 // and new instruction, we need to fix up the register class in new
5062 // instruction.
5063 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5064 if (III.ZeroIsSpecialNew) {
5065 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5066 // need to fix up register class.
5067 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5068 if (RegToModify.isVirtual()) {
5069 const TargetRegisterClass *NewRC =
5070 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5071 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5072 MRI.setRegClass(RegToModify, NewRC);
5073 }
5074 }
5075 }
5076
5077 if (PostRA)
5078 recomputeLivenessFlags(*MI.getParent());
5079
5080 LLVM_DEBUG(dbgs() << "With: ");
5081 LLVM_DEBUG(MI.dump());
5082 LLVM_DEBUG(dbgs() << "\n");
5083 return true;
5084}
5085
5086const TargetRegisterClass *
5088 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5089 return &PPC::VSRCRegClass;
5090 return RC;
5091}
5092
5094 return PPC::getRecordFormOpcode(Opcode);
5095}
5096
5097static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5098 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5099 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5100 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5101 Opcode == PPC::LHZUX8);
5102}
5103
5104// This function checks for sign extension from 32 bits to 64 bits.
5105static bool definedBySignExtendingOp(const unsigned Reg,
5106 const MachineRegisterInfo *MRI) {
5108 return false;
5109
5110 MachineInstr *MI = MRI->getVRegDef(Reg);
5111 if (!MI)
5112 return false;
5113
5114 int Opcode = MI->getOpcode();
5115 const PPCInstrInfo *TII =
5116 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5117 if (TII->isSExt32To64(Opcode))
5118 return true;
5119
5120 // The first def of LBZU/LHZU is sign extended.
5121 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5122 return true;
5123
5124 // RLDICL generates sign-extended output if it clears at least
5125 // 33 bits from the left (MSB).
5126 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5127 return true;
5128
5129 // If at least one bit from left in a lower word is masked out,
5130 // all of 0 to 32-th bits of the output are cleared.
5131 // Hence the output is already sign extended.
5132 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5133 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5134 MI->getOperand(3).getImm() > 0 &&
5135 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5136 return true;
5137
5138 // If the most significant bit of immediate in ANDIS is zero,
5139 // all of 0 to 32-th bits are cleared.
5140 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5141 uint16_t Imm = MI->getOperand(2).getImm();
5142 if ((Imm & 0x8000) == 0)
5143 return true;
5144 }
5145
5146 return false;
5147}
5148
5149// This function checks the machine instruction that defines the input register
5150// Reg. If that machine instruction always outputs a value that has only zeros
5151// in the higher 32 bits then this function will return true.
5152static bool definedByZeroExtendingOp(const unsigned Reg,
5153 const MachineRegisterInfo *MRI) {
5155 return false;
5156
5157 MachineInstr *MI = MRI->getVRegDef(Reg);
5158 if (!MI)
5159 return false;
5160
5161 int Opcode = MI->getOpcode();
5162 const PPCInstrInfo *TII =
5163 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5164 if (TII->isZExt32To64(Opcode))
5165 return true;
5166
5167 // The first def of LBZU/LHZU/LWZU are zero extended.
5168 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5169 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5170 MI->getOperand(0).getReg() == Reg)
5171 return true;
5172
5173 // The 16-bit immediate is sign-extended in li/lis.
5174 // If the most significant bit is zero, all higher bits are zero.
5175 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5176 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5177 int64_t Imm = MI->getOperand(1).getImm();
5178 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5179 return true;
5180 }
5181
5182 // We have some variations of rotate-and-mask instructions
5183 // that clear higher 32-bits.
5184 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5185 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5186 Opcode == PPC::RLDICL_32_64) &&
5187 MI->getOperand(3).getImm() >= 32)
5188 return true;
5189
5190 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5191 MI->getOperand(3).getImm() >= 32 &&
5192 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5193 return true;
5194
5195 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5196 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5197 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5198 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5199 return true;
5200
5201 return false;
5202}
5203
5204// This function returns true if the input MachineInstr is a TOC save
5205// instruction.
5207 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5208 return false;
5209 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5210 unsigned StackOffset = MI.getOperand(1).getImm();
5211 Register StackReg = MI.getOperand(2).getReg();
5212 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5213 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5214 return true;
5215
5216 return false;
5217}
5218
5219// We limit the max depth to track incoming values of PHIs or binary ops
5220// (e.g. AND) to avoid excessive cost.
5221const unsigned MAX_BINOP_DEPTH = 1;
5222// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5223// does not count all of the recursions. The parameter BinOpDepth is incremented
5224// only when isSignOrZeroExtended calls itself more than once. This is done to
5225// prevent expontential recursion. There is no parameter to track linear
5226// recursion.
5227std::pair<bool, bool>
5229 const unsigned BinOpDepth,
5230 const MachineRegisterInfo *MRI) const {
5232 return std::pair<bool, bool>(false, false);
5233
5234 MachineInstr *MI = MRI->getVRegDef(Reg);
5235 if (!MI)
5236 return std::pair<bool, bool>(false, false);
5237
5238 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5239 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5240
5241 // If we know the instruction always returns sign- and zero-extended result,
5242 // return here.
5243 if (IsSExt && IsZExt)
5244 return std::pair<bool, bool>(IsSExt, IsZExt);
5245
5246 switch (MI->getOpcode()) {
5247 case PPC::COPY: {
5248 Register SrcReg = MI->getOperand(1).getReg();
5249
5250 // In both ELFv1 and v2 ABI, method parameters and the return value
5251 // are sign- or zero-extended.
5252 const MachineFunction *MF = MI->getMF();
5253
5254 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5255 // If this is a copy from another register, we recursively check source.
5256 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5257 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5258 SrcExt.second || IsZExt);
5259 }
5260
5261 // From here on everything is SVR4ABI
5262 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5263 // We check the ZExt/SExt flags for a method parameter.
5264 if (MI->getParent()->getBasicBlock() ==
5265 &MF->getFunction().getEntryBlock()) {
5266 Register VReg = MI->getOperand(0).getReg();
5267 if (MF->getRegInfo().isLiveIn(VReg)) {
5268 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5269 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5270 return std::pair<bool, bool>(IsSExt, IsZExt);
5271 }
5272 }
5273
5274 if (SrcReg != PPC::X3) {
5275 // If this is a copy from another register, we recursively check source.
5276 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5277 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5278 SrcExt.second || IsZExt);
5279 }
5280
5281 // For a method return value, we check the ZExt/SExt flags in attribute.
5282 // We assume the following code sequence for method call.
5283 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5284 // BL8_NOP @func,...
5285 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5286 // %5 = COPY %x3; G8RC:%5
5287 const MachineBasicBlock *MBB = MI->getParent();
5288 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5291 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5292 return IsExtendPair;
5293
5294 const MachineInstr &CallMI = *(--II);
5295 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5296 return IsExtendPair;
5297
5298 const Function *CalleeFn =
5299 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
5300 if (!CalleeFn)
5301 return IsExtendPair;
5302 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5303 if (IntTy && IntTy->getBitWidth() <= 32) {
5304 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5305 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5306 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5307 return std::pair<bool, bool>(IsSExt, IsZExt);
5308 }
5309
5310 return IsExtendPair;
5311 }
5312
5313 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5314 // So, we track the operand register as we do for register copy.
5315 case PPC::ORI:
5316 case PPC::XORI:
5317 case PPC::ORI8:
5318 case PPC::XORI8: {
5319 Register SrcReg = MI->getOperand(1).getReg();
5320 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5321 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5322 SrcExt.second || IsZExt);
5323 }
5324
5325 // OR, XOR with shifted 16-bit immediate does not change the upper
5326 // 32 bits. So, we track the operand register for zero extension.
5327 // For sign extension when the MSB of the immediate is zero, we also
5328 // track the operand register since the upper 33 bits are unchanged.
5329 case PPC::ORIS:
5330 case PPC::XORIS:
5331 case PPC::ORIS8:
5332 case PPC::XORIS8: {
5333 Register SrcReg = MI->getOperand(1).getReg();
5334 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5335 uint16_t Imm = MI->getOperand(2).getImm();
5336 if (Imm & 0x8000)
5337 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5338 else
5339 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5340 SrcExt.second || IsZExt);
5341 }
5342
5343 // If all incoming values are sign-/zero-extended,
5344 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5345 case PPC::OR:
5346 case PPC::OR8:
5347 case PPC::ISEL:
5348 case PPC::PHI: {
5349 if (BinOpDepth >= MAX_BINOP_DEPTH)
5350 return std::pair<bool, bool>(false, false);
5351
5352 // The input registers for PHI are operand 1, 3, ...
5353 // The input registers for others are operand 1 and 2.
5354 unsigned OperandEnd = 3, OperandStride = 1;
5355 if (MI->getOpcode() == PPC::PHI) {
5356 OperandEnd = MI->getNumOperands();
5357 OperandStride = 2;
5358 }
5359
5360 IsSExt = true;
5361 IsZExt = true;
5362 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5363 if (!MI->getOperand(I).isReg())
5364 return std::pair<bool, bool>(false, false);
5365
5366 Register SrcReg = MI->getOperand(I).getReg();
5367 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5368 IsSExt &= SrcExt.first;
5369 IsZExt &= SrcExt.second;
5370 }
5371 return std::pair<bool, bool>(IsSExt, IsZExt);
5372 }
5373
5374 // If at least one of the incoming values of an AND is zero extended
5375 // then the output is also zero-extended. If both of the incoming values
5376 // are sign-extended then the output is also sign extended.
5377 case PPC::AND:
5378 case PPC::AND8: {
5379 if (BinOpDepth >= MAX_BINOP_DEPTH)
5380 return std::pair<bool, bool>(false, false);
5381
5382 Register SrcReg1 = MI->getOperand(1).getReg();
5383 Register SrcReg2 = MI->getOperand(2).getReg();
5384 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5385 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5386 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5387 Src1Ext.second || Src2Ext.second);
5388 }
5389
5390 default:
5391 break;
5392 }
5393 return std::pair<bool, bool>(IsSExt, IsZExt);
5394}
5395
5396bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5397 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5398}
5399
5400namespace {
5401class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5402 MachineInstr *Loop, *EndLoop, *LoopCount;
5403 MachineFunction *MF;
5404 const TargetInstrInfo *TII;
5405 int64_t TripCount;
5406
5407public:
5408 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5409 MachineInstr *LoopCount)
5410 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5411 MF(Loop->getParent()->getParent()),
5412 TII(MF->getSubtarget().getInstrInfo()) {
5413 // Inspect the Loop instruction up-front, as it may be deleted when we call
5414 // createTripCountGreaterCondition.
5415 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5416 TripCount = LoopCount->getOperand(1).getImm();
5417 else
5418 TripCount = -1;
5419 }
5420
5421 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5422 // Only ignore the terminator.
5423 return MI == EndLoop;
5424 }
5425
5426 std::optional<bool> createTripCountGreaterCondition(
5427 int TC, MachineBasicBlock &MBB,
5429 if (TripCount == -1) {
5430 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5431 // so we don't need to generate any thing here.
5432 Cond.push_back(MachineOperand::CreateImm(0));
5434 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5435 true));
5436 return {};
5437 }
5438
5439 return TripCount > TC;
5440 }
5441
5442 void setPreheader(MachineBasicBlock *NewPreheader) override {
5443 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5444 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5445 }
5446
5447 void adjustTripCount(int TripCountAdjust) override {
5448 // If the loop trip count is a compile-time value, then just change the
5449 // value.
5450 if (LoopCount->getOpcode() == PPC::LI8 ||
5451 LoopCount->getOpcode() == PPC::LI) {
5452 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5453 LoopCount->getOperand(1).setImm(TripCount);
5454 return;
5455 }
5456
5457 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5458 // so we don't need to generate any thing here.
5459 }
5460
5461 void disposed() override {
5462 Loop->eraseFromParent();
5463 // Ensure the loop setup instruction is deleted too.
5464 LoopCount->eraseFromParent();
5465 }
5466};
5467} // namespace
5468
5469std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5471 // We really "analyze" only hardware loops right now.
5473 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5474 if (Preheader == LoopBB)
5475 Preheader = *std::next(LoopBB->pred_begin());
5476 MachineFunction *MF = Preheader->getParent();
5477
5478 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5480 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5481 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5483 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5484 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5485 }
5486 }
5487 return nullptr;
5488}
5489
5491 MachineBasicBlock &PreHeader,
5492 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5493
5494 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5495
5496 // The loop set-up instruction should be in preheader
5497 for (auto &I : PreHeader.instrs())
5498 if (I.getOpcode() == LOOPi)
5499 return &I;
5500 return nullptr;
5501}
5502
5503// Return true if get the base operand, byte offset of an instruction and the
5504// memory width. Width is the size of memory that is being loaded/stored.
5506 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5507 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5508 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5509 return false;
5510
5511 // Handle only loads/stores with base register followed by immediate offset.
5512 if (!LdSt.getOperand(1).isImm() ||
5513 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5514 return false;
5515 if (!LdSt.getOperand(1).isImm() ||
5516 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5517 return false;
5518
5519 if (!LdSt.hasOneMemOperand())
5520 return false;
5521
5522 Width = (*LdSt.memoperands_begin())->getSize();
5523 Offset = LdSt.getOperand(1).getImm();
5524 BaseReg = &LdSt.getOperand(2);
5525 return true;
5526}
5527
5529 const MachineInstr &MIa, const MachineInstr &MIb) const {
5530 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5531 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5532
5535 return false;
5536
5537 // Retrieve the base register, offset from the base register and width. Width
5538 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5539 // base registers are identical, and the offset of a lower memory access +
5540 // the width doesn't overlap the offset of a higher memory access,
5541 // then the memory accesses are different.
5543 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5544 int64_t OffsetA = 0, OffsetB = 0;
5545 LocationSize WidthA = 0, WidthB = 0;
5546 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5547 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5548 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5549 int LowOffset = std::min(OffsetA, OffsetB);
5550 int HighOffset = std::max(OffsetA, OffsetB);
5551 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5552 if (LowWidth.hasValue() &&
5553 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5554 return true;
5555 }
5556 }
5557 return false;
5558}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static unsigned getSize(unsigned Kind)
void changeSign()
Definition: APFloat.h:1158
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition: APInt.cpp:1111
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:248
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
const BasicBlock & getEntryBlock() const
Definition: Function.h:782
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:338
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:205
A possibly irreducible generalization of a Loop.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
bool hasValue() const
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
Definition: MCInstrDesc.h:565
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:269
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:85
uint16_t Constraints
Operand constraints (see OperandConstraint enum).
Definition: MCInstrDesc.h:100
bool isLookupPtrRegClass() const
Set if this operand is a pointer value and it requires a callback to look up its register class.
Definition: MCInstrDesc.h:104
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:91
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:544
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:327
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:916
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:377
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:708
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:618
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:790
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:660
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:775
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:473
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:372
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
PPCInstrInfo(PPCSubtarget &STI)
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
MCInst getNop() const override
Return the noop instruction to use for a noop.
static int getRecordFormOpcode(unsigned Opcode)
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &P, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
Definition: PPCInstrInfo.h:263
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:261
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &P, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:609
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
Definition: PPCInstrInfo.h:492
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
void finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
bool foldFrameOffset(MachineInstr &MI) const
void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:603
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const
getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode for a given imm form load/s...
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:208
void setGlibcHWCAPAccess(bool Val=true) const
void dump() const
Definition: Pass.cpp:136
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< MachineCombinerPattern > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual void genAlternativeCodeSequence(MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
CodeModel::Model getCodeModel() const
Returns the code model.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:687
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ MO_TOC_LO
Definition: PPC.h:185
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
int getAltVSXFMAOpcode(uint16_t Opcode)
int getNonRecordFormOpcode(uint16_t)
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
static bool isVFRegister(unsigned Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
MachineCombinerPattern
These are instruction patterns matched by the machine combiner pass.
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
unsigned getKillRegState(bool B)
@ SOK_CRBitSpill
Definition: PPCInstrInfo.h:73
@ SOK_VSXVectorSpill
Definition: PPCInstrInfo.h:75
@ SOK_SpillToVSR
Definition: PPCInstrInfo.h:78
@ SOK_Int4Spill
Definition: PPCInstrInfo.h:68
@ SOK_PairedVecSpill
Definition: PPCInstrInfo.h:79
@ SOK_VectorFloat8Spill
Definition: PPCInstrInfo.h:76
@ SOK_UAccumulatorSpill
Definition: PPCInstrInfo.h:81
@ SOK_PairedG8Spill
Definition: PPCInstrInfo.h:84
@ SOK_VectorFloat4Spill
Definition: PPCInstrInfo.h:77
@ SOK_Float8Spill
Definition: PPCInstrInfo.h:70
@ SOK_Float4Spill
Definition: PPCInstrInfo.h:71
@ SOK_VRVectorSpill
Definition: PPCInstrInfo.h:74
@ SOK_WAccumulatorSpill
Definition: PPCInstrInfo.h:82
@ SOK_SPESpill
Definition: PPCInstrInfo.h:83
@ SOK_CRSpill
Definition: PPCInstrInfo.h:72
@ SOK_AccumulatorSpill
Definition: PPCInstrInfo.h:80
@ SOK_Int8Spill
Definition: PPCInstrInfo.h:69
@ SOK_LastOpcodeSpill
Definition: PPCInstrInfo.h:85
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t IsSummingOperands
Definition: PPCInstrInfo.h:55
uint64_t OpNoForForwarding
Definition: PPCInstrInfo.h:45
uint64_t ImmMustBeMultipleOf
Definition: PPCInstrInfo.h:35
uint64_t IsCommutative
Definition: PPCInstrInfo.h:43
uint64_t ZeroIsSpecialNew
Definition: PPCInstrInfo.h:41
uint64_t TruncateImmTo
Definition: PPCInstrInfo.h:53
uint64_t ZeroIsSpecialOrig
Definition: PPCInstrInfo.h:38
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.