LLVM 20.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/Module.h"
37#include "llvm/MC/MCInst.h"
40#include "llvm/Support/Debug.h"
43
44using namespace llvm;
45
46#define DEBUG_TYPE "ppc-instr-info"
47
48#define GET_INSTRMAP_INFO
49#define GET_INSTRINFO_CTOR_DTOR
50#include "PPCGenInstrInfo.inc"
51
52STATISTIC(NumStoreSPILLVSRRCAsVec,
53 "Number of spillvsrrc spilled to stack as vec");
54STATISTIC(NumStoreSPILLVSRRCAsGpr,
55 "Number of spillvsrrc spilled to stack as gpr");
56STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
57STATISTIC(CmpIselsConverted,
58 "Number of ISELs that depend on comparison of constants converted");
59STATISTIC(MissedConvertibleImmediateInstrs,
60 "Number of compare-immediate instructions fed by constants");
61STATISTIC(NumRcRotatesConvertedToRcAnd,
62 "Number of record-form rotates converted to record-form andi");
63
64static cl::
65opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
66 cl::desc("Disable analysis for CTR loops"));
67
68static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
69cl::desc("Disable compare instruction optimization"), cl::Hidden);
70
71static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
72cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
74
75static cl::opt<bool>
76UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
77 cl::desc("Use the old (incorrect) instruction latency calculation"));
78
79static cl::opt<float>
80 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
81 cl::desc("register pressure factor for the transformations."));
82
84 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
85 cl::desc("enable register pressure reduce in machine combiner pass."));
86
87// Pin the vtable to this file.
88void PPCInstrInfo::anchor() {}
89
91 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
92 /* CatchRetOpcode */ -1,
93 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
94 Subtarget(STI), RI(STI.getTargetMachine()) {}
95
96/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
97/// this target when scheduling the DAG.
100 const ScheduleDAG *DAG) const {
101 unsigned Directive =
102 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
105 const InstrItineraryData *II =
106 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
107 return new ScoreboardHazardRecognizer(II, DAG);
108 }
109
111}
112
113/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
114/// to use for this target when scheduling the DAG.
117 const ScheduleDAG *DAG) const {
118 unsigned Directive =
119 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
120
121 // FIXME: Leaving this as-is until we have POWER9 scheduling info
123 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
124
125 // Most subtargets use a PPC970 recognizer.
128 assert(DAG->TII && "No InstrInfo?");
129
130 return new PPCHazardRecognizer970(*DAG);
131 }
132
133 return new ScoreboardHazardRecognizer(II, DAG);
134}
135
137 const MachineInstr &MI,
138 unsigned *PredCost) const {
139 if (!ItinData || UseOldLatencyCalc)
140 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
141
142 // The default implementation of getInstrLatency calls getStageLatency, but
143 // getStageLatency does not do the right thing for us. While we have
144 // itinerary, most cores are fully pipelined, and so the itineraries only
145 // express the first part of the pipeline, not every stage. Instead, we need
146 // to use the listed output operand cycle number (using operand 0 here, which
147 // is an output).
148
149 unsigned Latency = 1;
150 unsigned DefClass = MI.getDesc().getSchedClass();
151 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
152 const MachineOperand &MO = MI.getOperand(i);
153 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
154 continue;
155
156 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
157 if (!Cycle)
158 continue;
159
160 Latency = std::max(Latency, *Cycle);
161 }
162
163 return Latency;
164}
165
166std::optional<unsigned> PPCInstrInfo::getOperandLatency(
167 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
168 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
169 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
170 ItinData, DefMI, DefIdx, UseMI, UseIdx);
171
172 if (!DefMI.getParent())
173 return Latency;
174
175 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
176 Register Reg = DefMO.getReg();
177
178 bool IsRegCR;
179 if (Reg.isVirtual()) {
180 const MachineRegisterInfo *MRI =
181 &DefMI.getParent()->getParent()->getRegInfo();
182 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
183 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
184 } else {
185 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
186 PPC::CRBITRCRegClass.contains(Reg);
187 }
188
189 if (UseMI.isBranch() && IsRegCR) {
190 if (!Latency)
191 Latency = getInstrLatency(ItinData, DefMI);
192
193 // On some cores, there is an additional delay between writing to a condition
194 // register, and using it from a branch.
195 unsigned Directive = Subtarget.getCPUDirective();
196 switch (Directive) {
197 default: break;
198 case PPC::DIR_7400:
199 case PPC::DIR_750:
200 case PPC::DIR_970:
201 case PPC::DIR_E5500:
202 case PPC::DIR_PWR4:
203 case PPC::DIR_PWR5:
204 case PPC::DIR_PWR5X:
205 case PPC::DIR_PWR6:
206 case PPC::DIR_PWR6X:
207 case PPC::DIR_PWR7:
208 case PPC::DIR_PWR8:
209 // FIXME: Is this needed for POWER9?
210 Latency = *Latency + 2;
211 break;
212 }
213 }
214
215 return Latency;
216}
217
219 uint32_t Flags) const {
220 MI.setFlags(Flags);
224}
225
226// This function does not list all associative and commutative operations, but
227// only those worth feeding through the machine combiner in an attempt to
228// reduce the critical path. Mostly, this means floating-point operations,
229// because they have high latencies(>=5) (compared to other operations, such as
230// and/or, which are also associative and commutative, but have low latencies).
232 bool Invert) const {
233 if (Invert)
234 return false;
235 switch (Inst.getOpcode()) {
236 // Floating point:
237 // FP Add:
238 case PPC::FADD:
239 case PPC::FADDS:
240 // FP Multiply:
241 case PPC::FMUL:
242 case PPC::FMULS:
243 // Altivec Add:
244 case PPC::VADDFP:
245 // VSX Add:
246 case PPC::XSADDDP:
247 case PPC::XVADDDP:
248 case PPC::XVADDSP:
249 case PPC::XSADDSP:
250 // VSX Multiply:
251 case PPC::XSMULDP:
252 case PPC::XVMULDP:
253 case PPC::XVMULSP:
254 case PPC::XSMULSP:
257 // Fixed point:
258 // Multiply:
259 case PPC::MULHD:
260 case PPC::MULLD:
261 case PPC::MULHW:
262 case PPC::MULLW:
263 return true;
264 default:
265 return false;
266 }
267}
268
269#define InfoArrayIdxFMAInst 0
270#define InfoArrayIdxFAddInst 1
271#define InfoArrayIdxFMULInst 2
272#define InfoArrayIdxAddOpIdx 3
273#define InfoArrayIdxMULOpIdx 4
274#define InfoArrayIdxFSubInst 5
275// Array keeps info for FMA instructions:
276// Index 0(InfoArrayIdxFMAInst): FMA instruction;
277// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
278// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
279// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
280// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
281// second MUL operand index is plus 1;
282// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
283static const uint16_t FMAOpIdxInfo[][6] = {
284 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
285 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
286 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
287 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
288 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
289 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
290 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
291
292// Check if an opcode is a FMA instruction. If it is, return the index in array
293// FMAOpIdxInfo. Otherwise, return -1.
294int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
295 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
296 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
297 return I;
298 return -1;
299}
300
301// On PowerPC target, we have two kinds of patterns related to FMA:
302// 1: Improve ILP.
303// Try to reassociate FMA chains like below:
304//
305// Pattern 1:
306// A = FADD X, Y (Leaf)
307// B = FMA A, M21, M22 (Prev)
308// C = FMA B, M31, M32 (Root)
309// -->
310// A = FMA X, M21, M22
311// B = FMA Y, M31, M32
312// C = FADD A, B
313//
314// Pattern 2:
315// A = FMA X, M11, M12 (Leaf)
316// B = FMA A, M21, M22 (Prev)
317// C = FMA B, M31, M32 (Root)
318// -->
319// A = FMUL M11, M12
320// B = FMA X, M21, M22
321// D = FMA A, M31, M32
322// C = FADD B, D
323//
324// breaking the dependency between A and B, allowing FMA to be executed in
325// parallel (or back-to-back in a pipeline) instead of depending on each other.
326//
327// 2: Reduce register pressure.
328// Try to reassociate FMA with FSUB and a constant like below:
329// C is a floating point const.
330//
331// Pattern 1:
332// A = FSUB X, Y (Leaf)
333// D = FMA B, C, A (Root)
334// -->
335// A = FMA B, Y, -C
336// D = FMA A, X, C
337//
338// Pattern 2:
339// A = FSUB X, Y (Leaf)
340// D = FMA B, A, C (Root)
341// -->
342// A = FMA B, Y, -C
343// D = FMA A, X, C
344//
345// Before the transformation, A must be assigned with different hardware
346// register with D. After the transformation, A and D must be assigned with
347// same hardware register due to TIE attribute of FMA instructions.
348//
351 bool DoRegPressureReduce) const {
355
356 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
357 for (const auto &MO : Instr.explicit_operands())
358 if (!(MO.isReg() && MO.getReg().isVirtual()))
359 return false;
360 return true;
361 };
362
363 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
364 unsigned OpType) {
365 if (Instr.getOpcode() !=
366 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
367 return false;
368
369 // Instruction can be reassociated.
370 // fast math flags may prohibit reassociation.
371 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
372 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
373 return false;
374
375 // Instruction operands are virtual registers for reassociation.
376 if (!IsAllOpsVirtualReg(Instr))
377 return false;
378
379 // For register pressure reassociation, the FSub must have only one use as
380 // we want to delete the sub to save its def.
381 if (OpType == InfoArrayIdxFSubInst &&
382 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
383 return false;
384
385 return true;
386 };
387
388 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
389 int16_t &MulOpIdx, bool IsLeaf) {
390 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
391 if (Idx < 0)
392 return false;
393
394 // Instruction can be reassociated.
395 // fast math flags may prohibit reassociation.
396 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
397 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
398 return false;
399
400 // Instruction operands are virtual registers for reassociation.
401 if (!IsAllOpsVirtualReg(Instr))
402 return false;
403
405 if (IsLeaf)
406 return true;
407
409
410 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
411 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
412 // If 'add' operand's def is not in current block, don't do ILP related opt.
413 if (!MIAdd || MIAdd->getParent() != MBB)
414 return false;
415
416 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
417 // as this fma will be changed later.
418 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
419 };
420
421 int16_t AddOpIdx = -1;
422 int16_t MulOpIdx = -1;
423
424 bool IsUsedOnceL = false;
425 bool IsUsedOnceR = false;
426 MachineInstr *MULInstrL = nullptr;
427 MachineInstr *MULInstrR = nullptr;
428
429 auto IsRPReductionCandidate = [&]() {
430 // Currently, we only support float and double.
431 // FIXME: add support for other types.
432 unsigned Opcode = Root.getOpcode();
433 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
434 return false;
435
436 // Root must be a valid FMA like instruction.
437 // Treat it as leaf as we don't care its add operand.
438 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
439 assert((MulOpIdx >= 0) && "mul operand index not right!");
440 Register MULRegL = TRI->lookThruSingleUseCopyChain(
441 Root.getOperand(MulOpIdx).getReg(), MRI);
442 Register MULRegR = TRI->lookThruSingleUseCopyChain(
443 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
444 if (!MULRegL && !MULRegR)
445 return false;
446
447 if (MULRegL && !MULRegR) {
448 MULRegR =
449 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
450 IsUsedOnceL = true;
451 } else if (!MULRegL && MULRegR) {
452 MULRegL =
453 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
454 IsUsedOnceR = true;
455 } else {
456 IsUsedOnceL = true;
457 IsUsedOnceR = true;
458 }
459
460 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
461 return false;
462
463 MULInstrL = MRI->getVRegDef(MULRegL);
464 MULInstrR = MRI->getVRegDef(MULRegR);
465 return true;
466 }
467 return false;
468 };
469
470 // Register pressure fma reassociation patterns.
471 if (DoRegPressureReduce && IsRPReductionCandidate()) {
472 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
473 // Register pressure pattern 1
474 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
475 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
476 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
478 return true;
479 }
480
481 // Register pressure pattern 2
482 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
483 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
484 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
486 return true;
487 }
488 }
489
490 // ILP fma reassociation patterns.
491 // Root must be a valid FMA like instruction.
492 AddOpIdx = -1;
493 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
494 return false;
495
496 assert((AddOpIdx >= 0) && "add operand index not right!");
497
498 Register RegB = Root.getOperand(AddOpIdx).getReg();
499 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
500
501 // Prev must be a valid FMA like instruction.
502 AddOpIdx = -1;
503 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
504 return false;
505
506 assert((AddOpIdx >= 0) && "add operand index not right!");
507
508 Register RegA = Prev->getOperand(AddOpIdx).getReg();
509 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
510 AddOpIdx = -1;
511 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
513 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
514 return true;
515 }
516 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
518 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
519 return true;
520 }
521 return false;
522}
523
525 MachineInstr &Root, unsigned &Pattern,
526 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
527 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
528
529 MachineFunction *MF = Root.getMF();
533
534 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
535 if (Idx < 0)
536 return;
537
539
540 // For now we only need to fix up placeholder for register pressure reduce
541 // patterns.
542 Register ConstReg = 0;
543 switch (Pattern) {
545 ConstReg =
546 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
547 break;
549 ConstReg =
550 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
551 break;
552 default:
553 // Not register pressure reduce patterns.
554 return;
555 }
556
557 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
558 // Get const value from const pool.
559 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
560 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
561
562 // Get negative fp const.
563 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
564 F1.changeSign();
565 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
566 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
567
568 // Put negative fp const into constant pool.
569 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
570
571 MachineOperand *Placeholder = nullptr;
572 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
573 for (auto *Inst : InsInstrs) {
574 for (MachineOperand &Operand : Inst->explicit_operands()) {
575 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
576 if (Operand.getReg() == PPC::ZERO8) {
577 Placeholder = &Operand;
578 break;
579 }
580 }
581 }
582
583 assert(Placeholder && "Placeholder does not exist!");
584
585 // Generate instructions to load the const fp from constant pool.
586 // We only support PPC64 and medium code model.
587 Register LoadNewConst =
588 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
589
590 // Fill the placeholder with the new load from constant pool.
591 Placeholder->setReg(LoadNewConst);
592}
593
595 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
596
598 return false;
599
600 // Currently, we only enable register pressure reducing in machine combiner
601 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
602 // support.
603 //
604 // So we need following instructions to access a TOC entry:
605 //
606 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
607 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
608 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
609 //
610 // FIXME: add more supported targets, like Small and Large code model, PPC32,
611 // AIX.
612 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
614 return false;
615
617 const MachineFunction *MF = MBB->getParent();
618 const MachineRegisterInfo *MRI = &MF->getRegInfo();
619
620 auto GetMBBPressure =
621 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
622 RegionPressure Pressure;
623 RegPressureTracker RPTracker(Pressure);
624
625 // Initialize the register pressure tracker.
626 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
627 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
628
629 for (const auto &MI : reverse(*MBB)) {
630 if (MI.isDebugValue() || MI.isDebugLabel())
631 continue;
632 RegisterOperands RegOpers;
633 RegOpers.collect(MI, *TRI, *MRI, false, false);
634 RPTracker.recedeSkipDebugValues();
635 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
636 RPTracker.recede(RegOpers);
637 }
638
639 // Close the RPTracker to finalize live ins.
640 RPTracker.closeRegion();
641
642 return RPTracker.getPressure().MaxSetPressure;
643 };
644
645 // For now we only care about float and double type fma.
646 unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
647 *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
648
649 // Only reduce register pressure when pressure is high.
650 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
651 (float)VSSRCLimit * FMARPFactor;
652}
653
655 // I has only one memory operand which is load from constant pool.
656 if (!I->hasOneMemOperand())
657 return false;
658
659 MachineMemOperand *Op = I->memoperands()[0];
660 return Op->isLoad() && Op->getPseudoValue() &&
661 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
662}
663
664Register PPCInstrInfo::generateLoadForNewConst(
665 unsigned Idx, MachineInstr *MI, Type *Ty,
666 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
667 // Now we only support PPC64, Medium code model and P9 with vector.
668 // We have immutable pattern to access const pool. See function
669 // shouldReduceRegisterPressure.
670 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
672 "Target not supported!\n");
673
674 MachineFunction *MF = MI->getMF();
676
677 // Generate ADDIStocHA8
678 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
679 MachineInstrBuilder TOCOffset =
680 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
681 .addReg(PPC::X2)
683
684 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
685 "Only float and double are supported!");
686
687 unsigned LoadOpcode;
688 // Should be float type or double type.
689 if (Ty->isFloatTy())
690 LoadOpcode = PPC::DFLOADf32;
691 else
692 LoadOpcode = PPC::DFLOADf64;
693
694 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
695 Register VReg2 = MRI->createVirtualRegister(RC);
699
700 // Generate Load from constant pool.
702 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
704 .addReg(VReg1, getKillRegState(true))
705 .addMemOperand(MMO);
706
707 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
708
709 // Insert the toc load instructions into InsInstrs.
710 InsInstrs.insert(InsInstrs.begin(), Load);
711 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
712 return VReg2;
713}
714
715// This function returns the const value in constant pool if the \p I is a load
716// from constant pool.
717const Constant *
719 MachineFunction *MF = I->getMF();
722 assert(I->mayLoad() && "Should be a load instruction.\n");
723 for (auto MO : I->uses()) {
724 if (!MO.isReg())
725 continue;
726 Register Reg = MO.getReg();
727 if (Reg == 0 || !Reg.isVirtual())
728 continue;
729 // Find the toc address.
730 MachineInstr *DefMI = MRI->getVRegDef(Reg);
731 for (auto MO2 : DefMI->uses())
732 if (MO2.isCPI())
733 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
734 }
735 return nullptr;
736}
737
739 switch (Pattern) {
746 default:
748 }
749}
750
753 bool DoRegPressureReduce) const {
754 // Using the machine combiner in this way is potentially expensive, so
755 // restrict to when aggressive optimizations are desired.
757 return false;
758
759 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
760 return true;
761
763 DoRegPressureReduce);
764}
765
767 MachineInstr &Root, unsigned Pattern,
770 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
771 switch (Pattern) {
776 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
777 break;
778 default:
779 // Reassociate default patterns.
781 DelInstrs, InstrIdxForVirtReg);
782 break;
783 }
784}
785
786void PPCInstrInfo::reassociateFMA(
787 MachineInstr &Root, unsigned Pattern,
790 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
791 MachineFunction *MF = Root.getMF();
794 MachineOperand &OpC = Root.getOperand(0);
795 Register RegC = OpC.getReg();
796 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
797 MRI.constrainRegClass(RegC, RC);
798
799 unsigned FmaOp = Root.getOpcode();
800 int16_t Idx = getFMAOpIdxInfo(FmaOp);
801 assert(Idx >= 0 && "Root must be a FMA instruction");
802
803 bool IsILPReassociate =
806
809
810 MachineInstr *Prev = nullptr;
811 MachineInstr *Leaf = nullptr;
812 switch (Pattern) {
813 default:
814 llvm_unreachable("not recognized pattern!");
817 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
818 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
819 break;
821 Register MULReg =
822 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
823 Leaf = MRI.getVRegDef(MULReg);
824 break;
825 }
827 Register MULReg = TRI->lookThruCopyLike(
828 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
829 Leaf = MRI.getVRegDef(MULReg);
830 break;
831 }
832 }
833
834 uint32_t IntersectedFlags = 0;
835 if (IsILPReassociate)
836 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
837 else
838 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
839
840 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
841 bool &KillFlag) {
842 Reg = Operand.getReg();
843 MRI.constrainRegClass(Reg, RC);
844 KillFlag = Operand.isKill();
845 };
846
847 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
848 Register &MulOp2, Register &AddOp,
849 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
850 bool &AddOpKillFlag) {
851 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
852 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
853 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
854 };
855
856 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
857 RegA21, RegB;
858 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
859 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
860 KillA11 = false, KillA21 = false, KillB = false;
861
862 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
863
864 if (IsILPReassociate)
865 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
866
868 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
869 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
871 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
872 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
873 } else {
874 // Get FSUB instruction info.
875 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
876 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
877 }
878
879 // Create new virtual registers for the new results instead of
880 // recycling legacy ones because the MachineCombiner's computation of the
881 // critical path requires a new register definition rather than an existing
882 // one.
883 // For register pressure reassociation, we only need create one virtual
884 // register for the new fma.
885 Register NewVRA = MRI.createVirtualRegister(RC);
886 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
887
888 Register NewVRB = 0;
889 if (IsILPReassociate) {
890 NewVRB = MRI.createVirtualRegister(RC);
891 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
892 }
893
894 Register NewVRD = 0;
896 NewVRD = MRI.createVirtualRegister(RC);
897 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
898 }
899
900 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
901 Register RegMul1, bool KillRegMul1,
902 Register RegMul2, bool KillRegMul2) {
903 MI->getOperand(AddOpIdx).setReg(RegAdd);
904 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
905 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
906 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
907 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
908 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
909 };
910
911 MachineInstrBuilder NewARegPressure, NewCRegPressure;
912 switch (Pattern) {
913 default:
914 llvm_unreachable("not recognized pattern!");
916 // Create new instructions for insertion.
917 MachineInstrBuilder MINewB =
918 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
919 .addReg(RegX, getKillRegState(KillX))
920 .addReg(RegM21, getKillRegState(KillM21))
921 .addReg(RegM22, getKillRegState(KillM22));
922 MachineInstrBuilder MINewA =
923 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
924 .addReg(RegY, getKillRegState(KillY))
925 .addReg(RegM31, getKillRegState(KillM31))
926 .addReg(RegM32, getKillRegState(KillM32));
927 // If AddOpIdx is not 1, adjust the order.
928 if (AddOpIdx != 1) {
929 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
930 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
931 }
932
933 MachineInstrBuilder MINewC =
934 BuildMI(*MF, Root.getDebugLoc(),
936 .addReg(NewVRB, getKillRegState(true))
937 .addReg(NewVRA, getKillRegState(true));
938
939 // Update flags for newly created instructions.
940 setSpecialOperandAttr(*MINewA, IntersectedFlags);
941 setSpecialOperandAttr(*MINewB, IntersectedFlags);
942 setSpecialOperandAttr(*MINewC, IntersectedFlags);
943
944 // Record new instructions for insertion.
945 InsInstrs.push_back(MINewA);
946 InsInstrs.push_back(MINewB);
947 InsInstrs.push_back(MINewC);
948 break;
949 }
951 assert(NewVRD && "new FMA register not created!");
952 // Create new instructions for insertion.
953 MachineInstrBuilder MINewA =
954 BuildMI(*MF, Leaf->getDebugLoc(),
956 .addReg(RegM11, getKillRegState(KillM11))
957 .addReg(RegM12, getKillRegState(KillM12));
958 MachineInstrBuilder MINewB =
959 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
960 .addReg(RegX, getKillRegState(KillX))
961 .addReg(RegM21, getKillRegState(KillM21))
962 .addReg(RegM22, getKillRegState(KillM22));
963 MachineInstrBuilder MINewD =
964 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
965 .addReg(NewVRA, getKillRegState(true))
966 .addReg(RegM31, getKillRegState(KillM31))
967 .addReg(RegM32, getKillRegState(KillM32));
968 // If AddOpIdx is not 1, adjust the order.
969 if (AddOpIdx != 1) {
970 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
971 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
972 KillM32);
973 }
974
975 MachineInstrBuilder MINewC =
976 BuildMI(*MF, Root.getDebugLoc(),
978 .addReg(NewVRB, getKillRegState(true))
979 .addReg(NewVRD, getKillRegState(true));
980
981 // Update flags for newly created instructions.
982 setSpecialOperandAttr(*MINewA, IntersectedFlags);
983 setSpecialOperandAttr(*MINewB, IntersectedFlags);
984 setSpecialOperandAttr(*MINewD, IntersectedFlags);
985 setSpecialOperandAttr(*MINewC, IntersectedFlags);
986
987 // Record new instructions for insertion.
988 InsInstrs.push_back(MINewA);
989 InsInstrs.push_back(MINewB);
990 InsInstrs.push_back(MINewD);
991 InsInstrs.push_back(MINewC);
992 break;
993 }
996 Register VarReg;
997 bool KillVarReg = false;
999 VarReg = RegM31;
1000 KillVarReg = KillM31;
1001 } else {
1002 VarReg = RegM32;
1003 KillVarReg = KillM32;
1004 }
1005 // We don't want to get negative const from memory pool too early, as the
1006 // created entry will not be deleted even if it has no users. Since all
1007 // operand of Leaf and Root are virtual register, we use zero register
1008 // here as a placeholder. When the InsInstrs is selected in
1009 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1010 // with a virtual register which is a load from constant pool.
1011 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1012 .addReg(RegB, getKillRegState(RegB))
1013 .addReg(RegY, getKillRegState(KillY))
1014 .addReg(PPC::ZERO8);
1015 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1016 .addReg(NewVRA, getKillRegState(true))
1017 .addReg(RegX, getKillRegState(KillX))
1018 .addReg(VarReg, getKillRegState(KillVarReg));
1019 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1020 // both at index 1, no need to adjust.
1021 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1022 // the operand index here.
1023 break;
1024 }
1025 }
1026
1027 if (!IsILPReassociate) {
1028 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1029 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1030
1031 InsInstrs.push_back(NewARegPressure);
1032 InsInstrs.push_back(NewCRegPressure);
1033 }
1034
1035 assert(!InsInstrs.empty() &&
1036 "Insertion instructions set should not be empty!");
1037
1038 // Record old instructions for deletion.
1039 DelInstrs.push_back(Leaf);
1040 if (IsILPReassociate)
1041 DelInstrs.push_back(Prev);
1042 DelInstrs.push_back(&Root);
1043}
1044
1045// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1047 Register &SrcReg, Register &DstReg,
1048 unsigned &SubIdx) const {
1049 switch (MI.getOpcode()) {
1050 default: return false;
1051 case PPC::EXTSW:
1052 case PPC::EXTSW_32:
1053 case PPC::EXTSW_32_64:
1054 SrcReg = MI.getOperand(1).getReg();
1055 DstReg = MI.getOperand(0).getReg();
1056 SubIdx = PPC::sub_32;
1057 return true;
1058 }
1059}
1060
1062 int &FrameIndex) const {
1063 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1064 // Check for the operands added by addFrameReference (the immediate is the
1065 // offset which defaults to 0).
1066 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1067 MI.getOperand(2).isFI()) {
1068 FrameIndex = MI.getOperand(2).getIndex();
1069 return MI.getOperand(0).getReg();
1070 }
1071 }
1072 return 0;
1073}
1074
1075// For opcodes with the ReMaterializable flag set, this function is called to
1076// verify the instruction is really rematable.
1078 const MachineInstr &MI) const {
1079 switch (MI.getOpcode()) {
1080 default:
1081 // Let base implementaion decide.
1082 break;
1083 case PPC::LI:
1084 case PPC::LI8:
1085 case PPC::PLI:
1086 case PPC::PLI8:
1087 case PPC::LIS:
1088 case PPC::LIS8:
1089 case PPC::ADDIStocHA:
1090 case PPC::ADDIStocHA8:
1091 case PPC::ADDItocL:
1092 case PPC::ADDItocL8:
1093 case PPC::LOAD_STACK_GUARD:
1094 case PPC::PPCLdFixedAddr:
1095 case PPC::XXLXORz:
1096 case PPC::XXLXORspz:
1097 case PPC::XXLXORdpz:
1098 case PPC::XXLEQVOnes:
1099 case PPC::XXSPLTI32DX:
1100 case PPC::XXSPLTIW:
1101 case PPC::XXSPLTIDP:
1102 case PPC::V_SET0B:
1103 case PPC::V_SET0H:
1104 case PPC::V_SET0:
1105 case PPC::V_SETALLONESB:
1106 case PPC::V_SETALLONESH:
1107 case PPC::V_SETALLONES:
1108 case PPC::CRSET:
1109 case PPC::CRUNSET:
1110 case PPC::XXSETACCZ:
1111 case PPC::XXSETACCZW:
1112 return true;
1113 }
1115}
1116
1118 int &FrameIndex) const {
1119 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1120 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1121 MI.getOperand(2).isFI()) {
1122 FrameIndex = MI.getOperand(2).getIndex();
1123 return MI.getOperand(0).getReg();
1124 }
1125 }
1126 return 0;
1127}
1128
1130 unsigned OpIdx1,
1131 unsigned OpIdx2) const {
1132 MachineFunction &MF = *MI.getParent()->getParent();
1133
1134 // Normal instructions can be commuted the obvious way.
1135 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1136 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1137 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1138 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1139 // changing the relative order of the mask operands might change what happens
1140 // to the high-bits of the mask (and, thus, the result).
1141
1142 // Cannot commute if it has a non-zero rotate count.
1143 if (MI.getOperand(3).getImm() != 0)
1144 return nullptr;
1145
1146 // If we have a zero rotate count, we have:
1147 // M = mask(MB,ME)
1148 // Op0 = (Op1 & ~M) | (Op2 & M)
1149 // Change this to:
1150 // M = mask((ME+1)&31, (MB-1)&31)
1151 // Op0 = (Op2 & ~M) | (Op1 & M)
1152
1153 // Swap op1/op2
1154 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1155 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1156 Register Reg0 = MI.getOperand(0).getReg();
1157 Register Reg1 = MI.getOperand(1).getReg();
1158 Register Reg2 = MI.getOperand(2).getReg();
1159 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1160 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1161 bool Reg1IsKill = MI.getOperand(1).isKill();
1162 bool Reg2IsKill = MI.getOperand(2).isKill();
1163 bool ChangeReg0 = false;
1164 // If machine instrs are no longer in two-address forms, update
1165 // destination register as well.
1166 if (Reg0 == Reg1) {
1167 // Must be two address instruction (i.e. op1 is tied to op0).
1168 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1169 "Expecting a two-address instruction!");
1170 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1171 Reg2IsKill = false;
1172 ChangeReg0 = true;
1173 }
1174
1175 // Masks.
1176 unsigned MB = MI.getOperand(4).getImm();
1177 unsigned ME = MI.getOperand(5).getImm();
1178
1179 // We can't commute a trivial mask (there is no way to represent an all-zero
1180 // mask).
1181 if (MB == 0 && ME == 31)
1182 return nullptr;
1183
1184 if (NewMI) {
1185 // Create a new instruction.
1186 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1187 bool Reg0IsDead = MI.getOperand(0).isDead();
1188 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1189 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1190 .addReg(Reg2, getKillRegState(Reg2IsKill))
1191 .addReg(Reg1, getKillRegState(Reg1IsKill))
1192 .addImm((ME + 1) & 31)
1193 .addImm((MB - 1) & 31);
1194 }
1195
1196 if (ChangeReg0) {
1197 MI.getOperand(0).setReg(Reg2);
1198 MI.getOperand(0).setSubReg(SubReg2);
1199 }
1200 MI.getOperand(2).setReg(Reg1);
1201 MI.getOperand(1).setReg(Reg2);
1202 MI.getOperand(2).setSubReg(SubReg1);
1203 MI.getOperand(1).setSubReg(SubReg2);
1204 MI.getOperand(2).setIsKill(Reg1IsKill);
1205 MI.getOperand(1).setIsKill(Reg2IsKill);
1206
1207 // Swap the mask around.
1208 MI.getOperand(4).setImm((ME + 1) & 31);
1209 MI.getOperand(5).setImm((MB - 1) & 31);
1210 return &MI;
1211}
1212
1214 unsigned &SrcOpIdx1,
1215 unsigned &SrcOpIdx2) const {
1216 // For VSX A-Type FMA instructions, it is the first two operands that can be
1217 // commuted, however, because the non-encoded tied input operand is listed
1218 // first, the operands to swap are actually the second and third.
1219
1220 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1221 if (AltOpc == -1)
1222 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1223
1224 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1225 // and SrcOpIdx2.
1226 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1227}
1228
1231 // This function is used for scheduling, and the nop wanted here is the type
1232 // that terminates dispatch groups on the POWER cores.
1233 unsigned Directive = Subtarget.getCPUDirective();
1234 unsigned Opcode;
1235 switch (Directive) {
1236 default: Opcode = PPC::NOP; break;
1237 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1238 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1239 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1240 // FIXME: Update when POWER9 scheduling model is ready.
1241 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1242 }
1243
1244 DebugLoc DL;
1245 BuildMI(MBB, MI, DL, get(Opcode));
1246}
1247
1248/// Return the noop instruction to use for a noop.
1250 MCInst Nop;
1251 Nop.setOpcode(PPC::NOP);
1252 return Nop;
1253}
1254
1255// Branch analysis.
1256// Note: If the condition register is set to CTR or CTR8 then this is a
1257// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1260 MachineBasicBlock *&FBB,
1262 bool AllowModify) const {
1263 bool isPPC64 = Subtarget.isPPC64();
1264
1265 // If the block has no terminators, it just falls into the block after it.
1267 if (I == MBB.end())
1268 return false;
1269
1270 if (!isUnpredicatedTerminator(*I))
1271 return false;
1272
1273 if (AllowModify) {
1274 // If the BB ends with an unconditional branch to the fallthrough BB,
1275 // we eliminate the branch instruction.
1276 if (I->getOpcode() == PPC::B &&
1277 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1278 I->eraseFromParent();
1279
1280 // We update iterator after deleting the last branch.
1282 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1283 return false;
1284 }
1285 }
1286
1287 // Get the last instruction in the block.
1288 MachineInstr &LastInst = *I;
1289
1290 // If there is only one terminator instruction, process it.
1291 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1292 if (LastInst.getOpcode() == PPC::B) {
1293 if (!LastInst.getOperand(0).isMBB())
1294 return true;
1295 TBB = LastInst.getOperand(0).getMBB();
1296 return false;
1297 } else if (LastInst.getOpcode() == PPC::BCC) {
1298 if (!LastInst.getOperand(2).isMBB())
1299 return true;
1300 // Block ends with fall-through condbranch.
1301 TBB = LastInst.getOperand(2).getMBB();
1302 Cond.push_back(LastInst.getOperand(0));
1303 Cond.push_back(LastInst.getOperand(1));
1304 return false;
1305 } else if (LastInst.getOpcode() == PPC::BC) {
1306 if (!LastInst.getOperand(1).isMBB())
1307 return true;
1308 // Block ends with fall-through condbranch.
1309 TBB = LastInst.getOperand(1).getMBB();
1311 Cond.push_back(LastInst.getOperand(0));
1312 return false;
1313 } else if (LastInst.getOpcode() == PPC::BCn) {
1314 if (!LastInst.getOperand(1).isMBB())
1315 return true;
1316 // Block ends with fall-through condbranch.
1317 TBB = LastInst.getOperand(1).getMBB();
1319 Cond.push_back(LastInst.getOperand(0));
1320 return false;
1321 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1322 LastInst.getOpcode() == PPC::BDNZ) {
1323 if (!LastInst.getOperand(0).isMBB())
1324 return true;
1326 return true;
1327 TBB = LastInst.getOperand(0).getMBB();
1328 Cond.push_back(MachineOperand::CreateImm(1));
1329 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1330 true));
1331 return false;
1332 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1333 LastInst.getOpcode() == PPC::BDZ) {
1334 if (!LastInst.getOperand(0).isMBB())
1335 return true;
1337 return true;
1338 TBB = LastInst.getOperand(0).getMBB();
1339 Cond.push_back(MachineOperand::CreateImm(0));
1340 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1341 true));
1342 return false;
1343 }
1344
1345 // Otherwise, don't know what this is.
1346 return true;
1347 }
1348
1349 // Get the instruction before it if it's a terminator.
1350 MachineInstr &SecondLastInst = *I;
1351
1352 // If there are three terminators, we don't know what sort of block this is.
1353 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1354 return true;
1355
1356 // If the block ends with PPC::B and PPC:BCC, handle it.
1357 if (SecondLastInst.getOpcode() == PPC::BCC &&
1358 LastInst.getOpcode() == PPC::B) {
1359 if (!SecondLastInst.getOperand(2).isMBB() ||
1360 !LastInst.getOperand(0).isMBB())
1361 return true;
1362 TBB = SecondLastInst.getOperand(2).getMBB();
1363 Cond.push_back(SecondLastInst.getOperand(0));
1364 Cond.push_back(SecondLastInst.getOperand(1));
1365 FBB = LastInst.getOperand(0).getMBB();
1366 return false;
1367 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1368 LastInst.getOpcode() == PPC::B) {
1369 if (!SecondLastInst.getOperand(1).isMBB() ||
1370 !LastInst.getOperand(0).isMBB())
1371 return true;
1372 TBB = SecondLastInst.getOperand(1).getMBB();
1374 Cond.push_back(SecondLastInst.getOperand(0));
1375 FBB = LastInst.getOperand(0).getMBB();
1376 return false;
1377 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1378 LastInst.getOpcode() == PPC::B) {
1379 if (!SecondLastInst.getOperand(1).isMBB() ||
1380 !LastInst.getOperand(0).isMBB())
1381 return true;
1382 TBB = SecondLastInst.getOperand(1).getMBB();
1384 Cond.push_back(SecondLastInst.getOperand(0));
1385 FBB = LastInst.getOperand(0).getMBB();
1386 return false;
1387 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1388 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1389 LastInst.getOpcode() == PPC::B) {
1390 if (!SecondLastInst.getOperand(0).isMBB() ||
1391 !LastInst.getOperand(0).isMBB())
1392 return true;
1394 return true;
1395 TBB = SecondLastInst.getOperand(0).getMBB();
1396 Cond.push_back(MachineOperand::CreateImm(1));
1397 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1398 true));
1399 FBB = LastInst.getOperand(0).getMBB();
1400 return false;
1401 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1402 SecondLastInst.getOpcode() == PPC::BDZ) &&
1403 LastInst.getOpcode() == PPC::B) {
1404 if (!SecondLastInst.getOperand(0).isMBB() ||
1405 !LastInst.getOperand(0).isMBB())
1406 return true;
1408 return true;
1409 TBB = SecondLastInst.getOperand(0).getMBB();
1410 Cond.push_back(MachineOperand::CreateImm(0));
1411 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1412 true));
1413 FBB = LastInst.getOperand(0).getMBB();
1414 return false;
1415 }
1416
1417 // If the block ends with two PPC:Bs, handle it. The second one is not
1418 // executed, so remove it.
1419 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1420 if (!SecondLastInst.getOperand(0).isMBB())
1421 return true;
1422 TBB = SecondLastInst.getOperand(0).getMBB();
1423 I = LastInst;
1424 if (AllowModify)
1425 I->eraseFromParent();
1426 return false;
1427 }
1428
1429 // Otherwise, can't handle this.
1430 return true;
1431}
1432
1434 int *BytesRemoved) const {
1435 assert(!BytesRemoved && "code size not handled");
1436
1438 if (I == MBB.end())
1439 return 0;
1440
1441 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1442 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1443 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1444 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1445 return 0;
1446
1447 // Remove the branch.
1448 I->eraseFromParent();
1449
1450 I = MBB.end();
1451
1452 if (I == MBB.begin()) return 1;
1453 --I;
1454 if (I->getOpcode() != PPC::BCC &&
1455 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1456 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1457 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1458 return 1;
1459
1460 // Remove the branch.
1461 I->eraseFromParent();
1462 return 2;
1463}
1464
1467 MachineBasicBlock *FBB,
1469 const DebugLoc &DL,
1470 int *BytesAdded) const {
1471 // Shouldn't be a fall through.
1472 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1473 assert((Cond.size() == 2 || Cond.size() == 0) &&
1474 "PPC branch conditions have two components!");
1475 assert(!BytesAdded && "code size not handled");
1476
1477 bool isPPC64 = Subtarget.isPPC64();
1478
1479 // One-way branch.
1480 if (!FBB) {
1481 if (Cond.empty()) // Unconditional branch
1482 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1483 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1484 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1485 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1486 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1487 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1488 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1489 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1490 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1491 else // Conditional branch
1492 BuildMI(&MBB, DL, get(PPC::BCC))
1493 .addImm(Cond[0].getImm())
1494 .add(Cond[1])
1495 .addMBB(TBB);
1496 return 1;
1497 }
1498
1499 // Two-way Conditional Branch.
1500 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1501 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1502 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1503 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1504 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1505 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1506 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1507 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1508 else
1509 BuildMI(&MBB, DL, get(PPC::BCC))
1510 .addImm(Cond[0].getImm())
1511 .add(Cond[1])
1512 .addMBB(TBB);
1513 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1514 return 2;
1515}
1516
1517// Select analysis.
1520 Register DstReg, Register TrueReg,
1521 Register FalseReg, int &CondCycles,
1522 int &TrueCycles, int &FalseCycles) const {
1523 if (!Subtarget.hasISEL())
1524 return false;
1525
1526 if (Cond.size() != 2)
1527 return false;
1528
1529 // If this is really a bdnz-like condition, then it cannot be turned into a
1530 // select.
1531 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1532 return false;
1533
1534 // If the conditional branch uses a physical register, then it cannot be
1535 // turned into a select.
1536 if (Cond[1].getReg().isPhysical())
1537 return false;
1538
1539 // Check register classes.
1541 const TargetRegisterClass *RC =
1542 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1543 if (!RC)
1544 return false;
1545
1546 // isel is for regular integer GPRs only.
1547 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1548 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1549 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1550 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1551 return false;
1552
1553 // FIXME: These numbers are for the A2, how well they work for other cores is
1554 // an open question. On the A2, the isel instruction has a 2-cycle latency
1555 // but single-cycle throughput. These numbers are used in combination with
1556 // the MispredictPenalty setting from the active SchedMachineModel.
1557 CondCycles = 1;
1558 TrueCycles = 1;
1559 FalseCycles = 1;
1560
1561 return true;
1562}
1563
1566 const DebugLoc &dl, Register DestReg,
1568 Register FalseReg) const {
1569 assert(Cond.size() == 2 &&
1570 "PPC branch conditions have two components!");
1571
1572 // Get the register classes.
1574 const TargetRegisterClass *RC =
1575 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1576 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1577
1578 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1579 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1580 assert((Is64Bit ||
1581 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1582 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1583 "isel is for regular integer GPRs only");
1584
1585 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1586 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1587
1588 unsigned SubIdx = 0;
1589 bool SwapOps = false;
1590 switch (SelectPred) {
1591 case PPC::PRED_EQ:
1592 case PPC::PRED_EQ_MINUS:
1593 case PPC::PRED_EQ_PLUS:
1594 SubIdx = PPC::sub_eq; SwapOps = false; break;
1595 case PPC::PRED_NE:
1596 case PPC::PRED_NE_MINUS:
1597 case PPC::PRED_NE_PLUS:
1598 SubIdx = PPC::sub_eq; SwapOps = true; break;
1599 case PPC::PRED_LT:
1600 case PPC::PRED_LT_MINUS:
1601 case PPC::PRED_LT_PLUS:
1602 SubIdx = PPC::sub_lt; SwapOps = false; break;
1603 case PPC::PRED_GE:
1604 case PPC::PRED_GE_MINUS:
1605 case PPC::PRED_GE_PLUS:
1606 SubIdx = PPC::sub_lt; SwapOps = true; break;
1607 case PPC::PRED_GT:
1608 case PPC::PRED_GT_MINUS:
1609 case PPC::PRED_GT_PLUS:
1610 SubIdx = PPC::sub_gt; SwapOps = false; break;
1611 case PPC::PRED_LE:
1612 case PPC::PRED_LE_MINUS:
1613 case PPC::PRED_LE_PLUS:
1614 SubIdx = PPC::sub_gt; SwapOps = true; break;
1615 case PPC::PRED_UN:
1616 case PPC::PRED_UN_MINUS:
1617 case PPC::PRED_UN_PLUS:
1618 SubIdx = PPC::sub_un; SwapOps = false; break;
1619 case PPC::PRED_NU:
1620 case PPC::PRED_NU_MINUS:
1621 case PPC::PRED_NU_PLUS:
1622 SubIdx = PPC::sub_un; SwapOps = true; break;
1623 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1624 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1625 }
1626
1627 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1628 SecondReg = SwapOps ? TrueReg : FalseReg;
1629
1630 // The first input register of isel cannot be r0. If it is a member
1631 // of a register class that can be r0, then copy it first (the
1632 // register allocator should eliminate the copy).
1633 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1634 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1635 const TargetRegisterClass *FirstRC =
1636 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1637 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1638 Register OldFirstReg = FirstReg;
1639 FirstReg = MRI.createVirtualRegister(FirstRC);
1640 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1641 .addReg(OldFirstReg);
1642 }
1643
1644 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1645 .addReg(FirstReg).addReg(SecondReg)
1646 .addReg(Cond[1].getReg(), 0, SubIdx);
1647}
1648
1649static unsigned getCRBitValue(unsigned CRBit) {
1650 unsigned Ret = 4;
1651 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1652 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1653 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1654 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1655 Ret = 3;
1656 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1657 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1658 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1659 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1660 Ret = 2;
1661 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1662 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1663 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1664 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1665 Ret = 1;
1666 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1667 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1668 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1669 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1670 Ret = 0;
1671
1672 assert(Ret != 4 && "Invalid CR bit register");
1673 return Ret;
1674}
1675
1678 const DebugLoc &DL, MCRegister DestReg,
1679 MCRegister SrcReg, bool KillSrc,
1680 bool RenamableDest, bool RenamableSrc) const {
1681 // We can end up with self copies and similar things as a result of VSX copy
1682 // legalization. Promote them here.
1684 if (PPC::F8RCRegClass.contains(DestReg) &&
1685 PPC::VSRCRegClass.contains(SrcReg)) {
1686 MCRegister SuperReg =
1687 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1688
1689 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1690 llvm_unreachable("nop VSX copy");
1691
1692 DestReg = SuperReg;
1693 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1694 PPC::VSRCRegClass.contains(DestReg)) {
1695 MCRegister SuperReg =
1696 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1697
1698 if (VSXSelfCopyCrash && DestReg == SuperReg)
1699 llvm_unreachable("nop VSX copy");
1700
1701 SrcReg = SuperReg;
1702 }
1703
1704 // Different class register copy
1705 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1706 PPC::GPRCRegClass.contains(DestReg)) {
1707 MCRegister CRReg = getCRFromCRBit(SrcReg);
1708 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1709 getKillRegState(KillSrc);
1710 // Rotate the CR bit in the CR fields to be the least significant bit and
1711 // then mask with 0x1 (MB = ME = 31).
1712 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1713 .addReg(DestReg, RegState::Kill)
1714 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1715 .addImm(31)
1716 .addImm(31);
1717 return;
1718 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1719 (PPC::G8RCRegClass.contains(DestReg) ||
1720 PPC::GPRCRegClass.contains(DestReg))) {
1721 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1722 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1723 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1724 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1725 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1726 getKillRegState(KillSrc);
1727 if (CRNum == 7)
1728 return;
1729 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1730 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1731 .addReg(DestReg, RegState::Kill)
1732 .addImm(CRNum * 4 + 4)
1733 .addImm(28)
1734 .addImm(31);
1735 return;
1736 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1737 PPC::VSFRCRegClass.contains(DestReg)) {
1738 assert(Subtarget.hasDirectMove() &&
1739 "Subtarget doesn't support directmove, don't know how to copy.");
1740 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1741 NumGPRtoVSRSpill++;
1742 getKillRegState(KillSrc);
1743 return;
1744 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1745 PPC::G8RCRegClass.contains(DestReg)) {
1746 assert(Subtarget.hasDirectMove() &&
1747 "Subtarget doesn't support directmove, don't know how to copy.");
1748 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1749 getKillRegState(KillSrc);
1750 return;
1751 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1752 PPC::GPRCRegClass.contains(DestReg)) {
1753 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1754 getKillRegState(KillSrc);
1755 return;
1756 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1757 PPC::SPERCRegClass.contains(DestReg)) {
1758 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1759 getKillRegState(KillSrc);
1760 return;
1761 }
1762
1763 unsigned Opc;
1764 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1765 Opc = PPC::OR;
1766 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1767 Opc = PPC::OR8;
1768 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1769 Opc = PPC::FMR;
1770 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1771 Opc = PPC::MCRF;
1772 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1773 Opc = PPC::VOR;
1774 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1775 // There are two different ways this can be done:
1776 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1777 // issue in VSU pipeline 0.
1778 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1779 // can go to either pipeline.
1780 // We'll always use xxlor here, because in practically all cases where
1781 // copies are generated, they are close enough to some use that the
1782 // lower-latency form is preferable.
1783 Opc = PPC::XXLOR;
1784 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1785 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1786 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1787 else if (Subtarget.pairedVectorMemops() &&
1788 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1789 if (SrcReg > PPC::VSRp15)
1790 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1791 else
1792 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1793 if (DestReg > PPC::VSRp15)
1794 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1795 else
1796 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1797 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1798 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1799 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1800 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1801 return;
1802 }
1803 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1804 Opc = PPC::CROR;
1805 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1806 Opc = PPC::EVOR;
1807 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1808 PPC::UACCRCRegClass.contains(DestReg)) &&
1809 (PPC::ACCRCRegClass.contains(SrcReg) ||
1810 PPC::UACCRCRegClass.contains(SrcReg))) {
1811 // If primed, de-prime the source register, copy the individual registers
1812 // and prime the destination if needed. The vector subregisters are
1813 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1814 // source is primed, we need to re-prime it after the copy as well.
1815 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1816 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1817 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1818 MCRegister VSLSrcReg =
1819 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1820 MCRegister VSLDestReg =
1821 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1822 if (SrcPrimed)
1823 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1824 for (unsigned Idx = 0; Idx < 4; Idx++)
1825 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1826 .addReg(VSLSrcReg + Idx)
1827 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1828 if (DestPrimed)
1829 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1830 if (SrcPrimed && !KillSrc)
1831 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1832 return;
1833 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1834 PPC::G8pRCRegClass.contains(SrcReg)) {
1835 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1836 unsigned DestRegIdx = DestReg - PPC::G8p0;
1837 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1838 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1839 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1840 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1841 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1842 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1843 .addReg(SrcRegSub0)
1844 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1845 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1846 .addReg(SrcRegSub1)
1847 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1848 return;
1849 } else
1850 llvm_unreachable("Impossible reg-to-reg copy");
1851
1852 const MCInstrDesc &MCID = get(Opc);
1853 if (MCID.getNumOperands() == 3)
1854 BuildMI(MBB, I, DL, MCID, DestReg)
1855 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1856 else
1857 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1858}
1859
1860unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1861 int OpcodeIndex = 0;
1862
1863 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1864 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1866 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1867 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1869 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1871 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1873 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1875 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1877 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1879 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1881 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1883 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1885 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1887 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1889 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1890 assert(Subtarget.pairedVectorMemops() &&
1891 "Register unexpected when paired memops are disabled.");
1893 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1894 assert(Subtarget.pairedVectorMemops() &&
1895 "Register unexpected when paired memops are disabled.");
1897 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1898 assert(Subtarget.pairedVectorMemops() &&
1899 "Register unexpected when paired memops are disabled.");
1901 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1902 assert(Subtarget.pairedVectorMemops() &&
1903 "Register unexpected when paired memops are disabled.");
1905 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1907 } else {
1908 llvm_unreachable("Unknown regclass!");
1909 }
1910 return OpcodeIndex;
1911}
1912
1913unsigned
1915 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1916 return OpcodesForSpill[getSpillIndex(RC)];
1917}
1918
1919unsigned
1921 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1922 return OpcodesForSpill[getSpillIndex(RC)];
1923}
1924
1925void PPCInstrInfo::StoreRegToStackSlot(
1926 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1927 const TargetRegisterClass *RC,
1928 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1929 unsigned Opcode = getStoreOpcodeForSpill(RC);
1930 DebugLoc DL;
1931
1932 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1933 FuncInfo->setHasSpills();
1934
1936 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
1937 FrameIdx));
1938
1939 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
1940 PPC::CRBITRCRegClass.hasSubClassEq(RC))
1941 FuncInfo->setSpillsCR();
1942
1943 if (isXFormMemOp(Opcode))
1944 FuncInfo->setHasNonRISpills();
1945}
1946
1949 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1950 const TargetRegisterInfo *TRI) const {
1951 MachineFunction &MF = *MBB.getParent();
1953
1954 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
1955
1956 for (MachineInstr *NewMI : NewMIs)
1957 MBB.insert(MI, NewMI);
1958
1959 const MachineFrameInfo &MFI = MF.getFrameInfo();
1963 MFI.getObjectAlign(FrameIdx));
1964 NewMIs.back()->addMemOperand(MF, MMO);
1965}
1966
1969 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1970 const TargetRegisterInfo *TRI, Register VReg) const {
1971 // We need to avoid a situation in which the value from a VRRC register is
1972 // spilled using an Altivec instruction and reloaded into a VSRC register
1973 // using a VSX instruction. The issue with this is that the VSX
1974 // load/store instructions swap the doublewords in the vector and the Altivec
1975 // ones don't. The register classes on the spill/reload may be different if
1976 // the register is defined using an Altivec instruction and is then used by a
1977 // VSX instruction.
1978 RC = updatedRC(RC);
1979 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
1980}
1981
1982void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
1983 unsigned DestReg, int FrameIdx,
1984 const TargetRegisterClass *RC,
1986 const {
1987 unsigned Opcode = getLoadOpcodeForSpill(RC);
1988 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
1989 FrameIdx));
1990}
1991
1994 int FrameIdx, const TargetRegisterClass *RC,
1995 const TargetRegisterInfo *TRI) const {
1996 MachineFunction &MF = *MBB.getParent();
1998 DebugLoc DL;
1999 if (MI != MBB.end()) DL = MI->getDebugLoc();
2000
2001 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2002
2003 for (MachineInstr *NewMI : NewMIs)
2004 MBB.insert(MI, NewMI);
2005
2006 const MachineFrameInfo &MFI = MF.getFrameInfo();
2010 MFI.getObjectAlign(FrameIdx));
2011 NewMIs.back()->addMemOperand(MF, MMO);
2012}
2013
2016 Register DestReg, int FrameIdx,
2017 const TargetRegisterClass *RC,
2018 const TargetRegisterInfo *TRI,
2019 Register VReg) const {
2020 // We need to avoid a situation in which the value from a VRRC register is
2021 // spilled using an Altivec instruction and reloaded into a VSRC register
2022 // using a VSX instruction. The issue with this is that the VSX
2023 // load/store instructions swap the doublewords in the vector and the Altivec
2024 // ones don't. The register classes on the spill/reload may be different if
2025 // the register is defined using an Altivec instruction and is then used by a
2026 // VSX instruction.
2027 RC = updatedRC(RC);
2028
2029 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2030}
2031
2034 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2035 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2036 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2037 else
2038 // Leave the CR# the same, but invert the condition.
2039 Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
2040 return false;
2041}
2042
2043// For some instructions, it is legal to fold ZERO into the RA register field.
2044// This function performs that fold by replacing the operand with PPC::ZERO,
2045// it does not consider whether the load immediate zero is no longer in use.
2047 Register Reg) const {
2048 // A zero immediate should always be loaded with a single li.
2049 unsigned DefOpc = DefMI.getOpcode();
2050 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2051 return false;
2052 if (!DefMI.getOperand(1).isImm())
2053 return false;
2054 if (DefMI.getOperand(1).getImm() != 0)
2055 return false;
2056
2057 // Note that we cannot here invert the arguments of an isel in order to fold
2058 // a ZERO into what is presented as the second argument. All we have here
2059 // is the condition bit, and that might come from a CR-logical bit operation.
2060
2061 const MCInstrDesc &UseMCID = UseMI.getDesc();
2062
2063 // Only fold into real machine instructions.
2064 if (UseMCID.isPseudo())
2065 return false;
2066
2067 // We need to find which of the User's operands is to be folded, that will be
2068 // the operand that matches the given register ID.
2069 unsigned UseIdx;
2070 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2071 if (UseMI.getOperand(UseIdx).isReg() &&
2072 UseMI.getOperand(UseIdx).getReg() == Reg)
2073 break;
2074
2075 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2076 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2077
2078 const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2079
2080 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2081 // register (which might also be specified as a pointer class kind).
2082 if (UseInfo->isLookupPtrRegClass()) {
2083 if (UseInfo->RegClass /* Kind */ != 1)
2084 return false;
2085 } else {
2086 if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2087 UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2088 return false;
2089 }
2090
2091 // Make sure this is not tied to an output register (or otherwise
2092 // constrained). This is true for ST?UX registers, for example, which
2093 // are tied to their output registers.
2094 if (UseInfo->Constraints != 0)
2095 return false;
2096
2097 MCRegister ZeroReg;
2098 if (UseInfo->isLookupPtrRegClass()) {
2099 bool isPPC64 = Subtarget.isPPC64();
2100 ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2101 } else {
2102 ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2103 PPC::ZERO8 : PPC::ZERO;
2104 }
2105
2106 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2107 LLVM_DEBUG(UseMI.dump());
2108 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2109 LLVM_DEBUG(dbgs() << "Into: ");
2110 LLVM_DEBUG(UseMI.dump());
2111 return true;
2112}
2113
2114// Folds zero into instructions which have a load immediate zero as an operand
2115// but also recognize zero as immediate zero. If the definition of the load
2116// has no more users it is deleted.
2118 Register Reg, MachineRegisterInfo *MRI) const {
2119 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2120 if (MRI->use_nodbg_empty(Reg))
2121 DefMI.eraseFromParent();
2122 return Changed;
2123}
2124
2126 for (MachineInstr &MI : MBB)
2127 if (MI.definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
2128 MI.definesRegister(PPC::CTR8, /*TRI=*/nullptr))
2129 return true;
2130 return false;
2131}
2132
2133// We should make sure that, if we're going to predicate both sides of a
2134// condition (a diamond), that both sides don't define the counter register. We
2135// can predicate counter-decrement-based branches, but while that predicates
2136// the branching, it does not predicate the counter decrement. If we tried to
2137// merge the triangle into one predicated block, we'd decrement the counter
2138// twice.
2140 unsigned NumT, unsigned ExtraT,
2141 MachineBasicBlock &FMBB,
2142 unsigned NumF, unsigned ExtraF,
2143 BranchProbability Probability) const {
2144 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2145}
2146
2147
2149 // The predicated branches are identified by their type, not really by the
2150 // explicit presence of a predicate. Furthermore, some of them can be
2151 // predicated more than once. Because if conversion won't try to predicate
2152 // any instruction which already claims to be predicated (by returning true
2153 // here), always return false. In doing so, we let isPredicable() be the
2154 // final word on whether not the instruction can be (further) predicated.
2155
2156 return false;
2157}
2158
2160 const MachineBasicBlock *MBB,
2161 const MachineFunction &MF) const {
2162 switch (MI.getOpcode()) {
2163 default:
2164 break;
2165 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2166 // across them, since some FP operations may change content of FPSCR.
2167 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2168 case PPC::MFFS:
2169 case PPC::MTFSF:
2170 case PPC::FENCE:
2171 return true;
2172 }
2174}
2175
2177 ArrayRef<MachineOperand> Pred) const {
2178 unsigned OpC = MI.getOpcode();
2179 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2180 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2181 bool isPPC64 = Subtarget.isPPC64();
2182 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2183 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2184 // Need add Def and Use for CTR implicit operand.
2185 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2186 .addReg(Pred[1].getReg(), RegState::Implicit)
2188 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2189 MI.setDesc(get(PPC::BCLR));
2190 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2191 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2192 MI.setDesc(get(PPC::BCLRn));
2193 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2194 } else {
2195 MI.setDesc(get(PPC::BCCLR));
2196 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2197 .addImm(Pred[0].getImm())
2198 .add(Pred[1]);
2199 }
2200
2201 return true;
2202 } else if (OpC == PPC::B) {
2203 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2204 bool isPPC64 = Subtarget.isPPC64();
2205 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2206 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2207 // Need add Def and Use for CTR implicit operand.
2208 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2209 .addReg(Pred[1].getReg(), RegState::Implicit)
2211 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2212 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2213 MI.removeOperand(0);
2214
2215 MI.setDesc(get(PPC::BC));
2216 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2217 .add(Pred[1])
2218 .addMBB(MBB);
2219 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2220 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2221 MI.removeOperand(0);
2222
2223 MI.setDesc(get(PPC::BCn));
2224 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2225 .add(Pred[1])
2226 .addMBB(MBB);
2227 } else {
2228 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2229 MI.removeOperand(0);
2230
2231 MI.setDesc(get(PPC::BCC));
2232 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2233 .addImm(Pred[0].getImm())
2234 .add(Pred[1])
2235 .addMBB(MBB);
2236 }
2237
2238 return true;
2239 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2240 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2241 OpC == PPC::BCTRL8_RM) {
2242 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2243 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2244
2245 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2246 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2247 bool isPPC64 = Subtarget.isPPC64();
2248
2249 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2250 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2251 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2252 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2253 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2254 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2255 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2256 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2257 } else {
2258 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2259 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2260 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2261 .addImm(Pred[0].getImm())
2262 .add(Pred[1]);
2263 }
2264
2265 // Need add Def and Use for LR implicit operand.
2266 if (setLR)
2267 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2268 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2269 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2270 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2271 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2273
2274 return true;
2275 }
2276
2277 return false;
2278}
2279
2281 ArrayRef<MachineOperand> Pred2) const {
2282 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2283 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2284
2285 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2286 return false;
2287 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2288 return false;
2289
2290 // P1 can only subsume P2 if they test the same condition register.
2291 if (Pred1[1].getReg() != Pred2[1].getReg())
2292 return false;
2293
2294 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2295 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2296
2297 if (P1 == P2)
2298 return true;
2299
2300 // Does P1 subsume P2, e.g. GE subsumes GT.
2301 if (P1 == PPC::PRED_LE &&
2302 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2303 return true;
2304 if (P1 == PPC::PRED_GE &&
2305 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2306 return true;
2307
2308 return false;
2309}
2310
2312 std::vector<MachineOperand> &Pred,
2313 bool SkipDead) const {
2314 // Note: At the present time, the contents of Pred from this function is
2315 // unused by IfConversion. This implementation follows ARM by pushing the
2316 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2317 // predicate, instructions defining CTR or CTR8 are also included as
2318 // predicate-defining instructions.
2319
2320 const TargetRegisterClass *RCs[] =
2321 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2322 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2323
2324 bool Found = false;
2325 for (const MachineOperand &MO : MI.operands()) {
2326 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2327 const TargetRegisterClass *RC = RCs[c];
2328 if (MO.isReg()) {
2329 if (MO.isDef() && RC->contains(MO.getReg())) {
2330 Pred.push_back(MO);
2331 Found = true;
2332 }
2333 } else if (MO.isRegMask()) {
2334 for (MCPhysReg R : *RC)
2335 if (MO.clobbersPhysReg(R)) {
2336 Pred.push_back(MO);
2337 Found = true;
2338 }
2339 }
2340 }
2341 }
2342
2343 return Found;
2344}
2345
2347 Register &SrcReg2, int64_t &Mask,
2348 int64_t &Value) const {
2349 unsigned Opc = MI.getOpcode();
2350
2351 switch (Opc) {
2352 default: return false;
2353 case PPC::CMPWI:
2354 case PPC::CMPLWI:
2355 case PPC::CMPDI:
2356 case PPC::CMPLDI:
2357 SrcReg = MI.getOperand(1).getReg();
2358 SrcReg2 = 0;
2359 Value = MI.getOperand(2).getImm();
2360 Mask = 0xFFFF;
2361 return true;
2362 case PPC::CMPW:
2363 case PPC::CMPLW:
2364 case PPC::CMPD:
2365 case PPC::CMPLD:
2366 case PPC::FCMPUS:
2367 case PPC::FCMPUD:
2368 SrcReg = MI.getOperand(1).getReg();
2369 SrcReg2 = MI.getOperand(2).getReg();
2370 Value = 0;
2371 Mask = 0;
2372 return true;
2373 }
2374}
2375
2377 Register SrcReg2, int64_t Mask,
2378 int64_t Value,
2379 const MachineRegisterInfo *MRI) const {
2380 if (DisableCmpOpt)
2381 return false;
2382
2383 int OpC = CmpInstr.getOpcode();
2384 Register CRReg = CmpInstr.getOperand(0).getReg();
2385
2386 // FP record forms set CR1 based on the exception status bits, not a
2387 // comparison with zero.
2388 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2389 return false;
2390
2392 // The record forms set the condition register based on a signed comparison
2393 // with zero (so says the ISA manual). This is not as straightforward as it
2394 // seems, however, because this is always a 64-bit comparison on PPC64, even
2395 // for instructions that are 32-bit in nature (like slw for example).
2396 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2397 // for equality checks (as those don't depend on the sign). On PPC64,
2398 // we are restricted to equality for unsigned 64-bit comparisons and for
2399 // signed 32-bit comparisons the applicability is more restricted.
2400 bool isPPC64 = Subtarget.isPPC64();
2401 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2402 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2403 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2404
2405 // Look through copies unless that gets us to a physical register.
2406 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2407 if (ActualSrc.isVirtual())
2408 SrcReg = ActualSrc;
2409
2410 // Get the unique definition of SrcReg.
2411 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2412 if (!MI) return false;
2413
2414 bool equalityOnly = false;
2415 bool noSub = false;
2416 if (isPPC64) {
2417 if (is32BitSignedCompare) {
2418 // We can perform this optimization only if SrcReg is sign-extending.
2419 if (isSignExtended(SrcReg, MRI))
2420 noSub = true;
2421 else
2422 return false;
2423 } else if (is32BitUnsignedCompare) {
2424 // We can perform this optimization, equality only, if SrcReg is
2425 // zero-extending.
2426 if (isZeroExtended(SrcReg, MRI)) {
2427 noSub = true;
2428 equalityOnly = true;
2429 } else
2430 return false;
2431 } else
2432 equalityOnly = is64BitUnsignedCompare;
2433 } else
2434 equalityOnly = is32BitUnsignedCompare;
2435
2436 if (equalityOnly) {
2437 // We need to check the uses of the condition register in order to reject
2438 // non-equality comparisons.
2440 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2441 I != IE; ++I) {
2442 MachineInstr *UseMI = &*I;
2443 if (UseMI->getOpcode() == PPC::BCC) {
2445 unsigned PredCond = PPC::getPredicateCondition(Pred);
2446 // We ignore hint bits when checking for non-equality comparisons.
2447 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2448 return false;
2449 } else if (UseMI->getOpcode() == PPC::ISEL ||
2450 UseMI->getOpcode() == PPC::ISEL8) {
2451 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2452 if (SubIdx != PPC::sub_eq)
2453 return false;
2454 } else
2455 return false;
2456 }
2457 }
2458
2459 MachineBasicBlock::iterator I = CmpInstr;
2460
2461 // Scan forward to find the first use of the compare.
2462 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2463 ++I) {
2464 bool FoundUse = false;
2466 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2467 J != JE; ++J)
2468 if (&*J == &*I) {
2469 FoundUse = true;
2470 break;
2471 }
2472
2473 if (FoundUse)
2474 break;
2475 }
2476
2479
2480 // There are two possible candidates which can be changed to set CR[01].
2481 // One is MI, the other is a SUB instruction.
2482 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2483 MachineInstr *Sub = nullptr;
2484 if (SrcReg2 != 0)
2485 // MI is not a candidate for CMPrr.
2486 MI = nullptr;
2487 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2488 // same BB as the comparison. This is to allow the check below to avoid calls
2489 // (and other explicit clobbers); instead we should really check for these
2490 // more explicitly (in at least a few predecessors).
2491 else if (MI->getParent() != CmpInstr.getParent())
2492 return false;
2493 else if (Value != 0) {
2494 // The record-form instructions set CR bit based on signed comparison
2495 // against 0. We try to convert a compare against 1 or -1 into a compare
2496 // against 0 to exploit record-form instructions. For example, we change
2497 // the condition "greater than -1" into "greater than or equal to 0"
2498 // and "less than 1" into "less than or equal to 0".
2499
2500 // Since we optimize comparison based on a specific branch condition,
2501 // we don't optimize if condition code is used by more than once.
2502 if (equalityOnly || !MRI->hasOneUse(CRReg))
2503 return false;
2504
2505 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2506 if (UseMI->getOpcode() != PPC::BCC)
2507 return false;
2508
2510 unsigned PredCond = PPC::getPredicateCondition(Pred);
2511 unsigned PredHint = PPC::getPredicateHint(Pred);
2512 int16_t Immed = (int16_t)Value;
2513
2514 // When modifying the condition in the predicate, we propagate hint bits
2515 // from the original predicate to the new one.
2516 if (Immed == -1 && PredCond == PPC::PRED_GT)
2517 // We convert "greater than -1" into "greater than or equal to 0",
2518 // since we are assuming signed comparison by !equalityOnly
2519 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2520 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2521 // We convert "less than or equal to -1" into "less than 0".
2522 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2523 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2524 // We convert "less than 1" into "less than or equal to 0".
2525 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2526 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2527 // We convert "greater than or equal to 1" into "greater than 0".
2528 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2529 else
2530 return false;
2531
2532 // Convert the comparison and its user to a compare against zero with the
2533 // appropriate predicate on the branch. Zero comparison might provide
2534 // optimization opportunities post-RA (see optimization in
2535 // PPCPreEmitPeephole.cpp).
2536 UseMI->getOperand(0).setImm(Pred);
2537 CmpInstr.getOperand(2).setImm(0);
2538 }
2539
2540 // Search for Sub.
2541 --I;
2542
2543 // Get ready to iterate backward from CmpInstr.
2544 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2545
2546 for (; I != E && !noSub; --I) {
2547 const MachineInstr &Instr = *I;
2548 unsigned IOpC = Instr.getOpcode();
2549
2550 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2551 Instr.readsRegister(PPC::CR0, TRI)))
2552 // This instruction modifies or uses the record condition register after
2553 // the one we want to change. While we could do this transformation, it
2554 // would likely not be profitable. This transformation removes one
2555 // instruction, and so even forcing RA to generate one move probably
2556 // makes it unprofitable.
2557 return false;
2558
2559 // Check whether CmpInstr can be made redundant by the current instruction.
2560 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2561 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2562 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2563 ((Instr.getOperand(1).getReg() == SrcReg &&
2564 Instr.getOperand(2).getReg() == SrcReg2) ||
2565 (Instr.getOperand(1).getReg() == SrcReg2 &&
2566 Instr.getOperand(2).getReg() == SrcReg))) {
2567 Sub = &*I;
2568 break;
2569 }
2570
2571 if (I == B)
2572 // The 'and' is below the comparison instruction.
2573 return false;
2574 }
2575
2576 // Return false if no candidates exist.
2577 if (!MI && !Sub)
2578 return false;
2579
2580 // The single candidate is called MI.
2581 if (!MI) MI = Sub;
2582
2583 int NewOpC = -1;
2584 int MIOpC = MI->getOpcode();
2585 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2586 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2587 NewOpC = MIOpC;
2588 else {
2589 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2590 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2591 NewOpC = MIOpC;
2592 }
2593
2594 // FIXME: On the non-embedded POWER architectures, only some of the record
2595 // forms are fast, and we should use only the fast ones.
2596
2597 // The defining instruction has a record form (or is already a record
2598 // form). It is possible, however, that we'll need to reverse the condition
2599 // code of the users.
2600 if (NewOpC == -1)
2601 return false;
2602
2603 // This transformation should not be performed if `nsw` is missing and is not
2604 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2605 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2606 // CRReg can reflect if compared values are equal, this optz is still valid.
2607 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2608 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2609 return false;
2610
2611 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2612 // needs to be updated to be based on SUB. Push the condition code
2613 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2614 // condition code of these operands will be modified.
2615 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2616 // comparison against 0, which may modify predicate.
2617 bool ShouldSwap = false;
2618 if (Sub && Value == 0) {
2619 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2620 Sub->getOperand(2).getReg() == SrcReg;
2621
2622 // The operands to subf are the opposite of sub, so only in the fixed-point
2623 // case, invert the order.
2624 ShouldSwap = !ShouldSwap;
2625 }
2626
2627 if (ShouldSwap)
2629 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2630 I != IE; ++I) {
2631 MachineInstr *UseMI = &*I;
2632 if (UseMI->getOpcode() == PPC::BCC) {
2634 unsigned PredCond = PPC::getPredicateCondition(Pred);
2635 assert((!equalityOnly ||
2636 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2637 "Invalid predicate for equality-only optimization");
2638 (void)PredCond; // To suppress warning in release build.
2639 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2641 } else if (UseMI->getOpcode() == PPC::ISEL ||
2642 UseMI->getOpcode() == PPC::ISEL8) {
2643 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2644 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2645 "Invalid CR bit for equality-only optimization");
2646
2647 if (NewSubReg == PPC::sub_lt)
2648 NewSubReg = PPC::sub_gt;
2649 else if (NewSubReg == PPC::sub_gt)
2650 NewSubReg = PPC::sub_lt;
2651
2652 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2653 NewSubReg));
2654 } else // We need to abort on a user we don't understand.
2655 return false;
2656 }
2657 assert(!(Value != 0 && ShouldSwap) &&
2658 "Non-zero immediate support and ShouldSwap"
2659 "may conflict in updating predicate");
2660
2661 // Create a new virtual register to hold the value of the CR set by the
2662 // record-form instruction. If the instruction was not previously in
2663 // record form, then set the kill flag on the CR.
2664 CmpInstr.eraseFromParent();
2665
2667 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2668 get(TargetOpcode::COPY), CRReg)
2669 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2670
2671 // Even if CR0 register were dead before, it is alive now since the
2672 // instruction we just built uses it.
2673 MI->clearRegisterDeads(PPC::CR0);
2674
2675 if (MIOpC != NewOpC) {
2676 // We need to be careful here: we're replacing one instruction with
2677 // another, and we need to make sure that we get all of the right
2678 // implicit uses and defs. On the other hand, the caller may be holding
2679 // an iterator to this instruction, and so we can't delete it (this is
2680 // specifically the case if this is the instruction directly after the
2681 // compare).
2682
2683 // Rotates are expensive instructions. If we're emitting a record-form
2684 // rotate that can just be an andi/andis, we should just emit that.
2685 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2686 Register GPRRes = MI->getOperand(0).getReg();
2687 int64_t SH = MI->getOperand(2).getImm();
2688 int64_t MB = MI->getOperand(3).getImm();
2689 int64_t ME = MI->getOperand(4).getImm();
2690 // We can only do this if both the start and end of the mask are in the
2691 // same halfword.
2692 bool MBInLoHWord = MB >= 16;
2693 bool MEInLoHWord = ME >= 16;
2694 uint64_t Mask = ~0LLU;
2695
2696 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2697 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2698 // The mask value needs to shift right 16 if we're emitting andis.
2699 Mask >>= MBInLoHWord ? 0 : 16;
2700 NewOpC = MIOpC == PPC::RLWINM
2701 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2702 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2703 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2704 (ME - MB + 1 == SH) && (MB >= 16)) {
2705 // If we are rotating by the exact number of bits as are in the mask
2706 // and the mask is in the least significant bits of the register,
2707 // that's just an andis. (as long as the GPR result has no uses).
2708 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2709 Mask >>= 16;
2710 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2711 }
2712 // If we've set the mask, we can transform.
2713 if (Mask != ~0LLU) {
2714 MI->removeOperand(4);
2715 MI->removeOperand(3);
2716 MI->getOperand(2).setImm(Mask);
2717 NumRcRotatesConvertedToRcAnd++;
2718 }
2719 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2720 int64_t MB = MI->getOperand(3).getImm();
2721 if (MB >= 48) {
2722 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2723 NewOpC = PPC::ANDI8_rec;
2724 MI->removeOperand(3);
2725 MI->getOperand(2).setImm(Mask);
2726 NumRcRotatesConvertedToRcAnd++;
2727 }
2728 }
2729
2730 const MCInstrDesc &NewDesc = get(NewOpC);
2731 MI->setDesc(NewDesc);
2732
2733 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2734 if (!MI->definesRegister(ImpDef, /*TRI=*/nullptr)) {
2735 MI->addOperand(*MI->getParent()->getParent(),
2736 MachineOperand::CreateReg(ImpDef, true, true));
2737 }
2738 }
2739 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2740 if (!MI->readsRegister(ImpUse, /*TRI=*/nullptr)) {
2741 MI->addOperand(*MI->getParent()->getParent(),
2742 MachineOperand::CreateReg(ImpUse, false, true));
2743 }
2744 }
2745 }
2746 assert(MI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2747 "Record-form instruction does not define cr0?");
2748
2749 // Modify the condition code of operands in OperandsToUpdate.
2750 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2751 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2752 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2753 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2754
2755 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2756 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2757
2758 return true;
2759}
2760
2763 if (MRI->isSSA())
2764 return false;
2765
2766 Register SrcReg, SrcReg2;
2767 int64_t CmpMask, CmpValue;
2768 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2769 return false;
2770
2771 // Try to optimize the comparison against 0.
2772 if (CmpValue || !CmpMask || SrcReg2)
2773 return false;
2774
2775 // The record forms set the condition register based on a signed comparison
2776 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2777 // equality checks in post-RA, we are more restricted on a unsigned
2778 // comparison.
2779 unsigned Opc = CmpMI.getOpcode();
2780 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2781 return false;
2782
2783 // The record forms are always based on a 64-bit comparison on PPC64
2784 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2785 // comparison. Since we can't do the equality checks in post-RA, we bail out
2786 // the case.
2787 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2788 return false;
2789
2790 // CmpMI can't be deleted if it has implicit def.
2791 if (CmpMI.hasImplicitDef())
2792 return false;
2793
2794 bool SrcRegHasOtherUse = false;
2795 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2796 if (!SrcMI || !SrcMI->definesRegister(SrcReg, /*TRI=*/nullptr))
2797 return false;
2798
2799 MachineOperand RegMO = CmpMI.getOperand(0);
2800 Register CRReg = RegMO.getReg();
2801 if (CRReg != PPC::CR0)
2802 return false;
2803
2804 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2805 bool SeenUseOfCRReg = false;
2806 bool IsCRRegKilled = false;
2807 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2808 SeenUseOfCRReg) ||
2809 SrcMI->definesRegister(CRReg, /*TRI=*/nullptr) || SeenUseOfCRReg)
2810 return false;
2811
2812 int SrcMIOpc = SrcMI->getOpcode();
2813 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2814 if (NewOpC == -1)
2815 return false;
2816
2817 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2818 LLVM_DEBUG(SrcMI->dump());
2819
2820 const MCInstrDesc &NewDesc = get(NewOpC);
2821 SrcMI->setDesc(NewDesc);
2822 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2824 SrcMI->clearRegisterDeads(CRReg);
2825
2826 assert(SrcMI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2827 "Record-form instruction does not define cr0?");
2828
2829 LLVM_DEBUG(dbgs() << "with: ");
2830 LLVM_DEBUG(SrcMI->dump());
2831 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2832 LLVM_DEBUG(CmpMI.dump());
2833 return true;
2834}
2835
2838 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2839 const TargetRegisterInfo *TRI) const {
2840 const MachineOperand *BaseOp;
2841 OffsetIsScalable = false;
2842 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2843 return false;
2844 BaseOps.push_back(BaseOp);
2845 return true;
2846}
2847
2848static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2849 const TargetRegisterInfo *TRI) {
2850 // If this is a volatile load/store, don't mess with it.
2851 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2852 return false;
2853
2854 if (LdSt.getOperand(2).isFI())
2855 return true;
2856
2857 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2858 // Can't cluster if the instruction modifies the base register
2859 // or it is update form. e.g. ld r2,3(r2)
2860 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2861 return false;
2862
2863 return true;
2864}
2865
2866// Only cluster instruction pair that have the same opcode, and they are
2867// clusterable according to PowerPC specification.
2868static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2869 const PPCSubtarget &Subtarget) {
2870 switch (FirstOpc) {
2871 default:
2872 return false;
2873 case PPC::STD:
2874 case PPC::STFD:
2875 case PPC::STXSD:
2876 case PPC::DFSTOREf64:
2877 return FirstOpc == SecondOpc;
2878 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2879 // 32bit and 64bit instruction selection. They are clusterable pair though
2880 // they are different opcode.
2881 case PPC::STW:
2882 case PPC::STW8:
2883 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2884 }
2885}
2886
2888 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2889 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2890 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2891 unsigned NumBytes) const {
2892
2893 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2894 const MachineOperand &BaseOp1 = *BaseOps1.front();
2895 const MachineOperand &BaseOp2 = *BaseOps2.front();
2896 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2897 "Only base registers and frame indices are supported.");
2898
2899 // ClusterSize means the number of memory operations that will have been
2900 // clustered if this hook returns true.
2901 // Don't cluster memory op if there are already two ops clustered at least.
2902 if (ClusterSize > 2)
2903 return false;
2904
2905 // Cluster the load/store only when they have the same base
2906 // register or FI.
2907 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2908 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2909 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2910 return false;
2911
2912 // Check if the load/store are clusterable according to the PowerPC
2913 // specification.
2914 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2915 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2916 unsigned FirstOpc = FirstLdSt.getOpcode();
2917 unsigned SecondOpc = SecondLdSt.getOpcode();
2919 // Cluster the load/store only when they have the same opcode, and they are
2920 // clusterable opcode according to PowerPC specification.
2921 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2922 return false;
2923
2924 // Can't cluster load/store that have ordered or volatile memory reference.
2925 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2926 !isLdStSafeToCluster(SecondLdSt, TRI))
2927 return false;
2928
2929 int64_t Offset1 = 0, Offset2 = 0;
2930 LocationSize Width1 = 0, Width2 = 0;
2931 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2932 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2933 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2934 Width1 != Width2)
2935 return false;
2936
2937 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2938 "getMemOperandWithOffsetWidth return incorrect base op");
2939 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2940 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2941 return Offset1 + (int64_t)Width1.getValue() == Offset2;
2942}
2943
2944/// GetInstSize - Return the number of bytes of code the specified
2945/// instruction may be. This returns the maximum number of bytes.
2946///
2948 unsigned Opcode = MI.getOpcode();
2949
2950 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
2951 const MachineFunction *MF = MI.getParent()->getParent();
2952 const char *AsmStr = MI.getOperand(0).getSymbolName();
2953 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
2954 } else if (Opcode == TargetOpcode::STACKMAP) {
2955 StackMapOpers Opers(&MI);
2956 return Opers.getNumPatchBytes();
2957 } else if (Opcode == TargetOpcode::PATCHPOINT) {
2958 PatchPointOpers Opers(&MI);
2959 return Opers.getNumPatchBytes();
2960 } else {
2961 return get(Opcode).getSize();
2962 }
2963}
2964
2965std::pair<unsigned, unsigned>
2967 // PPC always uses a direct mask.
2968 return std::make_pair(TF, 0u);
2969}
2970
2973 using namespace PPCII;
2974 static const std::pair<unsigned, const char *> TargetFlags[] = {
2975 {MO_PLT, "ppc-plt"},
2976 {MO_PIC_FLAG, "ppc-pic"},
2977 {MO_PCREL_FLAG, "ppc-pcrel"},
2978 {MO_GOT_FLAG, "ppc-got"},
2979 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
2980 {MO_TLSGD_FLAG, "ppc-tlsgd"},
2981 {MO_TPREL_FLAG, "ppc-tprel"},
2982 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
2983 {MO_TLSLD_FLAG, "ppc-tlsld"},
2984 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
2985 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
2986 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
2987 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
2988 {MO_LO, "ppc-lo"},
2989 {MO_HA, "ppc-ha"},
2990 {MO_TPREL_LO, "ppc-tprel-lo"},
2991 {MO_TPREL_HA, "ppc-tprel-ha"},
2992 {MO_DTPREL_LO, "ppc-dtprel-lo"},
2993 {MO_TLSLD_LO, "ppc-tlsld-lo"},
2994 {MO_TOC_LO, "ppc-toc-lo"},
2995 {MO_TLS, "ppc-tls"},
2996 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
2997 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
2998 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
2999 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
3000 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3001 };
3002 return ArrayRef(TargetFlags);
3003}
3004
3005// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3006// The VSX versions have the advantage of a full 64-register target whereas
3007// the FP ones have the advantage of lower latency and higher throughput. So
3008// what we are after is using the faster instructions in low register pressure
3009// situations and using the larger register file in high register pressure
3010// situations.
3012 unsigned UpperOpcode, LowerOpcode;
3013 switch (MI.getOpcode()) {
3014 case PPC::DFLOADf32:
3015 UpperOpcode = PPC::LXSSP;
3016 LowerOpcode = PPC::LFS;
3017 break;
3018 case PPC::DFLOADf64:
3019 UpperOpcode = PPC::LXSD;
3020 LowerOpcode = PPC::LFD;
3021 break;
3022 case PPC::DFSTOREf32:
3023 UpperOpcode = PPC::STXSSP;
3024 LowerOpcode = PPC::STFS;
3025 break;
3026 case PPC::DFSTOREf64:
3027 UpperOpcode = PPC::STXSD;
3028 LowerOpcode = PPC::STFD;
3029 break;
3030 case PPC::XFLOADf32:
3031 UpperOpcode = PPC::LXSSPX;
3032 LowerOpcode = PPC::LFSX;
3033 break;
3034 case PPC::XFLOADf64:
3035 UpperOpcode = PPC::LXSDX;
3036 LowerOpcode = PPC::LFDX;
3037 break;
3038 case PPC::XFSTOREf32:
3039 UpperOpcode = PPC::STXSSPX;
3040 LowerOpcode = PPC::STFSX;
3041 break;
3042 case PPC::XFSTOREf64:
3043 UpperOpcode = PPC::STXSDX;
3044 LowerOpcode = PPC::STFDX;
3045 break;
3046 case PPC::LIWAX:
3047 UpperOpcode = PPC::LXSIWAX;
3048 LowerOpcode = PPC::LFIWAX;
3049 break;
3050 case PPC::LIWZX:
3051 UpperOpcode = PPC::LXSIWZX;
3052 LowerOpcode = PPC::LFIWZX;
3053 break;
3054 case PPC::STIWX:
3055 UpperOpcode = PPC::STXSIWX;
3056 LowerOpcode = PPC::STFIWX;
3057 break;
3058 default:
3059 llvm_unreachable("Unknown Operation!");
3060 }
3061
3062 Register TargetReg = MI.getOperand(0).getReg();
3063 unsigned Opcode;
3064 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3065 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3066 Opcode = LowerOpcode;
3067 else
3068 Opcode = UpperOpcode;
3069 MI.setDesc(get(Opcode));
3070 return true;
3071}
3072
3073static bool isAnImmediateOperand(const MachineOperand &MO) {
3074 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3075}
3076
3078 auto &MBB = *MI.getParent();
3079 auto DL = MI.getDebugLoc();
3080
3081 switch (MI.getOpcode()) {
3082 case PPC::BUILD_UACC: {
3083 MCRegister ACC = MI.getOperand(0).getReg();
3084 MCRegister UACC = MI.getOperand(1).getReg();
3085 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3086 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3087 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3088 // FIXME: This can easily be improved to look up to the top of the MBB
3089 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3090 // we can just re-target any such XXLOR's to DstVSR + offset.
3091 for (int VecNo = 0; VecNo < 4; VecNo++)
3092 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3093 .addReg(SrcVSR + VecNo)
3094 .addReg(SrcVSR + VecNo);
3095 }
3096 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3097 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3098 // with a NOP.
3099 [[fallthrough]];
3100 }
3101 case PPC::KILL_PAIR: {
3102 MI.setDesc(get(PPC::UNENCODED_NOP));
3103 MI.removeOperand(1);
3104 MI.removeOperand(0);
3105 return true;
3106 }
3107 case TargetOpcode::LOAD_STACK_GUARD: {
3108 auto M = MBB.getParent()->getFunction().getParent();
3109 assert(
3110 (Subtarget.isTargetLinux() || M->getStackProtectorGuard() == "tls") &&
3111 "Only Linux target or tls mode are expected to contain "
3112 "LOAD_STACK_GUARD");
3113 int64_t Offset;
3114 if (M->getStackProtectorGuard() == "tls")
3115 Offset = M->getStackProtectorGuardOffset();
3116 else
3117 Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3118 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3119 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3120 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3121 .addImm(Offset)
3122 .addReg(Reg);
3123 return true;
3124 }
3125 case PPC::PPCLdFixedAddr: {
3126 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3127 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3128 int64_t Offset = 0;
3129 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3130 MI.setDesc(get(PPC::LWZ));
3131 uint64_t FAType = MI.getOperand(1).getImm();
3132#undef PPC_LNX_FEATURE
3133#undef PPC_CPU
3134#define PPC_LNX_DEFINE_OFFSETS
3135#include "llvm/TargetParser/PPCTargetParser.def"
3136 bool IsLE = Subtarget.isLittleEndian();
3137 bool Is64 = Subtarget.isPPC64();
3138 if (FAType == PPC_FAWORD_HWCAP) {
3139 if (IsLE)
3140 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3141 else
3142 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3143 } else if (FAType == PPC_FAWORD_HWCAP2) {
3144 if (IsLE)
3145 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3146 else
3147 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3148 } else if (FAType == PPC_FAWORD_CPUID) {
3149 if (IsLE)
3150 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3151 else
3152 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3153 }
3154 assert(Offset && "Do not know the offset for this fixed addr load");
3155 MI.removeOperand(1);
3157 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3158 .addImm(Offset)
3159 .addReg(Reg);
3160 return true;
3161#define PPC_TGT_PARSER_UNDEF_MACROS
3162#include "llvm/TargetParser/PPCTargetParser.def"
3163#undef PPC_TGT_PARSER_UNDEF_MACROS
3164 }
3165 case PPC::DFLOADf32:
3166 case PPC::DFLOADf64:
3167 case PPC::DFSTOREf32:
3168 case PPC::DFSTOREf64: {
3169 assert(Subtarget.hasP9Vector() &&
3170 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3171 assert(MI.getOperand(2).isReg() &&
3172 isAnImmediateOperand(MI.getOperand(1)) &&
3173 "D-form op must have register and immediate operands");
3174 return expandVSXMemPseudo(MI);
3175 }
3176 case PPC::XFLOADf32:
3177 case PPC::XFSTOREf32:
3178 case PPC::LIWAX:
3179 case PPC::LIWZX:
3180 case PPC::STIWX: {
3181 assert(Subtarget.hasP8Vector() &&
3182 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3183 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3184 "X-form op must have register and register operands");
3185 return expandVSXMemPseudo(MI);
3186 }
3187 case PPC::XFLOADf64:
3188 case PPC::XFSTOREf64: {
3189 assert(Subtarget.hasVSX() &&
3190 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3191 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3192 "X-form op must have register and register operands");
3193 return expandVSXMemPseudo(MI);
3194 }
3195 case PPC::SPILLTOVSR_LD: {
3196 Register TargetReg = MI.getOperand(0).getReg();
3197 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3198 MI.setDesc(get(PPC::DFLOADf64));
3199 return expandPostRAPseudo(MI);
3200 }
3201 else
3202 MI.setDesc(get(PPC::LD));
3203 return true;
3204 }
3205 case PPC::SPILLTOVSR_ST: {
3206 Register SrcReg = MI.getOperand(0).getReg();
3207 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3208 NumStoreSPILLVSRRCAsVec++;
3209 MI.setDesc(get(PPC::DFSTOREf64));
3210 return expandPostRAPseudo(MI);
3211 } else {
3212 NumStoreSPILLVSRRCAsGpr++;
3213 MI.setDesc(get(PPC::STD));
3214 }
3215 return true;
3216 }
3217 case PPC::SPILLTOVSR_LDX: {
3218 Register TargetReg = MI.getOperand(0).getReg();
3219 if (PPC::VSFRCRegClass.contains(TargetReg))
3220 MI.setDesc(get(PPC::LXSDX));
3221 else
3222 MI.setDesc(get(PPC::LDX));
3223 return true;
3224 }
3225 case PPC::SPILLTOVSR_STX: {
3226 Register SrcReg = MI.getOperand(0).getReg();
3227 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3228 NumStoreSPILLVSRRCAsVec++;
3229 MI.setDesc(get(PPC::STXSDX));
3230 } else {
3231 NumStoreSPILLVSRRCAsGpr++;
3232 MI.setDesc(get(PPC::STDX));
3233 }
3234 return true;
3235 }
3236
3237 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3238 case PPC::CFENCE:
3239 case PPC::CFENCE8: {
3240 auto Val = MI.getOperand(0).getReg();
3241 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3242 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3243 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3245 .addReg(PPC::CR7)
3246 .addImm(1);
3247 MI.setDesc(get(PPC::ISYNC));
3248 MI.removeOperand(0);
3249 return true;
3250 }
3251 }
3252 return false;
3253}
3254
3255// Essentially a compile-time implementation of a compare->isel sequence.
3256// It takes two constants to compare, along with the true/false registers
3257// and the comparison type (as a subreg to a CR field) and returns one
3258// of the true/false registers, depending on the comparison results.
3259static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3260 unsigned TrueReg, unsigned FalseReg,
3261 unsigned CRSubReg) {
3262 // Signed comparisons. The immediates are assumed to be sign-extended.
3263 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3264 switch (CRSubReg) {
3265 default: llvm_unreachable("Unknown integer comparison type.");
3266 case PPC::sub_lt:
3267 return Imm1 < Imm2 ? TrueReg : FalseReg;
3268 case PPC::sub_gt:
3269 return Imm1 > Imm2 ? TrueReg : FalseReg;
3270 case PPC::sub_eq:
3271 return Imm1 == Imm2 ? TrueReg : FalseReg;
3272 }
3273 }
3274 // Unsigned comparisons.
3275 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3276 switch (CRSubReg) {
3277 default: llvm_unreachable("Unknown integer comparison type.");
3278 case PPC::sub_lt:
3279 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3280 case PPC::sub_gt:
3281 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3282 case PPC::sub_eq:
3283 return Imm1 == Imm2 ? TrueReg : FalseReg;
3284 }
3285 }
3286 return PPC::NoRegister;
3287}
3288
3290 unsigned OpNo,
3291 int64_t Imm) const {
3292 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3293 // Replace the REG with the Immediate.
3294 Register InUseReg = MI.getOperand(OpNo).getReg();
3295 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3296
3297 // We need to make sure that the MI didn't have any implicit use
3298 // of this REG any more. We don't call MI.implicit_operands().empty() to
3299 // return early, since MI's MCID might be changed in calling context, as a
3300 // result its number of explicit operands may be changed, thus the begin of
3301 // implicit operand is changed.
3303 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, TRI, false);
3304 if (UseOpIdx >= 0) {
3305 MachineOperand &MO = MI.getOperand(UseOpIdx);
3306 if (MO.isImplicit())
3307 // The operands must always be in the following order:
3308 // - explicit reg defs,
3309 // - other explicit operands (reg uses, immediates, etc.),
3310 // - implicit reg defs
3311 // - implicit reg uses
3312 // Therefore, removing the implicit operand won't change the explicit
3313 // operands layout.
3314 MI.removeOperand(UseOpIdx);
3315 }
3316}
3317
3318// Replace an instruction with one that materializes a constant (and sets
3319// CR0 if the original instruction was a record-form instruction).
3321 const LoadImmediateInfo &LII) const {
3322 // Remove existing operands.
3323 int OperandToKeep = LII.SetCR ? 1 : 0;
3324 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3325 MI.removeOperand(i);
3326
3327 // Replace the instruction.
3328 if (LII.SetCR) {
3329 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3330 // Set the immediate.
3331 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3332 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3333 return;
3334 }
3335 else
3336 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3337
3338 // Set the immediate.
3339 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3340 .addImm(LII.Imm);
3341}
3342
3344 bool &SeenIntermediateUse) const {
3345 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3346 "Should be called after register allocation.");
3348 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3349 It++;
3350 SeenIntermediateUse = false;
3351 for (; It != E; ++It) {
3352 if (It->modifiesRegister(Reg, TRI))
3353 return &*It;
3354 if (It->readsRegister(Reg, TRI))
3355 SeenIntermediateUse = true;
3356 }
3357 return nullptr;
3358}
3359
3362 const DebugLoc &DL, Register Reg,
3363 int64_t Imm) const {
3365 "Register should be in non-SSA form after RA");
3366 bool isPPC64 = Subtarget.isPPC64();
3367 // FIXME: Materialization here is not optimal.
3368 // For some special bit patterns we can use less instructions.
3369 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3370 if (isInt<16>(Imm)) {
3371 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3372 } else if (isInt<32>(Imm)) {
3373 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3374 .addImm(Imm >> 16);
3375 if (Imm & 0xFFFF)
3376 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3377 .addReg(Reg, RegState::Kill)
3378 .addImm(Imm & 0xFFFF);
3379 } else {
3380 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3381 "only supported in PPC64");
3382 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3383 if ((Imm >> 32) & 0xFFFF)
3384 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3385 .addReg(Reg, RegState::Kill)
3386 .addImm((Imm >> 32) & 0xFFFF);
3387 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3388 .addReg(Reg, RegState::Kill)
3389 .addImm(32)
3390 .addImm(31);
3391 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3392 .addReg(Reg, RegState::Kill)
3393 .addImm((Imm >> 16) & 0xFFFF);
3394 if (Imm & 0xFFFF)
3395 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3396 .addReg(Reg, RegState::Kill)
3397 .addImm(Imm & 0xFFFF);
3398 }
3399}
3400
3401MachineInstr *PPCInstrInfo::getForwardingDefMI(
3403 unsigned &OpNoForForwarding,
3404 bool &SeenIntermediateUse) const {
3405 OpNoForForwarding = ~0U;
3406 MachineInstr *DefMI = nullptr;
3407 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3409 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3410 // within the basic block to see if the register is defined using an
3411 // LI/LI8/ADDI/ADDI8.
3412 if (MRI->isSSA()) {
3413 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3414 if (!MI.getOperand(i).isReg())
3415 continue;
3416 Register Reg = MI.getOperand(i).getReg();
3417 if (!Reg.isVirtual())
3418 continue;
3419 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3420 if (TrueReg.isVirtual()) {
3421 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3422 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3423 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3424 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3425 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3426 OpNoForForwarding = i;
3427 DefMI = DefMIForTrueReg;
3428 // The ADDI and LI operand maybe exist in one instruction at same
3429 // time. we prefer to fold LI operand as LI only has one Imm operand
3430 // and is more possible to be converted. So if current DefMI is
3431 // ADDI/ADDI8, we continue to find possible LI/LI8.
3432 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3433 break;
3434 }
3435 }
3436 }
3437 } else {
3438 // Looking back through the definition for each operand could be expensive,
3439 // so exit early if this isn't an instruction that either has an immediate
3440 // form or is already an immediate form that we can handle.
3441 ImmInstrInfo III;
3442 unsigned Opc = MI.getOpcode();
3443 bool ConvertibleImmForm =
3444 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3445 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3446 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3447 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3448 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3449 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3450 Opc == PPC::RLWINM8_rec;
3451 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3452 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3453 : false;
3454 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3455 return nullptr;
3456
3457 // Don't convert or %X, %Y, %Y since that's just a register move.
3458 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3459 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3460 return nullptr;
3461 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3462 MachineOperand &MO = MI.getOperand(i);
3463 SeenIntermediateUse = false;
3464 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3465 Register Reg = MI.getOperand(i).getReg();
3466 // If we see another use of this reg between the def and the MI,
3467 // we want to flag it so the def isn't deleted.
3468 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3469 if (DefMI) {
3470 // Is this register defined by some form of add-immediate (including
3471 // load-immediate) within this basic block?
3472 switch (DefMI->getOpcode()) {
3473 default:
3474 break;
3475 case PPC::LI:
3476 case PPC::LI8:
3477 case PPC::ADDItocL8:
3478 case PPC::ADDI:
3479 case PPC::ADDI8:
3480 OpNoForForwarding = i;
3481 return DefMI;
3482 }
3483 }
3484 }
3485 }
3486 }
3487 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3488}
3489
3490unsigned PPCInstrInfo::getSpillTarget() const {
3491 // With P10, we may need to spill paired vector registers or accumulator
3492 // registers. MMA implies paired vectors, so we can just check that.
3493 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3494 // P11 uses the P10 target.
3495 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3496 2 : Subtarget.hasP9Vector() ?
3497 1 : 0;
3498}
3499
3500ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3501 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3502}
3503
3504ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3505 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3506}
3507
3508// This opt tries to convert the following imm form to an index form to save an
3509// add for stack variables.
3510// Return false if no such pattern found.
3511//
3512// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3513// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3514// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3515//
3516// can be converted to:
3517//
3518// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3519// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3520//
3521// In order to eliminate ADD instr, make sure that:
3522// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3523// new ADDI instr and ADDI can only take int16 Imm.
3524// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3525// between ADDI and ADD instr since its original def in ADDI will be changed
3526// in new ADDI instr. And also there should be no new def for it between
3527// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3528// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3529// between ADD and Imm instr since ADD instr will be eliminated.
3530// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3531// moved to Index instr.
3533 MachineFunction *MF = MI.getParent()->getParent();
3535 bool PostRA = !MRI->isSSA();
3536 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3537 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3538 // frame base(OffsetAddi) are determined.
3539 if (!PostRA)
3540 return false;
3541 unsigned ToBeDeletedReg = 0;
3542 int64_t OffsetImm = 0;
3543 unsigned XFormOpcode = 0;
3544 ImmInstrInfo III;
3545
3546 // Check if Imm instr meets requirement.
3547 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3548 III))
3549 return false;
3550
3551 bool OtherIntermediateUse = false;
3552 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3553
3554 // Exit if there is other use between ADD and Imm instr or no def found.
3555 if (OtherIntermediateUse || !ADDMI)
3556 return false;
3557
3558 // Check if ADD instr meets requirement.
3559 if (!isADDInstrEligibleForFolding(*ADDMI))
3560 return false;
3561
3562 unsigned ScaleRegIdx = 0;
3563 int64_t OffsetAddi = 0;
3564 MachineInstr *ADDIMI = nullptr;
3565
3566 // Check if there is a valid ToBeChangedReg in ADDMI.
3567 // 1: It must be killed.
3568 // 2: Its definition must be a valid ADDIMI.
3569 // 3: It must satify int16 offset requirement.
3570 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3571 ScaleRegIdx = 2;
3572 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3573 ScaleRegIdx = 1;
3574 else
3575 return false;
3576
3577 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3578 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3579 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3580 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3582 for (auto It = ++Start; It != End; It++)
3583 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3584 return true;
3585 return false;
3586 };
3587
3588 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3589 // treated as special zero when ScaleReg is R0/X0 register.
3590 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3591 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3592 return false;
3593
3594 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3595 // and Imm Instr.
3596 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3597 return false;
3598
3599 // Now start to do the transformation.
3600 LLVM_DEBUG(dbgs() << "Replace instruction: "
3601 << "\n");
3602 LLVM_DEBUG(ADDIMI->dump());
3603 LLVM_DEBUG(ADDMI->dump());
3604 LLVM_DEBUG(MI.dump());
3605 LLVM_DEBUG(dbgs() << "with: "
3606 << "\n");
3607
3608 // Update ADDI instr.
3609 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3610
3611 // Update Imm instr.
3612 MI.setDesc(get(XFormOpcode));
3613 MI.getOperand(III.ImmOpNo)
3614 .ChangeToRegister(ScaleReg, false, false,
3615 ADDMI->getOperand(ScaleRegIdx).isKill());
3616
3617 MI.getOperand(III.OpNoForForwarding)
3618 .ChangeToRegister(ToBeChangedReg, false, false, true);
3619
3620 // Eliminate ADD instr.
3621 ADDMI->eraseFromParent();
3622
3623 LLVM_DEBUG(ADDIMI->dump());
3624 LLVM_DEBUG(MI.dump());
3625
3626 return true;
3627}
3628
3630 int64_t &Imm) const {
3631 unsigned Opc = ADDIMI.getOpcode();
3632
3633 // Exit if the instruction is not ADDI.
3634 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3635 return false;
3636
3637 // The operand may not necessarily be an immediate - it could be a relocation.
3638 if (!ADDIMI.getOperand(2).isImm())
3639 return false;
3640
3641 Imm = ADDIMI.getOperand(2).getImm();
3642
3643 return true;
3644}
3645
3647 unsigned Opc = ADDMI.getOpcode();
3648
3649 // Exit if the instruction is not ADD.
3650 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3651}
3652
3654 unsigned &ToBeDeletedReg,
3655 unsigned &XFormOpcode,
3656 int64_t &OffsetImm,
3657 ImmInstrInfo &III) const {
3658 // Only handle load/store.
3659 if (!MI.mayLoadOrStore())
3660 return false;
3661
3662 unsigned Opc = MI.getOpcode();
3663
3664 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3665
3666 // Exit if instruction has no index form.
3667 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3668 return false;
3669
3670 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3671 if (!instrHasImmForm(XFormOpcode,
3672 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3673 return false;
3674
3675 if (!III.IsSummingOperands)
3676 return false;
3677
3678 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3679 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3680 // Only support imm operands, not relocation slots or others.
3681 if (!ImmOperand.isImm())
3682 return false;
3683
3684 assert(RegOperand.isReg() && "Instruction format is not right");
3685
3686 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3687 if (!RegOperand.isKill())
3688 return false;
3689
3690 ToBeDeletedReg = RegOperand.getReg();
3691 OffsetImm = ImmOperand.getImm();
3692
3693 return true;
3694}
3695
3697 MachineInstr *&ADDIMI,
3698 int64_t &OffsetAddi,
3699 int64_t OffsetImm) const {
3700 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3701 MachineOperand &MO = ADDMI->getOperand(Index);
3702
3703 if (!MO.isKill())
3704 return false;
3705
3706 bool OtherIntermediateUse = false;
3707
3708 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3709 // Currently handle only one "add + Imminstr" pair case, exit if other
3710 // intermediate use for ToBeChangedReg found.
3711 // TODO: handle the cases where there are other "add + Imminstr" pairs
3712 // with same offset in Imminstr which is like:
3713 //
3714 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3715 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3716 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3717 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3718 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3719 //
3720 // can be converted to:
3721 //
3722 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3723 // (OffsetAddi + OffsetImm)
3724 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3725 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3726
3727 if (OtherIntermediateUse || !ADDIMI)
3728 return false;
3729 // Check if ADDI instr meets requirement.
3730 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3731 return false;
3732
3733 if (isInt<16>(OffsetAddi + OffsetImm))
3734 return true;
3735 return false;
3736}
3737
3738// If this instruction has an immediate form and one of its operands is a
3739// result of a load-immediate or an add-immediate, convert it to
3740// the immediate form if the constant is in range.
3742 SmallSet<Register, 4> &RegsToUpdate,
3743 MachineInstr **KilledDef) const {
3744 MachineFunction *MF = MI.getParent()->getParent();
3746 bool PostRA = !MRI->isSSA();
3747 bool SeenIntermediateUse = true;
3748 unsigned ForwardingOperand = ~0U;
3749 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3750 SeenIntermediateUse);
3751 if (!DefMI)
3752 return false;
3753 assert(ForwardingOperand < MI.getNumOperands() &&
3754 "The forwarding operand needs to be valid at this point");
3755 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3756 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3757 if (KilledDef && KillFwdDefMI)
3758 *KilledDef = DefMI;
3759
3760 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3761 // registers that need their kill flags updated.
3762 for (const MachineOperand &MO : DefMI->operands())
3763 if (MO.isReg() && MO.isDef())
3764 RegsToUpdate.insert(MO.getReg());
3765 for (const MachineOperand &MO : MI.operands())
3766 if (MO.isReg())
3767 RegsToUpdate.insert(MO.getReg());
3768
3769 // If this is a imm instruction and its register operands is produced by ADDI,
3770 // put the imm into imm inst directly.
3771 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3772 PPC::INSTRUCTION_LIST_END &&
3773 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3774 return true;
3775
3776 ImmInstrInfo III;
3777 bool IsVFReg = MI.getOperand(0).isReg()
3778 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3779 : false;
3780 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3781 // If this is a reg+reg instruction that has a reg+imm form,
3782 // and one of the operands is produced by an add-immediate,
3783 // try to convert it.
3784 if (HasImmForm &&
3785 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3786 KillFwdDefMI))
3787 return true;
3788
3789 // If this is a reg+reg instruction that has a reg+imm form,
3790 // and one of the operands is produced by LI, convert it now.
3791 if (HasImmForm &&
3792 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3793 return true;
3794
3795 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3796 // can be simpified to LI.
3797 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3798 return true;
3799
3800 return false;
3801}
3802
3804 MachineInstr **ToErase) const {
3805 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3806 Register FoldingReg = MI.getOperand(1).getReg();
3807 if (!FoldingReg.isVirtual())
3808 return false;
3809 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3810 if (SrcMI->getOpcode() != PPC::RLWINM &&
3811 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3812 SrcMI->getOpcode() != PPC::RLWINM8 &&
3813 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3814 return false;
3815 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3816 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3817 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3818 "Invalid PPC::RLWINM Instruction!");
3819 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3820 uint64_t SHMI = MI.getOperand(2).getImm();
3821 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3822 uint64_t MBMI = MI.getOperand(3).getImm();
3823 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3824 uint64_t MEMI = MI.getOperand(4).getImm();
3825
3826 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3827 "Invalid PPC::RLWINM Instruction!");
3828 // If MBMI is bigger than MEMI, we always can not get run of ones.
3829 // RotatedSrcMask non-wrap:
3830 // 0........31|32........63
3831 // RotatedSrcMask: B---E B---E
3832 // MaskMI: -----------|--E B------
3833 // Result: ----- --- (Bad candidate)
3834 //
3835 // RotatedSrcMask wrap:
3836 // 0........31|32........63
3837 // RotatedSrcMask: --E B----|--E B----
3838 // MaskMI: -----------|--E B------
3839 // Result: --- -----|--- ----- (Bad candidate)
3840 //
3841 // One special case is RotatedSrcMask is a full set mask.
3842 // RotatedSrcMask full:
3843 // 0........31|32........63
3844 // RotatedSrcMask: ------EB---|-------EB---
3845 // MaskMI: -----------|--E B------
3846 // Result: -----------|--- ------- (Good candidate)
3847
3848 // Mark special case.
3849 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3850
3851 // For other MBMI > MEMI cases, just return.
3852 if ((MBMI > MEMI) && !SrcMaskFull)
3853 return false;
3854
3855 // Handle MBMI <= MEMI cases.
3856 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3857 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3858 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3859 // while in PowerPC ISA, lowerest bit is at index 63.
3860 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3861
3862 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3863 APInt FinalMask = RotatedSrcMask & MaskMI;
3864 uint32_t NewMB, NewME;
3865 bool Simplified = false;
3866
3867 // If final mask is 0, MI result should be 0 too.
3868 if (FinalMask.isZero()) {
3869 bool Is64Bit =
3870 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3871 Simplified = true;
3872 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3873 LLVM_DEBUG(MI.dump());
3874
3875 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3876 // Replace MI with "LI 0"
3877 MI.removeOperand(4);
3878 MI.removeOperand(3);
3879 MI.removeOperand(2);
3880 MI.getOperand(1).ChangeToImmediate(0);
3881 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3882 } else {
3883 // Replace MI with "ANDI_rec reg, 0"
3884 MI.removeOperand(4);
3885 MI.removeOperand(3);
3886 MI.getOperand(2).setImm(0);
3887 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3888 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3889 if (SrcMI->getOperand(1).isKill()) {
3890 MI.getOperand(1).setIsKill(true);
3891 SrcMI->getOperand(1).setIsKill(false);
3892 } else
3893 // About to replace MI.getOperand(1), clear its kill flag.
3894 MI.getOperand(1).setIsKill(false);
3895 }
3896
3897 LLVM_DEBUG(dbgs() << "With: ");
3898 LLVM_DEBUG(MI.dump());
3899
3900 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3901 NewMB <= NewME) ||
3902 SrcMaskFull) {
3903 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3904 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3905 // return a 32 bit value.
3906 Simplified = true;
3907 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3908 LLVM_DEBUG(MI.dump());
3909
3910 uint16_t NewSH = (SHSrc + SHMI) % 32;
3911 MI.getOperand(2).setImm(NewSH);
3912 // If SrcMI mask is full, no need to update MBMI and MEMI.
3913 if (!SrcMaskFull) {
3914 MI.getOperand(3).setImm(NewMB);
3915 MI.getOperand(4).setImm(NewME);
3916 }
3917 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3918 if (SrcMI->getOperand(1).isKill()) {
3919 MI.getOperand(1).setIsKill(true);
3920 SrcMI->getOperand(1).setIsKill(false);
3921 } else
3922 // About to replace MI.getOperand(1), clear its kill flag.
3923 MI.getOperand(1).setIsKill(false);
3924
3925 LLVM_DEBUG(dbgs() << "To: ");
3926 LLVM_DEBUG(MI.dump());
3927 }
3928 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3929 !SrcMI->hasImplicitDef()) {
3930 // If FoldingReg has no non-debug use and it has no implicit def (it
3931 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3932 // Otherwise keep it.
3933 *ToErase = SrcMI;
3934 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3935 LLVM_DEBUG(SrcMI->dump());
3936 }
3937 return Simplified;
3938}
3939
3940bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3941 ImmInstrInfo &III, bool PostRA) const {
3942 // The vast majority of the instructions would need their operand 2 replaced
3943 // with an immediate when switching to the reg+imm form. A marked exception
3944 // are the update form loads/stores for which a constant operand 2 would need
3945 // to turn into a displacement and move operand 1 to the operand 2 position.
3946 III.ImmOpNo = 2;
3947 III.OpNoForForwarding = 2;
3948 III.ImmWidth = 16;
3949 III.ImmMustBeMultipleOf = 1;
3950 III.TruncateImmTo = 0;
3951 III.IsSummingOperands = false;
3952 switch (Opc) {
3953 default: return false;
3954 case PPC::ADD4:
3955 case PPC::ADD8:
3956 III.SignedImm = true;
3957 III.ZeroIsSpecialOrig = 0;
3958 III.ZeroIsSpecialNew = 1;
3959 III.IsCommutative = true;
3960 III.IsSummingOperands = true;
3961 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
3962 break;
3963 case PPC::ADDC:
3964 case PPC::ADDC8:
3965 III.SignedImm = true;
3966 III.ZeroIsSpecialOrig = 0;
3967 III.ZeroIsSpecialNew = 0;
3968 III.IsCommutative = true;
3969 III.IsSummingOperands = true;
3970 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
3971 break;
3972 case PPC::ADDC_rec:
3973 III.SignedImm = true;
3974 III.ZeroIsSpecialOrig = 0;
3975 III.ZeroIsSpecialNew = 0;
3976 III.IsCommutative = true;
3977 III.IsSummingOperands = true;
3978 III.ImmOpcode = PPC::ADDIC_rec;
3979 break;
3980 case PPC::SUBFC:
3981 case PPC::SUBFC8:
3982 III.SignedImm = true;
3983 III.ZeroIsSpecialOrig = 0;
3984 III.ZeroIsSpecialNew = 0;
3985 III.IsCommutative = false;
3986 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
3987 break;
3988 case PPC::CMPW:
3989 case PPC::CMPD:
3990 III.SignedImm = true;
3991 III.ZeroIsSpecialOrig = 0;
3992 III.ZeroIsSpecialNew = 0;
3993 III.IsCommutative = false;
3994 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
3995 break;
3996 case PPC::CMPLW:
3997 case PPC::CMPLD:
3998 III.SignedImm = false;
3999 III.ZeroIsSpecialOrig = 0;
4000 III.ZeroIsSpecialNew = 0;
4001 III.IsCommutative = false;
4002 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
4003 break;
4004 case PPC::AND_rec:
4005 case PPC::AND8_rec:
4006 case PPC::OR:
4007 case PPC::OR8:
4008 case PPC::XOR:
4009 case PPC::XOR8:
4010 III.SignedImm = false;
4011 III.ZeroIsSpecialOrig = 0;
4012 III.ZeroIsSpecialNew = 0;
4013 III.IsCommutative = true;
4014 switch(Opc) {
4015 default: llvm_unreachable("Unknown opcode");
4016 case PPC::AND_rec:
4017 III.ImmOpcode = PPC::ANDI_rec;
4018 break;
4019 case PPC::AND8_rec:
4020 III.ImmOpcode = PPC::ANDI8_rec;
4021 break;
4022 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4023 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4024 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4025 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4026 }
4027 break;
4028 case PPC::RLWNM:
4029 case PPC::RLWNM8:
4030 case PPC::RLWNM_rec:
4031 case PPC::RLWNM8_rec:
4032 case PPC::SLW:
4033 case PPC::SLW8:
4034 case PPC::SLW_rec:
4035 case PPC::SLW8_rec:
4036 case PPC::SRW:
4037 case PPC::SRW8:
4038 case PPC::SRW_rec:
4039 case PPC::SRW8_rec:
4040 case PPC::SRAW:
4041 case PPC::SRAW_rec:
4042 III.SignedImm = false;
4043 III.ZeroIsSpecialOrig = 0;
4044 III.ZeroIsSpecialNew = 0;
4045 III.IsCommutative = false;
4046 // This isn't actually true, but the instructions ignore any of the
4047 // upper bits, so any immediate loaded with an LI is acceptable.
4048 // This does not apply to shift right algebraic because a value
4049 // out of range will produce a -1/0.
4050 III.ImmWidth = 16;
4051 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4052 Opc == PPC::RLWNM8_rec)
4053 III.TruncateImmTo = 5;
4054 else
4055 III.TruncateImmTo = 6;
4056 switch(Opc) {
4057 default: llvm_unreachable("Unknown opcode");
4058 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4059 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4060 case PPC::RLWNM_rec:
4061 III.ImmOpcode = PPC::RLWINM_rec;
4062 break;
4063 case PPC::RLWNM8_rec:
4064 III.ImmOpcode = PPC::RLWINM8_rec;
4065 break;
4066 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4067 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4068 case PPC::SLW_rec:
4069 III.ImmOpcode = PPC::RLWINM_rec;
4070 break;
4071 case PPC::SLW8_rec:
4072 III.ImmOpcode = PPC::RLWINM8_rec;
4073 break;
4074 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4075 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4076 case PPC::SRW_rec:
4077 III.ImmOpcode = PPC::RLWINM_rec;
4078 break;
4079 case PPC::SRW8_rec:
4080 III.ImmOpcode = PPC::RLWINM8_rec;
4081 break;
4082 case PPC::SRAW:
4083 III.ImmWidth = 5;
4084 III.TruncateImmTo = 0;
4085 III.ImmOpcode = PPC::SRAWI;
4086 break;
4087 case PPC::SRAW_rec:
4088 III.ImmWidth = 5;
4089 III.TruncateImmTo = 0;
4090 III.ImmOpcode = PPC::SRAWI_rec;
4091 break;
4092 }
4093 break;
4094 case PPC::RLDCL:
4095 case PPC::RLDCL_rec:
4096 case PPC::RLDCR:
4097 case PPC::RLDCR_rec:
4098 case PPC::SLD:
4099 case PPC::SLD_rec:
4100 case PPC::SRD:
4101 case PPC::SRD_rec:
4102 case PPC::SRAD:
4103 case PPC::SRAD_rec:
4104 III.SignedImm = false;
4105 III.ZeroIsSpecialOrig = 0;
4106 III.ZeroIsSpecialNew = 0;
4107 III.IsCommutative = false;
4108 // This isn't actually true, but the instructions ignore any of the
4109 // upper bits, so any immediate loaded with an LI is acceptable.
4110 // This does not apply to shift right algebraic because a value
4111 // out of range will produce a -1/0.
4112 III.ImmWidth = 16;
4113 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4114 Opc == PPC::RLDCR_rec)
4115 III.TruncateImmTo = 6;
4116 else
4117 III.TruncateImmTo = 7;
4118 switch(Opc) {
4119 default: llvm_unreachable("Unknown opcode");
4120 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4121 case PPC::RLDCL_rec:
4122 III.ImmOpcode = PPC::RLDICL_rec;
4123 break;
4124 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4125 case PPC::RLDCR_rec:
4126 III.ImmOpcode = PPC::RLDICR_rec;
4127 break;
4128 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4129 case PPC::SLD_rec:
4130 III.ImmOpcode = PPC::RLDICR_rec;
4131 break;
4132 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4133 case PPC::SRD_rec:
4134 III.ImmOpcode = PPC::RLDICL_rec;
4135 break;
4136 case PPC::SRAD:
4137 III.ImmWidth = 6;
4138 III.TruncateImmTo = 0;
4139 III.ImmOpcode = PPC::SRADI;
4140 break;
4141 case PPC::SRAD_rec:
4142 III.ImmWidth = 6;
4143 III.TruncateImmTo = 0;
4144 III.ImmOpcode = PPC::SRADI_rec;
4145 break;
4146 }
4147 break;
4148 // Loads and stores:
4149 case PPC::LBZX:
4150 case PPC::LBZX8:
4151 case PPC::LHZX:
4152 case PPC::LHZX8:
4153 case PPC::LHAX:
4154 case PPC::LHAX8:
4155 case PPC::LWZX:
4156 case PPC::LWZX8:
4157 case PPC::LWAX:
4158 case PPC::LDX:
4159 case PPC::LFSX:
4160 case PPC::LFDX:
4161 case PPC::STBX:
4162 case PPC::STBX8:
4163 case PPC::STHX:
4164 case PPC::STHX8:
4165 case PPC::STWX:
4166 case PPC::STWX8:
4167 case PPC::STDX:
4168 case PPC::STFSX:
4169 case PPC::STFDX:
4170 III.SignedImm = true;
4171 III.ZeroIsSpecialOrig = 1;
4172 III.ZeroIsSpecialNew = 2;
4173 III.IsCommutative = true;
4174 III.IsSummingOperands = true;
4175 III.ImmOpNo = 1;
4176 III.OpNoForForwarding = 2;
4177 switch(Opc) {
4178 default: llvm_unreachable("Unknown opcode");
4179 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4180 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4181 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4182 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4183 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4184 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4185 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4186 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4187 case PPC::LWAX:
4188 III.ImmOpcode = PPC::LWA;
4189 III.ImmMustBeMultipleOf = 4;
4190 break;
4191 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4192 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4193 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4194 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4195 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4196 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4197 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4198 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4199 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4200 case PPC::STDX:
4201 III.ImmOpcode = PPC::STD;
4202 III.ImmMustBeMultipleOf = 4;
4203 break;
4204 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4205 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4206 }
4207 break;
4208 case PPC::LBZUX:
4209 case PPC::LBZUX8:
4210 case PPC::LHZUX:
4211 case PPC::LHZUX8:
4212 case PPC::LHAUX:
4213 case PPC::LHAUX8:
4214 case PPC::LWZUX:
4215 case PPC::LWZUX8:
4216 case PPC::LDUX:
4217 case PPC::LFSUX:
4218 case PPC::LFDUX:
4219 case PPC::STBUX:
4220 case PPC::STBUX8:
4221 case PPC::STHUX:
4222 case PPC::STHUX8:
4223 case PPC::STWUX:
4224 case PPC::STWUX8:
4225 case PPC::STDUX:
4226 case PPC::STFSUX:
4227 case PPC::STFDUX:
4228 III.SignedImm = true;
4229 III.ZeroIsSpecialOrig = 2;
4230 III.ZeroIsSpecialNew = 3;
4231 III.IsCommutative = false;
4232 III.IsSummingOperands = true;
4233 III.ImmOpNo = 2;
4234 III.OpNoForForwarding = 3;
4235 switch(Opc) {
4236 default: llvm_unreachable("Unknown opcode");
4237 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4238 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4239 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4240 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4241 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4242 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4243 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4244 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4245 case PPC::LDUX:
4246 III.ImmOpcode = PPC::LDU;
4247 III.ImmMustBeMultipleOf = 4;
4248 break;
4249 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4250 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4251 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4252 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4253 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4254 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4255 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4256 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4257 case PPC::STDUX:
4258 III.ImmOpcode = PPC::STDU;
4259 III.ImmMustBeMultipleOf = 4;
4260 break;
4261 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4262 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4263 }
4264 break;
4265 // Power9 and up only. For some of these, the X-Form version has access to all
4266 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4267 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4268 // into or stored from is one of the VR registers.
4269 case PPC::LXVX:
4270 case PPC::LXSSPX:
4271 case PPC::LXSDX:
4272 case PPC::STXVX:
4273 case PPC::STXSSPX:
4274 case PPC::STXSDX:
4275 case PPC::XFLOADf32:
4276 case PPC::XFLOADf64:
4277 case PPC::XFSTOREf32:
4278 case PPC::XFSTOREf64:
4279 if (!Subtarget.hasP9Vector())
4280 return false;
4281 III.SignedImm = true;
4282 III.ZeroIsSpecialOrig = 1;
4283 III.ZeroIsSpecialNew = 2;
4284 III.IsCommutative = true;
4285 III.IsSummingOperands = true;
4286 III.ImmOpNo = 1;
4287 III.OpNoForForwarding = 2;
4288 III.ImmMustBeMultipleOf = 4;
4289 switch(Opc) {
4290 default: llvm_unreachable("Unknown opcode");
4291 case PPC::LXVX:
4292 III.ImmOpcode = PPC::LXV;
4293 III.ImmMustBeMultipleOf = 16;
4294 break;
4295 case PPC::LXSSPX:
4296 if (PostRA) {
4297 if (IsVFReg)
4298 III.ImmOpcode = PPC::LXSSP;
4299 else {
4300 III.ImmOpcode = PPC::LFS;
4301 III.ImmMustBeMultipleOf = 1;
4302 }
4303 break;
4304 }
4305 [[fallthrough]];
4306 case PPC::XFLOADf32:
4307 III.ImmOpcode = PPC::DFLOADf32;
4308 break;
4309 case PPC::LXSDX:
4310 if (PostRA) {
4311 if (IsVFReg)
4312 III.ImmOpcode = PPC::LXSD;
4313 else {
4314 III.ImmOpcode = PPC::LFD;
4315 III.ImmMustBeMultipleOf = 1;
4316 }
4317 break;
4318 }
4319 [[fallthrough]];
4320 case PPC::XFLOADf64:
4321 III.ImmOpcode = PPC::DFLOADf64;
4322 break;
4323 case PPC::STXVX:
4324 III.ImmOpcode = PPC::STXV;
4325 III.ImmMustBeMultipleOf = 16;
4326 break;
4327 case PPC::STXSSPX:
4328 if (PostRA) {
4329 if (IsVFReg)
4330 III.ImmOpcode = PPC::STXSSP;
4331 else {
4332 III.ImmOpcode = PPC::STFS;
4333 III.ImmMustBeMultipleOf = 1;
4334 }
4335 break;
4336 }
4337 [[fallthrough]];
4338 case PPC::XFSTOREf32:
4339 III.ImmOpcode = PPC::DFSTOREf32;
4340 break;
4341 case PPC::STXSDX:
4342 if (PostRA) {
4343 if (IsVFReg)
4344 III.ImmOpcode = PPC::STXSD;
4345 else {
4346 III.ImmOpcode = PPC::STFD;
4347 III.ImmMustBeMultipleOf = 1;
4348 }
4349 break;
4350 }
4351 [[fallthrough]];
4352 case PPC::XFSTOREf64:
4353 III.ImmOpcode = PPC::DFSTOREf64;
4354 break;
4355 }
4356 break;
4357 }
4358 return true;
4359}
4360
4361// Utility function for swaping two arbitrary operands of an instruction.
4362static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4363 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4364
4365 unsigned MaxOp = std::max(Op1, Op2);
4366 unsigned MinOp = std::min(Op1, Op2);
4367 MachineOperand MOp1 = MI.getOperand(MinOp);
4368 MachineOperand MOp2 = MI.getOperand(MaxOp);
4369 MI.removeOperand(std::max(Op1, Op2));
4370 MI.removeOperand(std::min(Op1, Op2));
4371
4372 // If the operands we are swapping are the two at the end (the common case)
4373 // we can just remove both and add them in the opposite order.
4374 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4375 MI.addOperand(MOp2);
4376 MI.addOperand(MOp1);
4377 } else {
4378 // Store all operands in a temporary vector, remove them and re-add in the
4379 // right order.
4381 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4382 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4383 MOps.push_back(MI.getOperand(i));
4384 MI.removeOperand(i);
4385 }
4386 // MOp2 needs to be added next.
4387 MI.addOperand(MOp2);
4388 // Now add the rest.
4389 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4390 if (i == MaxOp)
4391 MI.addOperand(MOp1);
4392 else {
4393 MI.addOperand(MOps.back());
4394 MOps.pop_back();
4395 }
4396 }
4397 }
4398}
4399
4400// Check if the 'MI' that has the index OpNoForForwarding
4401// meets the requirement described in the ImmInstrInfo.
4402bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4403 const ImmInstrInfo &III,
4404 unsigned OpNoForForwarding
4405 ) const {
4406 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4407 // would not work pre-RA, we can only do the check post RA.
4408 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4409 if (MRI.isSSA())
4410 return false;
4411
4412 // Cannot do the transform if MI isn't summing the operands.
4413 if (!III.IsSummingOperands)
4414 return false;
4415
4416 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4417 if (!III.ZeroIsSpecialOrig)
4418 return false;
4419
4420 // We cannot do the transform if the operand we are trying to replace
4421 // isn't the same as the operand the instruction allows.
4422 if (OpNoForForwarding != III.OpNoForForwarding)
4423 return false;
4424
4425 // Check if the instruction we are trying to transform really has
4426 // the special zero register as its operand.
4427 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4428 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4429 return false;
4430
4431 // This machine instruction is convertible if it is,
4432 // 1. summing the operands.
4433 // 2. one of the operands is special zero register.
4434 // 3. the operand we are trying to replace is allowed by the MI.
4435 return true;
4436}
4437
4438// Check if the DefMI is the add inst and set the ImmMO and RegMO
4439// accordingly.
4440bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4441 const ImmInstrInfo &III,
4442 MachineOperand *&ImmMO,
4443 MachineOperand *&RegMO) const {
4444 unsigned Opc = DefMI.getOpcode();
4445 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4446 return false;
4447
4448 // Skip the optimization of transformTo[NewImm|Imm]FormFedByAdd for ADDItocL8
4449 // on AIX which is used for toc-data access. TODO: Follow up to see if it can
4450 // apply for AIX toc-data as well.
4451 if (Opc == PPC::ADDItocL8 && Subtarget.isAIX())
4452 return false;
4453
4454 assert(DefMI.getNumOperands() >= 3 &&
4455 "Add inst must have at least three operands");
4456 RegMO = &DefMI.getOperand(1);
4457 ImmMO = &DefMI.getOperand(2);
4458
4459 // Before RA, ADDI first operand could be a frame index.
4460 if (!RegMO->isReg())
4461 return false;
4462
4463 // This DefMI is elgible for forwarding if it is:
4464 // 1. add inst
4465 // 2. one of the operands is Imm/CPI/Global.
4466 return isAnImmediateOperand(*ImmMO);
4467}
4468
4469bool PPCInstrInfo::isRegElgibleForForwarding(
4470 const MachineOperand &RegMO, const MachineInstr &DefMI,
4471 const MachineInstr &MI, bool KillDefMI,
4472 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4473 // x = addi y, imm
4474 // ...
4475 // z = lfdx 0, x -> z = lfd imm(y)
4476 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4477 // of "y" between the DEF of "x" and "z".
4478 // The query is only valid post RA.
4479 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4480 if (MRI.isSSA())
4481 return false;
4482
4483 Register Reg = RegMO.getReg();
4484
4485 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4487 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4488 It++;
4489 for (; It != E; ++It) {
4490 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4491 return false;
4492 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4493 IsFwdFeederRegKilled = true;
4494 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4495 SeenIntermediateUse = true;
4496 // Made it to DefMI without encountering a clobber.
4497 if ((&*It) == &DefMI)
4498 break;
4499 }
4500 assert((&*It) == &DefMI && "DefMI is missing");
4501
4502 // If DefMI also defines the register to be forwarded, we can only forward it
4503 // if DefMI is being erased.
4504 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4505 return KillDefMI;
4506
4507 return true;
4508}
4509
4510bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4511 const MachineInstr &DefMI,
4512 const ImmInstrInfo &III,
4513 int64_t &Imm,
4514 int64_t BaseImm) const {
4515 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4516 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4517 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4518 // However, we know that, it is 16-bit width, and has the alignment of 4.
4519 // Check if the instruction met the requirement.
4520 if (III.ImmMustBeMultipleOf > 4 ||
4521 III.TruncateImmTo || III.ImmWidth != 16)
4522 return false;
4523
4524 // Going from XForm to DForm loads means that the displacement needs to be
4525 // not just an immediate but also a multiple of 4, or 16 depending on the
4526 // load. A DForm load cannot be represented if it is a multiple of say 2.
4527 // XForm loads do not have this restriction.
4528 if (ImmMO.isGlobal()) {
4529 const DataLayout &DL = ImmMO.getGlobal()->getDataLayout();
4531 return false;
4532 }
4533
4534 return true;
4535 }
4536
4537 if (ImmMO.isImm()) {
4538 // It is Imm, we need to check if the Imm fit the range.
4539 // Sign-extend to 64-bits.
4540 // DefMI may be folded with another imm form instruction, the result Imm is
4541 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4542 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4543 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4544 return false;
4545 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4546 return false;
4547 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4548
4549 if (Imm % III.ImmMustBeMultipleOf)
4550 return false;
4551 if (III.TruncateImmTo)
4552 Imm &= ((1 << III.TruncateImmTo) - 1);
4553 }
4554 else
4555 return false;
4556
4557 // This ImmMO is forwarded if it meets the requriement describle
4558 // in ImmInstrInfo
4559 return true;
4560}
4561
4562bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4563 unsigned OpNoForForwarding,
4564 MachineInstr **KilledDef) const {
4565 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4566 !DefMI.getOperand(1).isImm())
4567 return false;
4568
4569 MachineFunction *MF = MI.getParent()->getParent();
4571 bool PostRA = !MRI->isSSA();
4572
4573 int64_t Immediate = DefMI.getOperand(1).getImm();
4574 // Sign-extend to 64-bits.
4575 int64_t SExtImm = SignExtend64<16>(Immediate);
4576
4577 bool ReplaceWithLI = false;
4578 bool Is64BitLI = false;
4579 int64_t NewImm = 0;
4580 bool SetCR = false;
4581 unsigned Opc = MI.getOpcode();
4582 switch (Opc) {
4583 default:
4584 return false;
4585
4586 // FIXME: Any branches conditional on such a comparison can be made
4587 // unconditional. At this time, this happens too infrequently to be worth
4588 // the implementation effort, but if that ever changes, we could convert
4589 // such a pattern here.
4590 case PPC::CMPWI:
4591 case PPC::CMPLWI:
4592 case PPC::CMPDI:
4593 case PPC::CMPLDI: {
4594 // Doing this post-RA would require dataflow analysis to reliably find uses
4595 // of the CR register set by the compare.
4596 // No need to fixup killed/dead flag since this transformation is only valid
4597 // before RA.
4598 if (PostRA)
4599 return false;
4600 // If a compare-immediate is fed by an immediate and is itself an input of
4601 // an ISEL (the most common case) into a COPY of the correct register.
4602 bool Changed = false;
4603 Register DefReg = MI.getOperand(0).getReg();
4604 int64_t Comparand = MI.getOperand(2).getImm();
4605 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4606 ? (Comparand | 0xFFFFFFFFFFFF0000)
4607 : Comparand;
4608
4609 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4610 unsigned UseOpc = CompareUseMI.getOpcode();
4611 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4612 continue;
4613 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4614 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4615 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4616 unsigned RegToCopy =
4617 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4618 if (RegToCopy == PPC::NoRegister)
4619 continue;
4620 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4621 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4622 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4623 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4624 CompareUseMI.removeOperand(3);
4625 CompareUseMI.removeOperand(2);
4626 continue;
4627 }
4628 LLVM_DEBUG(
4629 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4630 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4631 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4632 // Convert to copy and remove unneeded operands.
4633 CompareUseMI.setDesc(get(PPC::COPY));
4634 CompareUseMI.removeOperand(3);
4635 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4636 CmpIselsConverted++;
4637 Changed = true;
4638 LLVM_DEBUG(CompareUseMI.dump());
4639 }
4640 if (Changed)
4641 return true;
4642 // This may end up incremented multiple times since this function is called
4643 // during a fixed-point transformation, but it is only meant to indicate the
4644 // presence of this opportunity.
4645 MissedConvertibleImmediateInstrs++;
4646 return false;
4647 }
4648
4649 // Immediate forms - may simply be convertable to an LI.
4650 case PPC::ADDI:
4651 case PPC::ADDI8: {
4652 // Does the sum fit in a 16-bit signed field?
4653 int64_t Addend = MI.getOperand(2).getImm();
4654 if (isInt<16>(Addend + SExtImm)) {
4655 ReplaceWithLI = true;
4656 Is64BitLI = Opc == PPC::ADDI8;
4657 NewImm = Addend + SExtImm;
4658 break;
4659 }
4660 return false;
4661 }
4662 case PPC::SUBFIC:
4663 case PPC::SUBFIC8: {
4664 // Only transform this if the CARRY implicit operand is dead.
4665 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4666 return false;
4667 int64_t Minuend = MI.getOperand(2).getImm();
4668 if (isInt<16>(Minuend - SExtImm)) {
4669 ReplaceWithLI = true;
4670 Is64BitLI = Opc == PPC::SUBFIC8;
4671 NewImm = Minuend - SExtImm;
4672 break;
4673 }
4674 return false;
4675 }
4676 case PPC::RLDICL:
4677 case PPC::RLDICL_rec:
4678 case PPC::RLDICL_32:
4679 case PPC::RLDICL_32_64: {
4680 // Use APInt's rotate function.
4681 int64_t SH = MI.getOperand(2).getImm();
4682 int64_t MB = MI.getOperand(3).getImm();
4683 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4684 SExtImm, true);
4685 InVal = InVal.rotl(SH);
4686 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4687 InVal &= Mask;
4688 // Can't replace negative values with an LI as that will sign-extend
4689 // and not clear the left bits. If we're setting the CR bit, we will use
4690 // ANDI_rec which won't sign extend, so that's safe.
4691 if (isUInt<15>(InVal.getSExtValue()) ||
4692 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4693 ReplaceWithLI = true;
4694 Is64BitLI = Opc != PPC::RLDICL_32;
4695 NewImm = InVal.getSExtValue();
4696 SetCR = Opc == PPC::RLDICL_rec;
4697 break;
4698 }
4699 return false;
4700 }
4701 case PPC::RLWINM:
4702 case PPC::RLWINM8:
4703 case PPC::RLWINM_rec:
4704 case PPC::RLWINM8_rec: {
4705 int64_t SH = MI.getOperand(2).getImm();
4706 int64_t MB = MI.getOperand(3).getImm();
4707 int64_t ME = MI.getOperand(4).getImm();
4708 APInt InVal(32, SExtImm, true);
4709 InVal = InVal.rotl(SH);
4710 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4711 InVal &= Mask;
4712 // Can't replace negative values with an LI as that will sign-extend
4713 // and not clear the left bits. If we're setting the CR bit, we will use
4714 // ANDI_rec which won't sign extend, so that's safe.
4715 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4716 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4717 isUInt<16>(InVal.getSExtValue()));
4718 if (ValueFits) {
4719 ReplaceWithLI = true;
4720 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4721 NewImm = InVal.getSExtValue();
4722 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4723 break;
4724 }
4725 return false;
4726 }
4727 case PPC::ORI:
4728 case PPC::ORI8:
4729 case PPC::XORI:
4730 case PPC::XORI8: {
4731 int64_t LogicalImm = MI.getOperand(2).getImm();
4732 int64_t Result = 0;
4733 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4734 Result = LogicalImm | SExtImm;
4735 else
4736 Result = LogicalImm ^ SExtImm;
4737 if (isInt<16>(Result)) {
4738 ReplaceWithLI = true;
4739 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4740 NewImm = Result;
4741 break;
4742 }
4743 return false;
4744 }
4745 }
4746
4747 if (ReplaceWithLI) {
4748 // We need to be careful with CR-setting instructions we're replacing.
4749 if (SetCR) {
4750 // We don't know anything about uses when we're out of SSA, so only
4751 // replace if the new immediate will be reproduced.
4752 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4753 if (PostRA && ImmChanged)
4754 return false;
4755
4756 if (!PostRA) {
4757 // If the defining load-immediate has no other uses, we can just replace
4758 // the immediate with the new immediate.
4759 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4760 DefMI.getOperand(1).setImm(NewImm);
4761
4762 // If we're not using the GPR result of the CR-setting instruction, we
4763 // just need to and with zero/non-zero depending on the new immediate.
4764 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4765 if (NewImm) {
4766 assert(Immediate && "Transformation converted zero to non-zero?");
4767 NewImm = Immediate;
4768 }
4769 } else if (ImmChanged)
4770 return false;
4771 }
4772 }
4773
4774 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4775 LLVM_DEBUG(MI.dump());
4776 LLVM_DEBUG(dbgs() << "Fed by:\n");
4777 LLVM_DEBUG(DefMI.dump());
4779 LII.Imm = NewImm;
4780 LII.Is64Bit = Is64BitLI;
4781 LII.SetCR = SetCR;
4782 // If we're setting the CR, the original load-immediate must be kept (as an
4783 // operand to ANDI_rec/ANDI8_rec).
4784 if (KilledDef && SetCR)
4785 *KilledDef = nullptr;
4786 replaceInstrWithLI(MI, LII);
4787
4788 if (PostRA)
4789 recomputeLivenessFlags(*MI.getParent());
4790
4791 LLVM_DEBUG(dbgs() << "With:\n");
4792 LLVM_DEBUG(MI.dump());
4793 return true;
4794 }
4795 return false;
4796}
4797
4798bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4799 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4800 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4801 bool PostRA = !MRI->isSSA();
4802 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4803 // for post-ra.
4804 if (PostRA)
4805 return false;
4806
4807 // Only handle load/store.
4808 if (!MI.mayLoadOrStore())
4809 return false;
4810
4811 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4812
4813 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4814 "MI must have x-form opcode");
4815
4816 // get Imm Form info.
4817 ImmInstrInfo III;
4818 bool IsVFReg = MI.getOperand(0).isReg()
4819 ? PPC::isVFRegister(MI.getOperand(0).getReg())
4820 : false;
4821
4822 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4823 return false;
4824
4825 if (!III.IsSummingOperands)
4826 return false;
4827
4828 if (OpNoForForwarding != III.OpNoForForwarding)
4829 return false;
4830
4831 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4832 if (!ImmOperandMI.isImm())
4833 return false;
4834
4835 // Check DefMI.
4836 MachineOperand *ImmMO = nullptr;
4837 MachineOperand *RegMO = nullptr;
4838 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4839 return false;
4840 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4841
4842 // Check Imm.
4843 // Set ImmBase from imm instruction as base and get new Imm inside
4844 // isImmElgibleForForwarding.
4845 int64_t ImmBase = ImmOperandMI.getImm();
4846 int64_t Imm = 0;
4847 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4848 return false;
4849
4850 // Do the transform
4851 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4852 LLVM_DEBUG(MI.dump());
4853 LLVM_DEBUG(dbgs() << "Fed by:\n");
4854 LLVM_DEBUG(DefMI.dump());
4855
4856 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4857 MI.getOperand(III.ImmOpNo).setImm(Imm);
4858
4859 LLVM_DEBUG(dbgs() << "With:\n");
4860 LLVM_DEBUG(MI.dump());
4861 return true;
4862}
4863
4864// If an X-Form instruction is fed by an add-immediate and one of its operands
4865// is the literal zero, attempt to forward the source of the add-immediate to
4866// the corresponding D-Form instruction with the displacement coming from
4867// the immediate being added.
4868bool PPCInstrInfo::transformToImmFormFedByAdd(
4869 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4870 MachineInstr &DefMI, bool KillDefMI) const {
4871 // RegMO ImmMO
4872 // | |
4873 // x = addi reg, imm <----- DefMI
4874 // y = op 0 , x <----- MI
4875 // |
4876 // OpNoForForwarding
4877 // Check if the MI meet the requirement described in the III.
4878 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4879 return false;
4880
4881 // Check if the DefMI meet the requirement
4882 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4883 MachineOperand *ImmMO = nullptr;
4884 MachineOperand *RegMO = nullptr;
4885 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4886 return false;
4887 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4888
4889 // As we get the Imm operand now, we need to check if the ImmMO meet
4890 // the requirement described in the III. If yes set the Imm.
4891 int64_t Imm = 0;
4892 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4893 return false;
4894
4895 bool IsFwdFeederRegKilled = false;
4896 bool SeenIntermediateUse = false;
4897 // Check if the RegMO can be forwarded to MI.
4898 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4899 IsFwdFeederRegKilled, SeenIntermediateUse))
4900 return false;
4901
4902 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4903 bool PostRA = !MRI.isSSA();
4904
4905 // We know that, the MI and DefMI both meet the pattern, and
4906 // the Imm also meet the requirement with the new Imm-form.
4907 // It is safe to do the transformation now.
4908 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4909 LLVM_DEBUG(MI.dump());
4910 LLVM_DEBUG(dbgs() << "Fed by:\n");
4911 LLVM_DEBUG(DefMI.dump());
4912
4913 // Update the base reg first.
4914 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
4915 false, false,
4916 RegMO->isKill());
4917
4918 // Then, update the imm.
4919 if (ImmMO->isImm()) {
4920 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4921 // directly.
4923 }
4924 else {
4925 // Otherwise, it is Constant Pool Index(CPI) or Global,
4926 // which is relocation in fact. We need to replace the special zero
4927 // register with ImmMO.
4928 // Before that, we need to fixup the target flags for imm.
4929 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4930 if (DefMI.getOpcode() == PPC::ADDItocL8)
4932
4933 // MI didn't have the interface such as MI.setOperand(i) though
4934 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4935 // ImmMO, we need to remove ZERO operand and all the operands behind it,
4936 // and, add the ImmMO, then, move back all the operands behind ZERO.
4938 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
4939 MOps.push_back(MI.getOperand(i));
4940 MI.removeOperand(i);
4941 }
4942
4943 // Remove the last MO in the list, which is ZERO operand in fact.
4944 MOps.pop_back();
4945 // Add the imm operand.
4946 MI.addOperand(*ImmMO);
4947 // Now add the rest back.
4948 for (auto &MO : MOps)
4949 MI.addOperand(MO);
4950 }
4951
4952 // Update the opcode.
4953 MI.setDesc(get(III.ImmOpcode));
4954
4955 if (PostRA)
4956 recomputeLivenessFlags(*MI.getParent());
4957 LLVM_DEBUG(dbgs() << "With:\n");
4958 LLVM_DEBUG(MI.dump());
4959
4960 return true;
4961}
4962
4963bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
4964 const ImmInstrInfo &III,
4965 unsigned ConstantOpNo,
4966 MachineInstr &DefMI) const {
4967 // DefMI must be LI or LI8.
4968 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4969 !DefMI.getOperand(1).isImm())
4970 return false;
4971
4972 // Get Imm operand and Sign-extend to 64-bits.
4973 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
4974
4975 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4976 bool PostRA = !MRI.isSSA();
4977 // Exit early if we can't convert this.
4978 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
4979 return false;
4980 if (Imm % III.ImmMustBeMultipleOf)
4981 return false;
4982 if (III.TruncateImmTo)
4983 Imm &= ((1 << III.TruncateImmTo) - 1);
4984 if (III.SignedImm) {
4985 APInt ActualValue(64, Imm, true);
4986 if (!ActualValue.isSignedIntN(III.ImmWidth))
4987 return false;
4988 } else {
4989 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
4990 if ((uint64_t)Imm > UnsignedMax)
4991 return false;
4992 }
4993
4994 // If we're post-RA, the instructions don't agree on whether register zero is
4995 // special, we can transform this as long as the register operand that will
4996 // end up in the location where zero is special isn't R0.
4997 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
4998 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
4999 III.ZeroIsSpecialNew + 1;
5000 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
5001 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5002 // If R0 is in the operand where zero is special for the new instruction,
5003 // it is unsafe to transform if the constant operand isn't that operand.
5004 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
5005 ConstantOpNo != III.ZeroIsSpecialNew)
5006 return false;
5007 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
5008 ConstantOpNo != PosForOrigZero)
5009 return false;
5010 }
5011
5012 unsigned Opc = MI.getOpcode();
5013 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
5014 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
5015 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
5016 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
5017 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
5018 Opc == PPC::SRD || Opc == PPC::SRD_rec;
5019 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
5020 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
5021 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
5022 Opc == PPC::SRD_rec;
5023
5024 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5025 LLVM_DEBUG(MI.dump());
5026 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5027 LLVM_DEBUG(DefMI.dump());
5028 MI.setDesc(get(III.ImmOpcode));
5029 if (ConstantOpNo == III.OpNoForForwarding) {
5030 // Converting shifts to immediate form is a bit tricky since they may do
5031 // one of three things:
5032 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5033 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5034 // setting CR0)
5035 // 3. If the shift amount is in [1, OpSize), it's just a shift
5036 if (SpecialShift32 || SpecialShift64) {
5038 LII.Imm = 0;
5039 LII.SetCR = SetCR;
5040 LII.Is64Bit = SpecialShift64;
5041 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5042 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5043 replaceInstrWithLI(MI, LII);
5044 // Shifts by zero don't change the value. If we don't need to set CR0,
5045 // just convert this to a COPY. Can't do this post-RA since we've already
5046 // cleaned up the copies.
5047 else if (!SetCR && ShAmt == 0 && !PostRA) {
5048 MI.removeOperand(2);
5049 MI.setDesc(get(PPC::COPY));
5050 } else {
5051 // The 32 bit and 64 bit instructions are quite different.
5052 if (SpecialShift32) {
5053 // Left shifts use (N, 0, 31-N).
5054 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5055 // use (0, 0, 31) if N == 0.
5056 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5057 uint64_t MB = RightShift ? ShAmt : 0;
5058 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5060 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5061 .addImm(ME);
5062 } else {
5063 // Left shifts use (N, 63-N).
5064 // Right shifts use (64-N, N) if 0 < N < 64.
5065 // use (0, 0) if N == 0.
5066 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5067 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5069 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5070 }
5071 }
5072 } else
5073 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5074 }
5075 // Convert commutative instructions (switch the operands and convert the
5076 // desired one to an immediate.
5077 else if (III.IsCommutative) {
5078 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5079 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5080 } else
5081 llvm_unreachable("Should have exited early!");
5082
5083 // For instructions for which the constant register replaces a different
5084 // operand than where the immediate goes, we need to swap them.
5085 if (III.OpNoForForwarding != III.ImmOpNo)
5087
5088 // If the special R0/X0 register index are different for original instruction
5089 // and new instruction, we need to fix up the register class in new
5090 // instruction.
5091 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5092 if (III.ZeroIsSpecialNew) {
5093 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5094 // need to fix up register class.
5095 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5096 if (RegToModify.isVirtual()) {
5097 const TargetRegisterClass *NewRC =
5098 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5099 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5100 MRI.setRegClass(RegToModify, NewRC);
5101 }
5102 }
5103 }
5104
5105 if (PostRA)
5106 recomputeLivenessFlags(*MI.getParent());
5107
5108 LLVM_DEBUG(dbgs() << "With: ");
5109 LLVM_DEBUG(MI.dump());
5110 LLVM_DEBUG(dbgs() << "\n");
5111 return true;
5112}
5113
5114const TargetRegisterClass *
5116 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5117 return &PPC::VSRCRegClass;
5118 return RC;
5119}
5120
5122 return PPC::getRecordFormOpcode(Opcode);
5123}
5124
5125static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5126 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5127 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5128 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5129 Opcode == PPC::LHZUX8);
5130}
5131
5132// This function checks for sign extension from 32 bits to 64 bits.
5133static bool definedBySignExtendingOp(const unsigned Reg,
5134 const MachineRegisterInfo *MRI) {
5136 return false;
5137
5138 MachineInstr *MI = MRI->getVRegDef(Reg);
5139 if (!MI)
5140 return false;
5141
5142 int Opcode = MI->getOpcode();
5143 const PPCInstrInfo *TII =
5144 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5145 if (TII->isSExt32To64(Opcode))
5146 return true;
5147
5148 // The first def of LBZU/LHZU is sign extended.
5149 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5150 return true;
5151
5152 // RLDICL generates sign-extended output if it clears at least
5153 // 33 bits from the left (MSB).
5154 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5155 return true;
5156
5157 // If at least one bit from left in a lower word is masked out,
5158 // all of 0 to 32-th bits of the output are cleared.
5159 // Hence the output is already sign extended.
5160 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5161 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5162 MI->getOperand(3).getImm() > 0 &&
5163 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5164 return true;
5165
5166 // If the most significant bit of immediate in ANDIS is zero,
5167 // all of 0 to 32-th bits are cleared.
5168 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5169 uint16_t Imm = MI->getOperand(2).getImm();
5170 if ((Imm & 0x8000) == 0)
5171 return true;
5172 }
5173
5174 return false;
5175}
5176
5177// This function checks the machine instruction that defines the input register
5178// Reg. If that machine instruction always outputs a value that has only zeros
5179// in the higher 32 bits then this function will return true.
5180static bool definedByZeroExtendingOp(const unsigned Reg,
5181 const MachineRegisterInfo *MRI) {
5183 return false;
5184
5185 MachineInstr *MI = MRI->getVRegDef(Reg);
5186 if (!MI)
5187 return false;
5188
5189 int Opcode = MI->getOpcode();
5190 const PPCInstrInfo *TII =
5191 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5192 if (TII->isZExt32To64(Opcode))
5193 return true;
5194
5195 // The first def of LBZU/LHZU/LWZU are zero extended.
5196 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5197 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5198 MI->getOperand(0).getReg() == Reg)
5199 return true;
5200
5201 // The 16-bit immediate is sign-extended in li/lis.
5202 // If the most significant bit is zero, all higher bits are zero.
5203 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5204 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5205 int64_t Imm = MI->getOperand(1).getImm();
5206 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5207 return true;
5208 }
5209
5210 // We have some variations of rotate-and-mask instructions
5211 // that clear higher 32-bits.
5212 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5213 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5214 Opcode == PPC::RLDICL_32_64) &&
5215 MI->getOperand(3).getImm() >= 32)
5216 return true;
5217
5218 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5219 MI->getOperand(3).getImm() >= 32 &&
5220 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5221 return true;
5222
5223 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5224 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5225 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5226 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5227 return true;
5228
5229 return false;
5230}
5231
5232// This function returns true if the input MachineInstr is a TOC save
5233// instruction.
5235 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5236 return false;
5237 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5238 unsigned StackOffset = MI.getOperand(1).getImm();
5239 Register StackReg = MI.getOperand(2).getReg();
5240 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5241 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5242 return true;
5243
5244 return false;
5245}
5246
5247// We limit the max depth to track incoming values of PHIs or binary ops
5248// (e.g. AND) to avoid excessive cost.
5249const unsigned MAX_BINOP_DEPTH = 1;
5250
5251// This function will promote the instruction which defines the register `Reg`
5252// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
5253// used to check whether an instruction needs to be promoted or not is similar
5254// to the logic used to check whether or not a defined register is sign or zero
5255// extended within the function PPCInstrInfo::isSignOrZeroExtended.
5256// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
5257// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
5258// incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
5259// than once. This is done to prevent exponential recursion.
5262 unsigned BinOpDepth,
5263 LiveVariables *LV) const {
5264 if (!Reg.isVirtual())
5265 return;
5266
5267 MachineInstr *MI = MRI->getVRegDef(Reg);
5268 if (!MI)
5269 return;
5270
5271 unsigned Opcode = MI->getOpcode();
5272
5273 switch (Opcode) {
5274 case PPC::OR:
5275 case PPC::ISEL:
5276 case PPC::OR8:
5277 case PPC::PHI: {
5278 if (BinOpDepth >= MAX_BINOP_DEPTH)
5279 break;
5280 unsigned OperandEnd = 3, OperandStride = 1;
5281 if (Opcode == PPC::PHI) {
5282 OperandEnd = MI->getNumOperands();
5283 OperandStride = 2;
5284 }
5285
5286 for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
5287 assert(MI->getOperand(I).isReg() && "Operand must be register");
5288 promoteInstr32To64ForElimEXTSW(MI->getOperand(I).getReg(), MRI,
5289 BinOpDepth + 1, LV);
5290 }
5291
5292 break;
5293 }
5294 case PPC::COPY: {
5295 // Refers to the logic of the `case PPC::COPY` statement in the function
5296 // PPCInstrInfo::isSignOrZeroExtended().
5297
5298 Register SrcReg = MI->getOperand(1).getReg();
5299 // In both ELFv1 and v2 ABI, method parameters and the return value
5300 // are sign- or zero-extended.
5301 const MachineFunction *MF = MI->getMF();
5302 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5303 // If this is a copy from another register, we recursively promote the
5304 // source.
5305 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5306 return;
5307 }
5308
5309 // From here on everything is SVR4ABI. COPY will be eliminated in the other
5310 // pass, we do not need promote the COPY pseudo opcode.
5311
5312 if (SrcReg != PPC::X3)
5313 // If this is a copy from another register, we recursively promote the
5314 // source.
5315 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5316 return;
5317 }
5318 case PPC::ORI:
5319 case PPC::XORI:
5320 case PPC::ORIS:
5321 case PPC::XORIS:
5322 case PPC::ORI8:
5323 case PPC::XORI8:
5324 case PPC::ORIS8:
5325 case PPC::XORIS8:
5326 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI, BinOpDepth,
5327 LV);
5328 break;
5329 case PPC::AND:
5330 case PPC::AND8:
5331 if (BinOpDepth >= MAX_BINOP_DEPTH)
5332 break;
5333
5334 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI,
5335 BinOpDepth + 1, LV);
5336 promoteInstr32To64ForElimEXTSW(MI->getOperand(2).getReg(), MRI,
5337 BinOpDepth + 1, LV);
5338 break;
5339 }
5340
5341 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
5342 if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
5343 return;
5344
5345 const PPCInstrInfo *TII =
5346 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5347
5348 // Map the 32bit to 64bit opcodes for instructions that are not signed or zero
5349 // extended themselves, but may have operands who's destination registers of
5350 // signed or zero extended instructions.
5351 std::unordered_map<unsigned, unsigned> OpcodeMap = {
5352 {PPC::OR, PPC::OR8}, {PPC::ISEL, PPC::ISEL8},
5353 {PPC::ORI, PPC::ORI8}, {PPC::XORI, PPC::XORI8},
5354 {PPC::ORIS, PPC::ORIS8}, {PPC::XORIS, PPC::XORIS8},
5355 {PPC::AND, PPC::AND8}};
5356
5357 int NewOpcode = -1;
5358 auto It = OpcodeMap.find(Opcode);
5359 if (It != OpcodeMap.end()) {
5360 // Set the new opcode to the mapped 64-bit version.
5361 NewOpcode = It->second;
5362 } else {
5363 if (!TII->isSExt32To64(Opcode))
5364 return;
5365
5366 // The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
5367 // map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
5368 // instruction with the same opcode.
5369 NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
5370 }
5371
5372 assert(NewOpcode != -1 &&
5373 "Must have a 64-bit opcode to map the 32-bit opcode!");
5374
5375 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
5376 const MCInstrDesc &MCID = TII->get(NewOpcode);
5377 const TargetRegisterClass *NewRC =
5378 TRI->getRegClass(MCID.operands()[0].RegClass);
5379
5380 Register SrcReg = MI->getOperand(0).getReg();
5381 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
5382
5383 // If the register class of the defined register in the 32-bit instruction
5384 // is the same as the register class of the defined register in the promoted
5385 // 64-bit instruction, we do not need to promote the instruction.
5386 if (NewRC == SrcRC)
5387 return;
5388
5389 DebugLoc DL = MI->getDebugLoc();
5390 auto MBB = MI->getParent();
5391
5392 // Since the pseudo-opcode of the instruction is promoted from 32-bit to
5393 // 64-bit, if the source reg class of the original instruction belongs to
5394 // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
5395 // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
5396 // respectively.
5397 DenseMap<unsigned, Register> PromoteRegs;
5398 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5399 MachineOperand &Operand = MI->getOperand(i);
5400 if (!Operand.isReg())
5401 continue;
5402
5403 Register OperandReg = Operand.getReg();
5404 if (!OperandReg.isVirtual())
5405 continue;
5406
5407 const TargetRegisterClass *NewUsedRegRC =
5408 TRI->getRegClass(MCID.operands()[i].RegClass);
5409 const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
5410 if (NewUsedRegRC != OrgRC && (OrgRC == &PPC::GPRCRegClass ||
5411 OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
5412 // Promote the used 32-bit register to 64-bit register.
5413 Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5414 Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5415 BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
5416 BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
5417 .addReg(TmpReg)
5418 .addReg(OperandReg)
5419 .addImm(PPC::sub_32);
5420 PromoteRegs[i] = DstTmpReg;
5421 }
5422 }
5423
5424 Register NewDefinedReg = MRI->createVirtualRegister(NewRC);
5425
5426 BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
5428 --Iter;
5429 MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
5430 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5431 if (PromoteRegs.find(i) != PromoteRegs.end())
5432 MIBuilder.addReg(PromoteRegs[i], RegState::Kill);
5433 else
5434 Iter->addOperand(MI->getOperand(i));
5435 }
5436
5437 for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
5438 MachineOperand &Operand = Iter->getOperand(i);
5439 if (!Operand.isReg())
5440 continue;
5441 Register OperandReg = Operand.getReg();
5442 if (!OperandReg.isVirtual())
5443 continue;
5444 LV->recomputeForSingleDefVirtReg(OperandReg);
5445 }
5446
5447 MI->eraseFromParent();
5448
5449 // A defined register may be used by other instructions that are 32-bit.
5450 // After the defined register is promoted to 64-bit for the promoted
5451 // instruction, we need to demote the 64-bit defined register back to a
5452 // 32-bit register
5453 BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
5454 .addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
5455 LV->recomputeForSingleDefVirtReg(NewDefinedReg);
5456}
5457
5458// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5459// does not count all of the recursions. The parameter BinOpDepth is incremented
5460// only when isSignOrZeroExtended calls itself more than once. This is done to
5461// prevent expontential recursion. There is no parameter to track linear
5462// recursion.
5463std::pair<bool, bool>
5465 const unsigned BinOpDepth,
5466 const MachineRegisterInfo *MRI) const {
5468 return std::pair<bool, bool>(false, false);
5469
5470 MachineInstr *MI = MRI->getVRegDef(Reg);
5471 if (!MI)
5472 return std::pair<bool, bool>(false, false);
5473
5474 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5475 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5476
5477 // If we know the instruction always returns sign- and zero-extended result,
5478 // return here.
5479 if (IsSExt && IsZExt)
5480 return std::pair<bool, bool>(IsSExt, IsZExt);
5481
5482 switch (MI->getOpcode()) {
5483 case PPC::COPY: {
5484 Register SrcReg = MI->getOperand(1).getReg();
5485
5486 // In both ELFv1 and v2 ABI, method parameters and the return value
5487 // are sign- or zero-extended.
5488 const MachineFunction *MF = MI->getMF();
5489
5490 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5491 // If this is a copy from another register, we recursively check source.
5492 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5493 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5494 SrcExt.second || IsZExt);
5495 }
5496
5497 // From here on everything is SVR4ABI
5498 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5499 // We check the ZExt/SExt flags for a method parameter.
5500 if (MI->getParent()->getBasicBlock() ==
5501 &MF->getFunction().getEntryBlock()) {
5502 Register VReg = MI->getOperand(0).getReg();
5503 if (MF->getRegInfo().isLiveIn(VReg)) {
5504 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5505 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5506 return std::pair<bool, bool>(IsSExt, IsZExt);
5507 }
5508 }
5509
5510 if (SrcReg != PPC::X3) {
5511 // If this is a copy from another register, we recursively check source.
5512 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5513 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5514 SrcExt.second || IsZExt);
5515 }
5516
5517 // For a method return value, we check the ZExt/SExt flags in attribute.
5518 // We assume the following code sequence for method call.
5519 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5520 // BL8_NOP @func,...
5521 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5522 // %5 = COPY %x3; G8RC:%5
5523 const MachineBasicBlock *MBB = MI->getParent();
5524 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5527 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5528 return IsExtendPair;
5529
5530 const MachineInstr &CallMI = *(--II);
5531 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5532 return IsExtendPair;
5533
5534 const Function *CalleeFn =
5535 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
5536 if (!CalleeFn)
5537 return IsExtendPair;
5538 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5539 if (IntTy && IntTy->getBitWidth() <= 32) {
5540 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5541 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5542 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5543 return std::pair<bool, bool>(IsSExt, IsZExt);
5544 }
5545
5546 return IsExtendPair;
5547 }
5548
5549 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5550 // So, we track the operand register as we do for register copy.
5551 case PPC::ORI:
5552 case PPC::XORI:
5553 case PPC::ORI8:
5554 case PPC::XORI8: {
5555 Register SrcReg = MI->getOperand(1).getReg();
5556 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5557 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5558 SrcExt.second || IsZExt);
5559 }
5560
5561 // OR, XOR with shifted 16-bit immediate does not change the upper
5562 // 32 bits. So, we track the operand register for zero extension.
5563 // For sign extension when the MSB of the immediate is zero, we also
5564 // track the operand register since the upper 33 bits are unchanged.
5565 case PPC::ORIS:
5566 case PPC::XORIS:
5567 case PPC::ORIS8:
5568 case PPC::XORIS8: {
5569 Register SrcReg = MI->getOperand(1).getReg();
5570 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5571 uint16_t Imm = MI->getOperand(2).getImm();
5572 if (Imm & 0x8000)
5573 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5574 else
5575 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5576 SrcExt.second || IsZExt);
5577 }
5578
5579 // If all incoming values are sign-/zero-extended,
5580 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5581 case PPC::OR:
5582 case PPC::OR8:
5583 case PPC::ISEL:
5584 case PPC::PHI: {
5585 if (BinOpDepth >= MAX_BINOP_DEPTH)
5586 return std::pair<bool, bool>(false, false);
5587
5588 // The input registers for PHI are operand 1, 3, ...
5589 // The input registers for others are operand 1 and 2.
5590 unsigned OperandEnd = 3, OperandStride = 1;
5591 if (MI->getOpcode() == PPC::PHI) {
5592 OperandEnd = MI->getNumOperands();
5593 OperandStride = 2;
5594 }
5595
5596 IsSExt = true;
5597 IsZExt = true;
5598 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5599 if (!MI->getOperand(I).isReg())
5600 return std::pair<bool, bool>(false, false);
5601
5602 Register SrcReg = MI->getOperand(I).getReg();
5603 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5604 IsSExt &= SrcExt.first;
5605 IsZExt &= SrcExt.second;
5606 }
5607 return std::pair<bool, bool>(IsSExt, IsZExt);
5608 }
5609
5610 // If at least one of the incoming values of an AND is zero extended
5611 // then the output is also zero-extended. If both of the incoming values
5612 // are sign-extended then the output is also sign extended.
5613 case PPC::AND:
5614 case PPC::AND8: {
5615 if (BinOpDepth >= MAX_BINOP_DEPTH)
5616 return std::pair<bool, bool>(false, false);
5617
5618 Register SrcReg1 = MI->getOperand(1).getReg();
5619 Register SrcReg2 = MI->getOperand(2).getReg();
5620 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5621 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5622 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5623 Src1Ext.second || Src2Ext.second);
5624 }
5625
5626 default:
5627 break;
5628 }
5629 return std::pair<bool, bool>(IsSExt, IsZExt);
5630}
5631
5632bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5633 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5634}
5635
5636namespace {
5637class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5638 MachineInstr *Loop, *EndLoop, *LoopCount;
5639 MachineFunction *MF;
5640 const TargetInstrInfo *TII;
5641 int64_t TripCount;
5642
5643public:
5644 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5645 MachineInstr *LoopCount)
5646 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5647 MF(Loop->getParent()->getParent()),
5648 TII(MF->getSubtarget().getInstrInfo()) {
5649 // Inspect the Loop instruction up-front, as it may be deleted when we call
5650 // createTripCountGreaterCondition.
5651 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5652 TripCount = LoopCount->getOperand(1).getImm();
5653 else
5654 TripCount = -1;
5655 }
5656
5657 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5658 // Only ignore the terminator.
5659 return MI == EndLoop;
5660 }
5661
5662 std::optional<bool> createTripCountGreaterCondition(
5663 int TC, MachineBasicBlock &MBB,
5665 if (TripCount == -1) {
5666 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5667 // so we don't need to generate any thing here.
5668 Cond.push_back(MachineOperand::CreateImm(0));
5670 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5671 true));
5672 return {};
5673 }
5674
5675 return TripCount > TC;
5676 }
5677
5678 void setPreheader(MachineBasicBlock *NewPreheader) override {
5679 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5680 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5681 }
5682
5683 void adjustTripCount(int TripCountAdjust) override {
5684 // If the loop trip count is a compile-time value, then just change the
5685 // value.
5686 if (LoopCount->getOpcode() == PPC::LI8 ||
5687 LoopCount->getOpcode() == PPC::LI) {
5688 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5689 LoopCount->getOperand(1).setImm(TripCount);
5690 return;
5691 }
5692
5693 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5694 // so we don't need to generate any thing here.
5695 }
5696
5697 void disposed() override {
5698 Loop->eraseFromParent();
5699 // Ensure the loop setup instruction is deleted too.
5700 LoopCount->eraseFromParent();
5701 }
5702};
5703} // namespace
5704
5705std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5707 // We really "analyze" only hardware loops right now.
5709 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5710 if (Preheader == LoopBB)
5711 Preheader = *std::next(LoopBB->pred_begin());
5712 MachineFunction *MF = Preheader->getParent();
5713
5714 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5716 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5717 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5719 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5720 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5721 }
5722 }
5723 return nullptr;
5724}
5725
5727 MachineBasicBlock &PreHeader,
5728 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5729
5730 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5731
5732 // The loop set-up instruction should be in preheader
5733 for (auto &I : PreHeader.instrs())
5734 if (I.getOpcode() == LOOPi)
5735 return &I;
5736 return nullptr;
5737}
5738
5739// Return true if get the base operand, byte offset of an instruction and the
5740// memory width. Width is the size of memory that is being loaded/stored.
5742 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5743 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5744 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5745 return false;
5746
5747 // Handle only loads/stores with base register followed by immediate offset.
5748 if (!LdSt.getOperand(1).isImm() ||
5749 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5750 return false;
5751 if (!LdSt.getOperand(1).isImm() ||
5752 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5753 return false;
5754
5755 if (!LdSt.hasOneMemOperand())
5756 return false;
5757
5758 Width = (*LdSt.memoperands_begin())->getSize();
5759 Offset = LdSt.getOperand(1).getImm();
5760 BaseReg = &LdSt.getOperand(2);
5761 return true;
5762}
5763
5765 const MachineInstr &MIa, const MachineInstr &MIb) const {
5766 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5767 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5768
5771 return false;
5772
5773 // Retrieve the base register, offset from the base register and width. Width
5774 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5775 // base registers are identical, and the offset of a lower memory access +
5776 // the width doesn't overlap the offset of a higher memory access,
5777 // then the memory accesses are different.
5779 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5780 int64_t OffsetA = 0, OffsetB = 0;
5781 LocationSize WidthA = 0, WidthB = 0;
5782 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5783 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5784 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5785 int LowOffset = std::min(OffsetA, OffsetB);
5786 int HighOffset = std::max(OffsetA, OffsetB);
5787 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5788 if (LowWidth.hasValue() &&
5789 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5790 return true;
5791 }
5792 }
5793 return false;
5794}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static unsigned getSize(unsigned Kind)
void changeSign()
Definition: APFloat.h:1294
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition: APInt.cpp:1115
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
A possibly irreducible generalization of a Loop.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:130
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
bool hasValue() const
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
void setOpcode(unsigned Op)
Definition: MCInst.h:198
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
Definition: MCInstrDesc.h:565
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:269
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:85
uint16_t Constraints
Operand constraints (see OperandConstraint enum).
Definition: MCInstrDesc.h:100
bool isLookupPtrRegClass() const
Set if this operand is a pointer value and it requires a callback to look up its register class.
Definition: MCInstrDesc.h:104
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:91
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:956
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:397
iterator_range< mop_iterator > uses()
Returns a range that includes all operands which may be register uses.
Definition: MachineInstr.h:739
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:649
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:821
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:691
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:806
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:392
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
PPCInstrInfo(PPCSubtarget &STI)
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
MCInst getNop() const override
Return the noop instruction to use for a noop.
static int getRecordFormOpcode(unsigned Opcode)
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
Definition: PPCInstrInfo.h:277
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:275
CombinerObjective getCombinerObjective(unsigned Pattern) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
void promoteInstr32To64ForElimEXTSW(const Register &Reg, MachineRegisterInfo *MRI, unsigned BinOpDepth, LiveVariables *LV) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:628
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
Definition: PPCInstrInfo.h:511
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
bool foldFrameOffset(MachineInstr &MI) const
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:622
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const
getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode for a given imm form load/s...
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:147
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:220
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:139
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
bool isTargetLinux() const
Definition: PPCSubtarget.h:217
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:160
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:213
void setGlibcHWCAPAccess(bool Val=true) const
void dump() const
Definition: Pass.cpp:136
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:714
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ MO_TOC_LO
Definition: PPC.h:183
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
int getAltVSXFMAOpcode(uint16_t Opcode)
int getNonRecordFormOpcode(uint16_t)
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
static bool isVFRegister(unsigned Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
@ REASSOC_XY_BCA
Definition: PPCInstrInfo.h:97
@ REASSOC_XY_BAC
Definition: PPCInstrInfo.h:98
@ REASSOC_XY_AMM_BMM
Definition: PPCInstrInfo.h:92
@ REASSOC_XMM_AMM_BMM
Definition: PPCInstrInfo.h:93
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
unsigned getKillRegState(bool B)
@ SOK_CRBitSpill
Definition: PPCInstrInfo.h:74
@ SOK_VSXVectorSpill
Definition: PPCInstrInfo.h:76
@ SOK_SpillToVSR
Definition: PPCInstrInfo.h:79
@ SOK_Int4Spill
Definition: PPCInstrInfo.h:69
@ SOK_PairedVecSpill
Definition: PPCInstrInfo.h:80
@ SOK_VectorFloat8Spill
Definition: PPCInstrInfo.h:77
@ SOK_UAccumulatorSpill
Definition: PPCInstrInfo.h:82
@ SOK_PairedG8Spill
Definition: PPCInstrInfo.h:85
@ SOK_VectorFloat4Spill
Definition: PPCInstrInfo.h:78
@ SOK_Float8Spill
Definition: PPCInstrInfo.h:71
@ SOK_Float4Spill
Definition: PPCInstrInfo.h:72
@ SOK_VRVectorSpill
Definition: PPCInstrInfo.h:75
@ SOK_WAccumulatorSpill
Definition: PPCInstrInfo.h:83
@ SOK_SPESpill
Definition: PPCInstrInfo.h:84
@ SOK_CRSpill
Definition: PPCInstrInfo.h:73
@ SOK_AccumulatorSpill
Definition: PPCInstrInfo.h:81
@ SOK_Int8Spill
Definition: PPCInstrInfo.h:70
@ SOK_LastOpcodeSpill
Definition: PPCInstrInfo.h:86
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t IsSummingOperands
Definition: PPCInstrInfo.h:56
uint64_t OpNoForForwarding
Definition: PPCInstrInfo.h:46
uint64_t ImmMustBeMultipleOf
Definition: PPCInstrInfo.h:36
uint64_t IsCommutative
Definition: PPCInstrInfo.h:44
uint64_t ZeroIsSpecialNew
Definition: PPCInstrInfo.h:42
uint64_t TruncateImmTo
Definition: PPCInstrInfo.h:54
uint64_t ZeroIsSpecialOrig
Definition: PPCInstrInfo.h:39
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.