LLVM 20.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/Module.h"
37#include "llvm/MC/MCInst.h"
40#include "llvm/Support/Debug.h"
43
44using namespace llvm;
45
46#define DEBUG_TYPE "ppc-instr-info"
47
48#define GET_INSTRMAP_INFO
49#define GET_INSTRINFO_CTOR_DTOR
50#include "PPCGenInstrInfo.inc"
51
52STATISTIC(NumStoreSPILLVSRRCAsVec,
53 "Number of spillvsrrc spilled to stack as vec");
54STATISTIC(NumStoreSPILLVSRRCAsGpr,
55 "Number of spillvsrrc spilled to stack as gpr");
56STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
57STATISTIC(CmpIselsConverted,
58 "Number of ISELs that depend on comparison of constants converted");
59STATISTIC(MissedConvertibleImmediateInstrs,
60 "Number of compare-immediate instructions fed by constants");
61STATISTIC(NumRcRotatesConvertedToRcAnd,
62 "Number of record-form rotates converted to record-form andi");
63
64static cl::
65opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
66 cl::desc("Disable analysis for CTR loops"));
67
68static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
69cl::desc("Disable compare instruction optimization"), cl::Hidden);
70
71static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
72cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
74
75static cl::opt<bool>
76UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
77 cl::desc("Use the old (incorrect) instruction latency calculation"));
78
79static cl::opt<float>
80 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
81 cl::desc("register pressure factor for the transformations."));
82
84 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
85 cl::desc("enable register pressure reduce in machine combiner pass."));
86
87// Pin the vtable to this file.
88void PPCInstrInfo::anchor() {}
89
91 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
92 /* CatchRetOpcode */ -1,
93 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
94 Subtarget(STI), RI(STI.getTargetMachine()) {}
95
96/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
97/// this target when scheduling the DAG.
100 const ScheduleDAG *DAG) const {
101 unsigned Directive =
102 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
105 const InstrItineraryData *II =
106 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
107 return new ScoreboardHazardRecognizer(II, DAG);
108 }
109
111}
112
113/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
114/// to use for this target when scheduling the DAG.
117 const ScheduleDAG *DAG) const {
118 unsigned Directive =
119 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
120
121 // FIXME: Leaving this as-is until we have POWER9 scheduling info
123 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
124
125 // Most subtargets use a PPC970 recognizer.
128 assert(DAG->TII && "No InstrInfo?");
129
130 return new PPCHazardRecognizer970(*DAG);
131 }
132
133 return new ScoreboardHazardRecognizer(II, DAG);
134}
135
137 const MachineInstr &MI,
138 unsigned *PredCost) const {
139 if (!ItinData || UseOldLatencyCalc)
140 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
141
142 // The default implementation of getInstrLatency calls getStageLatency, but
143 // getStageLatency does not do the right thing for us. While we have
144 // itinerary, most cores are fully pipelined, and so the itineraries only
145 // express the first part of the pipeline, not every stage. Instead, we need
146 // to use the listed output operand cycle number (using operand 0 here, which
147 // is an output).
148
149 unsigned Latency = 1;
150 unsigned DefClass = MI.getDesc().getSchedClass();
151 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
152 const MachineOperand &MO = MI.getOperand(i);
153 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
154 continue;
155
156 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
157 if (!Cycle)
158 continue;
159
160 Latency = std::max(Latency, *Cycle);
161 }
162
163 return Latency;
164}
165
166std::optional<unsigned> PPCInstrInfo::getOperandLatency(
167 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
168 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
169 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
170 ItinData, DefMI, DefIdx, UseMI, UseIdx);
171
172 if (!DefMI.getParent())
173 return Latency;
174
175 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
176 Register Reg = DefMO.getReg();
177
178 bool IsRegCR;
179 if (Reg.isVirtual()) {
180 const MachineRegisterInfo *MRI =
181 &DefMI.getParent()->getParent()->getRegInfo();
182 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
183 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
184 } else {
185 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
186 PPC::CRBITRCRegClass.contains(Reg);
187 }
188
189 if (UseMI.isBranch() && IsRegCR) {
190 if (!Latency)
191 Latency = getInstrLatency(ItinData, DefMI);
192
193 // On some cores, there is an additional delay between writing to a condition
194 // register, and using it from a branch.
195 unsigned Directive = Subtarget.getCPUDirective();
196 switch (Directive) {
197 default: break;
198 case PPC::DIR_7400:
199 case PPC::DIR_750:
200 case PPC::DIR_970:
201 case PPC::DIR_E5500:
202 case PPC::DIR_PWR4:
203 case PPC::DIR_PWR5:
204 case PPC::DIR_PWR5X:
205 case PPC::DIR_PWR6:
206 case PPC::DIR_PWR6X:
207 case PPC::DIR_PWR7:
208 case PPC::DIR_PWR8:
209 // FIXME: Is this needed for POWER9?
210 Latency = *Latency + 2;
211 break;
212 }
213 }
214
215 return Latency;
216}
217
219 uint32_t Flags) const {
220 MI.setFlags(Flags);
224}
225
226// This function does not list all associative and commutative operations, but
227// only those worth feeding through the machine combiner in an attempt to
228// reduce the critical path. Mostly, this means floating-point operations,
229// because they have high latencies(>=5) (compared to other operations, such as
230// and/or, which are also associative and commutative, but have low latencies).
232 bool Invert) const {
233 if (Invert)
234 return false;
235 switch (Inst.getOpcode()) {
236 // Floating point:
237 // FP Add:
238 case PPC::FADD:
239 case PPC::FADDS:
240 // FP Multiply:
241 case PPC::FMUL:
242 case PPC::FMULS:
243 // Altivec Add:
244 case PPC::VADDFP:
245 // VSX Add:
246 case PPC::XSADDDP:
247 case PPC::XVADDDP:
248 case PPC::XVADDSP:
249 case PPC::XSADDSP:
250 // VSX Multiply:
251 case PPC::XSMULDP:
252 case PPC::XVMULDP:
253 case PPC::XVMULSP:
254 case PPC::XSMULSP:
257 // Fixed point:
258 // Multiply:
259 case PPC::MULHD:
260 case PPC::MULLD:
261 case PPC::MULHW:
262 case PPC::MULLW:
263 return true;
264 default:
265 return false;
266 }
267}
268
269#define InfoArrayIdxFMAInst 0
270#define InfoArrayIdxFAddInst 1
271#define InfoArrayIdxFMULInst 2
272#define InfoArrayIdxAddOpIdx 3
273#define InfoArrayIdxMULOpIdx 4
274#define InfoArrayIdxFSubInst 5
275// Array keeps info for FMA instructions:
276// Index 0(InfoArrayIdxFMAInst): FMA instruction;
277// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
278// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
279// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
280// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
281// second MUL operand index is plus 1;
282// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
283static const uint16_t FMAOpIdxInfo[][6] = {
284 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
285 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
286 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
287 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
288 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
289 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
290 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
291
292// Check if an opcode is a FMA instruction. If it is, return the index in array
293// FMAOpIdxInfo. Otherwise, return -1.
294int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
295 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
296 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
297 return I;
298 return -1;
299}
300
301// On PowerPC target, we have two kinds of patterns related to FMA:
302// 1: Improve ILP.
303// Try to reassociate FMA chains like below:
304//
305// Pattern 1:
306// A = FADD X, Y (Leaf)
307// B = FMA A, M21, M22 (Prev)
308// C = FMA B, M31, M32 (Root)
309// -->
310// A = FMA X, M21, M22
311// B = FMA Y, M31, M32
312// C = FADD A, B
313//
314// Pattern 2:
315// A = FMA X, M11, M12 (Leaf)
316// B = FMA A, M21, M22 (Prev)
317// C = FMA B, M31, M32 (Root)
318// -->
319// A = FMUL M11, M12
320// B = FMA X, M21, M22
321// D = FMA A, M31, M32
322// C = FADD B, D
323//
324// breaking the dependency between A and B, allowing FMA to be executed in
325// parallel (or back-to-back in a pipeline) instead of depending on each other.
326//
327// 2: Reduce register pressure.
328// Try to reassociate FMA with FSUB and a constant like below:
329// C is a floating point const.
330//
331// Pattern 1:
332// A = FSUB X, Y (Leaf)
333// D = FMA B, C, A (Root)
334// -->
335// A = FMA B, Y, -C
336// D = FMA A, X, C
337//
338// Pattern 2:
339// A = FSUB X, Y (Leaf)
340// D = FMA B, A, C (Root)
341// -->
342// A = FMA B, Y, -C
343// D = FMA A, X, C
344//
345// Before the transformation, A must be assigned with different hardware
346// register with D. After the transformation, A and D must be assigned with
347// same hardware register due to TIE attribute of FMA instructions.
348//
351 bool DoRegPressureReduce) const {
355
356 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
357 for (const auto &MO : Instr.explicit_operands())
358 if (!(MO.isReg() && MO.getReg().isVirtual()))
359 return false;
360 return true;
361 };
362
363 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
364 unsigned OpType) {
365 if (Instr.getOpcode() !=
366 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
367 return false;
368
369 // Instruction can be reassociated.
370 // fast math flags may prohibit reassociation.
371 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
372 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
373 return false;
374
375 // Instruction operands are virtual registers for reassociation.
376 if (!IsAllOpsVirtualReg(Instr))
377 return false;
378
379 // For register pressure reassociation, the FSub must have only one use as
380 // we want to delete the sub to save its def.
381 if (OpType == InfoArrayIdxFSubInst &&
382 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
383 return false;
384
385 return true;
386 };
387
388 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
389 int16_t &MulOpIdx, bool IsLeaf) {
390 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
391 if (Idx < 0)
392 return false;
393
394 // Instruction can be reassociated.
395 // fast math flags may prohibit reassociation.
396 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
397 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
398 return false;
399
400 // Instruction operands are virtual registers for reassociation.
401 if (!IsAllOpsVirtualReg(Instr))
402 return false;
403
405 if (IsLeaf)
406 return true;
407
409
410 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
411 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
412 // If 'add' operand's def is not in current block, don't do ILP related opt.
413 if (!MIAdd || MIAdd->getParent() != MBB)
414 return false;
415
416 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
417 // as this fma will be changed later.
418 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
419 };
420
421 int16_t AddOpIdx = -1;
422 int16_t MulOpIdx = -1;
423
424 bool IsUsedOnceL = false;
425 bool IsUsedOnceR = false;
426 MachineInstr *MULInstrL = nullptr;
427 MachineInstr *MULInstrR = nullptr;
428
429 auto IsRPReductionCandidate = [&]() {
430 // Currently, we only support float and double.
431 // FIXME: add support for other types.
432 unsigned Opcode = Root.getOpcode();
433 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
434 return false;
435
436 // Root must be a valid FMA like instruction.
437 // Treat it as leaf as we don't care its add operand.
438 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
439 assert((MulOpIdx >= 0) && "mul operand index not right!");
440 Register MULRegL = TRI->lookThruSingleUseCopyChain(
441 Root.getOperand(MulOpIdx).getReg(), MRI);
442 Register MULRegR = TRI->lookThruSingleUseCopyChain(
443 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
444 if (!MULRegL && !MULRegR)
445 return false;
446
447 if (MULRegL && !MULRegR) {
448 MULRegR =
449 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
450 IsUsedOnceL = true;
451 } else if (!MULRegL && MULRegR) {
452 MULRegL =
453 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
454 IsUsedOnceR = true;
455 } else {
456 IsUsedOnceL = true;
457 IsUsedOnceR = true;
458 }
459
460 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
461 return false;
462
463 MULInstrL = MRI->getVRegDef(MULRegL);
464 MULInstrR = MRI->getVRegDef(MULRegR);
465 return true;
466 }
467 return false;
468 };
469
470 // Register pressure fma reassociation patterns.
471 if (DoRegPressureReduce && IsRPReductionCandidate()) {
472 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
473 // Register pressure pattern 1
474 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
475 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
476 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
478 return true;
479 }
480
481 // Register pressure pattern 2
482 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
483 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
484 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
486 return true;
487 }
488 }
489
490 // ILP fma reassociation patterns.
491 // Root must be a valid FMA like instruction.
492 AddOpIdx = -1;
493 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
494 return false;
495
496 assert((AddOpIdx >= 0) && "add operand index not right!");
497
498 Register RegB = Root.getOperand(AddOpIdx).getReg();
499 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
500
501 // Prev must be a valid FMA like instruction.
502 AddOpIdx = -1;
503 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
504 return false;
505
506 assert((AddOpIdx >= 0) && "add operand index not right!");
507
508 Register RegA = Prev->getOperand(AddOpIdx).getReg();
509 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
510 AddOpIdx = -1;
511 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
513 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
514 return true;
515 }
516 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
518 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
519 return true;
520 }
521 return false;
522}
523
525 MachineInstr &Root, unsigned &Pattern,
526 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
527 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
528
529 MachineFunction *MF = Root.getMF();
533
534 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
535 if (Idx < 0)
536 return;
537
539
540 // For now we only need to fix up placeholder for register pressure reduce
541 // patterns.
542 Register ConstReg = 0;
543 switch (Pattern) {
545 ConstReg =
546 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
547 break;
549 ConstReg =
550 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
551 break;
552 default:
553 // Not register pressure reduce patterns.
554 return;
555 }
556
557 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
558 // Get const value from const pool.
559 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
560 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
561
562 // Get negative fp const.
563 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
564 F1.changeSign();
565 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
566 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
567
568 // Put negative fp const into constant pool.
569 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
570
571 MachineOperand *Placeholder = nullptr;
572 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
573 for (auto *Inst : InsInstrs) {
574 for (MachineOperand &Operand : Inst->explicit_operands()) {
575 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
576 if (Operand.getReg() == PPC::ZERO8) {
577 Placeholder = &Operand;
578 break;
579 }
580 }
581 }
582
583 assert(Placeholder && "Placeholder does not exist!");
584
585 // Generate instructions to load the const fp from constant pool.
586 // We only support PPC64 and medium code model.
587 Register LoadNewConst =
588 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
589
590 // Fill the placeholder with the new load from constant pool.
591 Placeholder->setReg(LoadNewConst);
592}
593
595 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
596
598 return false;
599
600 // Currently, we only enable register pressure reducing in machine combiner
601 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
602 // support.
603 //
604 // So we need following instructions to access a TOC entry:
605 //
606 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
607 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
608 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
609 //
610 // FIXME: add more supported targets, like Small and Large code model, PPC32,
611 // AIX.
612 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
614 return false;
615
617 const MachineFunction *MF = MBB->getParent();
618 const MachineRegisterInfo *MRI = &MF->getRegInfo();
619
620 auto GetMBBPressure =
621 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
622 RegionPressure Pressure;
623 RegPressureTracker RPTracker(Pressure);
624
625 // Initialize the register pressure tracker.
626 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
627 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
628
629 for (const auto &MI : reverse(*MBB)) {
630 if (MI.isDebugValue() || MI.isDebugLabel())
631 continue;
632 RegisterOperands RegOpers;
633 RegOpers.collect(MI, *TRI, *MRI, false, false);
634 RPTracker.recedeSkipDebugValues();
635 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
636 RPTracker.recede(RegOpers);
637 }
638
639 // Close the RPTracker to finalize live ins.
640 RPTracker.closeRegion();
641
642 return RPTracker.getPressure().MaxSetPressure;
643 };
644
645 // For now we only care about float and double type fma.
646 unsigned VSSRCLimit =
647 RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC);
648
649 // Only reduce register pressure when pressure is high.
650 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
651 (float)VSSRCLimit * FMARPFactor;
652}
653
655 // I has only one memory operand which is load from constant pool.
656 if (!I->hasOneMemOperand())
657 return false;
658
659 MachineMemOperand *Op = I->memoperands()[0];
660 return Op->isLoad() && Op->getPseudoValue() &&
661 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
662}
663
664Register PPCInstrInfo::generateLoadForNewConst(
665 unsigned Idx, MachineInstr *MI, Type *Ty,
666 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
667 // Now we only support PPC64, Medium code model and P9 with vector.
668 // We have immutable pattern to access const pool. See function
669 // shouldReduceRegisterPressure.
670 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
672 "Target not supported!\n");
673
674 MachineFunction *MF = MI->getMF();
676
677 // Generate ADDIStocHA8
678 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
679 MachineInstrBuilder TOCOffset =
680 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
681 .addReg(PPC::X2)
683
684 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
685 "Only float and double are supported!");
686
687 unsigned LoadOpcode;
688 // Should be float type or double type.
689 if (Ty->isFloatTy())
690 LoadOpcode = PPC::DFLOADf32;
691 else
692 LoadOpcode = PPC::DFLOADf64;
693
694 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
695 Register VReg2 = MRI->createVirtualRegister(RC);
699
700 // Generate Load from constant pool.
702 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
704 .addReg(VReg1, getKillRegState(true))
705 .addMemOperand(MMO);
706
707 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
708
709 // Insert the toc load instructions into InsInstrs.
710 InsInstrs.insert(InsInstrs.begin(), Load);
711 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
712 return VReg2;
713}
714
715// This function returns the const value in constant pool if the \p I is a load
716// from constant pool.
717const Constant *
719 MachineFunction *MF = I->getMF();
722 assert(I->mayLoad() && "Should be a load instruction.\n");
723 for (auto MO : I->uses()) {
724 if (!MO.isReg())
725 continue;
726 Register Reg = MO.getReg();
727 if (Reg == 0 || !Reg.isVirtual())
728 continue;
729 // Find the toc address.
730 MachineInstr *DefMI = MRI->getVRegDef(Reg);
731 for (auto MO2 : DefMI->uses())
732 if (MO2.isCPI())
733 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
734 }
735 return nullptr;
736}
737
739 switch (Pattern) {
746 default:
748 }
749}
750
753 bool DoRegPressureReduce) const {
754 // Using the machine combiner in this way is potentially expensive, so
755 // restrict to when aggressive optimizations are desired.
757 return false;
758
759 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
760 return true;
761
763 DoRegPressureReduce);
764}
765
767 MachineInstr &Root, unsigned Pattern,
770 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
771 switch (Pattern) {
776 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
777 break;
778 default:
779 // Reassociate default patterns.
781 DelInstrs, InstrIdxForVirtReg);
782 break;
783 }
784}
785
786void PPCInstrInfo::reassociateFMA(
787 MachineInstr &Root, unsigned Pattern,
790 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
791 MachineFunction *MF = Root.getMF();
794 MachineOperand &OpC = Root.getOperand(0);
795 Register RegC = OpC.getReg();
796 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
797 MRI.constrainRegClass(RegC, RC);
798
799 unsigned FmaOp = Root.getOpcode();
800 int16_t Idx = getFMAOpIdxInfo(FmaOp);
801 assert(Idx >= 0 && "Root must be a FMA instruction");
802
803 bool IsILPReassociate =
806
809
810 MachineInstr *Prev = nullptr;
811 MachineInstr *Leaf = nullptr;
812 switch (Pattern) {
813 default:
814 llvm_unreachable("not recognized pattern!");
817 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
818 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
819 break;
821 Register MULReg =
822 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
823 Leaf = MRI.getVRegDef(MULReg);
824 break;
825 }
827 Register MULReg = TRI->lookThruCopyLike(
828 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
829 Leaf = MRI.getVRegDef(MULReg);
830 break;
831 }
832 }
833
834 uint32_t IntersectedFlags = 0;
835 if (IsILPReassociate)
836 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
837 else
838 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
839
840 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
841 bool &KillFlag) {
842 Reg = Operand.getReg();
843 MRI.constrainRegClass(Reg, RC);
844 KillFlag = Operand.isKill();
845 };
846
847 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
848 Register &MulOp2, Register &AddOp,
849 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
850 bool &AddOpKillFlag) {
851 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
852 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
853 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
854 };
855
856 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
857 RegA21, RegB;
858 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
859 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
860 KillA11 = false, KillA21 = false, KillB = false;
861
862 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
863
864 if (IsILPReassociate)
865 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
866
868 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
869 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
871 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
872 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
873 } else {
874 // Get FSUB instruction info.
875 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
876 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
877 }
878
879 // Create new virtual registers for the new results instead of
880 // recycling legacy ones because the MachineCombiner's computation of the
881 // critical path requires a new register definition rather than an existing
882 // one.
883 // For register pressure reassociation, we only need create one virtual
884 // register for the new fma.
885 Register NewVRA = MRI.createVirtualRegister(RC);
886 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
887
888 Register NewVRB = 0;
889 if (IsILPReassociate) {
890 NewVRB = MRI.createVirtualRegister(RC);
891 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
892 }
893
894 Register NewVRD = 0;
896 NewVRD = MRI.createVirtualRegister(RC);
897 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
898 }
899
900 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
901 Register RegMul1, bool KillRegMul1,
902 Register RegMul2, bool KillRegMul2) {
903 MI->getOperand(AddOpIdx).setReg(RegAdd);
904 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
905 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
906 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
907 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
908 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
909 };
910
911 MachineInstrBuilder NewARegPressure, NewCRegPressure;
912 switch (Pattern) {
913 default:
914 llvm_unreachable("not recognized pattern!");
916 // Create new instructions for insertion.
917 MachineInstrBuilder MINewB =
918 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
919 .addReg(RegX, getKillRegState(KillX))
920 .addReg(RegM21, getKillRegState(KillM21))
921 .addReg(RegM22, getKillRegState(KillM22));
922 MachineInstrBuilder MINewA =
923 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
924 .addReg(RegY, getKillRegState(KillY))
925 .addReg(RegM31, getKillRegState(KillM31))
926 .addReg(RegM32, getKillRegState(KillM32));
927 // If AddOpIdx is not 1, adjust the order.
928 if (AddOpIdx != 1) {
929 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
930 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
931 }
932
933 MachineInstrBuilder MINewC =
934 BuildMI(*MF, Root.getDebugLoc(),
936 .addReg(NewVRB, getKillRegState(true))
937 .addReg(NewVRA, getKillRegState(true));
938
939 // Update flags for newly created instructions.
940 setSpecialOperandAttr(*MINewA, IntersectedFlags);
941 setSpecialOperandAttr(*MINewB, IntersectedFlags);
942 setSpecialOperandAttr(*MINewC, IntersectedFlags);
943
944 // Record new instructions for insertion.
945 InsInstrs.push_back(MINewA);
946 InsInstrs.push_back(MINewB);
947 InsInstrs.push_back(MINewC);
948 break;
949 }
951 assert(NewVRD && "new FMA register not created!");
952 // Create new instructions for insertion.
953 MachineInstrBuilder MINewA =
954 BuildMI(*MF, Leaf->getDebugLoc(),
956 .addReg(RegM11, getKillRegState(KillM11))
957 .addReg(RegM12, getKillRegState(KillM12));
958 MachineInstrBuilder MINewB =
959 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
960 .addReg(RegX, getKillRegState(KillX))
961 .addReg(RegM21, getKillRegState(KillM21))
962 .addReg(RegM22, getKillRegState(KillM22));
963 MachineInstrBuilder MINewD =
964 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
965 .addReg(NewVRA, getKillRegState(true))
966 .addReg(RegM31, getKillRegState(KillM31))
967 .addReg(RegM32, getKillRegState(KillM32));
968 // If AddOpIdx is not 1, adjust the order.
969 if (AddOpIdx != 1) {
970 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
971 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
972 KillM32);
973 }
974
975 MachineInstrBuilder MINewC =
976 BuildMI(*MF, Root.getDebugLoc(),
978 .addReg(NewVRB, getKillRegState(true))
979 .addReg(NewVRD, getKillRegState(true));
980
981 // Update flags for newly created instructions.
982 setSpecialOperandAttr(*MINewA, IntersectedFlags);
983 setSpecialOperandAttr(*MINewB, IntersectedFlags);
984 setSpecialOperandAttr(*MINewD, IntersectedFlags);
985 setSpecialOperandAttr(*MINewC, IntersectedFlags);
986
987 // Record new instructions for insertion.
988 InsInstrs.push_back(MINewA);
989 InsInstrs.push_back(MINewB);
990 InsInstrs.push_back(MINewD);
991 InsInstrs.push_back(MINewC);
992 break;
993 }
996 Register VarReg;
997 bool KillVarReg = false;
999 VarReg = RegM31;
1000 KillVarReg = KillM31;
1001 } else {
1002 VarReg = RegM32;
1003 KillVarReg = KillM32;
1004 }
1005 // We don't want to get negative const from memory pool too early, as the
1006 // created entry will not be deleted even if it has no users. Since all
1007 // operand of Leaf and Root are virtual register, we use zero register
1008 // here as a placeholder. When the InsInstrs is selected in
1009 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1010 // with a virtual register which is a load from constant pool.
1011 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1012 .addReg(RegB, getKillRegState(RegB))
1013 .addReg(RegY, getKillRegState(KillY))
1014 .addReg(PPC::ZERO8);
1015 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1016 .addReg(NewVRA, getKillRegState(true))
1017 .addReg(RegX, getKillRegState(KillX))
1018 .addReg(VarReg, getKillRegState(KillVarReg));
1019 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1020 // both at index 1, no need to adjust.
1021 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1022 // the operand index here.
1023 break;
1024 }
1025 }
1026
1027 if (!IsILPReassociate) {
1028 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1029 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1030
1031 InsInstrs.push_back(NewARegPressure);
1032 InsInstrs.push_back(NewCRegPressure);
1033 }
1034
1035 assert(!InsInstrs.empty() &&
1036 "Insertion instructions set should not be empty!");
1037
1038 // Record old instructions for deletion.
1039 DelInstrs.push_back(Leaf);
1040 if (IsILPReassociate)
1041 DelInstrs.push_back(Prev);
1042 DelInstrs.push_back(&Root);
1043}
1044
1045// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1047 Register &SrcReg, Register &DstReg,
1048 unsigned &SubIdx) const {
1049 switch (MI.getOpcode()) {
1050 default: return false;
1051 case PPC::EXTSW:
1052 case PPC::EXTSW_32:
1053 case PPC::EXTSW_32_64:
1054 SrcReg = MI.getOperand(1).getReg();
1055 DstReg = MI.getOperand(0).getReg();
1056 SubIdx = PPC::sub_32;
1057 return true;
1058 }
1059}
1060
1062 int &FrameIndex) const {
1063 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1064 // Check for the operands added by addFrameReference (the immediate is the
1065 // offset which defaults to 0).
1066 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1067 MI.getOperand(2).isFI()) {
1068 FrameIndex = MI.getOperand(2).getIndex();
1069 return MI.getOperand(0).getReg();
1070 }
1071 }
1072 return 0;
1073}
1074
1075// For opcodes with the ReMaterializable flag set, this function is called to
1076// verify the instruction is really rematable.
1078 const MachineInstr &MI) const {
1079 switch (MI.getOpcode()) {
1080 default:
1081 // Let base implementaion decide.
1082 break;
1083 case PPC::LI:
1084 case PPC::LI8:
1085 case PPC::PLI:
1086 case PPC::PLI8:
1087 case PPC::LIS:
1088 case PPC::LIS8:
1089 case PPC::ADDIStocHA:
1090 case PPC::ADDIStocHA8:
1091 case PPC::ADDItocL:
1092 case PPC::ADDItocL8:
1093 case PPC::LOAD_STACK_GUARD:
1094 case PPC::PPCLdFixedAddr:
1095 case PPC::XXLXORz:
1096 case PPC::XXLXORspz:
1097 case PPC::XXLXORdpz:
1098 case PPC::XXLEQVOnes:
1099 case PPC::XXSPLTI32DX:
1100 case PPC::XXSPLTIW:
1101 case PPC::XXSPLTIDP:
1102 case PPC::V_SET0B:
1103 case PPC::V_SET0H:
1104 case PPC::V_SET0:
1105 case PPC::V_SETALLONESB:
1106 case PPC::V_SETALLONESH:
1107 case PPC::V_SETALLONES:
1108 case PPC::CRSET:
1109 case PPC::CRUNSET:
1110 case PPC::XXSETACCZ:
1111 case PPC::XXSETACCZW:
1112 return true;
1113 }
1115}
1116
1118 int &FrameIndex) const {
1119 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1120 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1121 MI.getOperand(2).isFI()) {
1122 FrameIndex = MI.getOperand(2).getIndex();
1123 return MI.getOperand(0).getReg();
1124 }
1125 }
1126 return 0;
1127}
1128
1130 unsigned OpIdx1,
1131 unsigned OpIdx2) const {
1132 MachineFunction &MF = *MI.getParent()->getParent();
1133
1134 // Normal instructions can be commuted the obvious way.
1135 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1136 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1137 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1138 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1139 // changing the relative order of the mask operands might change what happens
1140 // to the high-bits of the mask (and, thus, the result).
1141
1142 // Cannot commute if it has a non-zero rotate count.
1143 if (MI.getOperand(3).getImm() != 0)
1144 return nullptr;
1145
1146 // If we have a zero rotate count, we have:
1147 // M = mask(MB,ME)
1148 // Op0 = (Op1 & ~M) | (Op2 & M)
1149 // Change this to:
1150 // M = mask((ME+1)&31, (MB-1)&31)
1151 // Op0 = (Op2 & ~M) | (Op1 & M)
1152
1153 // Swap op1/op2
1154 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1155 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1156 Register Reg0 = MI.getOperand(0).getReg();
1157 Register Reg1 = MI.getOperand(1).getReg();
1158 Register Reg2 = MI.getOperand(2).getReg();
1159 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1160 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1161 bool Reg1IsKill = MI.getOperand(1).isKill();
1162 bool Reg2IsKill = MI.getOperand(2).isKill();
1163 bool ChangeReg0 = false;
1164 // If machine instrs are no longer in two-address forms, update
1165 // destination register as well.
1166 if (Reg0 == Reg1) {
1167 // Must be two address instruction (i.e. op1 is tied to op0).
1168 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1169 "Expecting a two-address instruction!");
1170 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1171 Reg2IsKill = false;
1172 ChangeReg0 = true;
1173 }
1174
1175 // Masks.
1176 unsigned MB = MI.getOperand(4).getImm();
1177 unsigned ME = MI.getOperand(5).getImm();
1178
1179 // We can't commute a trivial mask (there is no way to represent an all-zero
1180 // mask).
1181 if (MB == 0 && ME == 31)
1182 return nullptr;
1183
1184 if (NewMI) {
1185 // Create a new instruction.
1186 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1187 bool Reg0IsDead = MI.getOperand(0).isDead();
1188 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1189 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1190 .addReg(Reg2, getKillRegState(Reg2IsKill))
1191 .addReg(Reg1, getKillRegState(Reg1IsKill))
1192 .addImm((ME + 1) & 31)
1193 .addImm((MB - 1) & 31);
1194 }
1195
1196 if (ChangeReg0) {
1197 MI.getOperand(0).setReg(Reg2);
1198 MI.getOperand(0).setSubReg(SubReg2);
1199 }
1200 MI.getOperand(2).setReg(Reg1);
1201 MI.getOperand(1).setReg(Reg2);
1202 MI.getOperand(2).setSubReg(SubReg1);
1203 MI.getOperand(1).setSubReg(SubReg2);
1204 MI.getOperand(2).setIsKill(Reg1IsKill);
1205 MI.getOperand(1).setIsKill(Reg2IsKill);
1206
1207 // Swap the mask around.
1208 MI.getOperand(4).setImm((ME + 1) & 31);
1209 MI.getOperand(5).setImm((MB - 1) & 31);
1210 return &MI;
1211}
1212
1214 unsigned &SrcOpIdx1,
1215 unsigned &SrcOpIdx2) const {
1216 // For VSX A-Type FMA instructions, it is the first two operands that can be
1217 // commuted, however, because the non-encoded tied input operand is listed
1218 // first, the operands to swap are actually the second and third.
1219
1220 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1221 if (AltOpc == -1)
1222 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1223
1224 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1225 // and SrcOpIdx2.
1226 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1227}
1228
1231 // This function is used for scheduling, and the nop wanted here is the type
1232 // that terminates dispatch groups on the POWER cores.
1233 unsigned Directive = Subtarget.getCPUDirective();
1234 unsigned Opcode;
1235 switch (Directive) {
1236 default: Opcode = PPC::NOP; break;
1237 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1238 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1239 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1240 // FIXME: Update when POWER9 scheduling model is ready.
1241 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1242 }
1243
1244 DebugLoc DL;
1245 BuildMI(MBB, MI, DL, get(Opcode));
1246}
1247
1248/// Return the noop instruction to use for a noop.
1250 MCInst Nop;
1251 Nop.setOpcode(PPC::NOP);
1252 return Nop;
1253}
1254
1255// Branch analysis.
1256// Note: If the condition register is set to CTR or CTR8 then this is a
1257// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1260 MachineBasicBlock *&FBB,
1262 bool AllowModify) const {
1263 bool isPPC64 = Subtarget.isPPC64();
1264
1265 // If the block has no terminators, it just falls into the block after it.
1267 if (I == MBB.end())
1268 return false;
1269
1270 if (!isUnpredicatedTerminator(*I))
1271 return false;
1272
1273 if (AllowModify) {
1274 // If the BB ends with an unconditional branch to the fallthrough BB,
1275 // we eliminate the branch instruction.
1276 if (I->getOpcode() == PPC::B &&
1277 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1278 I->eraseFromParent();
1279
1280 // We update iterator after deleting the last branch.
1282 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1283 return false;
1284 }
1285 }
1286
1287 // Get the last instruction in the block.
1288 MachineInstr &LastInst = *I;
1289
1290 // If there is only one terminator instruction, process it.
1291 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1292 if (LastInst.getOpcode() == PPC::B) {
1293 if (!LastInst.getOperand(0).isMBB())
1294 return true;
1295 TBB = LastInst.getOperand(0).getMBB();
1296 return false;
1297 } else if (LastInst.getOpcode() == PPC::BCC) {
1298 if (!LastInst.getOperand(2).isMBB())
1299 return true;
1300 // Block ends with fall-through condbranch.
1301 TBB = LastInst.getOperand(2).getMBB();
1302 Cond.push_back(LastInst.getOperand(0));
1303 Cond.push_back(LastInst.getOperand(1));
1304 return false;
1305 } else if (LastInst.getOpcode() == PPC::BC) {
1306 if (!LastInst.getOperand(1).isMBB())
1307 return true;
1308 // Block ends with fall-through condbranch.
1309 TBB = LastInst.getOperand(1).getMBB();
1311 Cond.push_back(LastInst.getOperand(0));
1312 return false;
1313 } else if (LastInst.getOpcode() == PPC::BCn) {
1314 if (!LastInst.getOperand(1).isMBB())
1315 return true;
1316 // Block ends with fall-through condbranch.
1317 TBB = LastInst.getOperand(1).getMBB();
1319 Cond.push_back(LastInst.getOperand(0));
1320 return false;
1321 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1322 LastInst.getOpcode() == PPC::BDNZ) {
1323 if (!LastInst.getOperand(0).isMBB())
1324 return true;
1326 return true;
1327 TBB = LastInst.getOperand(0).getMBB();
1328 Cond.push_back(MachineOperand::CreateImm(1));
1329 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1330 true));
1331 return false;
1332 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1333 LastInst.getOpcode() == PPC::BDZ) {
1334 if (!LastInst.getOperand(0).isMBB())
1335 return true;
1337 return true;
1338 TBB = LastInst.getOperand(0).getMBB();
1339 Cond.push_back(MachineOperand::CreateImm(0));
1340 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1341 true));
1342 return false;
1343 }
1344
1345 // Otherwise, don't know what this is.
1346 return true;
1347 }
1348
1349 // Get the instruction before it if it's a terminator.
1350 MachineInstr &SecondLastInst = *I;
1351
1352 // If there are three terminators, we don't know what sort of block this is.
1353 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1354 return true;
1355
1356 // If the block ends with PPC::B and PPC:BCC, handle it.
1357 if (SecondLastInst.getOpcode() == PPC::BCC &&
1358 LastInst.getOpcode() == PPC::B) {
1359 if (!SecondLastInst.getOperand(2).isMBB() ||
1360 !LastInst.getOperand(0).isMBB())
1361 return true;
1362 TBB = SecondLastInst.getOperand(2).getMBB();
1363 Cond.push_back(SecondLastInst.getOperand(0));
1364 Cond.push_back(SecondLastInst.getOperand(1));
1365 FBB = LastInst.getOperand(0).getMBB();
1366 return false;
1367 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1368 LastInst.getOpcode() == PPC::B) {
1369 if (!SecondLastInst.getOperand(1).isMBB() ||
1370 !LastInst.getOperand(0).isMBB())
1371 return true;
1372 TBB = SecondLastInst.getOperand(1).getMBB();
1374 Cond.push_back(SecondLastInst.getOperand(0));
1375 FBB = LastInst.getOperand(0).getMBB();
1376 return false;
1377 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1378 LastInst.getOpcode() == PPC::B) {
1379 if (!SecondLastInst.getOperand(1).isMBB() ||
1380 !LastInst.getOperand(0).isMBB())
1381 return true;
1382 TBB = SecondLastInst.getOperand(1).getMBB();
1384 Cond.push_back(SecondLastInst.getOperand(0));
1385 FBB = LastInst.getOperand(0).getMBB();
1386 return false;
1387 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1388 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1389 LastInst.getOpcode() == PPC::B) {
1390 if (!SecondLastInst.getOperand(0).isMBB() ||
1391 !LastInst.getOperand(0).isMBB())
1392 return true;
1394 return true;
1395 TBB = SecondLastInst.getOperand(0).getMBB();
1396 Cond.push_back(MachineOperand::CreateImm(1));
1397 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1398 true));
1399 FBB = LastInst.getOperand(0).getMBB();
1400 return false;
1401 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1402 SecondLastInst.getOpcode() == PPC::BDZ) &&
1403 LastInst.getOpcode() == PPC::B) {
1404 if (!SecondLastInst.getOperand(0).isMBB() ||
1405 !LastInst.getOperand(0).isMBB())
1406 return true;
1408 return true;
1409 TBB = SecondLastInst.getOperand(0).getMBB();
1410 Cond.push_back(MachineOperand::CreateImm(0));
1411 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1412 true));
1413 FBB = LastInst.getOperand(0).getMBB();
1414 return false;
1415 }
1416
1417 // If the block ends with two PPC:Bs, handle it. The second one is not
1418 // executed, so remove it.
1419 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1420 if (!SecondLastInst.getOperand(0).isMBB())
1421 return true;
1422 TBB = SecondLastInst.getOperand(0).getMBB();
1423 I = LastInst;
1424 if (AllowModify)
1425 I->eraseFromParent();
1426 return false;
1427 }
1428
1429 // Otherwise, can't handle this.
1430 return true;
1431}
1432
1434 int *BytesRemoved) const {
1435 assert(!BytesRemoved && "code size not handled");
1436
1438 if (I == MBB.end())
1439 return 0;
1440
1441 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1442 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1443 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1444 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1445 return 0;
1446
1447 // Remove the branch.
1448 I->eraseFromParent();
1449
1450 I = MBB.end();
1451
1452 if (I == MBB.begin()) return 1;
1453 --I;
1454 if (I->getOpcode() != PPC::BCC &&
1455 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1456 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1457 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1458 return 1;
1459
1460 // Remove the branch.
1461 I->eraseFromParent();
1462 return 2;
1463}
1464
1467 MachineBasicBlock *FBB,
1469 const DebugLoc &DL,
1470 int *BytesAdded) const {
1471 // Shouldn't be a fall through.
1472 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1473 assert((Cond.size() == 2 || Cond.size() == 0) &&
1474 "PPC branch conditions have two components!");
1475 assert(!BytesAdded && "code size not handled");
1476
1477 bool isPPC64 = Subtarget.isPPC64();
1478
1479 // One-way branch.
1480 if (!FBB) {
1481 if (Cond.empty()) // Unconditional branch
1482 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1483 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1484 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1485 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1486 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1487 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1488 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1489 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1490 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1491 else // Conditional branch
1492 BuildMI(&MBB, DL, get(PPC::BCC))
1493 .addImm(Cond[0].getImm())
1494 .add(Cond[1])
1495 .addMBB(TBB);
1496 return 1;
1497 }
1498
1499 // Two-way Conditional Branch.
1500 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1501 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1502 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1503 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1504 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1505 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1506 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1507 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1508 else
1509 BuildMI(&MBB, DL, get(PPC::BCC))
1510 .addImm(Cond[0].getImm())
1511 .add(Cond[1])
1512 .addMBB(TBB);
1513 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1514 return 2;
1515}
1516
1517// Select analysis.
1520 Register DstReg, Register TrueReg,
1521 Register FalseReg, int &CondCycles,
1522 int &TrueCycles, int &FalseCycles) const {
1523 if (!Subtarget.hasISEL())
1524 return false;
1525
1526 if (Cond.size() != 2)
1527 return false;
1528
1529 // If this is really a bdnz-like condition, then it cannot be turned into a
1530 // select.
1531 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1532 return false;
1533
1534 // If the conditional branch uses a physical register, then it cannot be
1535 // turned into a select.
1536 if (Cond[1].getReg().isPhysical())
1537 return false;
1538
1539 // Check register classes.
1541 const TargetRegisterClass *RC =
1542 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1543 if (!RC)
1544 return false;
1545
1546 // isel is for regular integer GPRs only.
1547 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1548 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1549 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1550 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1551 return false;
1552
1553 // FIXME: These numbers are for the A2, how well they work for other cores is
1554 // an open question. On the A2, the isel instruction has a 2-cycle latency
1555 // but single-cycle throughput. These numbers are used in combination with
1556 // the MispredictPenalty setting from the active SchedMachineModel.
1557 CondCycles = 1;
1558 TrueCycles = 1;
1559 FalseCycles = 1;
1560
1561 return true;
1562}
1563
1566 const DebugLoc &dl, Register DestReg,
1568 Register FalseReg) const {
1569 assert(Cond.size() == 2 &&
1570 "PPC branch conditions have two components!");
1571
1572 // Get the register classes.
1574 const TargetRegisterClass *RC =
1575 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1576 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1577
1578 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1579 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1580 assert((Is64Bit ||
1581 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1582 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1583 "isel is for regular integer GPRs only");
1584
1585 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1586 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1587
1588 unsigned SubIdx = 0;
1589 bool SwapOps = false;
1590 switch (SelectPred) {
1591 case PPC::PRED_EQ:
1592 case PPC::PRED_EQ_MINUS:
1593 case PPC::PRED_EQ_PLUS:
1594 SubIdx = PPC::sub_eq; SwapOps = false; break;
1595 case PPC::PRED_NE:
1596 case PPC::PRED_NE_MINUS:
1597 case PPC::PRED_NE_PLUS:
1598 SubIdx = PPC::sub_eq; SwapOps = true; break;
1599 case PPC::PRED_LT:
1600 case PPC::PRED_LT_MINUS:
1601 case PPC::PRED_LT_PLUS:
1602 SubIdx = PPC::sub_lt; SwapOps = false; break;
1603 case PPC::PRED_GE:
1604 case PPC::PRED_GE_MINUS:
1605 case PPC::PRED_GE_PLUS:
1606 SubIdx = PPC::sub_lt; SwapOps = true; break;
1607 case PPC::PRED_GT:
1608 case PPC::PRED_GT_MINUS:
1609 case PPC::PRED_GT_PLUS:
1610 SubIdx = PPC::sub_gt; SwapOps = false; break;
1611 case PPC::PRED_LE:
1612 case PPC::PRED_LE_MINUS:
1613 case PPC::PRED_LE_PLUS:
1614 SubIdx = PPC::sub_gt; SwapOps = true; break;
1615 case PPC::PRED_UN:
1616 case PPC::PRED_UN_MINUS:
1617 case PPC::PRED_UN_PLUS:
1618 SubIdx = PPC::sub_un; SwapOps = false; break;
1619 case PPC::PRED_NU:
1620 case PPC::PRED_NU_MINUS:
1621 case PPC::PRED_NU_PLUS:
1622 SubIdx = PPC::sub_un; SwapOps = true; break;
1623 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1624 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1625 }
1626
1627 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1628 SecondReg = SwapOps ? TrueReg : FalseReg;
1629
1630 // The first input register of isel cannot be r0. If it is a member
1631 // of a register class that can be r0, then copy it first (the
1632 // register allocator should eliminate the copy).
1633 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1634 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1635 const TargetRegisterClass *FirstRC =
1636 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1637 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1638 Register OldFirstReg = FirstReg;
1639 FirstReg = MRI.createVirtualRegister(FirstRC);
1640 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1641 .addReg(OldFirstReg);
1642 }
1643
1644 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1645 .addReg(FirstReg).addReg(SecondReg)
1646 .addReg(Cond[1].getReg(), 0, SubIdx);
1647}
1648
1649static unsigned getCRBitValue(unsigned CRBit) {
1650 unsigned Ret = 4;
1651 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1652 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1653 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1654 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1655 Ret = 3;
1656 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1657 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1658 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1659 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1660 Ret = 2;
1661 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1662 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1663 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1664 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1665 Ret = 1;
1666 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1667 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1668 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1669 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1670 Ret = 0;
1671
1672 assert(Ret != 4 && "Invalid CR bit register");
1673 return Ret;
1674}
1675
1678 const DebugLoc &DL, MCRegister DestReg,
1679 MCRegister SrcReg, bool KillSrc,
1680 bool RenamableDest, bool RenamableSrc) const {
1681 // We can end up with self copies and similar things as a result of VSX copy
1682 // legalization. Promote them here.
1684 if (PPC::F8RCRegClass.contains(DestReg) &&
1685 PPC::VSRCRegClass.contains(SrcReg)) {
1686 MCRegister SuperReg =
1687 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1688
1689 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1690 llvm_unreachable("nop VSX copy");
1691
1692 DestReg = SuperReg;
1693 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1694 PPC::VSRCRegClass.contains(DestReg)) {
1695 MCRegister SuperReg =
1696 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1697
1698 if (VSXSelfCopyCrash && DestReg == SuperReg)
1699 llvm_unreachable("nop VSX copy");
1700
1701 SrcReg = SuperReg;
1702 }
1703
1704 // Different class register copy
1705 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1706 PPC::GPRCRegClass.contains(DestReg)) {
1707 MCRegister CRReg = getCRFromCRBit(SrcReg);
1708 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1709 getKillRegState(KillSrc);
1710 // Rotate the CR bit in the CR fields to be the least significant bit and
1711 // then mask with 0x1 (MB = ME = 31).
1712 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1713 .addReg(DestReg, RegState::Kill)
1714 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1715 .addImm(31)
1716 .addImm(31);
1717 return;
1718 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1719 (PPC::G8RCRegClass.contains(DestReg) ||
1720 PPC::GPRCRegClass.contains(DestReg))) {
1721 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1722 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1723 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1724 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1725 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1726 getKillRegState(KillSrc);
1727 if (CRNum == 7)
1728 return;
1729 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1730 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1731 .addReg(DestReg, RegState::Kill)
1732 .addImm(CRNum * 4 + 4)
1733 .addImm(28)
1734 .addImm(31);
1735 return;
1736 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1737 PPC::VSFRCRegClass.contains(DestReg)) {
1738 assert(Subtarget.hasDirectMove() &&
1739 "Subtarget doesn't support directmove, don't know how to copy.");
1740 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1741 NumGPRtoVSRSpill++;
1742 getKillRegState(KillSrc);
1743 return;
1744 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1745 PPC::G8RCRegClass.contains(DestReg)) {
1746 assert(Subtarget.hasDirectMove() &&
1747 "Subtarget doesn't support directmove, don't know how to copy.");
1748 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1749 getKillRegState(KillSrc);
1750 return;
1751 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1752 PPC::GPRCRegClass.contains(DestReg)) {
1753 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1754 getKillRegState(KillSrc);
1755 return;
1756 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1757 PPC::SPERCRegClass.contains(DestReg)) {
1758 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1759 getKillRegState(KillSrc);
1760 return;
1761 }
1762
1763 unsigned Opc;
1764 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1765 Opc = PPC::OR;
1766 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1767 Opc = PPC::OR8;
1768 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1769 Opc = PPC::FMR;
1770 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1771 Opc = PPC::MCRF;
1772 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1773 Opc = PPC::VOR;
1774 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1775 // There are two different ways this can be done:
1776 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1777 // issue in VSU pipeline 0.
1778 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1779 // can go to either pipeline.
1780 // We'll always use xxlor here, because in practically all cases where
1781 // copies are generated, they are close enough to some use that the
1782 // lower-latency form is preferable.
1783 Opc = PPC::XXLOR;
1784 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1785 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1786 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1787 else if (Subtarget.pairedVectorMemops() &&
1788 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1789 if (SrcReg > PPC::VSRp15)
1790 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1791 else
1792 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1793 if (DestReg > PPC::VSRp15)
1794 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1795 else
1796 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1797 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1798 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1799 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1800 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1801 return;
1802 }
1803 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1804 Opc = PPC::CROR;
1805 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1806 Opc = PPC::EVOR;
1807 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1808 PPC::UACCRCRegClass.contains(DestReg)) &&
1809 (PPC::ACCRCRegClass.contains(SrcReg) ||
1810 PPC::UACCRCRegClass.contains(SrcReg))) {
1811 // If primed, de-prime the source register, copy the individual registers
1812 // and prime the destination if needed. The vector subregisters are
1813 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1814 // source is primed, we need to re-prime it after the copy as well.
1815 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1816 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1817 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1818 MCRegister VSLSrcReg =
1819 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1820 MCRegister VSLDestReg =
1821 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1822 if (SrcPrimed)
1823 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1824 for (unsigned Idx = 0; Idx < 4; Idx++)
1825 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1826 .addReg(VSLSrcReg + Idx)
1827 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1828 if (DestPrimed)
1829 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1830 if (SrcPrimed && !KillSrc)
1831 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1832 return;
1833 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1834 PPC::G8pRCRegClass.contains(SrcReg)) {
1835 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1836 unsigned DestRegIdx = DestReg - PPC::G8p0;
1837 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1838 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1839 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1840 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1841 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1842 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1843 .addReg(SrcRegSub0)
1844 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1845 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1846 .addReg(SrcRegSub1)
1847 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1848 return;
1849 } else
1850 llvm_unreachable("Impossible reg-to-reg copy");
1851
1852 const MCInstrDesc &MCID = get(Opc);
1853 if (MCID.getNumOperands() == 3)
1854 BuildMI(MBB, I, DL, MCID, DestReg)
1855 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1856 else
1857 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1858}
1859
1860unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1861 int OpcodeIndex = 0;
1862
1863 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1864 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1866 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1867 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1869 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1871 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1873 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1875 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1877 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1879 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1881 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1883 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1885 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1887 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1889 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1890 assert(Subtarget.pairedVectorMemops() &&
1891 "Register unexpected when paired memops are disabled.");
1893 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1894 assert(Subtarget.pairedVectorMemops() &&
1895 "Register unexpected when paired memops are disabled.");
1897 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1898 assert(Subtarget.pairedVectorMemops() &&
1899 "Register unexpected when paired memops are disabled.");
1901 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1902 assert(Subtarget.pairedVectorMemops() &&
1903 "Register unexpected when paired memops are disabled.");
1905 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1907 } else {
1908 llvm_unreachable("Unknown regclass!");
1909 }
1910 return OpcodeIndex;
1911}
1912
1913unsigned
1915 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1916 return OpcodesForSpill[getSpillIndex(RC)];
1917}
1918
1919unsigned
1921 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1922 return OpcodesForSpill[getSpillIndex(RC)];
1923}
1924
1925void PPCInstrInfo::StoreRegToStackSlot(
1926 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1927 const TargetRegisterClass *RC,
1928 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1929 unsigned Opcode = getStoreOpcodeForSpill(RC);
1930 DebugLoc DL;
1931
1932 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1933 FuncInfo->setHasSpills();
1934
1936 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
1937 FrameIdx));
1938
1939 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
1940 PPC::CRBITRCRegClass.hasSubClassEq(RC))
1941 FuncInfo->setSpillsCR();
1942
1943 if (isXFormMemOp(Opcode))
1944 FuncInfo->setHasNonRISpills();
1945}
1946
1949 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1950 const TargetRegisterInfo *TRI) const {
1951 MachineFunction &MF = *MBB.getParent();
1953
1954 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
1955
1956 for (MachineInstr *NewMI : NewMIs)
1957 MBB.insert(MI, NewMI);
1958
1959 const MachineFrameInfo &MFI = MF.getFrameInfo();
1963 MFI.getObjectAlign(FrameIdx));
1964 NewMIs.back()->addMemOperand(MF, MMO);
1965}
1966
1969 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1970 const TargetRegisterInfo *TRI, Register VReg,
1971 MachineInstr::MIFlag Flags) const {
1972 // We need to avoid a situation in which the value from a VRRC register is
1973 // spilled using an Altivec instruction and reloaded into a VSRC register
1974 // using a VSX instruction. The issue with this is that the VSX
1975 // load/store instructions swap the doublewords in the vector and the Altivec
1976 // ones don't. The register classes on the spill/reload may be different if
1977 // the register is defined using an Altivec instruction and is then used by a
1978 // VSX instruction.
1979 RC = updatedRC(RC);
1980 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
1981}
1982
1983void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
1984 unsigned DestReg, int FrameIdx,
1985 const TargetRegisterClass *RC,
1987 const {
1988 unsigned Opcode = getLoadOpcodeForSpill(RC);
1989 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
1990 FrameIdx));
1991}
1992
1995 int FrameIdx, const TargetRegisterClass *RC,
1996 const TargetRegisterInfo *TRI) const {
1997 MachineFunction &MF = *MBB.getParent();
1999 DebugLoc DL;
2000 if (MI != MBB.end()) DL = MI->getDebugLoc();
2001
2002 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2003
2004 for (MachineInstr *NewMI : NewMIs)
2005 MBB.insert(MI, NewMI);
2006
2007 const MachineFrameInfo &MFI = MF.getFrameInfo();
2011 MFI.getObjectAlign(FrameIdx));
2012 NewMIs.back()->addMemOperand(MF, MMO);
2013}
2014
2017 int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
2018 Register VReg, MachineInstr::MIFlag Flags) const {
2019 // We need to avoid a situation in which the value from a VRRC register is
2020 // spilled using an Altivec instruction and reloaded into a VSRC register
2021 // using a VSX instruction. The issue with this is that the VSX
2022 // load/store instructions swap the doublewords in the vector and the Altivec
2023 // ones don't. The register classes on the spill/reload may be different if
2024 // the register is defined using an Altivec instruction and is then used by a
2025 // VSX instruction.
2026 RC = updatedRC(RC);
2027
2028 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2029}
2030
2033 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2034 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2035 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2036 else
2037 // Leave the CR# the same, but invert the condition.
2038 Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
2039 return false;
2040}
2041
2042// For some instructions, it is legal to fold ZERO into the RA register field.
2043// This function performs that fold by replacing the operand with PPC::ZERO,
2044// it does not consider whether the load immediate zero is no longer in use.
2046 Register Reg) const {
2047 // A zero immediate should always be loaded with a single li.
2048 unsigned DefOpc = DefMI.getOpcode();
2049 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2050 return false;
2051 if (!DefMI.getOperand(1).isImm())
2052 return false;
2053 if (DefMI.getOperand(1).getImm() != 0)
2054 return false;
2055
2056 // Note that we cannot here invert the arguments of an isel in order to fold
2057 // a ZERO into what is presented as the second argument. All we have here
2058 // is the condition bit, and that might come from a CR-logical bit operation.
2059
2060 const MCInstrDesc &UseMCID = UseMI.getDesc();
2061
2062 // Only fold into real machine instructions.
2063 if (UseMCID.isPseudo())
2064 return false;
2065
2066 // We need to find which of the User's operands is to be folded, that will be
2067 // the operand that matches the given register ID.
2068 unsigned UseIdx;
2069 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2070 if (UseMI.getOperand(UseIdx).isReg() &&
2071 UseMI.getOperand(UseIdx).getReg() == Reg)
2072 break;
2073
2074 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2075 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2076
2077 const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2078
2079 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2080 // register (which might also be specified as a pointer class kind).
2081 if (UseInfo->isLookupPtrRegClass()) {
2082 if (UseInfo->RegClass /* Kind */ != 1)
2083 return false;
2084 } else {
2085 if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2086 UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2087 return false;
2088 }
2089
2090 // Make sure this is not tied to an output register (or otherwise
2091 // constrained). This is true for ST?UX registers, for example, which
2092 // are tied to their output registers.
2093 if (UseInfo->Constraints != 0)
2094 return false;
2095
2096 MCRegister ZeroReg;
2097 if (UseInfo->isLookupPtrRegClass()) {
2098 bool isPPC64 = Subtarget.isPPC64();
2099 ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2100 } else {
2101 ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2102 PPC::ZERO8 : PPC::ZERO;
2103 }
2104
2105 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2106 LLVM_DEBUG(UseMI.dump());
2107 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2108 LLVM_DEBUG(dbgs() << "Into: ");
2109 LLVM_DEBUG(UseMI.dump());
2110 return true;
2111}
2112
2113// Folds zero into instructions which have a load immediate zero as an operand
2114// but also recognize zero as immediate zero. If the definition of the load
2115// has no more users it is deleted.
2117 Register Reg, MachineRegisterInfo *MRI) const {
2118 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2119 if (MRI->use_nodbg_empty(Reg))
2120 DefMI.eraseFromParent();
2121 return Changed;
2122}
2123
2125 for (MachineInstr &MI : MBB)
2126 if (MI.definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
2127 MI.definesRegister(PPC::CTR8, /*TRI=*/nullptr))
2128 return true;
2129 return false;
2130}
2131
2132// We should make sure that, if we're going to predicate both sides of a
2133// condition (a diamond), that both sides don't define the counter register. We
2134// can predicate counter-decrement-based branches, but while that predicates
2135// the branching, it does not predicate the counter decrement. If we tried to
2136// merge the triangle into one predicated block, we'd decrement the counter
2137// twice.
2139 unsigned NumT, unsigned ExtraT,
2140 MachineBasicBlock &FMBB,
2141 unsigned NumF, unsigned ExtraF,
2142 BranchProbability Probability) const {
2143 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2144}
2145
2146
2148 // The predicated branches are identified by their type, not really by the
2149 // explicit presence of a predicate. Furthermore, some of them can be
2150 // predicated more than once. Because if conversion won't try to predicate
2151 // any instruction which already claims to be predicated (by returning true
2152 // here), always return false. In doing so, we let isPredicable() be the
2153 // final word on whether not the instruction can be (further) predicated.
2154
2155 return false;
2156}
2157
2159 const MachineBasicBlock *MBB,
2160 const MachineFunction &MF) const {
2161 switch (MI.getOpcode()) {
2162 default:
2163 break;
2164 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2165 // across them, since some FP operations may change content of FPSCR.
2166 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2167 case PPC::MFFS:
2168 case PPC::MTFSF:
2169 case PPC::FENCE:
2170 return true;
2171 }
2173}
2174
2176 ArrayRef<MachineOperand> Pred) const {
2177 unsigned OpC = MI.getOpcode();
2178 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2179 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2180 bool isPPC64 = Subtarget.isPPC64();
2181 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2182 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2183 // Need add Def and Use for CTR implicit operand.
2184 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2185 .addReg(Pred[1].getReg(), RegState::Implicit)
2187 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2188 MI.setDesc(get(PPC::BCLR));
2189 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2190 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2191 MI.setDesc(get(PPC::BCLRn));
2192 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2193 } else {
2194 MI.setDesc(get(PPC::BCCLR));
2195 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2196 .addImm(Pred[0].getImm())
2197 .add(Pred[1]);
2198 }
2199
2200 return true;
2201 } else if (OpC == PPC::B) {
2202 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2203 bool isPPC64 = Subtarget.isPPC64();
2204 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2205 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2206 // Need add Def and Use for CTR implicit operand.
2207 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2208 .addReg(Pred[1].getReg(), RegState::Implicit)
2210 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2211 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2212 MI.removeOperand(0);
2213
2214 MI.setDesc(get(PPC::BC));
2215 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2216 .add(Pred[1])
2217 .addMBB(MBB);
2218 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2219 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2220 MI.removeOperand(0);
2221
2222 MI.setDesc(get(PPC::BCn));
2223 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2224 .add(Pred[1])
2225 .addMBB(MBB);
2226 } else {
2227 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2228 MI.removeOperand(0);
2229
2230 MI.setDesc(get(PPC::BCC));
2231 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2232 .addImm(Pred[0].getImm())
2233 .add(Pred[1])
2234 .addMBB(MBB);
2235 }
2236
2237 return true;
2238 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2239 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2240 OpC == PPC::BCTRL8_RM) {
2241 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2242 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2243
2244 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2245 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2246 bool isPPC64 = Subtarget.isPPC64();
2247
2248 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2249 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2250 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2251 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2252 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2253 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2254 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2255 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2256 } else {
2257 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2258 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2259 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2260 .addImm(Pred[0].getImm())
2261 .add(Pred[1]);
2262 }
2263
2264 // Need add Def and Use for LR implicit operand.
2265 if (setLR)
2266 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2267 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2268 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2269 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2270 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2272
2273 return true;
2274 }
2275
2276 return false;
2277}
2278
2280 ArrayRef<MachineOperand> Pred2) const {
2281 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2282 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2283
2284 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2285 return false;
2286 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2287 return false;
2288
2289 // P1 can only subsume P2 if they test the same condition register.
2290 if (Pred1[1].getReg() != Pred2[1].getReg())
2291 return false;
2292
2293 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2294 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2295
2296 if (P1 == P2)
2297 return true;
2298
2299 // Does P1 subsume P2, e.g. GE subsumes GT.
2300 if (P1 == PPC::PRED_LE &&
2301 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2302 return true;
2303 if (P1 == PPC::PRED_GE &&
2304 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2305 return true;
2306
2307 return false;
2308}
2309
2311 std::vector<MachineOperand> &Pred,
2312 bool SkipDead) const {
2313 // Note: At the present time, the contents of Pred from this function is
2314 // unused by IfConversion. This implementation follows ARM by pushing the
2315 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2316 // predicate, instructions defining CTR or CTR8 are also included as
2317 // predicate-defining instructions.
2318
2319 const TargetRegisterClass *RCs[] =
2320 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2321 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2322
2323 bool Found = false;
2324 for (const MachineOperand &MO : MI.operands()) {
2325 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2326 const TargetRegisterClass *RC = RCs[c];
2327 if (MO.isReg()) {
2328 if (MO.isDef() && RC->contains(MO.getReg())) {
2329 Pred.push_back(MO);
2330 Found = true;
2331 }
2332 } else if (MO.isRegMask()) {
2333 for (MCPhysReg R : *RC)
2334 if (MO.clobbersPhysReg(R)) {
2335 Pred.push_back(MO);
2336 Found = true;
2337 }
2338 }
2339 }
2340 }
2341
2342 return Found;
2343}
2344
2346 Register &SrcReg2, int64_t &Mask,
2347 int64_t &Value) const {
2348 unsigned Opc = MI.getOpcode();
2349
2350 switch (Opc) {
2351 default: return false;
2352 case PPC::CMPWI:
2353 case PPC::CMPLWI:
2354 case PPC::CMPDI:
2355 case PPC::CMPLDI:
2356 SrcReg = MI.getOperand(1).getReg();
2357 SrcReg2 = 0;
2358 Value = MI.getOperand(2).getImm();
2359 Mask = 0xFFFF;
2360 return true;
2361 case PPC::CMPW:
2362 case PPC::CMPLW:
2363 case PPC::CMPD:
2364 case PPC::CMPLD:
2365 case PPC::FCMPUS:
2366 case PPC::FCMPUD:
2367 SrcReg = MI.getOperand(1).getReg();
2368 SrcReg2 = MI.getOperand(2).getReg();
2369 Value = 0;
2370 Mask = 0;
2371 return true;
2372 }
2373}
2374
2376 Register SrcReg2, int64_t Mask,
2377 int64_t Value,
2378 const MachineRegisterInfo *MRI) const {
2379 if (DisableCmpOpt)
2380 return false;
2381
2382 int OpC = CmpInstr.getOpcode();
2383 Register CRReg = CmpInstr.getOperand(0).getReg();
2384
2385 // FP record forms set CR1 based on the exception status bits, not a
2386 // comparison with zero.
2387 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2388 return false;
2389
2391 // The record forms set the condition register based on a signed comparison
2392 // with zero (so says the ISA manual). This is not as straightforward as it
2393 // seems, however, because this is always a 64-bit comparison on PPC64, even
2394 // for instructions that are 32-bit in nature (like slw for example).
2395 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2396 // for equality checks (as those don't depend on the sign). On PPC64,
2397 // we are restricted to equality for unsigned 64-bit comparisons and for
2398 // signed 32-bit comparisons the applicability is more restricted.
2399 bool isPPC64 = Subtarget.isPPC64();
2400 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2401 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2402 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2403
2404 // Look through copies unless that gets us to a physical register.
2405 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2406 if (ActualSrc.isVirtual())
2407 SrcReg = ActualSrc;
2408
2409 // Get the unique definition of SrcReg.
2410 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2411 if (!MI) return false;
2412
2413 bool equalityOnly = false;
2414 bool noSub = false;
2415 if (isPPC64) {
2416 if (is32BitSignedCompare) {
2417 // We can perform this optimization only if SrcReg is sign-extending.
2418 if (isSignExtended(SrcReg, MRI))
2419 noSub = true;
2420 else
2421 return false;
2422 } else if (is32BitUnsignedCompare) {
2423 // We can perform this optimization, equality only, if SrcReg is
2424 // zero-extending.
2425 if (isZeroExtended(SrcReg, MRI)) {
2426 noSub = true;
2427 equalityOnly = true;
2428 } else
2429 return false;
2430 } else
2431 equalityOnly = is64BitUnsignedCompare;
2432 } else
2433 equalityOnly = is32BitUnsignedCompare;
2434
2435 if (equalityOnly) {
2436 // We need to check the uses of the condition register in order to reject
2437 // non-equality comparisons.
2439 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2440 I != IE; ++I) {
2441 MachineInstr *UseMI = &*I;
2442 if (UseMI->getOpcode() == PPC::BCC) {
2444 unsigned PredCond = PPC::getPredicateCondition(Pred);
2445 // We ignore hint bits when checking for non-equality comparisons.
2446 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2447 return false;
2448 } else if (UseMI->getOpcode() == PPC::ISEL ||
2449 UseMI->getOpcode() == PPC::ISEL8) {
2450 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2451 if (SubIdx != PPC::sub_eq)
2452 return false;
2453 } else
2454 return false;
2455 }
2456 }
2457
2458 MachineBasicBlock::iterator I = CmpInstr;
2459
2460 // Scan forward to find the first use of the compare.
2461 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2462 ++I) {
2463 bool FoundUse = false;
2465 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2466 J != JE; ++J)
2467 if (&*J == &*I) {
2468 FoundUse = true;
2469 break;
2470 }
2471
2472 if (FoundUse)
2473 break;
2474 }
2475
2478
2479 // There are two possible candidates which can be changed to set CR[01].
2480 // One is MI, the other is a SUB instruction.
2481 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2482 MachineInstr *Sub = nullptr;
2483 if (SrcReg2 != 0)
2484 // MI is not a candidate for CMPrr.
2485 MI = nullptr;
2486 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2487 // same BB as the comparison. This is to allow the check below to avoid calls
2488 // (and other explicit clobbers); instead we should really check for these
2489 // more explicitly (in at least a few predecessors).
2490 else if (MI->getParent() != CmpInstr.getParent())
2491 return false;
2492 else if (Value != 0) {
2493 // The record-form instructions set CR bit based on signed comparison
2494 // against 0. We try to convert a compare against 1 or -1 into a compare
2495 // against 0 to exploit record-form instructions. For example, we change
2496 // the condition "greater than -1" into "greater than or equal to 0"
2497 // and "less than 1" into "less than or equal to 0".
2498
2499 // Since we optimize comparison based on a specific branch condition,
2500 // we don't optimize if condition code is used by more than once.
2501 if (equalityOnly || !MRI->hasOneUse(CRReg))
2502 return false;
2503
2504 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2505 if (UseMI->getOpcode() != PPC::BCC)
2506 return false;
2507
2509 unsigned PredCond = PPC::getPredicateCondition(Pred);
2510 unsigned PredHint = PPC::getPredicateHint(Pred);
2511 int16_t Immed = (int16_t)Value;
2512
2513 // When modifying the condition in the predicate, we propagate hint bits
2514 // from the original predicate to the new one.
2515 if (Immed == -1 && PredCond == PPC::PRED_GT)
2516 // We convert "greater than -1" into "greater than or equal to 0",
2517 // since we are assuming signed comparison by !equalityOnly
2518 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2519 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2520 // We convert "less than or equal to -1" into "less than 0".
2521 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2522 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2523 // We convert "less than 1" into "less than or equal to 0".
2524 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2525 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2526 // We convert "greater than or equal to 1" into "greater than 0".
2527 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2528 else
2529 return false;
2530
2531 // Convert the comparison and its user to a compare against zero with the
2532 // appropriate predicate on the branch. Zero comparison might provide
2533 // optimization opportunities post-RA (see optimization in
2534 // PPCPreEmitPeephole.cpp).
2535 UseMI->getOperand(0).setImm(Pred);
2536 CmpInstr.getOperand(2).setImm(0);
2537 }
2538
2539 // Search for Sub.
2540 --I;
2541
2542 // Get ready to iterate backward from CmpInstr.
2543 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2544
2545 for (; I != E && !noSub; --I) {
2546 const MachineInstr &Instr = *I;
2547 unsigned IOpC = Instr.getOpcode();
2548
2549 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2550 Instr.readsRegister(PPC::CR0, TRI)))
2551 // This instruction modifies or uses the record condition register after
2552 // the one we want to change. While we could do this transformation, it
2553 // would likely not be profitable. This transformation removes one
2554 // instruction, and so even forcing RA to generate one move probably
2555 // makes it unprofitable.
2556 return false;
2557
2558 // Check whether CmpInstr can be made redundant by the current instruction.
2559 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2560 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2561 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2562 ((Instr.getOperand(1).getReg() == SrcReg &&
2563 Instr.getOperand(2).getReg() == SrcReg2) ||
2564 (Instr.getOperand(1).getReg() == SrcReg2 &&
2565 Instr.getOperand(2).getReg() == SrcReg))) {
2566 Sub = &*I;
2567 break;
2568 }
2569
2570 if (I == B)
2571 // The 'and' is below the comparison instruction.
2572 return false;
2573 }
2574
2575 // Return false if no candidates exist.
2576 if (!MI && !Sub)
2577 return false;
2578
2579 // The single candidate is called MI.
2580 if (!MI) MI = Sub;
2581
2582 int NewOpC = -1;
2583 int MIOpC = MI->getOpcode();
2584 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2585 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2586 NewOpC = MIOpC;
2587 else {
2588 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2589 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2590 NewOpC = MIOpC;
2591 }
2592
2593 // FIXME: On the non-embedded POWER architectures, only some of the record
2594 // forms are fast, and we should use only the fast ones.
2595
2596 // The defining instruction has a record form (or is already a record
2597 // form). It is possible, however, that we'll need to reverse the condition
2598 // code of the users.
2599 if (NewOpC == -1)
2600 return false;
2601
2602 // This transformation should not be performed if `nsw` is missing and is not
2603 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2604 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2605 // CRReg can reflect if compared values are equal, this optz is still valid.
2606 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2607 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2608 return false;
2609
2610 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2611 // needs to be updated to be based on SUB. Push the condition code
2612 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2613 // condition code of these operands will be modified.
2614 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2615 // comparison against 0, which may modify predicate.
2616 bool ShouldSwap = false;
2617 if (Sub && Value == 0) {
2618 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2619 Sub->getOperand(2).getReg() == SrcReg;
2620
2621 // The operands to subf are the opposite of sub, so only in the fixed-point
2622 // case, invert the order.
2623 ShouldSwap = !ShouldSwap;
2624 }
2625
2626 if (ShouldSwap)
2628 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2629 I != IE; ++I) {
2630 MachineInstr *UseMI = &*I;
2631 if (UseMI->getOpcode() == PPC::BCC) {
2633 unsigned PredCond = PPC::getPredicateCondition(Pred);
2634 assert((!equalityOnly ||
2635 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2636 "Invalid predicate for equality-only optimization");
2637 (void)PredCond; // To suppress warning in release build.
2638 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2640 } else if (UseMI->getOpcode() == PPC::ISEL ||
2641 UseMI->getOpcode() == PPC::ISEL8) {
2642 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2643 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2644 "Invalid CR bit for equality-only optimization");
2645
2646 if (NewSubReg == PPC::sub_lt)
2647 NewSubReg = PPC::sub_gt;
2648 else if (NewSubReg == PPC::sub_gt)
2649 NewSubReg = PPC::sub_lt;
2650
2651 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2652 NewSubReg));
2653 } else // We need to abort on a user we don't understand.
2654 return false;
2655 }
2656 assert(!(Value != 0 && ShouldSwap) &&
2657 "Non-zero immediate support and ShouldSwap"
2658 "may conflict in updating predicate");
2659
2660 // Create a new virtual register to hold the value of the CR set by the
2661 // record-form instruction. If the instruction was not previously in
2662 // record form, then set the kill flag on the CR.
2663 CmpInstr.eraseFromParent();
2664
2666 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2667 get(TargetOpcode::COPY), CRReg)
2668 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2669
2670 // Even if CR0 register were dead before, it is alive now since the
2671 // instruction we just built uses it.
2672 MI->clearRegisterDeads(PPC::CR0);
2673
2674 if (MIOpC != NewOpC) {
2675 // We need to be careful here: we're replacing one instruction with
2676 // another, and we need to make sure that we get all of the right
2677 // implicit uses and defs. On the other hand, the caller may be holding
2678 // an iterator to this instruction, and so we can't delete it (this is
2679 // specifically the case if this is the instruction directly after the
2680 // compare).
2681
2682 // Rotates are expensive instructions. If we're emitting a record-form
2683 // rotate that can just be an andi/andis, we should just emit that.
2684 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2685 Register GPRRes = MI->getOperand(0).getReg();
2686 int64_t SH = MI->getOperand(2).getImm();
2687 int64_t MB = MI->getOperand(3).getImm();
2688 int64_t ME = MI->getOperand(4).getImm();
2689 // We can only do this if both the start and end of the mask are in the
2690 // same halfword.
2691 bool MBInLoHWord = MB >= 16;
2692 bool MEInLoHWord = ME >= 16;
2693 uint64_t Mask = ~0LLU;
2694
2695 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2696 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2697 // The mask value needs to shift right 16 if we're emitting andis.
2698 Mask >>= MBInLoHWord ? 0 : 16;
2699 NewOpC = MIOpC == PPC::RLWINM
2700 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2701 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2702 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2703 (ME - MB + 1 == SH) && (MB >= 16)) {
2704 // If we are rotating by the exact number of bits as are in the mask
2705 // and the mask is in the least significant bits of the register,
2706 // that's just an andis. (as long as the GPR result has no uses).
2707 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2708 Mask >>= 16;
2709 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2710 }
2711 // If we've set the mask, we can transform.
2712 if (Mask != ~0LLU) {
2713 MI->removeOperand(4);
2714 MI->removeOperand(3);
2715 MI->getOperand(2).setImm(Mask);
2716 NumRcRotatesConvertedToRcAnd++;
2717 }
2718 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2719 int64_t MB = MI->getOperand(3).getImm();
2720 if (MB >= 48) {
2721 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2722 NewOpC = PPC::ANDI8_rec;
2723 MI->removeOperand(3);
2724 MI->getOperand(2).setImm(Mask);
2725 NumRcRotatesConvertedToRcAnd++;
2726 }
2727 }
2728
2729 const MCInstrDesc &NewDesc = get(NewOpC);
2730 MI->setDesc(NewDesc);
2731
2732 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2733 if (!MI->definesRegister(ImpDef, /*TRI=*/nullptr)) {
2734 MI->addOperand(*MI->getParent()->getParent(),
2735 MachineOperand::CreateReg(ImpDef, true, true));
2736 }
2737 }
2738 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2739 if (!MI->readsRegister(ImpUse, /*TRI=*/nullptr)) {
2740 MI->addOperand(*MI->getParent()->getParent(),
2741 MachineOperand::CreateReg(ImpUse, false, true));
2742 }
2743 }
2744 }
2745 assert(MI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2746 "Record-form instruction does not define cr0?");
2747
2748 // Modify the condition code of operands in OperandsToUpdate.
2749 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2750 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2751 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2752 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2753
2754 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2755 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2756
2757 return true;
2758}
2759
2762 if (MRI->isSSA())
2763 return false;
2764
2765 Register SrcReg, SrcReg2;
2766 int64_t CmpMask, CmpValue;
2767 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2768 return false;
2769
2770 // Try to optimize the comparison against 0.
2771 if (CmpValue || !CmpMask || SrcReg2)
2772 return false;
2773
2774 // The record forms set the condition register based on a signed comparison
2775 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2776 // equality checks in post-RA, we are more restricted on a unsigned
2777 // comparison.
2778 unsigned Opc = CmpMI.getOpcode();
2779 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2780 return false;
2781
2782 // The record forms are always based on a 64-bit comparison on PPC64
2783 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2784 // comparison. Since we can't do the equality checks in post-RA, we bail out
2785 // the case.
2786 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2787 return false;
2788
2789 // CmpMI can't be deleted if it has implicit def.
2790 if (CmpMI.hasImplicitDef())
2791 return false;
2792
2793 bool SrcRegHasOtherUse = false;
2794 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2795 if (!SrcMI || !SrcMI->definesRegister(SrcReg, /*TRI=*/nullptr))
2796 return false;
2797
2798 MachineOperand RegMO = CmpMI.getOperand(0);
2799 Register CRReg = RegMO.getReg();
2800 if (CRReg != PPC::CR0)
2801 return false;
2802
2803 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2804 bool SeenUseOfCRReg = false;
2805 bool IsCRRegKilled = false;
2806 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2807 SeenUseOfCRReg) ||
2808 SrcMI->definesRegister(CRReg, /*TRI=*/nullptr) || SeenUseOfCRReg)
2809 return false;
2810
2811 int SrcMIOpc = SrcMI->getOpcode();
2812 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2813 if (NewOpC == -1)
2814 return false;
2815
2816 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2817 LLVM_DEBUG(SrcMI->dump());
2818
2819 const MCInstrDesc &NewDesc = get(NewOpC);
2820 SrcMI->setDesc(NewDesc);
2821 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2823 SrcMI->clearRegisterDeads(CRReg);
2824
2825 assert(SrcMI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2826 "Record-form instruction does not define cr0?");
2827
2828 LLVM_DEBUG(dbgs() << "with: ");
2829 LLVM_DEBUG(SrcMI->dump());
2830 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2831 LLVM_DEBUG(CmpMI.dump());
2832 return true;
2833}
2834
2837 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2838 const TargetRegisterInfo *TRI) const {
2839 const MachineOperand *BaseOp;
2840 OffsetIsScalable = false;
2841 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2842 return false;
2843 BaseOps.push_back(BaseOp);
2844 return true;
2845}
2846
2847static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2848 const TargetRegisterInfo *TRI) {
2849 // If this is a volatile load/store, don't mess with it.
2850 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2851 return false;
2852
2853 if (LdSt.getOperand(2).isFI())
2854 return true;
2855
2856 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2857 // Can't cluster if the instruction modifies the base register
2858 // or it is update form. e.g. ld r2,3(r2)
2859 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2860 return false;
2861
2862 return true;
2863}
2864
2865// Only cluster instruction pair that have the same opcode, and they are
2866// clusterable according to PowerPC specification.
2867static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2868 const PPCSubtarget &Subtarget) {
2869 switch (FirstOpc) {
2870 default:
2871 return false;
2872 case PPC::STD:
2873 case PPC::STFD:
2874 case PPC::STXSD:
2875 case PPC::DFSTOREf64:
2876 return FirstOpc == SecondOpc;
2877 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2878 // 32bit and 64bit instruction selection. They are clusterable pair though
2879 // they are different opcode.
2880 case PPC::STW:
2881 case PPC::STW8:
2882 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2883 }
2884}
2885
2887 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2888 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2889 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2890 unsigned NumBytes) const {
2891
2892 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2893 const MachineOperand &BaseOp1 = *BaseOps1.front();
2894 const MachineOperand &BaseOp2 = *BaseOps2.front();
2895 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2896 "Only base registers and frame indices are supported.");
2897
2898 // ClusterSize means the number of memory operations that will have been
2899 // clustered if this hook returns true.
2900 // Don't cluster memory op if there are already two ops clustered at least.
2901 if (ClusterSize > 2)
2902 return false;
2903
2904 // Cluster the load/store only when they have the same base
2905 // register or FI.
2906 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2907 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2908 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2909 return false;
2910
2911 // Check if the load/store are clusterable according to the PowerPC
2912 // specification.
2913 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2914 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2915 unsigned FirstOpc = FirstLdSt.getOpcode();
2916 unsigned SecondOpc = SecondLdSt.getOpcode();
2918 // Cluster the load/store only when they have the same opcode, and they are
2919 // clusterable opcode according to PowerPC specification.
2920 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2921 return false;
2922
2923 // Can't cluster load/store that have ordered or volatile memory reference.
2924 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2925 !isLdStSafeToCluster(SecondLdSt, TRI))
2926 return false;
2927
2928 int64_t Offset1 = 0, Offset2 = 0;
2929 LocationSize Width1 = 0, Width2 = 0;
2930 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2931 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2932 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2933 Width1 != Width2)
2934 return false;
2935
2936 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2937 "getMemOperandWithOffsetWidth return incorrect base op");
2938 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2939 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2940 return Offset1 + (int64_t)Width1.getValue() == Offset2;
2941}
2942
2943/// GetInstSize - Return the number of bytes of code the specified
2944/// instruction may be. This returns the maximum number of bytes.
2945///
2947 unsigned Opcode = MI.getOpcode();
2948
2949 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
2950 const MachineFunction *MF = MI.getParent()->getParent();
2951 const char *AsmStr = MI.getOperand(0).getSymbolName();
2952 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
2953 } else if (Opcode == TargetOpcode::STACKMAP) {
2954 StackMapOpers Opers(&MI);
2955 return Opers.getNumPatchBytes();
2956 } else if (Opcode == TargetOpcode::PATCHPOINT) {
2957 PatchPointOpers Opers(&MI);
2958 return Opers.getNumPatchBytes();
2959 } else {
2960 return get(Opcode).getSize();
2961 }
2962}
2963
2964std::pair<unsigned, unsigned>
2966 // PPC always uses a direct mask.
2967 return std::make_pair(TF, 0u);
2968}
2969
2972 using namespace PPCII;
2973 static const std::pair<unsigned, const char *> TargetFlags[] = {
2974 {MO_PLT, "ppc-plt"},
2975 {MO_PIC_FLAG, "ppc-pic"},
2976 {MO_PCREL_FLAG, "ppc-pcrel"},
2977 {MO_GOT_FLAG, "ppc-got"},
2978 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
2979 {MO_TLSGD_FLAG, "ppc-tlsgd"},
2980 {MO_TPREL_FLAG, "ppc-tprel"},
2981 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
2982 {MO_TLSLD_FLAG, "ppc-tlsld"},
2983 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
2984 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
2985 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
2986 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
2987 {MO_LO, "ppc-lo"},
2988 {MO_HA, "ppc-ha"},
2989 {MO_TPREL_LO, "ppc-tprel-lo"},
2990 {MO_TPREL_HA, "ppc-tprel-ha"},
2991 {MO_DTPREL_LO, "ppc-dtprel-lo"},
2992 {MO_TLSLD_LO, "ppc-tlsld-lo"},
2993 {MO_TOC_LO, "ppc-toc-lo"},
2994 {MO_TLS, "ppc-tls"},
2995 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
2996 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
2997 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
2998 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
2999 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3000 };
3001 return ArrayRef(TargetFlags);
3002}
3003
3004// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3005// The VSX versions have the advantage of a full 64-register target whereas
3006// the FP ones have the advantage of lower latency and higher throughput. So
3007// what we are after is using the faster instructions in low register pressure
3008// situations and using the larger register file in high register pressure
3009// situations.
3011 unsigned UpperOpcode, LowerOpcode;
3012 switch (MI.getOpcode()) {
3013 case PPC::DFLOADf32:
3014 UpperOpcode = PPC::LXSSP;
3015 LowerOpcode = PPC::LFS;
3016 break;
3017 case PPC::DFLOADf64:
3018 UpperOpcode = PPC::LXSD;
3019 LowerOpcode = PPC::LFD;
3020 break;
3021 case PPC::DFSTOREf32:
3022 UpperOpcode = PPC::STXSSP;
3023 LowerOpcode = PPC::STFS;
3024 break;
3025 case PPC::DFSTOREf64:
3026 UpperOpcode = PPC::STXSD;
3027 LowerOpcode = PPC::STFD;
3028 break;
3029 case PPC::XFLOADf32:
3030 UpperOpcode = PPC::LXSSPX;
3031 LowerOpcode = PPC::LFSX;
3032 break;
3033 case PPC::XFLOADf64:
3034 UpperOpcode = PPC::LXSDX;
3035 LowerOpcode = PPC::LFDX;
3036 break;
3037 case PPC::XFSTOREf32:
3038 UpperOpcode = PPC::STXSSPX;
3039 LowerOpcode = PPC::STFSX;
3040 break;
3041 case PPC::XFSTOREf64:
3042 UpperOpcode = PPC::STXSDX;
3043 LowerOpcode = PPC::STFDX;
3044 break;
3045 case PPC::LIWAX:
3046 UpperOpcode = PPC::LXSIWAX;
3047 LowerOpcode = PPC::LFIWAX;
3048 break;
3049 case PPC::LIWZX:
3050 UpperOpcode = PPC::LXSIWZX;
3051 LowerOpcode = PPC::LFIWZX;
3052 break;
3053 case PPC::STIWX:
3054 UpperOpcode = PPC::STXSIWX;
3055 LowerOpcode = PPC::STFIWX;
3056 break;
3057 default:
3058 llvm_unreachable("Unknown Operation!");
3059 }
3060
3061 Register TargetReg = MI.getOperand(0).getReg();
3062 unsigned Opcode;
3063 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3064 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3065 Opcode = LowerOpcode;
3066 else
3067 Opcode = UpperOpcode;
3068 MI.setDesc(get(Opcode));
3069 return true;
3070}
3071
3072static bool isAnImmediateOperand(const MachineOperand &MO) {
3073 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3074}
3075
3077 auto &MBB = *MI.getParent();
3078 auto DL = MI.getDebugLoc();
3079
3080 switch (MI.getOpcode()) {
3081 case PPC::BUILD_UACC: {
3082 MCRegister ACC = MI.getOperand(0).getReg();
3083 MCRegister UACC = MI.getOperand(1).getReg();
3084 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3085 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3086 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3087 // FIXME: This can easily be improved to look up to the top of the MBB
3088 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3089 // we can just re-target any such XXLOR's to DstVSR + offset.
3090 for (int VecNo = 0; VecNo < 4; VecNo++)
3091 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3092 .addReg(SrcVSR + VecNo)
3093 .addReg(SrcVSR + VecNo);
3094 }
3095 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3096 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3097 // with a NOP.
3098 [[fallthrough]];
3099 }
3100 case PPC::KILL_PAIR: {
3101 MI.setDesc(get(PPC::UNENCODED_NOP));
3102 MI.removeOperand(1);
3103 MI.removeOperand(0);
3104 return true;
3105 }
3106 case TargetOpcode::LOAD_STACK_GUARD: {
3107 auto M = MBB.getParent()->getFunction().getParent();
3108 assert(
3109 (Subtarget.isTargetLinux() || M->getStackProtectorGuard() == "tls") &&
3110 "Only Linux target or tls mode are expected to contain "
3111 "LOAD_STACK_GUARD");
3112 int64_t Offset;
3113 if (M->getStackProtectorGuard() == "tls")
3114 Offset = M->getStackProtectorGuardOffset();
3115 else
3116 Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3117 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3118 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3119 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3120 .addImm(Offset)
3121 .addReg(Reg);
3122 return true;
3123 }
3124 case PPC::PPCLdFixedAddr: {
3125 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3126 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3127 int64_t Offset = 0;
3128 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3129 MI.setDesc(get(PPC::LWZ));
3130 uint64_t FAType = MI.getOperand(1).getImm();
3131#undef PPC_LNX_FEATURE
3132#undef PPC_CPU
3133#define PPC_LNX_DEFINE_OFFSETS
3134#include "llvm/TargetParser/PPCTargetParser.def"
3135 bool IsLE = Subtarget.isLittleEndian();
3136 bool Is64 = Subtarget.isPPC64();
3137 if (FAType == PPC_FAWORD_HWCAP) {
3138 if (IsLE)
3139 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3140 else
3141 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3142 } else if (FAType == PPC_FAWORD_HWCAP2) {
3143 if (IsLE)
3144 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3145 else
3146 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3147 } else if (FAType == PPC_FAWORD_CPUID) {
3148 if (IsLE)
3149 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3150 else
3151 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3152 }
3153 assert(Offset && "Do not know the offset for this fixed addr load");
3154 MI.removeOperand(1);
3156 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3157 .addImm(Offset)
3158 .addReg(Reg);
3159 return true;
3160#define PPC_TGT_PARSER_UNDEF_MACROS
3161#include "llvm/TargetParser/PPCTargetParser.def"
3162#undef PPC_TGT_PARSER_UNDEF_MACROS
3163 }
3164 case PPC::DFLOADf32:
3165 case PPC::DFLOADf64:
3166 case PPC::DFSTOREf32:
3167 case PPC::DFSTOREf64: {
3168 assert(Subtarget.hasP9Vector() &&
3169 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3170 assert(MI.getOperand(2).isReg() &&
3171 isAnImmediateOperand(MI.getOperand(1)) &&
3172 "D-form op must have register and immediate operands");
3173 return expandVSXMemPseudo(MI);
3174 }
3175 case PPC::XFLOADf32:
3176 case PPC::XFSTOREf32:
3177 case PPC::LIWAX:
3178 case PPC::LIWZX:
3179 case PPC::STIWX: {
3180 assert(Subtarget.hasP8Vector() &&
3181 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3182 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3183 "X-form op must have register and register operands");
3184 return expandVSXMemPseudo(MI);
3185 }
3186 case PPC::XFLOADf64:
3187 case PPC::XFSTOREf64: {
3188 assert(Subtarget.hasVSX() &&
3189 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3190 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3191 "X-form op must have register and register operands");
3192 return expandVSXMemPseudo(MI);
3193 }
3194 case PPC::SPILLTOVSR_LD: {
3195 Register TargetReg = MI.getOperand(0).getReg();
3196 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3197 MI.setDesc(get(PPC::DFLOADf64));
3198 return expandPostRAPseudo(MI);
3199 }
3200 else
3201 MI.setDesc(get(PPC::LD));
3202 return true;
3203 }
3204 case PPC::SPILLTOVSR_ST: {
3205 Register SrcReg = MI.getOperand(0).getReg();
3206 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3207 NumStoreSPILLVSRRCAsVec++;
3208 MI.setDesc(get(PPC::DFSTOREf64));
3209 return expandPostRAPseudo(MI);
3210 } else {
3211 NumStoreSPILLVSRRCAsGpr++;
3212 MI.setDesc(get(PPC::STD));
3213 }
3214 return true;
3215 }
3216 case PPC::SPILLTOVSR_LDX: {
3217 Register TargetReg = MI.getOperand(0).getReg();
3218 if (PPC::VSFRCRegClass.contains(TargetReg))
3219 MI.setDesc(get(PPC::LXSDX));
3220 else
3221 MI.setDesc(get(PPC::LDX));
3222 return true;
3223 }
3224 case PPC::SPILLTOVSR_STX: {
3225 Register SrcReg = MI.getOperand(0).getReg();
3226 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3227 NumStoreSPILLVSRRCAsVec++;
3228 MI.setDesc(get(PPC::STXSDX));
3229 } else {
3230 NumStoreSPILLVSRRCAsGpr++;
3231 MI.setDesc(get(PPC::STDX));
3232 }
3233 return true;
3234 }
3235
3236 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3237 case PPC::CFENCE:
3238 case PPC::CFENCE8: {
3239 auto Val = MI.getOperand(0).getReg();
3240 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3241 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3242 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3244 .addReg(PPC::CR7)
3245 .addImm(1);
3246 MI.setDesc(get(PPC::ISYNC));
3247 MI.removeOperand(0);
3248 return true;
3249 }
3250 }
3251 return false;
3252}
3253
3254// Essentially a compile-time implementation of a compare->isel sequence.
3255// It takes two constants to compare, along with the true/false registers
3256// and the comparison type (as a subreg to a CR field) and returns one
3257// of the true/false registers, depending on the comparison results.
3258static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3259 unsigned TrueReg, unsigned FalseReg,
3260 unsigned CRSubReg) {
3261 // Signed comparisons. The immediates are assumed to be sign-extended.
3262 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3263 switch (CRSubReg) {
3264 default: llvm_unreachable("Unknown integer comparison type.");
3265 case PPC::sub_lt:
3266 return Imm1 < Imm2 ? TrueReg : FalseReg;
3267 case PPC::sub_gt:
3268 return Imm1 > Imm2 ? TrueReg : FalseReg;
3269 case PPC::sub_eq:
3270 return Imm1 == Imm2 ? TrueReg : FalseReg;
3271 }
3272 }
3273 // Unsigned comparisons.
3274 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3275 switch (CRSubReg) {
3276 default: llvm_unreachable("Unknown integer comparison type.");
3277 case PPC::sub_lt:
3278 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3279 case PPC::sub_gt:
3280 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3281 case PPC::sub_eq:
3282 return Imm1 == Imm2 ? TrueReg : FalseReg;
3283 }
3284 }
3285 return PPC::NoRegister;
3286}
3287
3289 unsigned OpNo,
3290 int64_t Imm) const {
3291 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3292 // Replace the REG with the Immediate.
3293 Register InUseReg = MI.getOperand(OpNo).getReg();
3294 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3295
3296 // We need to make sure that the MI didn't have any implicit use
3297 // of this REG any more. We don't call MI.implicit_operands().empty() to
3298 // return early, since MI's MCID might be changed in calling context, as a
3299 // result its number of explicit operands may be changed, thus the begin of
3300 // implicit operand is changed.
3302 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, TRI, false);
3303 if (UseOpIdx >= 0) {
3304 MachineOperand &MO = MI.getOperand(UseOpIdx);
3305 if (MO.isImplicit())
3306 // The operands must always be in the following order:
3307 // - explicit reg defs,
3308 // - other explicit operands (reg uses, immediates, etc.),
3309 // - implicit reg defs
3310 // - implicit reg uses
3311 // Therefore, removing the implicit operand won't change the explicit
3312 // operands layout.
3313 MI.removeOperand(UseOpIdx);
3314 }
3315}
3316
3317// Replace an instruction with one that materializes a constant (and sets
3318// CR0 if the original instruction was a record-form instruction).
3320 const LoadImmediateInfo &LII) const {
3321 // Remove existing operands.
3322 int OperandToKeep = LII.SetCR ? 1 : 0;
3323 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3324 MI.removeOperand(i);
3325
3326 // Replace the instruction.
3327 if (LII.SetCR) {
3328 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3329 // Set the immediate.
3330 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3331 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3332 return;
3333 }
3334 else
3335 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3336
3337 // Set the immediate.
3338 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3339 .addImm(LII.Imm);
3340}
3341
3343 bool &SeenIntermediateUse) const {
3344 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3345 "Should be called after register allocation.");
3347 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3348 It++;
3349 SeenIntermediateUse = false;
3350 for (; It != E; ++It) {
3351 if (It->modifiesRegister(Reg, TRI))
3352 return &*It;
3353 if (It->readsRegister(Reg, TRI))
3354 SeenIntermediateUse = true;
3355 }
3356 return nullptr;
3357}
3358
3361 const DebugLoc &DL, Register Reg,
3362 int64_t Imm) const {
3364 "Register should be in non-SSA form after RA");
3365 bool isPPC64 = Subtarget.isPPC64();
3366 // FIXME: Materialization here is not optimal.
3367 // For some special bit patterns we can use less instructions.
3368 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3369 if (isInt<16>(Imm)) {
3370 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3371 } else if (isInt<32>(Imm)) {
3372 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3373 .addImm(Imm >> 16);
3374 if (Imm & 0xFFFF)
3375 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3376 .addReg(Reg, RegState::Kill)
3377 .addImm(Imm & 0xFFFF);
3378 } else {
3379 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3380 "only supported in PPC64");
3381 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3382 if ((Imm >> 32) & 0xFFFF)
3383 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3384 .addReg(Reg, RegState::Kill)
3385 .addImm((Imm >> 32) & 0xFFFF);
3386 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3387 .addReg(Reg, RegState::Kill)
3388 .addImm(32)
3389 .addImm(31);
3390 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3391 .addReg(Reg, RegState::Kill)
3392 .addImm((Imm >> 16) & 0xFFFF);
3393 if (Imm & 0xFFFF)
3394 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3395 .addReg(Reg, RegState::Kill)
3396 .addImm(Imm & 0xFFFF);
3397 }
3398}
3399
3400MachineInstr *PPCInstrInfo::getForwardingDefMI(
3402 unsigned &OpNoForForwarding,
3403 bool &SeenIntermediateUse) const {
3404 OpNoForForwarding = ~0U;
3405 MachineInstr *DefMI = nullptr;
3406 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3408 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3409 // within the basic block to see if the register is defined using an
3410 // LI/LI8/ADDI/ADDI8.
3411 if (MRI->isSSA()) {
3412 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3413 if (!MI.getOperand(i).isReg())
3414 continue;
3415 Register Reg = MI.getOperand(i).getReg();
3416 if (!Reg.isVirtual())
3417 continue;
3418 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3419 if (TrueReg.isVirtual()) {
3420 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3421 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3422 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3423 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3424 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3425 OpNoForForwarding = i;
3426 DefMI = DefMIForTrueReg;
3427 // The ADDI and LI operand maybe exist in one instruction at same
3428 // time. we prefer to fold LI operand as LI only has one Imm operand
3429 // and is more possible to be converted. So if current DefMI is
3430 // ADDI/ADDI8, we continue to find possible LI/LI8.
3431 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3432 break;
3433 }
3434 }
3435 }
3436 } else {
3437 // Looking back through the definition for each operand could be expensive,
3438 // so exit early if this isn't an instruction that either has an immediate
3439 // form or is already an immediate form that we can handle.
3440 ImmInstrInfo III;
3441 unsigned Opc = MI.getOpcode();
3442 bool ConvertibleImmForm =
3443 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3444 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3445 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3446 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3447 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3448 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3449 Opc == PPC::RLWINM8_rec;
3450 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3451 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3452 : false;
3453 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3454 return nullptr;
3455
3456 // Don't convert or %X, %Y, %Y since that's just a register move.
3457 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3458 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3459 return nullptr;
3460 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3461 MachineOperand &MO = MI.getOperand(i);
3462 SeenIntermediateUse = false;
3463 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3464 Register Reg = MI.getOperand(i).getReg();
3465 // If we see another use of this reg between the def and the MI,
3466 // we want to flag it so the def isn't deleted.
3467 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3468 if (DefMI) {
3469 // Is this register defined by some form of add-immediate (including
3470 // load-immediate) within this basic block?
3471 switch (DefMI->getOpcode()) {
3472 default:
3473 break;
3474 case PPC::LI:
3475 case PPC::LI8:
3476 case PPC::ADDItocL8:
3477 case PPC::ADDI:
3478 case PPC::ADDI8:
3479 OpNoForForwarding = i;
3480 return DefMI;
3481 }
3482 }
3483 }
3484 }
3485 }
3486 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3487}
3488
3489unsigned PPCInstrInfo::getSpillTarget() const {
3490 // With P10, we may need to spill paired vector registers or accumulator
3491 // registers. MMA implies paired vectors, so we can just check that.
3492 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3493 // P11 uses the P10 target.
3494 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3495 2 : Subtarget.hasP9Vector() ?
3496 1 : 0;
3497}
3498
3499ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3500 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3501}
3502
3503ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3504 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3505}
3506
3507// This opt tries to convert the following imm form to an index form to save an
3508// add for stack variables.
3509// Return false if no such pattern found.
3510//
3511// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3512// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3513// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3514//
3515// can be converted to:
3516//
3517// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3518// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3519//
3520// In order to eliminate ADD instr, make sure that:
3521// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3522// new ADDI instr and ADDI can only take int16 Imm.
3523// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3524// between ADDI and ADD instr since its original def in ADDI will be changed
3525// in new ADDI instr. And also there should be no new def for it between
3526// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3527// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3528// between ADD and Imm instr since ADD instr will be eliminated.
3529// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3530// moved to Index instr.
3532 MachineFunction *MF = MI.getParent()->getParent();
3534 bool PostRA = !MRI->isSSA();
3535 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3536 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3537 // frame base(OffsetAddi) are determined.
3538 if (!PostRA)
3539 return false;
3540 unsigned ToBeDeletedReg = 0;
3541 int64_t OffsetImm = 0;
3542 unsigned XFormOpcode = 0;
3543 ImmInstrInfo III;
3544
3545 // Check if Imm instr meets requirement.
3546 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3547 III))
3548 return false;
3549
3550 bool OtherIntermediateUse = false;
3551 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3552
3553 // Exit if there is other use between ADD and Imm instr or no def found.
3554 if (OtherIntermediateUse || !ADDMI)
3555 return false;
3556
3557 // Check if ADD instr meets requirement.
3558 if (!isADDInstrEligibleForFolding(*ADDMI))
3559 return false;
3560
3561 unsigned ScaleRegIdx = 0;
3562 int64_t OffsetAddi = 0;
3563 MachineInstr *ADDIMI = nullptr;
3564
3565 // Check if there is a valid ToBeChangedReg in ADDMI.
3566 // 1: It must be killed.
3567 // 2: Its definition must be a valid ADDIMI.
3568 // 3: It must satify int16 offset requirement.
3569 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3570 ScaleRegIdx = 2;
3571 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3572 ScaleRegIdx = 1;
3573 else
3574 return false;
3575
3576 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3577 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3578 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3579 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3581 for (auto It = ++Start; It != End; It++)
3582 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3583 return true;
3584 return false;
3585 };
3586
3587 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3588 // treated as special zero when ScaleReg is R0/X0 register.
3589 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3590 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3591 return false;
3592
3593 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3594 // and Imm Instr.
3595 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3596 return false;
3597
3598 // Now start to do the transformation.
3599 LLVM_DEBUG(dbgs() << "Replace instruction: "
3600 << "\n");
3601 LLVM_DEBUG(ADDIMI->dump());
3602 LLVM_DEBUG(ADDMI->dump());
3603 LLVM_DEBUG(MI.dump());
3604 LLVM_DEBUG(dbgs() << "with: "
3605 << "\n");
3606
3607 // Update ADDI instr.
3608 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3609
3610 // Update Imm instr.
3611 MI.setDesc(get(XFormOpcode));
3612 MI.getOperand(III.ImmOpNo)
3613 .ChangeToRegister(ScaleReg, false, false,
3614 ADDMI->getOperand(ScaleRegIdx).isKill());
3615
3616 MI.getOperand(III.OpNoForForwarding)
3617 .ChangeToRegister(ToBeChangedReg, false, false, true);
3618
3619 // Eliminate ADD instr.
3620 ADDMI->eraseFromParent();
3621
3622 LLVM_DEBUG(ADDIMI->dump());
3623 LLVM_DEBUG(MI.dump());
3624
3625 return true;
3626}
3627
3629 int64_t &Imm) const {
3630 unsigned Opc = ADDIMI.getOpcode();
3631
3632 // Exit if the instruction is not ADDI.
3633 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3634 return false;
3635
3636 // The operand may not necessarily be an immediate - it could be a relocation.
3637 if (!ADDIMI.getOperand(2).isImm())
3638 return false;
3639
3640 Imm = ADDIMI.getOperand(2).getImm();
3641
3642 return true;
3643}
3644
3646 unsigned Opc = ADDMI.getOpcode();
3647
3648 // Exit if the instruction is not ADD.
3649 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3650}
3651
3653 unsigned &ToBeDeletedReg,
3654 unsigned &XFormOpcode,
3655 int64_t &OffsetImm,
3656 ImmInstrInfo &III) const {
3657 // Only handle load/store.
3658 if (!MI.mayLoadOrStore())
3659 return false;
3660
3661 unsigned Opc = MI.getOpcode();
3662
3663 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3664
3665 // Exit if instruction has no index form.
3666 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3667 return false;
3668
3669 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3670 if (!instrHasImmForm(XFormOpcode,
3671 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3672 return false;
3673
3674 if (!III.IsSummingOperands)
3675 return false;
3676
3677 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3678 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3679 // Only support imm operands, not relocation slots or others.
3680 if (!ImmOperand.isImm())
3681 return false;
3682
3683 assert(RegOperand.isReg() && "Instruction format is not right");
3684
3685 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3686 if (!RegOperand.isKill())
3687 return false;
3688
3689 ToBeDeletedReg = RegOperand.getReg();
3690 OffsetImm = ImmOperand.getImm();
3691
3692 return true;
3693}
3694
3696 MachineInstr *&ADDIMI,
3697 int64_t &OffsetAddi,
3698 int64_t OffsetImm) const {
3699 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3700 MachineOperand &MO = ADDMI->getOperand(Index);
3701
3702 if (!MO.isKill())
3703 return false;
3704
3705 bool OtherIntermediateUse = false;
3706
3707 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3708 // Currently handle only one "add + Imminstr" pair case, exit if other
3709 // intermediate use for ToBeChangedReg found.
3710 // TODO: handle the cases where there are other "add + Imminstr" pairs
3711 // with same offset in Imminstr which is like:
3712 //
3713 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3714 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3715 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3716 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3717 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3718 //
3719 // can be converted to:
3720 //
3721 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3722 // (OffsetAddi + OffsetImm)
3723 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3724 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3725
3726 if (OtherIntermediateUse || !ADDIMI)
3727 return false;
3728 // Check if ADDI instr meets requirement.
3729 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3730 return false;
3731
3732 if (isInt<16>(OffsetAddi + OffsetImm))
3733 return true;
3734 return false;
3735}
3736
3737// If this instruction has an immediate form and one of its operands is a
3738// result of a load-immediate or an add-immediate, convert it to
3739// the immediate form if the constant is in range.
3741 SmallSet<Register, 4> &RegsToUpdate,
3742 MachineInstr **KilledDef) const {
3743 MachineFunction *MF = MI.getParent()->getParent();
3745 bool PostRA = !MRI->isSSA();
3746 bool SeenIntermediateUse = true;
3747 unsigned ForwardingOperand = ~0U;
3748 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3749 SeenIntermediateUse);
3750 if (!DefMI)
3751 return false;
3752 assert(ForwardingOperand < MI.getNumOperands() &&
3753 "The forwarding operand needs to be valid at this point");
3754 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3755 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3756 if (KilledDef && KillFwdDefMI)
3757 *KilledDef = DefMI;
3758
3759 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3760 // registers that need their kill flags updated.
3761 for (const MachineOperand &MO : DefMI->operands())
3762 if (MO.isReg() && MO.isDef())
3763 RegsToUpdate.insert(MO.getReg());
3764 for (const MachineOperand &MO : MI.operands())
3765 if (MO.isReg())
3766 RegsToUpdate.insert(MO.getReg());
3767
3768 // If this is a imm instruction and its register operands is produced by ADDI,
3769 // put the imm into imm inst directly.
3770 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3771 PPC::INSTRUCTION_LIST_END &&
3772 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3773 return true;
3774
3775 ImmInstrInfo III;
3776 bool IsVFReg = MI.getOperand(0).isReg()
3777 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3778 : false;
3779 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3780 // If this is a reg+reg instruction that has a reg+imm form,
3781 // and one of the operands is produced by an add-immediate,
3782 // try to convert it.
3783 if (HasImmForm &&
3784 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3785 KillFwdDefMI))
3786 return true;
3787
3788 // If this is a reg+reg instruction that has a reg+imm form,
3789 // and one of the operands is produced by LI, convert it now.
3790 if (HasImmForm &&
3791 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3792 return true;
3793
3794 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3795 // can be simpified to LI.
3796 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3797 return true;
3798
3799 return false;
3800}
3801
3803 MachineInstr **ToErase) const {
3804 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3805 Register FoldingReg = MI.getOperand(1).getReg();
3806 if (!FoldingReg.isVirtual())
3807 return false;
3808 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3809 if (SrcMI->getOpcode() != PPC::RLWINM &&
3810 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3811 SrcMI->getOpcode() != PPC::RLWINM8 &&
3812 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3813 return false;
3814 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3815 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3816 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3817 "Invalid PPC::RLWINM Instruction!");
3818 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3819 uint64_t SHMI = MI.getOperand(2).getImm();
3820 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3821 uint64_t MBMI = MI.getOperand(3).getImm();
3822 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3823 uint64_t MEMI = MI.getOperand(4).getImm();
3824
3825 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3826 "Invalid PPC::RLWINM Instruction!");
3827 // If MBMI is bigger than MEMI, we always can not get run of ones.
3828 // RotatedSrcMask non-wrap:
3829 // 0........31|32........63
3830 // RotatedSrcMask: B---E B---E
3831 // MaskMI: -----------|--E B------
3832 // Result: ----- --- (Bad candidate)
3833 //
3834 // RotatedSrcMask wrap:
3835 // 0........31|32........63
3836 // RotatedSrcMask: --E B----|--E B----
3837 // MaskMI: -----------|--E B------
3838 // Result: --- -----|--- ----- (Bad candidate)
3839 //
3840 // One special case is RotatedSrcMask is a full set mask.
3841 // RotatedSrcMask full:
3842 // 0........31|32........63
3843 // RotatedSrcMask: ------EB---|-------EB---
3844 // MaskMI: -----------|--E B------
3845 // Result: -----------|--- ------- (Good candidate)
3846
3847 // Mark special case.
3848 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3849
3850 // For other MBMI > MEMI cases, just return.
3851 if ((MBMI > MEMI) && !SrcMaskFull)
3852 return false;
3853
3854 // Handle MBMI <= MEMI cases.
3855 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3856 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3857 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3858 // while in PowerPC ISA, lowerest bit is at index 63.
3859 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3860
3861 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3862 APInt FinalMask = RotatedSrcMask & MaskMI;
3863 uint32_t NewMB, NewME;
3864 bool Simplified = false;
3865
3866 // If final mask is 0, MI result should be 0 too.
3867 if (FinalMask.isZero()) {
3868 bool Is64Bit =
3869 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3870 Simplified = true;
3871 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3872 LLVM_DEBUG(MI.dump());
3873
3874 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3875 // Replace MI with "LI 0"
3876 MI.removeOperand(4);
3877 MI.removeOperand(3);
3878 MI.removeOperand(2);
3879 MI.getOperand(1).ChangeToImmediate(0);
3880 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3881 } else {
3882 // Replace MI with "ANDI_rec reg, 0"
3883 MI.removeOperand(4);
3884 MI.removeOperand(3);
3885 MI.getOperand(2).setImm(0);
3886 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3887 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3888 if (SrcMI->getOperand(1).isKill()) {
3889 MI.getOperand(1).setIsKill(true);
3890 SrcMI->getOperand(1).setIsKill(false);
3891 } else
3892 // About to replace MI.getOperand(1), clear its kill flag.
3893 MI.getOperand(1).setIsKill(false);
3894 }
3895
3896 LLVM_DEBUG(dbgs() << "With: ");
3897 LLVM_DEBUG(MI.dump());
3898
3899 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3900 NewMB <= NewME) ||
3901 SrcMaskFull) {
3902 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3903 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3904 // return a 32 bit value.
3905 Simplified = true;
3906 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3907 LLVM_DEBUG(MI.dump());
3908
3909 uint16_t NewSH = (SHSrc + SHMI) % 32;
3910 MI.getOperand(2).setImm(NewSH);
3911 // If SrcMI mask is full, no need to update MBMI and MEMI.
3912 if (!SrcMaskFull) {
3913 MI.getOperand(3).setImm(NewMB);
3914 MI.getOperand(4).setImm(NewME);
3915 }
3916 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3917 if (SrcMI->getOperand(1).isKill()) {
3918 MI.getOperand(1).setIsKill(true);
3919 SrcMI->getOperand(1).setIsKill(false);
3920 } else
3921 // About to replace MI.getOperand(1), clear its kill flag.
3922 MI.getOperand(1).setIsKill(false);
3923
3924 LLVM_DEBUG(dbgs() << "To: ");
3925 LLVM_DEBUG(MI.dump());
3926 }
3927 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3928 !SrcMI->hasImplicitDef()) {
3929 // If FoldingReg has no non-debug use and it has no implicit def (it
3930 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3931 // Otherwise keep it.
3932 *ToErase = SrcMI;
3933 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3934 LLVM_DEBUG(SrcMI->dump());
3935 }
3936 return Simplified;
3937}
3938
3939bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3940 ImmInstrInfo &III, bool PostRA) const {
3941 // The vast majority of the instructions would need their operand 2 replaced
3942 // with an immediate when switching to the reg+imm form. A marked exception
3943 // are the update form loads/stores for which a constant operand 2 would need
3944 // to turn into a displacement and move operand 1 to the operand 2 position.
3945 III.ImmOpNo = 2;
3946 III.OpNoForForwarding = 2;
3947 III.ImmWidth = 16;
3948 III.ImmMustBeMultipleOf = 1;
3949 III.TruncateImmTo = 0;
3950 III.IsSummingOperands = false;
3951 switch (Opc) {
3952 default: return false;
3953 case PPC::ADD4:
3954 case PPC::ADD8:
3955 III.SignedImm = true;
3956 III.ZeroIsSpecialOrig = 0;
3957 III.ZeroIsSpecialNew = 1;
3958 III.IsCommutative = true;
3959 III.IsSummingOperands = true;
3960 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
3961 break;
3962 case PPC::ADDC:
3963 case PPC::ADDC8:
3964 III.SignedImm = true;
3965 III.ZeroIsSpecialOrig = 0;
3966 III.ZeroIsSpecialNew = 0;
3967 III.IsCommutative = true;
3968 III.IsSummingOperands = true;
3969 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
3970 break;
3971 case PPC::ADDC_rec:
3972 III.SignedImm = true;
3973 III.ZeroIsSpecialOrig = 0;
3974 III.ZeroIsSpecialNew = 0;
3975 III.IsCommutative = true;
3976 III.IsSummingOperands = true;
3977 III.ImmOpcode = PPC::ADDIC_rec;
3978 break;
3979 case PPC::SUBFC:
3980 case PPC::SUBFC8:
3981 III.SignedImm = true;
3982 III.ZeroIsSpecialOrig = 0;
3983 III.ZeroIsSpecialNew = 0;
3984 III.IsCommutative = false;
3985 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
3986 break;
3987 case PPC::CMPW:
3988 case PPC::CMPD:
3989 III.SignedImm = true;
3990 III.ZeroIsSpecialOrig = 0;
3991 III.ZeroIsSpecialNew = 0;
3992 III.IsCommutative = false;
3993 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
3994 break;
3995 case PPC::CMPLW:
3996 case PPC::CMPLD:
3997 III.SignedImm = false;
3998 III.ZeroIsSpecialOrig = 0;
3999 III.ZeroIsSpecialNew = 0;
4000 III.IsCommutative = false;
4001 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
4002 break;
4003 case PPC::AND_rec:
4004 case PPC::AND8_rec:
4005 case PPC::OR:
4006 case PPC::OR8:
4007 case PPC::XOR:
4008 case PPC::XOR8:
4009 III.SignedImm = false;
4010 III.ZeroIsSpecialOrig = 0;
4011 III.ZeroIsSpecialNew = 0;
4012 III.IsCommutative = true;
4013 switch(Opc) {
4014 default: llvm_unreachable("Unknown opcode");
4015 case PPC::AND_rec:
4016 III.ImmOpcode = PPC::ANDI_rec;
4017 break;
4018 case PPC::AND8_rec:
4019 III.ImmOpcode = PPC::ANDI8_rec;
4020 break;
4021 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4022 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4023 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4024 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4025 }
4026 break;
4027 case PPC::RLWNM:
4028 case PPC::RLWNM8:
4029 case PPC::RLWNM_rec:
4030 case PPC::RLWNM8_rec:
4031 case PPC::SLW:
4032 case PPC::SLW8:
4033 case PPC::SLW_rec:
4034 case PPC::SLW8_rec:
4035 case PPC::SRW:
4036 case PPC::SRW8:
4037 case PPC::SRW_rec:
4038 case PPC::SRW8_rec:
4039 case PPC::SRAW:
4040 case PPC::SRAW_rec:
4041 III.SignedImm = false;
4042 III.ZeroIsSpecialOrig = 0;
4043 III.ZeroIsSpecialNew = 0;
4044 III.IsCommutative = false;
4045 // This isn't actually true, but the instructions ignore any of the
4046 // upper bits, so any immediate loaded with an LI is acceptable.
4047 // This does not apply to shift right algebraic because a value
4048 // out of range will produce a -1/0.
4049 III.ImmWidth = 16;
4050 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4051 Opc == PPC::RLWNM8_rec)
4052 III.TruncateImmTo = 5;
4053 else
4054 III.TruncateImmTo = 6;
4055 switch(Opc) {
4056 default: llvm_unreachable("Unknown opcode");
4057 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4058 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4059 case PPC::RLWNM_rec:
4060 III.ImmOpcode = PPC::RLWINM_rec;
4061 break;
4062 case PPC::RLWNM8_rec:
4063 III.ImmOpcode = PPC::RLWINM8_rec;
4064 break;
4065 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4066 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4067 case PPC::SLW_rec:
4068 III.ImmOpcode = PPC::RLWINM_rec;
4069 break;
4070 case PPC::SLW8_rec:
4071 III.ImmOpcode = PPC::RLWINM8_rec;
4072 break;
4073 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4074 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4075 case PPC::SRW_rec:
4076 III.ImmOpcode = PPC::RLWINM_rec;
4077 break;
4078 case PPC::SRW8_rec:
4079 III.ImmOpcode = PPC::RLWINM8_rec;
4080 break;
4081 case PPC::SRAW:
4082 III.ImmWidth = 5;
4083 III.TruncateImmTo = 0;
4084 III.ImmOpcode = PPC::SRAWI;
4085 break;
4086 case PPC::SRAW_rec:
4087 III.ImmWidth = 5;
4088 III.TruncateImmTo = 0;
4089 III.ImmOpcode = PPC::SRAWI_rec;
4090 break;
4091 }
4092 break;
4093 case PPC::RLDCL:
4094 case PPC::RLDCL_rec:
4095 case PPC::RLDCR:
4096 case PPC::RLDCR_rec:
4097 case PPC::SLD:
4098 case PPC::SLD_rec:
4099 case PPC::SRD:
4100 case PPC::SRD_rec:
4101 case PPC::SRAD:
4102 case PPC::SRAD_rec:
4103 III.SignedImm = false;
4104 III.ZeroIsSpecialOrig = 0;
4105 III.ZeroIsSpecialNew = 0;
4106 III.IsCommutative = false;
4107 // This isn't actually true, but the instructions ignore any of the
4108 // upper bits, so any immediate loaded with an LI is acceptable.
4109 // This does not apply to shift right algebraic because a value
4110 // out of range will produce a -1/0.
4111 III.ImmWidth = 16;
4112 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4113 Opc == PPC::RLDCR_rec)
4114 III.TruncateImmTo = 6;
4115 else
4116 III.TruncateImmTo = 7;
4117 switch(Opc) {
4118 default: llvm_unreachable("Unknown opcode");
4119 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4120 case PPC::RLDCL_rec:
4121 III.ImmOpcode = PPC::RLDICL_rec;
4122 break;
4123 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4124 case PPC::RLDCR_rec:
4125 III.ImmOpcode = PPC::RLDICR_rec;
4126 break;
4127 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4128 case PPC::SLD_rec:
4129 III.ImmOpcode = PPC::RLDICR_rec;
4130 break;
4131 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4132 case PPC::SRD_rec:
4133 III.ImmOpcode = PPC::RLDICL_rec;
4134 break;
4135 case PPC::SRAD:
4136 III.ImmWidth = 6;
4137 III.TruncateImmTo = 0;
4138 III.ImmOpcode = PPC::SRADI;
4139 break;
4140 case PPC::SRAD_rec:
4141 III.ImmWidth = 6;
4142 III.TruncateImmTo = 0;
4143 III.ImmOpcode = PPC::SRADI_rec;
4144 break;
4145 }
4146 break;
4147 // Loads and stores:
4148 case PPC::LBZX:
4149 case PPC::LBZX8:
4150 case PPC::LHZX:
4151 case PPC::LHZX8:
4152 case PPC::LHAX:
4153 case PPC::LHAX8:
4154 case PPC::LWZX:
4155 case PPC::LWZX8:
4156 case PPC::LWAX:
4157 case PPC::LDX:
4158 case PPC::LFSX:
4159 case PPC::LFDX:
4160 case PPC::STBX:
4161 case PPC::STBX8:
4162 case PPC::STHX:
4163 case PPC::STHX8:
4164 case PPC::STWX:
4165 case PPC::STWX8:
4166 case PPC::STDX:
4167 case PPC::STFSX:
4168 case PPC::STFDX:
4169 III.SignedImm = true;
4170 III.ZeroIsSpecialOrig = 1;
4171 III.ZeroIsSpecialNew = 2;
4172 III.IsCommutative = true;
4173 III.IsSummingOperands = true;
4174 III.ImmOpNo = 1;
4175 III.OpNoForForwarding = 2;
4176 switch(Opc) {
4177 default: llvm_unreachable("Unknown opcode");
4178 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4179 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4180 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4181 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4182 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4183 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4184 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4185 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4186 case PPC::LWAX:
4187 III.ImmOpcode = PPC::LWA;
4188 III.ImmMustBeMultipleOf = 4;
4189 break;
4190 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4191 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4192 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4193 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4194 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4195 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4196 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4197 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4198 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4199 case PPC::STDX:
4200 III.ImmOpcode = PPC::STD;
4201 III.ImmMustBeMultipleOf = 4;
4202 break;
4203 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4204 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4205 }
4206 break;
4207 case PPC::LBZUX:
4208 case PPC::LBZUX8:
4209 case PPC::LHZUX:
4210 case PPC::LHZUX8:
4211 case PPC::LHAUX:
4212 case PPC::LHAUX8:
4213 case PPC::LWZUX:
4214 case PPC::LWZUX8:
4215 case PPC::LDUX:
4216 case PPC::LFSUX:
4217 case PPC::LFDUX:
4218 case PPC::STBUX:
4219 case PPC::STBUX8:
4220 case PPC::STHUX:
4221 case PPC::STHUX8:
4222 case PPC::STWUX:
4223 case PPC::STWUX8:
4224 case PPC::STDUX:
4225 case PPC::STFSUX:
4226 case PPC::STFDUX:
4227 III.SignedImm = true;
4228 III.ZeroIsSpecialOrig = 2;
4229 III.ZeroIsSpecialNew = 3;
4230 III.IsCommutative = false;
4231 III.IsSummingOperands = true;
4232 III.ImmOpNo = 2;
4233 III.OpNoForForwarding = 3;
4234 switch(Opc) {
4235 default: llvm_unreachable("Unknown opcode");
4236 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4237 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4238 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4239 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4240 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4241 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4242 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4243 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4244 case PPC::LDUX:
4245 III.ImmOpcode = PPC::LDU;
4246 III.ImmMustBeMultipleOf = 4;
4247 break;
4248 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4249 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4250 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4251 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4252 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4253 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4254 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4255 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4256 case PPC::STDUX:
4257 III.ImmOpcode = PPC::STDU;
4258 III.ImmMustBeMultipleOf = 4;
4259 break;
4260 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4261 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4262 }
4263 break;
4264 // Power9 and up only. For some of these, the X-Form version has access to all
4265 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4266 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4267 // into or stored from is one of the VR registers.
4268 case PPC::LXVX:
4269 case PPC::LXSSPX:
4270 case PPC::LXSDX:
4271 case PPC::STXVX:
4272 case PPC::STXSSPX:
4273 case PPC::STXSDX:
4274 case PPC::XFLOADf32:
4275 case PPC::XFLOADf64:
4276 case PPC::XFSTOREf32:
4277 case PPC::XFSTOREf64:
4278 if (!Subtarget.hasP9Vector())
4279 return false;
4280 III.SignedImm = true;
4281 III.ZeroIsSpecialOrig = 1;
4282 III.ZeroIsSpecialNew = 2;
4283 III.IsCommutative = true;
4284 III.IsSummingOperands = true;
4285 III.ImmOpNo = 1;
4286 III.OpNoForForwarding = 2;
4287 III.ImmMustBeMultipleOf = 4;
4288 switch(Opc) {
4289 default: llvm_unreachable("Unknown opcode");
4290 case PPC::LXVX:
4291 III.ImmOpcode = PPC::LXV;
4292 III.ImmMustBeMultipleOf = 16;
4293 break;
4294 case PPC::LXSSPX:
4295 if (PostRA) {
4296 if (IsVFReg)
4297 III.ImmOpcode = PPC::LXSSP;
4298 else {
4299 III.ImmOpcode = PPC::LFS;
4300 III.ImmMustBeMultipleOf = 1;
4301 }
4302 break;
4303 }
4304 [[fallthrough]];
4305 case PPC::XFLOADf32:
4306 III.ImmOpcode = PPC::DFLOADf32;
4307 break;
4308 case PPC::LXSDX:
4309 if (PostRA) {
4310 if (IsVFReg)
4311 III.ImmOpcode = PPC::LXSD;
4312 else {
4313 III.ImmOpcode = PPC::LFD;
4314 III.ImmMustBeMultipleOf = 1;
4315 }
4316 break;
4317 }
4318 [[fallthrough]];
4319 case PPC::XFLOADf64:
4320 III.ImmOpcode = PPC::DFLOADf64;
4321 break;
4322 case PPC::STXVX:
4323 III.ImmOpcode = PPC::STXV;
4324 III.ImmMustBeMultipleOf = 16;
4325 break;
4326 case PPC::STXSSPX:
4327 if (PostRA) {
4328 if (IsVFReg)
4329 III.ImmOpcode = PPC::STXSSP;
4330 else {
4331 III.ImmOpcode = PPC::STFS;
4332 III.ImmMustBeMultipleOf = 1;
4333 }
4334 break;
4335 }
4336 [[fallthrough]];
4337 case PPC::XFSTOREf32:
4338 III.ImmOpcode = PPC::DFSTOREf32;
4339 break;
4340 case PPC::STXSDX:
4341 if (PostRA) {
4342 if (IsVFReg)
4343 III.ImmOpcode = PPC::STXSD;
4344 else {
4345 III.ImmOpcode = PPC::STFD;
4346 III.ImmMustBeMultipleOf = 1;
4347 }
4348 break;
4349 }
4350 [[fallthrough]];
4351 case PPC::XFSTOREf64:
4352 III.ImmOpcode = PPC::DFSTOREf64;
4353 break;
4354 }
4355 break;
4356 }
4357 return true;
4358}
4359
4360// Utility function for swaping two arbitrary operands of an instruction.
4361static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4362 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4363
4364 unsigned MaxOp = std::max(Op1, Op2);
4365 unsigned MinOp = std::min(Op1, Op2);
4366 MachineOperand MOp1 = MI.getOperand(MinOp);
4367 MachineOperand MOp2 = MI.getOperand(MaxOp);
4368 MI.removeOperand(std::max(Op1, Op2));
4369 MI.removeOperand(std::min(Op1, Op2));
4370
4371 // If the operands we are swapping are the two at the end (the common case)
4372 // we can just remove both and add them in the opposite order.
4373 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4374 MI.addOperand(MOp2);
4375 MI.addOperand(MOp1);
4376 } else {
4377 // Store all operands in a temporary vector, remove them and re-add in the
4378 // right order.
4380 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4381 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4382 MOps.push_back(MI.getOperand(i));
4383 MI.removeOperand(i);
4384 }
4385 // MOp2 needs to be added next.
4386 MI.addOperand(MOp2);
4387 // Now add the rest.
4388 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4389 if (i == MaxOp)
4390 MI.addOperand(MOp1);
4391 else {
4392 MI.addOperand(MOps.back());
4393 MOps.pop_back();
4394 }
4395 }
4396 }
4397}
4398
4399// Check if the 'MI' that has the index OpNoForForwarding
4400// meets the requirement described in the ImmInstrInfo.
4401bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4402 const ImmInstrInfo &III,
4403 unsigned OpNoForForwarding
4404 ) const {
4405 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4406 // would not work pre-RA, we can only do the check post RA.
4407 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4408 if (MRI.isSSA())
4409 return false;
4410
4411 // Cannot do the transform if MI isn't summing the operands.
4412 if (!III.IsSummingOperands)
4413 return false;
4414
4415 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4416 if (!III.ZeroIsSpecialOrig)
4417 return false;
4418
4419 // We cannot do the transform if the operand we are trying to replace
4420 // isn't the same as the operand the instruction allows.
4421 if (OpNoForForwarding != III.OpNoForForwarding)
4422 return false;
4423
4424 // Check if the instruction we are trying to transform really has
4425 // the special zero register as its operand.
4426 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4427 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4428 return false;
4429
4430 // This machine instruction is convertible if it is,
4431 // 1. summing the operands.
4432 // 2. one of the operands is special zero register.
4433 // 3. the operand we are trying to replace is allowed by the MI.
4434 return true;
4435}
4436
4437// Check if the DefMI is the add inst and set the ImmMO and RegMO
4438// accordingly.
4439bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4440 const ImmInstrInfo &III,
4441 MachineOperand *&ImmMO,
4442 MachineOperand *&RegMO) const {
4443 unsigned Opc = DefMI.getOpcode();
4444 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4445 return false;
4446
4447 // Skip the optimization of transformTo[NewImm|Imm]FormFedByAdd for ADDItocL8
4448 // on AIX which is used for toc-data access. TODO: Follow up to see if it can
4449 // apply for AIX toc-data as well.
4450 if (Opc == PPC::ADDItocL8 && Subtarget.isAIX())
4451 return false;
4452
4453 assert(DefMI.getNumOperands() >= 3 &&
4454 "Add inst must have at least three operands");
4455 RegMO = &DefMI.getOperand(1);
4456 ImmMO = &DefMI.getOperand(2);
4457
4458 // Before RA, ADDI first operand could be a frame index.
4459 if (!RegMO->isReg())
4460 return false;
4461
4462 // This DefMI is elgible for forwarding if it is:
4463 // 1. add inst
4464 // 2. one of the operands is Imm/CPI/Global.
4465 return isAnImmediateOperand(*ImmMO);
4466}
4467
4468bool PPCInstrInfo::isRegElgibleForForwarding(
4469 const MachineOperand &RegMO, const MachineInstr &DefMI,
4470 const MachineInstr &MI, bool KillDefMI,
4471 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4472 // x = addi y, imm
4473 // ...
4474 // z = lfdx 0, x -> z = lfd imm(y)
4475 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4476 // of "y" between the DEF of "x" and "z".
4477 // The query is only valid post RA.
4478 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4479 if (MRI.isSSA())
4480 return false;
4481
4482 Register Reg = RegMO.getReg();
4483
4484 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4486 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4487 It++;
4488 for (; It != E; ++It) {
4489 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4490 return false;
4491 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4492 IsFwdFeederRegKilled = true;
4493 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4494 SeenIntermediateUse = true;
4495 // Made it to DefMI without encountering a clobber.
4496 if ((&*It) == &DefMI)
4497 break;
4498 }
4499 assert((&*It) == &DefMI && "DefMI is missing");
4500
4501 // If DefMI also defines the register to be forwarded, we can only forward it
4502 // if DefMI is being erased.
4503 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4504 return KillDefMI;
4505
4506 return true;
4507}
4508
4509bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4510 const MachineInstr &DefMI,
4511 const ImmInstrInfo &III,
4512 int64_t &Imm,
4513 int64_t BaseImm) const {
4514 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4515 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4516 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4517 // However, we know that, it is 16-bit width, and has the alignment of 4.
4518 // Check if the instruction met the requirement.
4519 if (III.ImmMustBeMultipleOf > 4 ||
4520 III.TruncateImmTo || III.ImmWidth != 16)
4521 return false;
4522
4523 // Going from XForm to DForm loads means that the displacement needs to be
4524 // not just an immediate but also a multiple of 4, or 16 depending on the
4525 // load. A DForm load cannot be represented if it is a multiple of say 2.
4526 // XForm loads do not have this restriction.
4527 if (ImmMO.isGlobal()) {
4528 const DataLayout &DL = ImmMO.getGlobal()->getDataLayout();
4530 return false;
4531 }
4532
4533 return true;
4534 }
4535
4536 if (ImmMO.isImm()) {
4537 // It is Imm, we need to check if the Imm fit the range.
4538 // Sign-extend to 64-bits.
4539 // DefMI may be folded with another imm form instruction, the result Imm is
4540 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4541 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4542 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4543 return false;
4544 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4545 return false;
4546 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4547
4548 if (Imm % III.ImmMustBeMultipleOf)
4549 return false;
4550 if (III.TruncateImmTo)
4551 Imm &= ((1 << III.TruncateImmTo) - 1);
4552 }
4553 else
4554 return false;
4555
4556 // This ImmMO is forwarded if it meets the requriement describle
4557 // in ImmInstrInfo
4558 return true;
4559}
4560
4561bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4562 unsigned OpNoForForwarding,
4563 MachineInstr **KilledDef) const {
4564 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4565 !DefMI.getOperand(1).isImm())
4566 return false;
4567
4568 MachineFunction *MF = MI.getParent()->getParent();
4570 bool PostRA = !MRI->isSSA();
4571
4572 int64_t Immediate = DefMI.getOperand(1).getImm();
4573 // Sign-extend to 64-bits.
4574 int64_t SExtImm = SignExtend64<16>(Immediate);
4575
4576 bool ReplaceWithLI = false;
4577 bool Is64BitLI = false;
4578 int64_t NewImm = 0;
4579 bool SetCR = false;
4580 unsigned Opc = MI.getOpcode();
4581 switch (Opc) {
4582 default:
4583 return false;
4584
4585 // FIXME: Any branches conditional on such a comparison can be made
4586 // unconditional. At this time, this happens too infrequently to be worth
4587 // the implementation effort, but if that ever changes, we could convert
4588 // such a pattern here.
4589 case PPC::CMPWI:
4590 case PPC::CMPLWI:
4591 case PPC::CMPDI:
4592 case PPC::CMPLDI: {
4593 // Doing this post-RA would require dataflow analysis to reliably find uses
4594 // of the CR register set by the compare.
4595 // No need to fixup killed/dead flag since this transformation is only valid
4596 // before RA.
4597 if (PostRA)
4598 return false;
4599 // If a compare-immediate is fed by an immediate and is itself an input of
4600 // an ISEL (the most common case) into a COPY of the correct register.
4601 bool Changed = false;
4602 Register DefReg = MI.getOperand(0).getReg();
4603 int64_t Comparand = MI.getOperand(2).getImm();
4604 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4605 ? (Comparand | 0xFFFFFFFFFFFF0000)
4606 : Comparand;
4607
4608 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4609 unsigned UseOpc = CompareUseMI.getOpcode();
4610 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4611 continue;
4612 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4613 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4614 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4615 unsigned RegToCopy =
4616 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4617 if (RegToCopy == PPC::NoRegister)
4618 continue;
4619 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4620 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4621 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4622 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4623 CompareUseMI.removeOperand(3);
4624 CompareUseMI.removeOperand(2);
4625 continue;
4626 }
4627 LLVM_DEBUG(
4628 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4629 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4630 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4631 // Convert to copy and remove unneeded operands.
4632 CompareUseMI.setDesc(get(PPC::COPY));
4633 CompareUseMI.removeOperand(3);
4634 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4635 CmpIselsConverted++;
4636 Changed = true;
4637 LLVM_DEBUG(CompareUseMI.dump());
4638 }
4639 if (Changed)
4640 return true;
4641 // This may end up incremented multiple times since this function is called
4642 // during a fixed-point transformation, but it is only meant to indicate the
4643 // presence of this opportunity.
4644 MissedConvertibleImmediateInstrs++;
4645 return false;
4646 }
4647
4648 // Immediate forms - may simply be convertable to an LI.
4649 case PPC::ADDI:
4650 case PPC::ADDI8: {
4651 // Does the sum fit in a 16-bit signed field?
4652 int64_t Addend = MI.getOperand(2).getImm();
4653 if (isInt<16>(Addend + SExtImm)) {
4654 ReplaceWithLI = true;
4655 Is64BitLI = Opc == PPC::ADDI8;
4656 NewImm = Addend + SExtImm;
4657 break;
4658 }
4659 return false;
4660 }
4661 case PPC::SUBFIC:
4662 case PPC::SUBFIC8: {
4663 // Only transform this if the CARRY implicit operand is dead.
4664 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4665 return false;
4666 int64_t Minuend = MI.getOperand(2).getImm();
4667 if (isInt<16>(Minuend - SExtImm)) {
4668 ReplaceWithLI = true;
4669 Is64BitLI = Opc == PPC::SUBFIC8;
4670 NewImm = Minuend - SExtImm;
4671 break;
4672 }
4673 return false;
4674 }
4675 case PPC::RLDICL:
4676 case PPC::RLDICL_rec:
4677 case PPC::RLDICL_32:
4678 case PPC::RLDICL_32_64: {
4679 // Use APInt's rotate function.
4680 int64_t SH = MI.getOperand(2).getImm();
4681 int64_t MB = MI.getOperand(3).getImm();
4682 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4683 SExtImm, true);
4684 InVal = InVal.rotl(SH);
4685 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4686 InVal &= Mask;
4687 // Can't replace negative values with an LI as that will sign-extend
4688 // and not clear the left bits. If we're setting the CR bit, we will use
4689 // ANDI_rec which won't sign extend, so that's safe.
4690 if (isUInt<15>(InVal.getSExtValue()) ||
4691 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4692 ReplaceWithLI = true;
4693 Is64BitLI = Opc != PPC::RLDICL_32;
4694 NewImm = InVal.getSExtValue();
4695 SetCR = Opc == PPC::RLDICL_rec;
4696 break;
4697 }
4698 return false;
4699 }
4700 case PPC::RLWINM:
4701 case PPC::RLWINM8:
4702 case PPC::RLWINM_rec:
4703 case PPC::RLWINM8_rec: {
4704 int64_t SH = MI.getOperand(2).getImm();
4705 int64_t MB = MI.getOperand(3).getImm();
4706 int64_t ME = MI.getOperand(4).getImm();
4707 APInt InVal(32, SExtImm, true);
4708 InVal = InVal.rotl(SH);
4709 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4710 InVal &= Mask;
4711 // Can't replace negative values with an LI as that will sign-extend
4712 // and not clear the left bits. If we're setting the CR bit, we will use
4713 // ANDI_rec which won't sign extend, so that's safe.
4714 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4715 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4716 isUInt<16>(InVal.getSExtValue()));
4717 if (ValueFits) {
4718 ReplaceWithLI = true;
4719 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4720 NewImm = InVal.getSExtValue();
4721 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4722 break;
4723 }
4724 return false;
4725 }
4726 case PPC::ORI:
4727 case PPC::ORI8:
4728 case PPC::XORI:
4729 case PPC::XORI8: {
4730 int64_t LogicalImm = MI.getOperand(2).getImm();
4731 int64_t Result = 0;
4732 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4733 Result = LogicalImm | SExtImm;
4734 else
4735 Result = LogicalImm ^ SExtImm;
4736 if (isInt<16>(Result)) {
4737 ReplaceWithLI = true;
4738 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4739 NewImm = Result;
4740 break;
4741 }
4742 return false;
4743 }
4744 }
4745
4746 if (ReplaceWithLI) {
4747 // We need to be careful with CR-setting instructions we're replacing.
4748 if (SetCR) {
4749 // We don't know anything about uses when we're out of SSA, so only
4750 // replace if the new immediate will be reproduced.
4751 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4752 if (PostRA && ImmChanged)
4753 return false;
4754
4755 if (!PostRA) {
4756 // If the defining load-immediate has no other uses, we can just replace
4757 // the immediate with the new immediate.
4758 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4759 DefMI.getOperand(1).setImm(NewImm);
4760
4761 // If we're not using the GPR result of the CR-setting instruction, we
4762 // just need to and with zero/non-zero depending on the new immediate.
4763 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4764 if (NewImm) {
4765 assert(Immediate && "Transformation converted zero to non-zero?");
4766 NewImm = Immediate;
4767 }
4768 } else if (ImmChanged)
4769 return false;
4770 }
4771 }
4772
4773 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4774 LLVM_DEBUG(MI.dump());
4775 LLVM_DEBUG(dbgs() << "Fed by:\n");
4776 LLVM_DEBUG(DefMI.dump());
4778 LII.Imm = NewImm;
4779 LII.Is64Bit = Is64BitLI;
4780 LII.SetCR = SetCR;
4781 // If we're setting the CR, the original load-immediate must be kept (as an
4782 // operand to ANDI_rec/ANDI8_rec).
4783 if (KilledDef && SetCR)
4784 *KilledDef = nullptr;
4785 replaceInstrWithLI(MI, LII);
4786
4787 if (PostRA)
4788 recomputeLivenessFlags(*MI.getParent());
4789
4790 LLVM_DEBUG(dbgs() << "With:\n");
4791 LLVM_DEBUG(MI.dump());
4792 return true;
4793 }
4794 return false;
4795}
4796
4797bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4798 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4799 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4800 bool PostRA = !MRI->isSSA();
4801 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4802 // for post-ra.
4803 if (PostRA)
4804 return false;
4805
4806 // Only handle load/store.
4807 if (!MI.mayLoadOrStore())
4808 return false;
4809
4810 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4811
4812 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4813 "MI must have x-form opcode");
4814
4815 // get Imm Form info.
4816 ImmInstrInfo III;
4817 bool IsVFReg = MI.getOperand(0).isReg()
4818 ? PPC::isVFRegister(MI.getOperand(0).getReg())
4819 : false;
4820
4821 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4822 return false;
4823
4824 if (!III.IsSummingOperands)
4825 return false;
4826
4827 if (OpNoForForwarding != III.OpNoForForwarding)
4828 return false;
4829
4830 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4831 if (!ImmOperandMI.isImm())
4832 return false;
4833
4834 // Check DefMI.
4835 MachineOperand *ImmMO = nullptr;
4836 MachineOperand *RegMO = nullptr;
4837 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4838 return false;
4839 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4840
4841 // Check Imm.
4842 // Set ImmBase from imm instruction as base and get new Imm inside
4843 // isImmElgibleForForwarding.
4844 int64_t ImmBase = ImmOperandMI.getImm();
4845 int64_t Imm = 0;
4846 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4847 return false;
4848
4849 // Do the transform
4850 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4851 LLVM_DEBUG(MI.dump());
4852 LLVM_DEBUG(dbgs() << "Fed by:\n");
4853 LLVM_DEBUG(DefMI.dump());
4854
4855 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4856 MI.getOperand(III.ImmOpNo).setImm(Imm);
4857
4858 LLVM_DEBUG(dbgs() << "With:\n");
4859 LLVM_DEBUG(MI.dump());
4860 return true;
4861}
4862
4863// If an X-Form instruction is fed by an add-immediate and one of its operands
4864// is the literal zero, attempt to forward the source of the add-immediate to
4865// the corresponding D-Form instruction with the displacement coming from
4866// the immediate being added.
4867bool PPCInstrInfo::transformToImmFormFedByAdd(
4868 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4869 MachineInstr &DefMI, bool KillDefMI) const {
4870 // RegMO ImmMO
4871 // | |
4872 // x = addi reg, imm <----- DefMI
4873 // y = op 0 , x <----- MI
4874 // |
4875 // OpNoForForwarding
4876 // Check if the MI meet the requirement described in the III.
4877 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4878 return false;
4879
4880 // Check if the DefMI meet the requirement
4881 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4882 MachineOperand *ImmMO = nullptr;
4883 MachineOperand *RegMO = nullptr;
4884 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4885 return false;
4886 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4887
4888 // As we get the Imm operand now, we need to check if the ImmMO meet
4889 // the requirement described in the III. If yes set the Imm.
4890 int64_t Imm = 0;
4891 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4892 return false;
4893
4894 bool IsFwdFeederRegKilled = false;
4895 bool SeenIntermediateUse = false;
4896 // Check if the RegMO can be forwarded to MI.
4897 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4898 IsFwdFeederRegKilled, SeenIntermediateUse))
4899 return false;
4900
4901 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4902 bool PostRA = !MRI.isSSA();
4903
4904 // We know that, the MI and DefMI both meet the pattern, and
4905 // the Imm also meet the requirement with the new Imm-form.
4906 // It is safe to do the transformation now.
4907 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4908 LLVM_DEBUG(MI.dump());
4909 LLVM_DEBUG(dbgs() << "Fed by:\n");
4910 LLVM_DEBUG(DefMI.dump());
4911
4912 // Update the base reg first.
4913 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
4914 false, false,
4915 RegMO->isKill());
4916
4917 // Then, update the imm.
4918 if (ImmMO->isImm()) {
4919 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4920 // directly.
4922 }
4923 else {
4924 // Otherwise, it is Constant Pool Index(CPI) or Global,
4925 // which is relocation in fact. We need to replace the special zero
4926 // register with ImmMO.
4927 // Before that, we need to fixup the target flags for imm.
4928 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4929 if (DefMI.getOpcode() == PPC::ADDItocL8)
4931
4932 // MI didn't have the interface such as MI.setOperand(i) though
4933 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4934 // ImmMO, we need to remove ZERO operand and all the operands behind it,
4935 // and, add the ImmMO, then, move back all the operands behind ZERO.
4937 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
4938 MOps.push_back(MI.getOperand(i));
4939 MI.removeOperand(i);
4940 }
4941
4942 // Remove the last MO in the list, which is ZERO operand in fact.
4943 MOps.pop_back();
4944 // Add the imm operand.
4945 MI.addOperand(*ImmMO);
4946 // Now add the rest back.
4947 for (auto &MO : MOps)
4948 MI.addOperand(MO);
4949 }
4950
4951 // Update the opcode.
4952 MI.setDesc(get(III.ImmOpcode));
4953
4954 if (PostRA)
4955 recomputeLivenessFlags(*MI.getParent());
4956 LLVM_DEBUG(dbgs() << "With:\n");
4957 LLVM_DEBUG(MI.dump());
4958
4959 return true;
4960}
4961
4962bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
4963 const ImmInstrInfo &III,
4964 unsigned ConstantOpNo,
4965 MachineInstr &DefMI) const {
4966 // DefMI must be LI or LI8.
4967 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4968 !DefMI.getOperand(1).isImm())
4969 return false;
4970
4971 // Get Imm operand and Sign-extend to 64-bits.
4972 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
4973
4974 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4975 bool PostRA = !MRI.isSSA();
4976 // Exit early if we can't convert this.
4977 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
4978 return false;
4979 if (Imm % III.ImmMustBeMultipleOf)
4980 return false;
4981 if (III.TruncateImmTo)
4982 Imm &= ((1 << III.TruncateImmTo) - 1);
4983 if (III.SignedImm) {
4984 APInt ActualValue(64, Imm, true);
4985 if (!ActualValue.isSignedIntN(III.ImmWidth))
4986 return false;
4987 } else {
4988 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
4989 if ((uint64_t)Imm > UnsignedMax)
4990 return false;
4991 }
4992
4993 // If we're post-RA, the instructions don't agree on whether register zero is
4994 // special, we can transform this as long as the register operand that will
4995 // end up in the location where zero is special isn't R0.
4996 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
4997 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
4998 III.ZeroIsSpecialNew + 1;
4999 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
5000 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5001 // If R0 is in the operand where zero is special for the new instruction,
5002 // it is unsafe to transform if the constant operand isn't that operand.
5003 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
5004 ConstantOpNo != III.ZeroIsSpecialNew)
5005 return false;
5006 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
5007 ConstantOpNo != PosForOrigZero)
5008 return false;
5009 }
5010
5011 unsigned Opc = MI.getOpcode();
5012 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
5013 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
5014 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
5015 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
5016 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
5017 Opc == PPC::SRD || Opc == PPC::SRD_rec;
5018 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
5019 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
5020 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
5021 Opc == PPC::SRD_rec;
5022
5023 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5024 LLVM_DEBUG(MI.dump());
5025 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5026 LLVM_DEBUG(DefMI.dump());
5027 MI.setDesc(get(III.ImmOpcode));
5028 if (ConstantOpNo == III.OpNoForForwarding) {
5029 // Converting shifts to immediate form is a bit tricky since they may do
5030 // one of three things:
5031 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5032 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5033 // setting CR0)
5034 // 3. If the shift amount is in [1, OpSize), it's just a shift
5035 if (SpecialShift32 || SpecialShift64) {
5037 LII.Imm = 0;
5038 LII.SetCR = SetCR;
5039 LII.Is64Bit = SpecialShift64;
5040 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5041 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5042 replaceInstrWithLI(MI, LII);
5043 // Shifts by zero don't change the value. If we don't need to set CR0,
5044 // just convert this to a COPY. Can't do this post-RA since we've already
5045 // cleaned up the copies.
5046 else if (!SetCR && ShAmt == 0 && !PostRA) {
5047 MI.removeOperand(2);
5048 MI.setDesc(get(PPC::COPY));
5049 } else {
5050 // The 32 bit and 64 bit instructions are quite different.
5051 if (SpecialShift32) {
5052 // Left shifts use (N, 0, 31-N).
5053 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5054 // use (0, 0, 31) if N == 0.
5055 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5056 uint64_t MB = RightShift ? ShAmt : 0;
5057 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5059 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5060 .addImm(ME);
5061 } else {
5062 // Left shifts use (N, 63-N).
5063 // Right shifts use (64-N, N) if 0 < N < 64.
5064 // use (0, 0) if N == 0.
5065 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5066 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5068 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5069 }
5070 }
5071 } else
5072 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5073 }
5074 // Convert commutative instructions (switch the operands and convert the
5075 // desired one to an immediate.
5076 else if (III.IsCommutative) {
5077 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5078 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5079 } else
5080 llvm_unreachable("Should have exited early!");
5081
5082 // For instructions for which the constant register replaces a different
5083 // operand than where the immediate goes, we need to swap them.
5084 if (III.OpNoForForwarding != III.ImmOpNo)
5086
5087 // If the special R0/X0 register index are different for original instruction
5088 // and new instruction, we need to fix up the register class in new
5089 // instruction.
5090 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5091 if (III.ZeroIsSpecialNew) {
5092 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5093 // need to fix up register class.
5094 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5095 if (RegToModify.isVirtual()) {
5096 const TargetRegisterClass *NewRC =
5097 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5098 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5099 MRI.setRegClass(RegToModify, NewRC);
5100 }
5101 }
5102 }
5103
5104 if (PostRA)
5105 recomputeLivenessFlags(*MI.getParent());
5106
5107 LLVM_DEBUG(dbgs() << "With: ");
5108 LLVM_DEBUG(MI.dump());
5109 LLVM_DEBUG(dbgs() << "\n");
5110 return true;
5111}
5112
5113const TargetRegisterClass *
5115 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5116 return &PPC::VSRCRegClass;
5117 return RC;
5118}
5119
5121 return PPC::getRecordFormOpcode(Opcode);
5122}
5123
5124static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5125 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5126 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5127 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5128 Opcode == PPC::LHZUX8);
5129}
5130
5131// This function checks for sign extension from 32 bits to 64 bits.
5132static bool definedBySignExtendingOp(const unsigned Reg,
5133 const MachineRegisterInfo *MRI) {
5135 return false;
5136
5137 MachineInstr *MI = MRI->getVRegDef(Reg);
5138 if (!MI)
5139 return false;
5140
5141 int Opcode = MI->getOpcode();
5142 const PPCInstrInfo *TII =
5143 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5144 if (TII->isSExt32To64(Opcode))
5145 return true;
5146
5147 // The first def of LBZU/LHZU is sign extended.
5148 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5149 return true;
5150
5151 // RLDICL generates sign-extended output if it clears at least
5152 // 33 bits from the left (MSB).
5153 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5154 return true;
5155
5156 // If at least one bit from left in a lower word is masked out,
5157 // all of 0 to 32-th bits of the output are cleared.
5158 // Hence the output is already sign extended.
5159 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5160 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5161 MI->getOperand(3).getImm() > 0 &&
5162 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5163 return true;
5164
5165 // If the most significant bit of immediate in ANDIS is zero,
5166 // all of 0 to 32-th bits are cleared.
5167 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5168 uint16_t Imm = MI->getOperand(2).getImm();
5169 if ((Imm & 0x8000) == 0)
5170 return true;
5171 }
5172
5173 return false;
5174}
5175
5176// This function checks the machine instruction that defines the input register
5177// Reg. If that machine instruction always outputs a value that has only zeros
5178// in the higher 32 bits then this function will return true.
5179static bool definedByZeroExtendingOp(const unsigned Reg,
5180 const MachineRegisterInfo *MRI) {
5182 return false;
5183
5184 MachineInstr *MI = MRI->getVRegDef(Reg);
5185 if (!MI)
5186 return false;
5187
5188 int Opcode = MI->getOpcode();
5189 const PPCInstrInfo *TII =
5190 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5191 if (TII->isZExt32To64(Opcode))
5192 return true;
5193
5194 // The first def of LBZU/LHZU/LWZU are zero extended.
5195 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5196 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5197 MI->getOperand(0).getReg() == Reg)
5198 return true;
5199
5200 // The 16-bit immediate is sign-extended in li/lis.
5201 // If the most significant bit is zero, all higher bits are zero.
5202 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5203 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5204 int64_t Imm = MI->getOperand(1).getImm();
5205 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5206 return true;
5207 }
5208
5209 // We have some variations of rotate-and-mask instructions
5210 // that clear higher 32-bits.
5211 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5212 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5213 Opcode == PPC::RLDICL_32_64) &&
5214 MI->getOperand(3).getImm() >= 32)
5215 return true;
5216
5217 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5218 MI->getOperand(3).getImm() >= 32 &&
5219 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5220 return true;
5221
5222 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5223 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5224 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5225 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5226 return true;
5227
5228 return false;
5229}
5230
5231// This function returns true if the input MachineInstr is a TOC save
5232// instruction.
5234 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5235 return false;
5236 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5237 unsigned StackOffset = MI.getOperand(1).getImm();
5238 Register StackReg = MI.getOperand(2).getReg();
5239 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5240 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5241 return true;
5242
5243 return false;
5244}
5245
5246// We limit the max depth to track incoming values of PHIs or binary ops
5247// (e.g. AND) to avoid excessive cost.
5248const unsigned MAX_BINOP_DEPTH = 1;
5249
5250// This function will promote the instruction which defines the register `Reg`
5251// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
5252// used to check whether an instruction needs to be promoted or not is similar
5253// to the logic used to check whether or not a defined register is sign or zero
5254// extended within the function PPCInstrInfo::isSignOrZeroExtended.
5255// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
5256// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
5257// incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
5258// than once. This is done to prevent exponential recursion.
5261 unsigned BinOpDepth,
5262 LiveVariables *LV) const {
5263 if (!Reg.isVirtual())
5264 return;
5265
5266 MachineInstr *MI = MRI->getVRegDef(Reg);
5267 if (!MI)
5268 return;
5269
5270 unsigned Opcode = MI->getOpcode();
5271
5272 switch (Opcode) {
5273 case PPC::OR:
5274 case PPC::ISEL:
5275 case PPC::OR8:
5276 case PPC::PHI: {
5277 if (BinOpDepth >= MAX_BINOP_DEPTH)
5278 break;
5279 unsigned OperandEnd = 3, OperandStride = 1;
5280 if (Opcode == PPC::PHI) {
5281 OperandEnd = MI->getNumOperands();
5282 OperandStride = 2;
5283 }
5284
5285 for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
5286 assert(MI->getOperand(I).isReg() && "Operand must be register");
5287 promoteInstr32To64ForElimEXTSW(MI->getOperand(I).getReg(), MRI,
5288 BinOpDepth + 1, LV);
5289 }
5290
5291 break;
5292 }
5293 case PPC::COPY: {
5294 // Refers to the logic of the `case PPC::COPY` statement in the function
5295 // PPCInstrInfo::isSignOrZeroExtended().
5296
5297 Register SrcReg = MI->getOperand(1).getReg();
5298 // In both ELFv1 and v2 ABI, method parameters and the return value
5299 // are sign- or zero-extended.
5300 const MachineFunction *MF = MI->getMF();
5301 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5302 // If this is a copy from another register, we recursively promote the
5303 // source.
5304 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5305 return;
5306 }
5307
5308 // From here on everything is SVR4ABI. COPY will be eliminated in the other
5309 // pass, we do not need promote the COPY pseudo opcode.
5310
5311 if (SrcReg != PPC::X3)
5312 // If this is a copy from another register, we recursively promote the
5313 // source.
5314 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5315 return;
5316 }
5317 case PPC::ORI:
5318 case PPC::XORI:
5319 case PPC::ORIS:
5320 case PPC::XORIS:
5321 case PPC::ORI8:
5322 case PPC::XORI8:
5323 case PPC::ORIS8:
5324 case PPC::XORIS8:
5325 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI, BinOpDepth,
5326 LV);
5327 break;
5328 case PPC::AND:
5329 case PPC::AND8:
5330 if (BinOpDepth >= MAX_BINOP_DEPTH)
5331 break;
5332
5333 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI,
5334 BinOpDepth + 1, LV);
5335 promoteInstr32To64ForElimEXTSW(MI->getOperand(2).getReg(), MRI,
5336 BinOpDepth + 1, LV);
5337 break;
5338 }
5339
5340 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
5341 if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
5342 return;
5343
5344 const PPCInstrInfo *TII =
5345 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5346
5347 // Map the 32bit to 64bit opcodes for instructions that are not signed or zero
5348 // extended themselves, but may have operands who's destination registers of
5349 // signed or zero extended instructions.
5350 std::unordered_map<unsigned, unsigned> OpcodeMap = {
5351 {PPC::OR, PPC::OR8}, {PPC::ISEL, PPC::ISEL8},
5352 {PPC::ORI, PPC::ORI8}, {PPC::XORI, PPC::XORI8},
5353 {PPC::ORIS, PPC::ORIS8}, {PPC::XORIS, PPC::XORIS8},
5354 {PPC::AND, PPC::AND8}};
5355
5356 int NewOpcode = -1;
5357 auto It = OpcodeMap.find(Opcode);
5358 if (It != OpcodeMap.end()) {
5359 // Set the new opcode to the mapped 64-bit version.
5360 NewOpcode = It->second;
5361 } else {
5362 if (!TII->isSExt32To64(Opcode))
5363 return;
5364
5365 // The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
5366 // map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
5367 // instruction with the same opcode.
5368 NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
5369 }
5370
5371 assert(NewOpcode != -1 &&
5372 "Must have a 64-bit opcode to map the 32-bit opcode!");
5373
5374 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
5375 const MCInstrDesc &MCID = TII->get(NewOpcode);
5376 const TargetRegisterClass *NewRC =
5377 TRI->getRegClass(MCID.operands()[0].RegClass);
5378
5379 Register SrcReg = MI->getOperand(0).getReg();
5380 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
5381
5382 // If the register class of the defined register in the 32-bit instruction
5383 // is the same as the register class of the defined register in the promoted
5384 // 64-bit instruction, we do not need to promote the instruction.
5385 if (NewRC == SrcRC)
5386 return;
5387
5388 DebugLoc DL = MI->getDebugLoc();
5389 auto MBB = MI->getParent();
5390
5391 // Since the pseudo-opcode of the instruction is promoted from 32-bit to
5392 // 64-bit, if the source reg class of the original instruction belongs to
5393 // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
5394 // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
5395 // respectively.
5396 DenseMap<unsigned, Register> PromoteRegs;
5397 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5398 MachineOperand &Operand = MI->getOperand(i);
5399 if (!Operand.isReg())
5400 continue;
5401
5402 Register OperandReg = Operand.getReg();
5403 if (!OperandReg.isVirtual())
5404 continue;
5405
5406 const TargetRegisterClass *NewUsedRegRC =
5407 TRI->getRegClass(MCID.operands()[i].RegClass);
5408 const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
5409 if (NewUsedRegRC != OrgRC && (OrgRC == &PPC::GPRCRegClass ||
5410 OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
5411 // Promote the used 32-bit register to 64-bit register.
5412 Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5413 Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5414 BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
5415 BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
5416 .addReg(TmpReg)
5417 .addReg(OperandReg)
5418 .addImm(PPC::sub_32);
5419 PromoteRegs[i] = DstTmpReg;
5420 }
5421 }
5422
5423 Register NewDefinedReg = MRI->createVirtualRegister(NewRC);
5424
5425 BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
5427 --Iter;
5428 MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
5429 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5430 if (PromoteRegs.find(i) != PromoteRegs.end())
5431 MIBuilder.addReg(PromoteRegs[i], RegState::Kill);
5432 else
5433 Iter->addOperand(MI->getOperand(i));
5434 }
5435
5436 for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
5437 MachineOperand &Operand = Iter->getOperand(i);
5438 if (!Operand.isReg())
5439 continue;
5440 Register OperandReg = Operand.getReg();
5441 if (!OperandReg.isVirtual())
5442 continue;
5443 LV->recomputeForSingleDefVirtReg(OperandReg);
5444 }
5445
5446 MI->eraseFromParent();
5447
5448 // A defined register may be used by other instructions that are 32-bit.
5449 // After the defined register is promoted to 64-bit for the promoted
5450 // instruction, we need to demote the 64-bit defined register back to a
5451 // 32-bit register
5452 BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
5453 .addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
5454 LV->recomputeForSingleDefVirtReg(NewDefinedReg);
5455}
5456
5457// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5458// does not count all of the recursions. The parameter BinOpDepth is incremented
5459// only when isSignOrZeroExtended calls itself more than once. This is done to
5460// prevent expontential recursion. There is no parameter to track linear
5461// recursion.
5462std::pair<bool, bool>
5464 const unsigned BinOpDepth,
5465 const MachineRegisterInfo *MRI) const {
5467 return std::pair<bool, bool>(false, false);
5468
5469 MachineInstr *MI = MRI->getVRegDef(Reg);
5470 if (!MI)
5471 return std::pair<bool, bool>(false, false);
5472
5473 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5474 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5475
5476 // If we know the instruction always returns sign- and zero-extended result,
5477 // return here.
5478 if (IsSExt && IsZExt)
5479 return std::pair<bool, bool>(IsSExt, IsZExt);
5480
5481 switch (MI->getOpcode()) {
5482 case PPC::COPY: {
5483 Register SrcReg = MI->getOperand(1).getReg();
5484
5485 // In both ELFv1 and v2 ABI, method parameters and the return value
5486 // are sign- or zero-extended.
5487 const MachineFunction *MF = MI->getMF();
5488
5489 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5490 // If this is a copy from another register, we recursively check source.
5491 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5492 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5493 SrcExt.second || IsZExt);
5494 }
5495
5496 // From here on everything is SVR4ABI
5497 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5498 // We check the ZExt/SExt flags for a method parameter.
5499 if (MI->getParent()->getBasicBlock() ==
5500 &MF->getFunction().getEntryBlock()) {
5501 Register VReg = MI->getOperand(0).getReg();
5502 if (MF->getRegInfo().isLiveIn(VReg)) {
5503 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5504 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5505 return std::pair<bool, bool>(IsSExt, IsZExt);
5506 }
5507 }
5508
5509 if (SrcReg != PPC::X3) {
5510 // If this is a copy from another register, we recursively check source.
5511 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5512 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5513 SrcExt.second || IsZExt);
5514 }
5515
5516 // For a method return value, we check the ZExt/SExt flags in attribute.
5517 // We assume the following code sequence for method call.
5518 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5519 // BL8_NOP @func,...
5520 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5521 // %5 = COPY %x3; G8RC:%5
5522 const MachineBasicBlock *MBB = MI->getParent();
5523 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5526 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5527 return IsExtendPair;
5528
5529 const MachineInstr &CallMI = *(--II);
5530 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5531 return IsExtendPair;
5532
5533 const Function *CalleeFn =
5534 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
5535 if (!CalleeFn)
5536 return IsExtendPair;
5537 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5538 if (IntTy && IntTy->getBitWidth() <= 32) {
5539 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5540 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5541 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5542 return std::pair<bool, bool>(IsSExt, IsZExt);
5543 }
5544
5545 return IsExtendPair;
5546 }
5547
5548 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5549 // So, we track the operand register as we do for register copy.
5550 case PPC::ORI:
5551 case PPC::XORI:
5552 case PPC::ORI8:
5553 case PPC::XORI8: {
5554 Register SrcReg = MI->getOperand(1).getReg();
5555 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5556 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5557 SrcExt.second || IsZExt);
5558 }
5559
5560 // OR, XOR with shifted 16-bit immediate does not change the upper
5561 // 32 bits. So, we track the operand register for zero extension.
5562 // For sign extension when the MSB of the immediate is zero, we also
5563 // track the operand register since the upper 33 bits are unchanged.
5564 case PPC::ORIS:
5565 case PPC::XORIS:
5566 case PPC::ORIS8:
5567 case PPC::XORIS8: {
5568 Register SrcReg = MI->getOperand(1).getReg();
5569 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5570 uint16_t Imm = MI->getOperand(2).getImm();
5571 if (Imm & 0x8000)
5572 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5573 else
5574 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5575 SrcExt.second || IsZExt);
5576 }
5577
5578 // If all incoming values are sign-/zero-extended,
5579 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5580 case PPC::OR:
5581 case PPC::OR8:
5582 case PPC::ISEL:
5583 case PPC::PHI: {
5584 if (BinOpDepth >= MAX_BINOP_DEPTH)
5585 return std::pair<bool, bool>(false, false);
5586
5587 // The input registers for PHI are operand 1, 3, ...
5588 // The input registers for others are operand 1 and 2.
5589 unsigned OperandEnd = 3, OperandStride = 1;
5590 if (MI->getOpcode() == PPC::PHI) {
5591 OperandEnd = MI->getNumOperands();
5592 OperandStride = 2;
5593 }
5594
5595 IsSExt = true;
5596 IsZExt = true;
5597 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5598 if (!MI->getOperand(I).isReg())
5599 return std::pair<bool, bool>(false, false);
5600
5601 Register SrcReg = MI->getOperand(I).getReg();
5602 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5603 IsSExt &= SrcExt.first;
5604 IsZExt &= SrcExt.second;
5605 }
5606 return std::pair<bool, bool>(IsSExt, IsZExt);
5607 }
5608
5609 // If at least one of the incoming values of an AND is zero extended
5610 // then the output is also zero-extended. If both of the incoming values
5611 // are sign-extended then the output is also sign extended.
5612 case PPC::AND:
5613 case PPC::AND8: {
5614 if (BinOpDepth >= MAX_BINOP_DEPTH)
5615 return std::pair<bool, bool>(false, false);
5616
5617 Register SrcReg1 = MI->getOperand(1).getReg();
5618 Register SrcReg2 = MI->getOperand(2).getReg();
5619 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5620 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5621 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5622 Src1Ext.second || Src2Ext.second);
5623 }
5624
5625 default:
5626 break;
5627 }
5628 return std::pair<bool, bool>(IsSExt, IsZExt);
5629}
5630
5631bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5632 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5633}
5634
5635namespace {
5636class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5637 MachineInstr *Loop, *EndLoop, *LoopCount;
5638 MachineFunction *MF;
5639 const TargetInstrInfo *TII;
5640 int64_t TripCount;
5641
5642public:
5643 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5644 MachineInstr *LoopCount)
5645 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5646 MF(Loop->getParent()->getParent()),
5647 TII(MF->getSubtarget().getInstrInfo()) {
5648 // Inspect the Loop instruction up-front, as it may be deleted when we call
5649 // createTripCountGreaterCondition.
5650 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5651 TripCount = LoopCount->getOperand(1).getImm();
5652 else
5653 TripCount = -1;
5654 }
5655
5656 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5657 // Only ignore the terminator.
5658 return MI == EndLoop;
5659 }
5660
5661 std::optional<bool> createTripCountGreaterCondition(
5662 int TC, MachineBasicBlock &MBB,
5664 if (TripCount == -1) {
5665 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5666 // so we don't need to generate any thing here.
5667 Cond.push_back(MachineOperand::CreateImm(0));
5669 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5670 true));
5671 return {};
5672 }
5673
5674 return TripCount > TC;
5675 }
5676
5677 void setPreheader(MachineBasicBlock *NewPreheader) override {
5678 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5679 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5680 }
5681
5682 void adjustTripCount(int TripCountAdjust) override {
5683 // If the loop trip count is a compile-time value, then just change the
5684 // value.
5685 if (LoopCount->getOpcode() == PPC::LI8 ||
5686 LoopCount->getOpcode() == PPC::LI) {
5687 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5688 LoopCount->getOperand(1).setImm(TripCount);
5689 return;
5690 }
5691
5692 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5693 // so we don't need to generate any thing here.
5694 }
5695
5696 void disposed(LiveIntervals *LIS) override {
5697 if (LIS) {
5699 LIS->RemoveMachineInstrFromMaps(*LoopCount);
5700 }
5701 Loop->eraseFromParent();
5702 // Ensure the loop setup instruction is deleted too.
5703 LoopCount->eraseFromParent();
5704 }
5705};
5706} // namespace
5707
5708std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5710 // We really "analyze" only hardware loops right now.
5712 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5713 if (Preheader == LoopBB)
5714 Preheader = *std::next(LoopBB->pred_begin());
5715 MachineFunction *MF = Preheader->getParent();
5716
5717 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5719 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5720 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5722 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5723 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5724 }
5725 }
5726 return nullptr;
5727}
5728
5730 MachineBasicBlock &PreHeader,
5731 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5732
5733 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5734
5735 // The loop set-up instruction should be in preheader
5736 for (auto &I : PreHeader.instrs())
5737 if (I.getOpcode() == LOOPi)
5738 return &I;
5739 return nullptr;
5740}
5741
5742// Return true if get the base operand, byte offset of an instruction and the
5743// memory width. Width is the size of memory that is being loaded/stored.
5745 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5746 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5747 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5748 return false;
5749
5750 // Handle only loads/stores with base register followed by immediate offset.
5751 if (!LdSt.getOperand(1).isImm() ||
5752 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5753 return false;
5754 if (!LdSt.getOperand(1).isImm() ||
5755 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5756 return false;
5757
5758 if (!LdSt.hasOneMemOperand())
5759 return false;
5760
5761 Width = (*LdSt.memoperands_begin())->getSize();
5762 Offset = LdSt.getOperand(1).getImm();
5763 BaseReg = &LdSt.getOperand(2);
5764 return true;
5765}
5766
5768 const MachineInstr &MIa, const MachineInstr &MIb) const {
5769 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5770 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5771
5774 return false;
5775
5776 // Retrieve the base register, offset from the base register and width. Width
5777 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5778 // base registers are identical, and the offset of a lower memory access +
5779 // the width doesn't overlap the offset of a higher memory access,
5780 // then the memory accesses are different.
5782 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5783 int64_t OffsetA = 0, OffsetB = 0;
5784 LocationSize WidthA = 0, WidthB = 0;
5785 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5786 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5787 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5788 int LowOffset = std::min(OffsetA, OffsetB);
5789 int HighOffset = std::max(OffsetA, OffsetB);
5790 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5791 if (LowWidth.hasValue() &&
5792 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5793 return true;
5794 }
5795 }
5796 return false;
5797}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static unsigned getSize(unsigned Kind)
void changeSign()
Definition: APFloat.h:1299
Class for arbitrary precision integers.
Definition: APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition: APInt.cpp:1115
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:353
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
A possibly irreducible generalization of a Loop.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:130
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
Definition: DerivedTypes.h:42
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
bool hasValue() const
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
void setOpcode(unsigned Op)
Definition: MCInst.h:198
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
Definition: MCInstrDesc.h:565
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:269
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:85
uint16_t Constraints
Operand constraints (see OperandConstraint enum).
Definition: MCInstrDesc.h:100
bool isLookupPtrRegClass() const
Set if this operand is a pointer value and it requires a callback to look up its register class.
Definition: MCInstrDesc.h:104
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:91
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:956
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:397
iterator_range< mop_iterator > uses()
Returns a range that includes all operands which may be register uses.
Definition: MachineInstr.h:739
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:649
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:821
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:691
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:806
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:392
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
PPCInstrInfo(PPCSubtarget &STI)
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
MCInst getNop() const override
Return the noop instruction to use for a noop.
static int getRecordFormOpcode(unsigned Opcode)
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
Definition: PPCInstrInfo.h:277
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:275
CombinerObjective getCombinerObjective(unsigned Pattern) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
void promoteInstr32To64ForElimEXTSW(const Register &Reg, MachineRegisterInfo *MRI, unsigned BinOpDepth, LiveVariables *LV) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:627
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
Definition: PPCInstrInfo.h:510
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
bool foldFrameOffset(MachineInstr &MI) const
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:621
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const
getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode for a given imm form load/s...
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:147
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:220
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:139
bool isLittleEndian() const
Definition: PPCSubtarget.h:186
bool isTargetLinux() const
Definition: PPCSubtarget.h:217
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:160
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:213
void setGlibcHWCAPAccess(bool Val=true) const
void dump() const
Definition: Pass.cpp:136
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:731
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:153
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ MO_TOC_LO
Definition: PPC.h:183
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
int getAltVSXFMAOpcode(uint16_t Opcode)
int getNonRecordFormOpcode(uint16_t)
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
static bool isVFRegister(unsigned Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:48
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
@ REASSOC_XY_BCA
Definition: PPCInstrInfo.h:97
@ REASSOC_XY_BAC
Definition: PPCInstrInfo.h:98
@ REASSOC_XY_AMM_BMM
Definition: PPCInstrInfo.h:92
@ REASSOC_XMM_AMM_BMM
Definition: PPCInstrInfo.h:93
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
unsigned getKillRegState(bool B)
@ SOK_CRBitSpill
Definition: PPCInstrInfo.h:74
@ SOK_VSXVectorSpill
Definition: PPCInstrInfo.h:76
@ SOK_SpillToVSR
Definition: PPCInstrInfo.h:79
@ SOK_Int4Spill
Definition: PPCInstrInfo.h:69
@ SOK_PairedVecSpill
Definition: PPCInstrInfo.h:80
@ SOK_VectorFloat8Spill
Definition: PPCInstrInfo.h:77
@ SOK_UAccumulatorSpill
Definition: PPCInstrInfo.h:82
@ SOK_PairedG8Spill
Definition: PPCInstrInfo.h:85
@ SOK_VectorFloat4Spill
Definition: PPCInstrInfo.h:78
@ SOK_Float8Spill
Definition: PPCInstrInfo.h:71
@ SOK_Float4Spill
Definition: PPCInstrInfo.h:72
@ SOK_VRVectorSpill
Definition: PPCInstrInfo.h:75
@ SOK_WAccumulatorSpill
Definition: PPCInstrInfo.h:83
@ SOK_SPESpill
Definition: PPCInstrInfo.h:84
@ SOK_CRSpill
Definition: PPCInstrInfo.h:73
@ SOK_AccumulatorSpill
Definition: PPCInstrInfo.h:81
@ SOK_Int8Spill
Definition: PPCInstrInfo.h:70
@ SOK_LastOpcodeSpill
Definition: PPCInstrInfo.h:86
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t IsSummingOperands
Definition: PPCInstrInfo.h:56
uint64_t OpNoForForwarding
Definition: PPCInstrInfo.h:46
uint64_t ImmMustBeMultipleOf
Definition: PPCInstrInfo.h:36
uint64_t IsCommutative
Definition: PPCInstrInfo.h:44
uint64_t ZeroIsSpecialNew
Definition: PPCInstrInfo.h:42
uint64_t TruncateImmTo
Definition: PPCInstrInfo.h:54
uint64_t ZeroIsSpecialOrig
Definition: PPCInstrInfo.h:39
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.