LLVM 22.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
37#include "llvm/IR/Module.h"
38#include "llvm/MC/MCInst.h"
41#include "llvm/Support/Debug.h"
44
45using namespace llvm;
46
47#define DEBUG_TYPE "ppc-instr-info"
48
49#define GET_INSTRMAP_INFO
50#define GET_INSTRINFO_CTOR_DTOR
51#include "PPCGenInstrInfo.inc"
52
53STATISTIC(NumStoreSPILLVSRRCAsVec,
54 "Number of spillvsrrc spilled to stack as vec");
55STATISTIC(NumStoreSPILLVSRRCAsGpr,
56 "Number of spillvsrrc spilled to stack as gpr");
57STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
58STATISTIC(CmpIselsConverted,
59 "Number of ISELs that depend on comparison of constants converted");
60STATISTIC(MissedConvertibleImmediateInstrs,
61 "Number of compare-immediate instructions fed by constants");
62STATISTIC(NumRcRotatesConvertedToRcAnd,
63 "Number of record-form rotates converted to record-form andi");
64
65static cl::
66opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
67 cl::desc("Disable analysis for CTR loops"));
68
69static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
70cl::desc("Disable compare instruction optimization"), cl::Hidden);
71
72static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
73cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
75
76static cl::opt<bool>
77UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
78 cl::desc("Use the old (incorrect) instruction latency calculation"));
79
80static cl::opt<float>
81 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
82 cl::desc("register pressure factor for the transformations."));
83
85 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
86 cl::desc("enable register pressure reduce in machine combiner pass."));
87
88// Pin the vtable to this file.
89void PPCInstrInfo::anchor() {}
90
92 : PPCGenInstrInfo(STI, RI, PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
93 /* CatchRetOpcode */ -1,
94 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
95 Subtarget(STI), RI(STI.getTargetMachine()) {}
96
97/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
98/// this target when scheduling the DAG.
101 const ScheduleDAG *DAG) const {
102 unsigned Directive =
103 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
106 const InstrItineraryData *II =
107 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
108 return new ScoreboardHazardRecognizer(II, DAG);
109 }
110
112}
113
114/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
115/// to use for this target when scheduling the DAG.
118 const ScheduleDAG *DAG) const {
119 unsigned Directive =
120 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
121
122 // FIXME: Leaving this as-is until we have POWER9 scheduling info
124 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
125
126 // Most subtargets use a PPC970 recognizer.
129 assert(DAG->TII && "No InstrInfo?");
130
131 return new PPCHazardRecognizer970(*DAG);
132 }
133
134 return new ScoreboardHazardRecognizer(II, DAG);
135}
136
138 const MachineInstr &MI,
139 unsigned *PredCost) const {
140 if (!ItinData || UseOldLatencyCalc)
141 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
142
143 // The default implementation of getInstrLatency calls getStageLatency, but
144 // getStageLatency does not do the right thing for us. While we have
145 // itinerary, most cores are fully pipelined, and so the itineraries only
146 // express the first part of the pipeline, not every stage. Instead, we need
147 // to use the listed output operand cycle number (using operand 0 here, which
148 // is an output).
149
150 unsigned Latency = 1;
151 unsigned DefClass = MI.getDesc().getSchedClass();
152 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
153 const MachineOperand &MO = MI.getOperand(i);
154 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
155 continue;
156
157 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
158 if (!Cycle)
159 continue;
160
161 Latency = std::max(Latency, *Cycle);
162 }
163
164 return Latency;
165}
166
167std::optional<unsigned> PPCInstrInfo::getOperandLatency(
168 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
169 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
170 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
171 ItinData, DefMI, DefIdx, UseMI, UseIdx);
172
173 if (!DefMI.getParent())
174 return Latency;
175
176 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
177 Register Reg = DefMO.getReg();
178
179 bool IsRegCR;
180 if (Reg.isVirtual()) {
181 const MachineRegisterInfo *MRI =
182 &DefMI.getParent()->getParent()->getRegInfo();
183 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
184 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
185 } else {
186 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
187 PPC::CRBITRCRegClass.contains(Reg);
188 }
189
190 if (UseMI.isBranch() && IsRegCR) {
191 if (!Latency)
192 Latency = getInstrLatency(ItinData, DefMI);
193
194 // On some cores, there is an additional delay between writing to a condition
195 // register, and using it from a branch.
196 unsigned Directive = Subtarget.getCPUDirective();
197 switch (Directive) {
198 default: break;
199 case PPC::DIR_7400:
200 case PPC::DIR_750:
201 case PPC::DIR_970:
202 case PPC::DIR_E5500:
203 case PPC::DIR_PWR4:
204 case PPC::DIR_PWR5:
205 case PPC::DIR_PWR5X:
206 case PPC::DIR_PWR6:
207 case PPC::DIR_PWR6X:
208 case PPC::DIR_PWR7:
209 case PPC::DIR_PWR8:
210 // FIXME: Is this needed for POWER9?
211 Latency = *Latency + 2;
212 break;
213 }
214 }
215
216 return Latency;
217}
218
220 uint32_t Flags) const {
221 MI.setFlags(Flags);
225}
226
227// This function does not list all associative and commutative operations, but
228// only those worth feeding through the machine combiner in an attempt to
229// reduce the critical path. Mostly, this means floating-point operations,
230// because they have high latencies(>=5) (compared to other operations, such as
231// and/or, which are also associative and commutative, but have low latencies).
233 bool Invert) const {
234 if (Invert)
235 return false;
236 switch (Inst.getOpcode()) {
237 // Floating point:
238 // FP Add:
239 case PPC::FADD:
240 case PPC::FADDS:
241 // FP Multiply:
242 case PPC::FMUL:
243 case PPC::FMULS:
244 // Altivec Add:
245 case PPC::VADDFP:
246 // VSX Add:
247 case PPC::XSADDDP:
248 case PPC::XVADDDP:
249 case PPC::XVADDSP:
250 case PPC::XSADDSP:
251 // VSX Multiply:
252 case PPC::XSMULDP:
253 case PPC::XVMULDP:
254 case PPC::XVMULSP:
255 case PPC::XSMULSP:
258 // Fixed point:
259 // Multiply:
260 case PPC::MULHD:
261 case PPC::MULLD:
262 case PPC::MULHW:
263 case PPC::MULLW:
264 return true;
265 default:
266 return false;
267 }
268}
269
270#define InfoArrayIdxFMAInst 0
271#define InfoArrayIdxFAddInst 1
272#define InfoArrayIdxFMULInst 2
273#define InfoArrayIdxAddOpIdx 3
274#define InfoArrayIdxMULOpIdx 4
275#define InfoArrayIdxFSubInst 5
276// Array keeps info for FMA instructions:
277// Index 0(InfoArrayIdxFMAInst): FMA instruction;
278// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
279// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
280// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
281// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
282// second MUL operand index is plus 1;
283// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
284static const uint16_t FMAOpIdxInfo[][6] = {
285 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
286 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
287 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
288 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
289 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
290 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
291 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
292
293// Check if an opcode is a FMA instruction. If it is, return the index in array
294// FMAOpIdxInfo. Otherwise, return -1.
295int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
296 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
297 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
298 return I;
299 return -1;
300}
301
302// On PowerPC target, we have two kinds of patterns related to FMA:
303// 1: Improve ILP.
304// Try to reassociate FMA chains like below:
305//
306// Pattern 1:
307// A = FADD X, Y (Leaf)
308// B = FMA A, M21, M22 (Prev)
309// C = FMA B, M31, M32 (Root)
310// -->
311// A = FMA X, M21, M22
312// B = FMA Y, M31, M32
313// C = FADD A, B
314//
315// Pattern 2:
316// A = FMA X, M11, M12 (Leaf)
317// B = FMA A, M21, M22 (Prev)
318// C = FMA B, M31, M32 (Root)
319// -->
320// A = FMUL M11, M12
321// B = FMA X, M21, M22
322// D = FMA A, M31, M32
323// C = FADD B, D
324//
325// breaking the dependency between A and B, allowing FMA to be executed in
326// parallel (or back-to-back in a pipeline) instead of depending on each other.
327//
328// 2: Reduce register pressure.
329// Try to reassociate FMA with FSUB and a constant like below:
330// C is a floating point const.
331//
332// Pattern 1:
333// A = FSUB X, Y (Leaf)
334// D = FMA B, C, A (Root)
335// -->
336// A = FMA B, Y, -C
337// D = FMA A, X, C
338//
339// Pattern 2:
340// A = FSUB X, Y (Leaf)
341// D = FMA B, A, C (Root)
342// -->
343// A = FMA B, Y, -C
344// D = FMA A, X, C
345//
346// Before the transformation, A must be assigned with different hardware
347// register with D. After the transformation, A and D must be assigned with
348// same hardware register due to TIE attribute of FMA instructions.
349//
352 bool DoRegPressureReduce) const {
354 const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
356
357 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
358 for (const auto &MO : Instr.explicit_operands())
359 if (!(MO.isReg() && MO.getReg().isVirtual()))
360 return false;
361 return true;
362 };
363
364 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
365 unsigned OpType) {
366 if (Instr.getOpcode() !=
367 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
368 return false;
369
370 // Instruction can be reassociated.
371 // fast math flags may prohibit reassociation.
372 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
373 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
374 return false;
375
376 // Instruction operands are virtual registers for reassociation.
377 if (!IsAllOpsVirtualReg(Instr))
378 return false;
379
380 // For register pressure reassociation, the FSub must have only one use as
381 // we want to delete the sub to save its def.
382 if (OpType == InfoArrayIdxFSubInst &&
383 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
384 return false;
385
386 return true;
387 };
388
389 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
390 int16_t &MulOpIdx, bool IsLeaf) {
391 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
392 if (Idx < 0)
393 return false;
394
395 // Instruction can be reassociated.
396 // fast math flags may prohibit reassociation.
397 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
398 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
399 return false;
400
401 // Instruction operands are virtual registers for reassociation.
402 if (!IsAllOpsVirtualReg(Instr))
403 return false;
404
405 MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
406 if (IsLeaf)
407 return true;
408
409 AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
410
411 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
412 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
413 // If 'add' operand's def is not in current block, don't do ILP related opt.
414 if (!MIAdd || MIAdd->getParent() != MBB)
415 return false;
416
417 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
418 // as this fma will be changed later.
419 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
420 };
421
422 int16_t AddOpIdx = -1;
423 int16_t MulOpIdx = -1;
424
425 bool IsUsedOnceL = false;
426 bool IsUsedOnceR = false;
427 MachineInstr *MULInstrL = nullptr;
428 MachineInstr *MULInstrR = nullptr;
429
430 auto IsRPReductionCandidate = [&]() {
431 // Currently, we only support float and double.
432 // FIXME: add support for other types.
433 unsigned Opcode = Root.getOpcode();
434 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
435 return false;
436
437 // Root must be a valid FMA like instruction.
438 // Treat it as leaf as we don't care its add operand.
439 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
440 assert((MulOpIdx >= 0) && "mul operand index not right!");
441 Register MULRegL = TRI->lookThruSingleUseCopyChain(
442 Root.getOperand(MulOpIdx).getReg(), MRI);
443 Register MULRegR = TRI->lookThruSingleUseCopyChain(
444 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
445 if (!MULRegL && !MULRegR)
446 return false;
447
448 if (MULRegL && !MULRegR) {
449 MULRegR =
450 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
451 IsUsedOnceL = true;
452 } else if (!MULRegL && MULRegR) {
453 MULRegL =
454 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
455 IsUsedOnceR = true;
456 } else {
457 IsUsedOnceL = true;
458 IsUsedOnceR = true;
459 }
460
461 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
462 return false;
463
464 MULInstrL = MRI->getVRegDef(MULRegL);
465 MULInstrR = MRI->getVRegDef(MULRegR);
466 return true;
467 }
468 return false;
469 };
470
471 // Register pressure fma reassociation patterns.
472 if (DoRegPressureReduce && IsRPReductionCandidate()) {
473 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
474 // Register pressure pattern 1
475 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
476 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
477 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
479 return true;
480 }
481
482 // Register pressure pattern 2
483 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
484 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
485 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
487 return true;
488 }
489 }
490
491 // ILP fma reassociation patterns.
492 // Root must be a valid FMA like instruction.
493 AddOpIdx = -1;
494 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
495 return false;
496
497 assert((AddOpIdx >= 0) && "add operand index not right!");
498
499 Register RegB = Root.getOperand(AddOpIdx).getReg();
500 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
501
502 // Prev must be a valid FMA like instruction.
503 AddOpIdx = -1;
504 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
505 return false;
506
507 assert((AddOpIdx >= 0) && "add operand index not right!");
508
509 Register RegA = Prev->getOperand(AddOpIdx).getReg();
510 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
511 AddOpIdx = -1;
512 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
514 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
515 return true;
516 }
517 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
519 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
520 return true;
521 }
522 return false;
523}
524
526 MachineInstr &Root, unsigned &Pattern,
527 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
528 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
529
530 MachineFunction *MF = Root.getMF();
534
535 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
536 if (Idx < 0)
537 return;
538
539 uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
540
541 // For now we only need to fix up placeholder for register pressure reduce
542 // patterns.
543 Register ConstReg = 0;
544 switch (Pattern) {
546 ConstReg =
547 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
548 break;
550 ConstReg =
551 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
552 break;
553 default:
554 // Not register pressure reduce patterns.
555 return;
556 }
557
558 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
559 // Get const value from const pool.
560 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
561 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
562
563 // Get negative fp const.
564 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
565 F1.changeSign();
566 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
567 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
568
569 // Put negative fp const into constant pool.
570 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
571
572 MachineOperand *Placeholder = nullptr;
573 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
574 for (auto *Inst : InsInstrs) {
575 for (MachineOperand &Operand : Inst->explicit_operands()) {
576 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
577 if (Operand.getReg() == PPC::ZERO8) {
578 Placeholder = &Operand;
579 break;
580 }
581 }
582 }
583
584 assert(Placeholder && "Placeholder does not exist!");
585
586 // Generate instructions to load the const fp from constant pool.
587 // We only support PPC64 and medium code model.
588 Register LoadNewConst =
589 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
590
591 // Fill the placeholder with the new load from constant pool.
592 Placeholder->setReg(LoadNewConst);
593}
594
596 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
597
599 return false;
600
601 // Currently, we only enable register pressure reducing in machine combiner
602 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
603 // support.
604 //
605 // So we need following instructions to access a TOC entry:
606 //
607 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
608 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
609 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
610 //
611 // FIXME: add more supported targets, like Small and Large code model, PPC32,
612 // AIX.
613 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
614 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium))
615 return false;
616
618 const MachineFunction *MF = MBB->getParent();
619 const MachineRegisterInfo *MRI = &MF->getRegInfo();
620
621 auto GetMBBPressure =
622 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
623 RegionPressure Pressure;
624 RegPressureTracker RPTracker(Pressure);
625
626 // Initialize the register pressure tracker.
627 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
628 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
629
630 for (const auto &MI : reverse(*MBB)) {
631 if (MI.isDebugValue() || MI.isDebugLabel())
632 continue;
633 RegisterOperands RegOpers;
634 RegOpers.collect(MI, *TRI, *MRI, false, false);
635 RPTracker.recedeSkipDebugValues();
636 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
637 RPTracker.recede(RegOpers);
638 }
639
640 // Close the RPTracker to finalize live ins.
641 RPTracker.closeRegion();
642
643 return RPTracker.getPressure().MaxSetPressure;
644 };
645
646 // For now we only care about float and double type fma.
647 unsigned VSSRCLimit =
648 RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC);
649
650 // Only reduce register pressure when pressure is high.
651 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
652 (float)VSSRCLimit * FMARPFactor;
653}
654
656 // I has only one memory operand which is load from constant pool.
657 if (!I->hasOneMemOperand())
658 return false;
659
660 MachineMemOperand *Op = I->memoperands()[0];
661 return Op->isLoad() && Op->getPseudoValue() &&
662 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
663}
664
665Register PPCInstrInfo::generateLoadForNewConst(
666 unsigned Idx, MachineInstr *MI, Type *Ty,
667 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
668 // Now we only support PPC64, Medium code model and P9 with vector.
669 // We have immutable pattern to access const pool. See function
670 // shouldReduceRegisterPressure.
671 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
673 "Target not supported!\n");
674
675 MachineFunction *MF = MI->getMF();
677
678 // Generate ADDIStocHA8
679 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
680 MachineInstrBuilder TOCOffset =
681 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
682 .addReg(PPC::X2)
684
685 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
686 "Only float and double are supported!");
687
688 unsigned LoadOpcode;
689 // Should be float type or double type.
690 if (Ty->isFloatTy())
691 LoadOpcode = PPC::DFLOADf32;
692 else
693 LoadOpcode = PPC::DFLOADf64;
694
695 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
696 Register VReg2 = MRI->createVirtualRegister(RC);
699 Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty));
700
701 // Generate Load from constant pool.
703 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
705 .addReg(VReg1, getKillRegState(true))
706 .addMemOperand(MMO);
707
708 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
709
710 // Insert the toc load instructions into InsInstrs.
711 InsInstrs.insert(InsInstrs.begin(), Load);
712 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
713 return VReg2;
714}
715
716// This function returns the const value in constant pool if the \p I is a load
717// from constant pool.
718const Constant *
720 MachineFunction *MF = I->getMF();
723 assert(I->mayLoad() && "Should be a load instruction.\n");
724 for (auto MO : I->uses()) {
725 if (!MO.isReg())
726 continue;
727 Register Reg = MO.getReg();
728 if (Reg == 0 || !Reg.isVirtual())
729 continue;
730 // Find the toc address.
731 MachineInstr *DefMI = MRI->getVRegDef(Reg);
732 for (auto MO2 : DefMI->uses())
733 if (MO2.isCPI())
734 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
735 }
736 return nullptr;
737}
738
751
754 bool DoRegPressureReduce) const {
755 // Using the machine combiner in this way is potentially expensive, so
756 // restrict to when aggressive optimizations are desired.
757 if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOptLevel::Aggressive)
758 return false;
759
760 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
761 return true;
762
764 DoRegPressureReduce);
765}
766
768 MachineInstr &Root, unsigned Pattern,
771 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
772 switch (Pattern) {
777 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
778 break;
779 default:
780 // Reassociate default patterns.
782 DelInstrs, InstrIdxForVirtReg);
783 break;
784 }
785}
786
787void PPCInstrInfo::reassociateFMA(
788 MachineInstr &Root, unsigned Pattern,
791 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
792 MachineFunction *MF = Root.getMF();
795 MachineOperand &OpC = Root.getOperand(0);
796 Register RegC = OpC.getReg();
797 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
798 MRI.constrainRegClass(RegC, RC);
799
800 unsigned FmaOp = Root.getOpcode();
801 int16_t Idx = getFMAOpIdxInfo(FmaOp);
802 assert(Idx >= 0 && "Root must be a FMA instruction");
803
804 bool IsILPReassociate =
807
809 uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
810
811 MachineInstr *Prev = nullptr;
812 MachineInstr *Leaf = nullptr;
813 switch (Pattern) {
814 default:
815 llvm_unreachable("not recognized pattern!");
818 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
819 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
820 break;
822 Register MULReg =
823 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
824 Leaf = MRI.getVRegDef(MULReg);
825 break;
826 }
828 Register MULReg = TRI->lookThruCopyLike(
829 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
830 Leaf = MRI.getVRegDef(MULReg);
831 break;
832 }
833 }
834
835 uint32_t IntersectedFlags = 0;
836 if (IsILPReassociate)
837 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
838 else
839 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
840
841 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
842 bool &KillFlag) {
843 Reg = Operand.getReg();
844 MRI.constrainRegClass(Reg, RC);
845 KillFlag = Operand.isKill();
846 };
847
848 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
849 Register &MulOp2, Register &AddOp,
850 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
851 bool &AddOpKillFlag) {
852 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
853 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
854 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
855 };
856
857 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
858 RegA21, RegB;
859 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
860 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
861 KillA11 = false, KillA21 = false, KillB = false;
862
863 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
864
865 if (IsILPReassociate)
866 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
867
869 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
870 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
871 } else if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) {
872 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
873 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
874 } else {
875 // Get FSUB instruction info.
876 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
877 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
878 }
879
880 // Create new virtual registers for the new results instead of
881 // recycling legacy ones because the MachineCombiner's computation of the
882 // critical path requires a new register definition rather than an existing
883 // one.
884 // For register pressure reassociation, we only need create one virtual
885 // register for the new fma.
886 Register NewVRA = MRI.createVirtualRegister(RC);
887 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
888
889 Register NewVRB = 0;
890 if (IsILPReassociate) {
891 NewVRB = MRI.createVirtualRegister(RC);
892 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
893 }
894
895 Register NewVRD = 0;
897 NewVRD = MRI.createVirtualRegister(RC);
898 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
899 }
900
901 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
902 Register RegMul1, bool KillRegMul1,
903 Register RegMul2, bool KillRegMul2) {
904 MI->getOperand(AddOpIdx).setReg(RegAdd);
905 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
906 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
907 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
908 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
909 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
910 };
911
912 MachineInstrBuilder NewARegPressure, NewCRegPressure;
913 switch (Pattern) {
914 default:
915 llvm_unreachable("not recognized pattern!");
917 // Create new instructions for insertion.
918 MachineInstrBuilder MINewB =
919 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
920 .addReg(RegX, getKillRegState(KillX))
921 .addReg(RegM21, getKillRegState(KillM21))
922 .addReg(RegM22, getKillRegState(KillM22));
923 MachineInstrBuilder MINewA =
924 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
925 .addReg(RegY, getKillRegState(KillY))
926 .addReg(RegM31, getKillRegState(KillM31))
927 .addReg(RegM32, getKillRegState(KillM32));
928 // If AddOpIdx is not 1, adjust the order.
929 if (AddOpIdx != 1) {
930 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
931 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
932 }
933
934 MachineInstrBuilder MINewC =
935 BuildMI(*MF, Root.getDebugLoc(),
937 .addReg(NewVRB, getKillRegState(true))
938 .addReg(NewVRA, getKillRegState(true));
939
940 // Update flags for newly created instructions.
941 setSpecialOperandAttr(*MINewA, IntersectedFlags);
942 setSpecialOperandAttr(*MINewB, IntersectedFlags);
943 setSpecialOperandAttr(*MINewC, IntersectedFlags);
944
945 // Record new instructions for insertion.
946 InsInstrs.push_back(MINewA);
947 InsInstrs.push_back(MINewB);
948 InsInstrs.push_back(MINewC);
949 break;
950 }
952 assert(NewVRD && "new FMA register not created!");
953 // Create new instructions for insertion.
954 MachineInstrBuilder MINewA =
955 BuildMI(*MF, Leaf->getDebugLoc(),
957 .addReg(RegM11, getKillRegState(KillM11))
958 .addReg(RegM12, getKillRegState(KillM12));
959 MachineInstrBuilder MINewB =
960 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
961 .addReg(RegX, getKillRegState(KillX))
962 .addReg(RegM21, getKillRegState(KillM21))
963 .addReg(RegM22, getKillRegState(KillM22));
964 MachineInstrBuilder MINewD =
965 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
966 .addReg(NewVRA, getKillRegState(true))
967 .addReg(RegM31, getKillRegState(KillM31))
968 .addReg(RegM32, getKillRegState(KillM32));
969 // If AddOpIdx is not 1, adjust the order.
970 if (AddOpIdx != 1) {
971 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
972 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
973 KillM32);
974 }
975
976 MachineInstrBuilder MINewC =
977 BuildMI(*MF, Root.getDebugLoc(),
979 .addReg(NewVRB, getKillRegState(true))
980 .addReg(NewVRD, getKillRegState(true));
981
982 // Update flags for newly created instructions.
983 setSpecialOperandAttr(*MINewA, IntersectedFlags);
984 setSpecialOperandAttr(*MINewB, IntersectedFlags);
985 setSpecialOperandAttr(*MINewD, IntersectedFlags);
986 setSpecialOperandAttr(*MINewC, IntersectedFlags);
987
988 // Record new instructions for insertion.
989 InsInstrs.push_back(MINewA);
990 InsInstrs.push_back(MINewB);
991 InsInstrs.push_back(MINewD);
992 InsInstrs.push_back(MINewC);
993 break;
994 }
997 Register VarReg;
998 bool KillVarReg = false;
1000 VarReg = RegM31;
1001 KillVarReg = KillM31;
1002 } else {
1003 VarReg = RegM32;
1004 KillVarReg = KillM32;
1005 }
1006 // We don't want to get negative const from memory pool too early, as the
1007 // created entry will not be deleted even if it has no users. Since all
1008 // operand of Leaf and Root are virtual register, we use zero register
1009 // here as a placeholder. When the InsInstrs is selected in
1010 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1011 // with a virtual register which is a load from constant pool.
1012 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1013 .addReg(RegB, getKillRegState(RegB))
1014 .addReg(RegY, getKillRegState(KillY))
1015 .addReg(PPC::ZERO8);
1016 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1017 .addReg(NewVRA, getKillRegState(true))
1018 .addReg(RegX, getKillRegState(KillX))
1019 .addReg(VarReg, getKillRegState(KillVarReg));
1020 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1021 // both at index 1, no need to adjust.
1022 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1023 // the operand index here.
1024 break;
1025 }
1026 }
1027
1028 if (!IsILPReassociate) {
1029 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1030 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1031
1032 InsInstrs.push_back(NewARegPressure);
1033 InsInstrs.push_back(NewCRegPressure);
1034 }
1035
1036 assert(!InsInstrs.empty() &&
1037 "Insertion instructions set should not be empty!");
1038
1039 // Record old instructions for deletion.
1040 DelInstrs.push_back(Leaf);
1041 if (IsILPReassociate)
1042 DelInstrs.push_back(Prev);
1043 DelInstrs.push_back(&Root);
1044}
1045
1046// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1048 Register &SrcReg, Register &DstReg,
1049 unsigned &SubIdx) const {
1050 switch (MI.getOpcode()) {
1051 default: return false;
1052 case PPC::EXTSW:
1053 case PPC::EXTSW_32:
1054 case PPC::EXTSW_32_64:
1055 SrcReg = MI.getOperand(1).getReg();
1056 DstReg = MI.getOperand(0).getReg();
1057 SubIdx = PPC::sub_32;
1058 return true;
1059 }
1060}
1061
1063 int &FrameIndex) const {
1064 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1065 // Check for the operands added by addFrameReference (the immediate is the
1066 // offset which defaults to 0).
1067 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1068 MI.getOperand(2).isFI()) {
1069 FrameIndex = MI.getOperand(2).getIndex();
1070 return MI.getOperand(0).getReg();
1071 }
1072 }
1073 return 0;
1074}
1075
1076// For opcodes with the ReMaterializable flag set, this function is called to
1077// verify the instruction is really rematable.
1079 const MachineInstr &MI) const {
1080 switch (MI.getOpcode()) {
1081 default:
1082 // Let base implementaion decide.
1083 break;
1084 case PPC::LI:
1085 case PPC::LI8:
1086 case PPC::PLI:
1087 case PPC::PLI8:
1088 case PPC::LIS:
1089 case PPC::LIS8:
1090 case PPC::ADDIStocHA:
1091 case PPC::ADDIStocHA8:
1092 case PPC::ADDItocL:
1093 case PPC::ADDItocL8:
1094 case PPC::LOAD_STACK_GUARD:
1095 case PPC::PPCLdFixedAddr:
1096 case PPC::XXLXORz:
1097 case PPC::XXLXORspz:
1098 case PPC::XXLXORdpz:
1099 case PPC::XXLEQVOnes:
1100 case PPC::XXSPLTI32DX:
1101 case PPC::XXSPLTIW:
1102 case PPC::XXSPLTIDP:
1103 case PPC::V_SET0B:
1104 case PPC::V_SET0H:
1105 case PPC::V_SET0:
1106 case PPC::V_SETALLONESB:
1107 case PPC::V_SETALLONESH:
1108 case PPC::V_SETALLONES:
1109 case PPC::CRSET:
1110 case PPC::CRUNSET:
1111 case PPC::XXSETACCZ:
1112 case PPC::DMXXSETACCZ:
1113 return true;
1114 }
1116}
1117
1119 int &FrameIndex) const {
1120 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1121 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1122 MI.getOperand(2).isFI()) {
1123 FrameIndex = MI.getOperand(2).getIndex();
1124 return MI.getOperand(0).getReg();
1125 }
1126 }
1127 return 0;
1128}
1129
1131 unsigned OpIdx1,
1132 unsigned OpIdx2) const {
1133 MachineFunction &MF = *MI.getParent()->getParent();
1134
1135 // Normal instructions can be commuted the obvious way.
1136 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1137 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1138 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1139 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1140 // changing the relative order of the mask operands might change what happens
1141 // to the high-bits of the mask (and, thus, the result).
1142
1143 // Cannot commute if it has a non-zero rotate count.
1144 if (MI.getOperand(3).getImm() != 0)
1145 return nullptr;
1146
1147 // If we have a zero rotate count, we have:
1148 // M = mask(MB,ME)
1149 // Op0 = (Op1 & ~M) | (Op2 & M)
1150 // Change this to:
1151 // M = mask((ME+1)&31, (MB-1)&31)
1152 // Op0 = (Op2 & ~M) | (Op1 & M)
1153
1154 // Swap op1/op2
1155 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1156 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1157 Register Reg0 = MI.getOperand(0).getReg();
1158 Register Reg1 = MI.getOperand(1).getReg();
1159 Register Reg2 = MI.getOperand(2).getReg();
1160 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1161 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1162 bool Reg1IsKill = MI.getOperand(1).isKill();
1163 bool Reg2IsKill = MI.getOperand(2).isKill();
1164 bool ChangeReg0 = false;
1165 // If machine instrs are no longer in two-address forms, update
1166 // destination register as well.
1167 if (Reg0 == Reg1) {
1168 // Must be two address instruction (i.e. op1 is tied to op0).
1169 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1170 "Expecting a two-address instruction!");
1171 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1172 Reg2IsKill = false;
1173 ChangeReg0 = true;
1174 }
1175
1176 // Masks.
1177 unsigned MB = MI.getOperand(4).getImm();
1178 unsigned ME = MI.getOperand(5).getImm();
1179
1180 // We can't commute a trivial mask (there is no way to represent an all-zero
1181 // mask).
1182 if (MB == 0 && ME == 31)
1183 return nullptr;
1184
1185 if (NewMI) {
1186 // Create a new instruction.
1187 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1188 bool Reg0IsDead = MI.getOperand(0).isDead();
1189 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1190 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1191 .addReg(Reg2, getKillRegState(Reg2IsKill))
1192 .addReg(Reg1, getKillRegState(Reg1IsKill))
1193 .addImm((ME + 1) & 31)
1194 .addImm((MB - 1) & 31);
1195 }
1196
1197 if (ChangeReg0) {
1198 MI.getOperand(0).setReg(Reg2);
1199 MI.getOperand(0).setSubReg(SubReg2);
1200 }
1201 MI.getOperand(2).setReg(Reg1);
1202 MI.getOperand(1).setReg(Reg2);
1203 MI.getOperand(2).setSubReg(SubReg1);
1204 MI.getOperand(1).setSubReg(SubReg2);
1205 MI.getOperand(2).setIsKill(Reg1IsKill);
1206 MI.getOperand(1).setIsKill(Reg2IsKill);
1207
1208 // Swap the mask around.
1209 MI.getOperand(4).setImm((ME + 1) & 31);
1210 MI.getOperand(5).setImm((MB - 1) & 31);
1211 return &MI;
1212}
1213
1215 unsigned &SrcOpIdx1,
1216 unsigned &SrcOpIdx2) const {
1217 // For VSX A-Type FMA instructions, it is the first two operands that can be
1218 // commuted, however, because the non-encoded tied input operand is listed
1219 // first, the operands to swap are actually the second and third.
1220
1221 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1222 if (AltOpc == -1)
1223 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1224
1225 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1226 // and SrcOpIdx2.
1227 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1228}
1229
1232 // This function is used for scheduling, and the nop wanted here is the type
1233 // that terminates dispatch groups on the POWER cores.
1234 unsigned Directive = Subtarget.getCPUDirective();
1235 unsigned Opcode;
1236 switch (Directive) {
1237 default: Opcode = PPC::NOP; break;
1238 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1239 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1240 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1241 // FIXME: Update when POWER9 scheduling model is ready.
1242 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1243 }
1244
1245 DebugLoc DL;
1246 BuildMI(MBB, MI, DL, get(Opcode));
1247}
1248
1249/// Return the noop instruction to use for a noop.
1251 MCInst Nop;
1252 Nop.setOpcode(PPC::NOP);
1253 return Nop;
1254}
1255
1256// Branch analysis.
1257// Note: If the condition register is set to CTR or CTR8 then this is a
1258// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1261 MachineBasicBlock *&FBB,
1263 bool AllowModify) const {
1264 bool isPPC64 = Subtarget.isPPC64();
1265
1266 // If the block has no terminators, it just falls into the block after it.
1267 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1268 if (I == MBB.end())
1269 return false;
1270
1271 if (!isUnpredicatedTerminator(*I))
1272 return false;
1273
1274 if (AllowModify) {
1275 // If the BB ends with an unconditional branch to the fallthrough BB,
1276 // we eliminate the branch instruction.
1277 if (I->getOpcode() == PPC::B &&
1278 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1279 I->eraseFromParent();
1280
1281 // We update iterator after deleting the last branch.
1282 I = MBB.getLastNonDebugInstr();
1283 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1284 return false;
1285 }
1286 }
1287
1288 // Get the last instruction in the block.
1289 MachineInstr &LastInst = *I;
1290
1291 // If there is only one terminator instruction, process it.
1292 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1293 if (LastInst.getOpcode() == PPC::B) {
1294 if (!LastInst.getOperand(0).isMBB())
1295 return true;
1296 TBB = LastInst.getOperand(0).getMBB();
1297 return false;
1298 } else if (LastInst.getOpcode() == PPC::BCC) {
1299 if (!LastInst.getOperand(2).isMBB())
1300 return true;
1301 // Block ends with fall-through condbranch.
1302 TBB = LastInst.getOperand(2).getMBB();
1303 Cond.push_back(LastInst.getOperand(0));
1304 Cond.push_back(LastInst.getOperand(1));
1305 return false;
1306 } else if (LastInst.getOpcode() == PPC::BC) {
1307 if (!LastInst.getOperand(1).isMBB())
1308 return true;
1309 // Block ends with fall-through condbranch.
1310 TBB = LastInst.getOperand(1).getMBB();
1312 Cond.push_back(LastInst.getOperand(0));
1313 return false;
1314 } else if (LastInst.getOpcode() == PPC::BCn) {
1315 if (!LastInst.getOperand(1).isMBB())
1316 return true;
1317 // Block ends with fall-through condbranch.
1318 TBB = LastInst.getOperand(1).getMBB();
1320 Cond.push_back(LastInst.getOperand(0));
1321 return false;
1322 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1323 LastInst.getOpcode() == PPC::BDNZ) {
1324 if (!LastInst.getOperand(0).isMBB())
1325 return true;
1327 return true;
1328 TBB = LastInst.getOperand(0).getMBB();
1329 Cond.push_back(MachineOperand::CreateImm(1));
1330 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1331 true));
1332 return false;
1333 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1334 LastInst.getOpcode() == PPC::BDZ) {
1335 if (!LastInst.getOperand(0).isMBB())
1336 return true;
1338 return true;
1339 TBB = LastInst.getOperand(0).getMBB();
1340 Cond.push_back(MachineOperand::CreateImm(0));
1341 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1342 true));
1343 return false;
1344 }
1345
1346 // Otherwise, don't know what this is.
1347 return true;
1348 }
1349
1350 // Get the instruction before it if it's a terminator.
1351 MachineInstr &SecondLastInst = *I;
1352
1353 // If there are three terminators, we don't know what sort of block this is.
1354 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1355 return true;
1356
1357 // If the block ends with PPC::B and PPC:BCC, handle it.
1358 if (SecondLastInst.getOpcode() == PPC::BCC &&
1359 LastInst.getOpcode() == PPC::B) {
1360 if (!SecondLastInst.getOperand(2).isMBB() ||
1361 !LastInst.getOperand(0).isMBB())
1362 return true;
1363 TBB = SecondLastInst.getOperand(2).getMBB();
1364 Cond.push_back(SecondLastInst.getOperand(0));
1365 Cond.push_back(SecondLastInst.getOperand(1));
1366 FBB = LastInst.getOperand(0).getMBB();
1367 return false;
1368 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1369 LastInst.getOpcode() == PPC::B) {
1370 if (!SecondLastInst.getOperand(1).isMBB() ||
1371 !LastInst.getOperand(0).isMBB())
1372 return true;
1373 TBB = SecondLastInst.getOperand(1).getMBB();
1375 Cond.push_back(SecondLastInst.getOperand(0));
1376 FBB = LastInst.getOperand(0).getMBB();
1377 return false;
1378 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1379 LastInst.getOpcode() == PPC::B) {
1380 if (!SecondLastInst.getOperand(1).isMBB() ||
1381 !LastInst.getOperand(0).isMBB())
1382 return true;
1383 TBB = SecondLastInst.getOperand(1).getMBB();
1385 Cond.push_back(SecondLastInst.getOperand(0));
1386 FBB = LastInst.getOperand(0).getMBB();
1387 return false;
1388 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1389 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1390 LastInst.getOpcode() == PPC::B) {
1391 if (!SecondLastInst.getOperand(0).isMBB() ||
1392 !LastInst.getOperand(0).isMBB())
1393 return true;
1395 return true;
1396 TBB = SecondLastInst.getOperand(0).getMBB();
1397 Cond.push_back(MachineOperand::CreateImm(1));
1398 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1399 true));
1400 FBB = LastInst.getOperand(0).getMBB();
1401 return false;
1402 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1403 SecondLastInst.getOpcode() == PPC::BDZ) &&
1404 LastInst.getOpcode() == PPC::B) {
1405 if (!SecondLastInst.getOperand(0).isMBB() ||
1406 !LastInst.getOperand(0).isMBB())
1407 return true;
1409 return true;
1410 TBB = SecondLastInst.getOperand(0).getMBB();
1411 Cond.push_back(MachineOperand::CreateImm(0));
1412 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1413 true));
1414 FBB = LastInst.getOperand(0).getMBB();
1415 return false;
1416 }
1417
1418 // If the block ends with two PPC:Bs, handle it. The second one is not
1419 // executed, so remove it.
1420 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1421 if (!SecondLastInst.getOperand(0).isMBB())
1422 return true;
1423 TBB = SecondLastInst.getOperand(0).getMBB();
1424 I = LastInst;
1425 if (AllowModify)
1426 I->eraseFromParent();
1427 return false;
1428 }
1429
1430 // Otherwise, can't handle this.
1431 return true;
1432}
1433
1435 int *BytesRemoved) const {
1436 assert(!BytesRemoved && "code size not handled");
1437
1438 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1439 if (I == MBB.end())
1440 return 0;
1441
1442 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1443 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1444 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1445 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1446 return 0;
1447
1448 // Remove the branch.
1449 I->eraseFromParent();
1450
1451 I = MBB.end();
1452
1453 if (I == MBB.begin()) return 1;
1454 --I;
1455 if (I->getOpcode() != PPC::BCC &&
1456 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1457 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1458 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1459 return 1;
1460
1461 // Remove the branch.
1462 I->eraseFromParent();
1463 return 2;
1464}
1465
1468 MachineBasicBlock *FBB,
1470 const DebugLoc &DL,
1471 int *BytesAdded) const {
1472 // Shouldn't be a fall through.
1473 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1474 assert((Cond.size() == 2 || Cond.size() == 0) &&
1475 "PPC branch conditions have two components!");
1476 assert(!BytesAdded && "code size not handled");
1477
1478 bool isPPC64 = Subtarget.isPPC64();
1479
1480 // One-way branch.
1481 if (!FBB) {
1482 if (Cond.empty()) // Unconditional branch
1483 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1484 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1485 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1486 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1487 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1488 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1489 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1490 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1491 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1492 else // Conditional branch
1493 BuildMI(&MBB, DL, get(PPC::BCC))
1494 .addImm(Cond[0].getImm())
1495 .add(Cond[1])
1496 .addMBB(TBB);
1497 return 1;
1498 }
1499
1500 // Two-way Conditional Branch.
1501 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1502 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1503 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1504 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1505 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1506 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1507 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1508 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1509 else
1510 BuildMI(&MBB, DL, get(PPC::BCC))
1511 .addImm(Cond[0].getImm())
1512 .add(Cond[1])
1513 .addMBB(TBB);
1514 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1515 return 2;
1516}
1517
1518// Select analysis.
1521 Register DstReg, Register TrueReg,
1522 Register FalseReg, int &CondCycles,
1523 int &TrueCycles, int &FalseCycles) const {
1524 if (!Subtarget.hasISEL())
1525 return false;
1526
1527 if (Cond.size() != 2)
1528 return false;
1529
1530 // If this is really a bdnz-like condition, then it cannot be turned into a
1531 // select.
1532 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1533 return false;
1534
1535 // If the conditional branch uses a physical register, then it cannot be
1536 // turned into a select.
1537 if (Cond[1].getReg().isPhysical())
1538 return false;
1539
1540 // Check register classes.
1541 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1542 const TargetRegisterClass *RC =
1543 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1544 if (!RC)
1545 return false;
1546
1547 // isel is for regular integer GPRs only.
1548 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1549 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1550 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1551 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1552 return false;
1553
1554 // FIXME: These numbers are for the A2, how well they work for other cores is
1555 // an open question. On the A2, the isel instruction has a 2-cycle latency
1556 // but single-cycle throughput. These numbers are used in combination with
1557 // the MispredictPenalty setting from the active SchedMachineModel.
1558 CondCycles = 1;
1559 TrueCycles = 1;
1560 FalseCycles = 1;
1561
1562 return true;
1563}
1564
1567 const DebugLoc &dl, Register DestReg,
1569 Register FalseReg) const {
1570 assert(Cond.size() == 2 &&
1571 "PPC branch conditions have two components!");
1572
1573 // Get the register classes.
1574 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1575 const TargetRegisterClass *RC =
1576 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1577 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1578
1579 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1580 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1581 assert((Is64Bit ||
1582 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1583 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1584 "isel is for regular integer GPRs only");
1585
1586 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1587 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1588
1589 unsigned SubIdx = 0;
1590 bool SwapOps = false;
1591 switch (SelectPred) {
1592 case PPC::PRED_EQ:
1593 case PPC::PRED_EQ_MINUS:
1594 case PPC::PRED_EQ_PLUS:
1595 SubIdx = PPC::sub_eq; SwapOps = false; break;
1596 case PPC::PRED_NE:
1597 case PPC::PRED_NE_MINUS:
1598 case PPC::PRED_NE_PLUS:
1599 SubIdx = PPC::sub_eq; SwapOps = true; break;
1600 case PPC::PRED_LT:
1601 case PPC::PRED_LT_MINUS:
1602 case PPC::PRED_LT_PLUS:
1603 SubIdx = PPC::sub_lt; SwapOps = false; break;
1604 case PPC::PRED_GE:
1605 case PPC::PRED_GE_MINUS:
1606 case PPC::PRED_GE_PLUS:
1607 SubIdx = PPC::sub_lt; SwapOps = true; break;
1608 case PPC::PRED_GT:
1609 case PPC::PRED_GT_MINUS:
1610 case PPC::PRED_GT_PLUS:
1611 SubIdx = PPC::sub_gt; SwapOps = false; break;
1612 case PPC::PRED_LE:
1613 case PPC::PRED_LE_MINUS:
1614 case PPC::PRED_LE_PLUS:
1615 SubIdx = PPC::sub_gt; SwapOps = true; break;
1616 case PPC::PRED_UN:
1617 case PPC::PRED_UN_MINUS:
1618 case PPC::PRED_UN_PLUS:
1619 SubIdx = PPC::sub_un; SwapOps = false; break;
1620 case PPC::PRED_NU:
1621 case PPC::PRED_NU_MINUS:
1622 case PPC::PRED_NU_PLUS:
1623 SubIdx = PPC::sub_un; SwapOps = true; break;
1624 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1625 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1626 }
1627
1628 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1629 SecondReg = SwapOps ? TrueReg : FalseReg;
1630
1631 // The first input register of isel cannot be r0. If it is a member
1632 // of a register class that can be r0, then copy it first (the
1633 // register allocator should eliminate the copy).
1634 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1635 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1636 const TargetRegisterClass *FirstRC =
1637 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1638 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1639 Register OldFirstReg = FirstReg;
1640 FirstReg = MRI.createVirtualRegister(FirstRC);
1641 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1642 .addReg(OldFirstReg);
1643 }
1644
1645 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1646 .addReg(FirstReg).addReg(SecondReg)
1647 .addReg(Cond[1].getReg(), 0, SubIdx);
1648}
1649
1650static unsigned getCRBitValue(unsigned CRBit) {
1651 unsigned Ret = 4;
1652 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1653 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1654 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1655 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1656 Ret = 3;
1657 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1658 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1659 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1660 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1661 Ret = 2;
1662 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1663 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1664 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1665 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1666 Ret = 1;
1667 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1668 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1669 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1670 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1671 Ret = 0;
1672
1673 assert(Ret != 4 && "Invalid CR bit register");
1674 return Ret;
1675}
1676
1679 const DebugLoc &DL, Register DestReg,
1680 Register SrcReg, bool KillSrc,
1681 bool RenamableDest, bool RenamableSrc) const {
1682 // We can end up with self copies and similar things as a result of VSX copy
1683 // legalization. Promote them here.
1685 if (PPC::F8RCRegClass.contains(DestReg) &&
1686 PPC::VSRCRegClass.contains(SrcReg)) {
1687 MCRegister SuperReg =
1688 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1689
1690 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1691 llvm_unreachable("nop VSX copy");
1692
1693 DestReg = SuperReg;
1694 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1695 PPC::VSRCRegClass.contains(DestReg)) {
1696 MCRegister SuperReg =
1697 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1698
1699 if (VSXSelfCopyCrash && DestReg == SuperReg)
1700 llvm_unreachable("nop VSX copy");
1701
1702 SrcReg = SuperReg;
1703 }
1704
1705 // Different class register copy
1706 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1707 PPC::GPRCRegClass.contains(DestReg)) {
1708 MCRegister CRReg = getCRFromCRBit(SrcReg);
1709 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1710 getKillRegState(KillSrc);
1711 // Rotate the CR bit in the CR fields to be the least significant bit and
1712 // then mask with 0x1 (MB = ME = 31).
1713 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1714 .addReg(DestReg, RegState::Kill)
1715 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1716 .addImm(31)
1717 .addImm(31);
1718 return;
1719 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1720 (PPC::G8RCRegClass.contains(DestReg) ||
1721 PPC::GPRCRegClass.contains(DestReg))) {
1722 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1723 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1724 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1725 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1726 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1727 getKillRegState(KillSrc);
1728 if (CRNum == 7)
1729 return;
1730 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1731 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1732 .addReg(DestReg, RegState::Kill)
1733 .addImm(CRNum * 4 + 4)
1734 .addImm(28)
1735 .addImm(31);
1736 return;
1737 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1738 PPC::VSFRCRegClass.contains(DestReg)) {
1739 assert(Subtarget.hasDirectMove() &&
1740 "Subtarget doesn't support directmove, don't know how to copy.");
1741 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1742 NumGPRtoVSRSpill++;
1743 getKillRegState(KillSrc);
1744 return;
1745 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1746 PPC::G8RCRegClass.contains(DestReg)) {
1747 assert(Subtarget.hasDirectMove() &&
1748 "Subtarget doesn't support directmove, don't know how to copy.");
1749 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1750 getKillRegState(KillSrc);
1751 return;
1752 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1753 PPC::GPRCRegClass.contains(DestReg)) {
1754 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1755 getKillRegState(KillSrc);
1756 return;
1757 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1758 PPC::SPERCRegClass.contains(DestReg)) {
1759 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1760 getKillRegState(KillSrc);
1761 return;
1762 } else if ((PPC::G8RCRegClass.contains(DestReg) ||
1763 PPC::GPRCRegClass.contains(DestReg)) &&
1764 SrcReg == PPC::CARRY) {
1765 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1766 BuildMI(MBB, I, DL, get(Is64Bit ? PPC::MFSPR8 : PPC::MFSPR), DestReg)
1767 .addImm(1)
1768 .addReg(PPC::CARRY, RegState::Implicit);
1769 return;
1770 } else if ((PPC::G8RCRegClass.contains(SrcReg) ||
1771 PPC::GPRCRegClass.contains(SrcReg)) &&
1772 DestReg == PPC::CARRY) {
1773 bool Is64Bit = PPC::G8RCRegClass.contains(SrcReg);
1774 BuildMI(MBB, I, DL, get(Is64Bit ? PPC::MTSPR8 : PPC::MTSPR))
1775 .addImm(1)
1776 .addReg(SrcReg)
1777 .addReg(PPC::CARRY, RegState::ImplicitDefine);
1778 return;
1779 }
1780
1781 unsigned Opc;
1782 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1783 Opc = PPC::OR;
1784 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1785 Opc = PPC::OR8;
1786 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1787 Opc = PPC::FMR;
1788 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1789 Opc = PPC::MCRF;
1790 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1791 Opc = PPC::VOR;
1792 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1793 // There are two different ways this can be done:
1794 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1795 // issue in VSU pipeline 0.
1796 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1797 // can go to either pipeline.
1798 // We'll always use xxlor here, because in practically all cases where
1799 // copies are generated, they are close enough to some use that the
1800 // lower-latency form is preferable.
1801 Opc = PPC::XXLOR;
1802 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1803 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1804 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1805 else if (Subtarget.pairedVectorMemops() &&
1806 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1807 if (SrcReg > PPC::VSRp15)
1808 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1809 else
1810 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1811 if (DestReg > PPC::VSRp15)
1812 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1813 else
1814 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1815 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1816 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1817 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1818 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1819 return;
1820 }
1821 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1822 Opc = PPC::CROR;
1823 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1824 Opc = PPC::EVOR;
1825 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1826 PPC::UACCRCRegClass.contains(DestReg)) &&
1827 (PPC::ACCRCRegClass.contains(SrcReg) ||
1828 PPC::UACCRCRegClass.contains(SrcReg))) {
1829 // If primed, de-prime the source register, copy the individual registers
1830 // and prime the destination if needed. The vector subregisters are
1831 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1832 // source is primed, we need to re-prime it after the copy as well.
1833 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1834 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1835 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1836 MCRegister VSLSrcReg =
1837 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1838 MCRegister VSLDestReg =
1839 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1840 if (SrcPrimed)
1841 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1842 for (unsigned Idx = 0; Idx < 4; Idx++)
1843 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1844 .addReg(VSLSrcReg + Idx)
1845 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1846 if (DestPrimed)
1847 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1848 if (SrcPrimed && !KillSrc)
1849 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1850 return;
1851 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1852 PPC::G8pRCRegClass.contains(SrcReg)) {
1853 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1854 unsigned DestRegIdx = DestReg - PPC::G8p0;
1855 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1856 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1857 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1858 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1859 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1860 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1861 .addReg(SrcRegSub0)
1862 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1863 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1864 .addReg(SrcRegSub1)
1865 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1866 return;
1867 } else if ((PPC::WACCRCRegClass.contains(DestReg) ||
1868 PPC::WACC_HIRCRegClass.contains(DestReg)) &&
1869 (PPC::WACCRCRegClass.contains(SrcReg) ||
1870 PPC::WACC_HIRCRegClass.contains(SrcReg))) {
1871
1872 Opc = PPC::WACCRCRegClass.contains(SrcReg) ? PPC::DMXXEXTFDMR512
1873 : PPC::DMXXEXTFDMR512_HI;
1874
1875 RegScavenger RS;
1876 RS.enterBasicBlockEnd(MBB);
1877 RS.backward(std::next(I));
1878
1879 Register TmpReg1 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
1880 /* RestoreAfter */ false, 0,
1881 /* AllowSpill */ false);
1882
1883 RS.setRegUsed(TmpReg1);
1884 Register TmpReg2 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
1885 /* RestoreAfter */ false, 0,
1886 /* AllowSpill */ false);
1887
1888 BuildMI(MBB, I, DL, get(Opc))
1889 .addReg(TmpReg1, RegState::Define)
1890 .addReg(TmpReg2, RegState::Define)
1891 .addReg(SrcReg, getKillRegState(KillSrc));
1892
1893 Opc = PPC::WACCRCRegClass.contains(DestReg) ? PPC::DMXXINSTDMR512
1894 : PPC::DMXXINSTDMR512_HI;
1895
1896 BuildMI(MBB, I, DL, get(Opc), DestReg)
1897 .addReg(TmpReg1, RegState::Kill)
1898 .addReg(TmpReg2, RegState::Kill);
1899
1900 return;
1901 } else if (PPC::DMRRCRegClass.contains(DestReg) &&
1902 PPC::DMRRCRegClass.contains(SrcReg)) {
1903
1904 BuildMI(MBB, I, DL, get(PPC::DMMR), DestReg)
1905 .addReg(SrcReg, getKillRegState(KillSrc));
1906
1907 return;
1908
1909 } else
1910 llvm_unreachable("Impossible reg-to-reg copy");
1911
1912 const MCInstrDesc &MCID = get(Opc);
1913 if (MCID.getNumOperands() == 3)
1914 BuildMI(MBB, I, DL, MCID, DestReg)
1915 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1916 else
1917 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1918}
1919
1920unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1921 int OpcodeIndex = 0;
1922
1923 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1924 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1926 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1927 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1929 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1931 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1933 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1935 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1937 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1939 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1941 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1943 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1945 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1947 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1949 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1950 assert(Subtarget.pairedVectorMemops() &&
1951 "Register unexpected when paired memops are disabled.");
1953 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1954 assert(Subtarget.pairedVectorMemops() &&
1955 "Register unexpected when paired memops are disabled.");
1957 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1958 assert(Subtarget.pairedVectorMemops() &&
1959 "Register unexpected when paired memops are disabled.");
1961 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1962 assert(Subtarget.pairedVectorMemops() &&
1963 "Register unexpected when paired memops are disabled.");
1965 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1967 } else if (PPC::DMRROWRCRegClass.hasSubClassEq(RC)) {
1968 llvm_unreachable("TODO: Implement spill DMRROW regclass!");
1969 } else if (PPC::DMRROWpRCRegClass.hasSubClassEq(RC)) {
1970 llvm_unreachable("TODO: Implement spill DMRROWp regclass!");
1971 } else if (PPC::DMRpRCRegClass.hasSubClassEq(RC)) {
1973 } else if (PPC::DMRRCRegClass.hasSubClassEq(RC)) {
1975 } else {
1976 llvm_unreachable("Unknown regclass!");
1977 }
1978 return OpcodeIndex;
1979}
1980
1981unsigned
1983 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1984 return OpcodesForSpill[getSpillIndex(RC)];
1985}
1986
1987unsigned
1989 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1990 return OpcodesForSpill[getSpillIndex(RC)];
1991}
1992
1993void PPCInstrInfo::StoreRegToStackSlot(
1994 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1995 const TargetRegisterClass *RC,
1996 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1997 unsigned Opcode = getStoreOpcodeForSpill(RC);
1998 DebugLoc DL;
1999
2000 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2001 FuncInfo->setHasSpills();
2002
2004 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
2005 FrameIdx));
2006
2007 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
2008 PPC::CRBITRCRegClass.hasSubClassEq(RC))
2009 FuncInfo->setSpillsCR();
2010
2011 if (isXFormMemOp(Opcode))
2012 FuncInfo->setHasNonRISpills();
2013}
2014
2017 bool isKill, int FrameIdx, const TargetRegisterClass *RC) const {
2018 MachineFunction &MF = *MBB.getParent();
2020
2021 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
2022
2023 for (MachineInstr *NewMI : NewMIs)
2024 MBB.insert(MI, NewMI);
2025
2026 const MachineFrameInfo &MFI = MF.getFrameInfo();
2030 MFI.getObjectAlign(FrameIdx));
2031 NewMIs.back()->addMemOperand(MF, MMO);
2032}
2033
2036 bool isKill, int FrameIdx, const TargetRegisterClass *RC, Register VReg,
2037 MachineInstr::MIFlag Flags) const {
2038 // We need to avoid a situation in which the value from a VRRC register is
2039 // spilled using an Altivec instruction and reloaded into a VSRC register
2040 // using a VSX instruction. The issue with this is that the VSX
2041 // load/store instructions swap the doublewords in the vector and the Altivec
2042 // ones don't. The register classes on the spill/reload may be different if
2043 // the register is defined using an Altivec instruction and is then used by a
2044 // VSX instruction.
2045 RC = updatedRC(RC);
2046 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC);
2047}
2048
2049void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
2050 unsigned DestReg, int FrameIdx,
2051 const TargetRegisterClass *RC,
2053 const {
2054 unsigned Opcode = getLoadOpcodeForSpill(RC);
2055 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
2056 FrameIdx));
2057}
2058
2061 int FrameIdx, const TargetRegisterClass *RC) const {
2062 MachineFunction &MF = *MBB.getParent();
2064 DebugLoc DL;
2065 if (MI != MBB.end()) DL = MI->getDebugLoc();
2066
2067 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2068
2069 for (MachineInstr *NewMI : NewMIs)
2070 MBB.insert(MI, NewMI);
2071
2072 const MachineFrameInfo &MFI = MF.getFrameInfo();
2076 MFI.getObjectAlign(FrameIdx));
2077 NewMIs.back()->addMemOperand(MF, MMO);
2078}
2079
2082 Register DestReg, int FrameIdx,
2083 const TargetRegisterClass *RC,
2084 Register VReg,
2085 MachineInstr::MIFlag Flags) const {
2086 // We need to avoid a situation in which the value from a VRRC register is
2087 // spilled using an Altivec instruction and reloaded into a VSRC register
2088 // using a VSX instruction. The issue with this is that the VSX
2089 // load/store instructions swap the doublewords in the vector and the Altivec
2090 // ones don't. The register classes on the spill/reload may be different if
2091 // the register is defined using an Altivec instruction and is then used by a
2092 // VSX instruction.
2093 RC = updatedRC(RC);
2094
2095 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC);
2096}
2097
2100 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2101 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2102 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2103 else
2104 // Leave the CR# the same, but invert the condition.
2106 return false;
2107}
2108
2109// For some instructions, it is legal to fold ZERO into the RA register field.
2110// This function performs that fold by replacing the operand with PPC::ZERO,
2111// it does not consider whether the load immediate zero is no longer in use.
2113 Register Reg) const {
2114 // A zero immediate should always be loaded with a single li.
2115 unsigned DefOpc = DefMI.getOpcode();
2116 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2117 return false;
2118 if (!DefMI.getOperand(1).isImm())
2119 return false;
2120 if (DefMI.getOperand(1).getImm() != 0)
2121 return false;
2122
2123 // Note that we cannot here invert the arguments of an isel in order to fold
2124 // a ZERO into what is presented as the second argument. All we have here
2125 // is the condition bit, and that might come from a CR-logical bit operation.
2126
2127 const MCInstrDesc &UseMCID = UseMI.getDesc();
2128
2129 // Only fold into real machine instructions.
2130 if (UseMCID.isPseudo())
2131 return false;
2132
2133 // We need to find which of the User's operands is to be folded, that will be
2134 // the operand that matches the given register ID.
2135 unsigned UseIdx;
2136 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2137 if (UseMI.getOperand(UseIdx).isReg() &&
2138 UseMI.getOperand(UseIdx).getReg() == Reg)
2139 break;
2140
2141 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2142 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2143
2144 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2145 // register (which might also be specified as a pointer class kind).
2146
2147 const MCOperandInfo &UseInfo = UseMCID.operands()[UseIdx];
2148 int16_t RegClass = getOpRegClassID(UseInfo);
2149 if (UseInfo.RegClass != PPC::GPRC_NOR0RegClassID &&
2150 UseInfo.RegClass != PPC::G8RC_NOX0RegClassID)
2151 return false;
2152
2153 // Make sure this is not tied to an output register (or otherwise
2154 // constrained). This is true for ST?UX registers, for example, which
2155 // are tied to their output registers.
2156 if (UseInfo.Constraints != 0)
2157 return false;
2158
2159 MCRegister ZeroReg =
2160 RegClass == PPC::G8RC_NOX0RegClassID ? PPC::ZERO8 : PPC::ZERO;
2161
2162 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2163 LLVM_DEBUG(UseMI.dump());
2164 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2165 LLVM_DEBUG(dbgs() << "Into: ");
2166 LLVM_DEBUG(UseMI.dump());
2167 return true;
2168}
2169
2170// Folds zero into instructions which have a load immediate zero as an operand
2171// but also recognize zero as immediate zero. If the definition of the load
2172// has no more users it is deleted.
2174 Register Reg, MachineRegisterInfo *MRI) const {
2175 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2176 if (MRI->use_nodbg_empty(Reg))
2177 DefMI.eraseFromParent();
2178 return Changed;
2179}
2180
2182 for (MachineInstr &MI : MBB)
2183 if (MI.definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
2184 MI.definesRegister(PPC::CTR8, /*TRI=*/nullptr))
2185 return true;
2186 return false;
2187}
2188
2189// We should make sure that, if we're going to predicate both sides of a
2190// condition (a diamond), that both sides don't define the counter register. We
2191// can predicate counter-decrement-based branches, but while that predicates
2192// the branching, it does not predicate the counter decrement. If we tried to
2193// merge the triangle into one predicated block, we'd decrement the counter
2194// twice.
2196 unsigned NumT, unsigned ExtraT,
2197 MachineBasicBlock &FMBB,
2198 unsigned NumF, unsigned ExtraF,
2199 BranchProbability Probability) const {
2200 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2201}
2202
2203
2205 // The predicated branches are identified by their type, not really by the
2206 // explicit presence of a predicate. Furthermore, some of them can be
2207 // predicated more than once. Because if conversion won't try to predicate
2208 // any instruction which already claims to be predicated (by returning true
2209 // here), always return false. In doing so, we let isPredicable() be the
2210 // final word on whether not the instruction can be (further) predicated.
2211
2212 return false;
2213}
2214
2216 const MachineBasicBlock *MBB,
2217 const MachineFunction &MF) const {
2218 switch (MI.getOpcode()) {
2219 default:
2220 break;
2221 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2222 // across them, since some FP operations may change content of FPSCR.
2223 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2224 case PPC::MFFS:
2225 case PPC::MTFSF:
2226 case PPC::FENCE:
2227 return true;
2228 }
2230}
2231
2233 ArrayRef<MachineOperand> Pred) const {
2234 unsigned OpC = MI.getOpcode();
2235 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2236 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2237 bool isPPC64 = Subtarget.isPPC64();
2238 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2239 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2240 // Need add Def and Use for CTR implicit operand.
2241 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2242 .addReg(Pred[1].getReg(), RegState::Implicit)
2244 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2245 MI.setDesc(get(PPC::BCLR));
2246 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2247 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2248 MI.setDesc(get(PPC::BCLRn));
2249 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2250 } else {
2251 MI.setDesc(get(PPC::BCCLR));
2252 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2253 .addImm(Pred[0].getImm())
2254 .add(Pred[1]);
2255 }
2256
2257 return true;
2258 } else if (OpC == PPC::B) {
2259 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2260 bool isPPC64 = Subtarget.isPPC64();
2261 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2262 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2263 // Need add Def and Use for CTR implicit operand.
2264 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2265 .addReg(Pred[1].getReg(), RegState::Implicit)
2267 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2268 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2269 MI.removeOperand(0);
2270
2271 MI.setDesc(get(PPC::BC));
2272 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2273 .add(Pred[1])
2274 .addMBB(MBB);
2275 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2276 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2277 MI.removeOperand(0);
2278
2279 MI.setDesc(get(PPC::BCn));
2280 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2281 .add(Pred[1])
2282 .addMBB(MBB);
2283 } else {
2284 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2285 MI.removeOperand(0);
2286
2287 MI.setDesc(get(PPC::BCC));
2288 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2289 .addImm(Pred[0].getImm())
2290 .add(Pred[1])
2291 .addMBB(MBB);
2292 }
2293
2294 return true;
2295 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2296 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2297 OpC == PPC::BCTRL8_RM) {
2298 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2299 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2300
2301 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2302 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2303 bool isPPC64 = Subtarget.isPPC64();
2304
2305 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2306 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2307 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2308 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2309 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2310 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2311 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2312 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2313 } else {
2314 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2315 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2316 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2317 .addImm(Pred[0].getImm())
2318 .add(Pred[1]);
2319 }
2320
2321 // Need add Def and Use for LR implicit operand.
2322 if (setLR)
2323 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2324 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2325 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2326 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2327 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2329
2330 return true;
2331 }
2332
2333 return false;
2334}
2335
2337 ArrayRef<MachineOperand> Pred2) const {
2338 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2339 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2340
2341 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2342 return false;
2343 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2344 return false;
2345
2346 // P1 can only subsume P2 if they test the same condition register.
2347 if (Pred1[1].getReg() != Pred2[1].getReg())
2348 return false;
2349
2350 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2351 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2352
2353 if (P1 == P2)
2354 return true;
2355
2356 // Does P1 subsume P2, e.g. GE subsumes GT.
2357 if (P1 == PPC::PRED_LE &&
2358 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2359 return true;
2360 if (P1 == PPC::PRED_GE &&
2361 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2362 return true;
2363
2364 return false;
2365}
2366
2368 std::vector<MachineOperand> &Pred,
2369 bool SkipDead) const {
2370 // Note: At the present time, the contents of Pred from this function is
2371 // unused by IfConversion. This implementation follows ARM by pushing the
2372 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2373 // predicate, instructions defining CTR or CTR8 are also included as
2374 // predicate-defining instructions.
2375
2376 const TargetRegisterClass *RCs[] =
2377 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2378 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2379
2380 bool Found = false;
2381 for (const MachineOperand &MO : MI.operands()) {
2382 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2383 const TargetRegisterClass *RC = RCs[c];
2384 if (MO.isReg()) {
2385 if (MO.isDef() && RC->contains(MO.getReg())) {
2386 Pred.push_back(MO);
2387 Found = true;
2388 }
2389 } else if (MO.isRegMask()) {
2390 for (MCPhysReg R : *RC)
2391 if (MO.clobbersPhysReg(R)) {
2392 Pred.push_back(MO);
2393 Found = true;
2394 }
2395 }
2396 }
2397 }
2398
2399 return Found;
2400}
2401
2403 Register &SrcReg2, int64_t &Mask,
2404 int64_t &Value) const {
2405 unsigned Opc = MI.getOpcode();
2406
2407 switch (Opc) {
2408 default: return false;
2409 case PPC::CMPWI:
2410 case PPC::CMPLWI:
2411 case PPC::CMPDI:
2412 case PPC::CMPLDI:
2413 SrcReg = MI.getOperand(1).getReg();
2414 SrcReg2 = 0;
2415 Value = MI.getOperand(2).getImm();
2416 Mask = 0xFFFF;
2417 return true;
2418 case PPC::CMPW:
2419 case PPC::CMPLW:
2420 case PPC::CMPD:
2421 case PPC::CMPLD:
2422 case PPC::FCMPUS:
2423 case PPC::FCMPUD:
2424 SrcReg = MI.getOperand(1).getReg();
2425 SrcReg2 = MI.getOperand(2).getReg();
2426 Value = 0;
2427 Mask = 0;
2428 return true;
2429 }
2430}
2431
2433 Register SrcReg2, int64_t Mask,
2434 int64_t Value,
2435 const MachineRegisterInfo *MRI) const {
2436 if (DisableCmpOpt)
2437 return false;
2438
2439 int OpC = CmpInstr.getOpcode();
2440 Register CRReg = CmpInstr.getOperand(0).getReg();
2441
2442 // FP record forms set CR1 based on the exception status bits, not a
2443 // comparison with zero.
2444 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2445 return false;
2446
2448 // The record forms set the condition register based on a signed comparison
2449 // with zero (so says the ISA manual). This is not as straightforward as it
2450 // seems, however, because this is always a 64-bit comparison on PPC64, even
2451 // for instructions that are 32-bit in nature (like slw for example).
2452 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2453 // for equality checks (as those don't depend on the sign). On PPC64,
2454 // we are restricted to equality for unsigned 64-bit comparisons and for
2455 // signed 32-bit comparisons the applicability is more restricted.
2456 bool isPPC64 = Subtarget.isPPC64();
2457 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2458 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2459 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2460
2461 // Look through copies unless that gets us to a physical register.
2462 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2463 if (ActualSrc.isVirtual())
2464 SrcReg = ActualSrc;
2465
2466 // Get the unique definition of SrcReg.
2467 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2468 if (!MI) return false;
2469
2470 bool equalityOnly = false;
2471 bool noSub = false;
2472 if (isPPC64) {
2473 if (is32BitSignedCompare) {
2474 // We can perform this optimization only if SrcReg is sign-extending.
2475 if (isSignExtended(SrcReg, MRI))
2476 noSub = true;
2477 else
2478 return false;
2479 } else if (is32BitUnsignedCompare) {
2480 // We can perform this optimization, equality only, if SrcReg is
2481 // zero-extending.
2482 if (isZeroExtended(SrcReg, MRI)) {
2483 noSub = true;
2484 equalityOnly = true;
2485 } else
2486 return false;
2487 } else
2488 equalityOnly = is64BitUnsignedCompare;
2489 } else
2490 equalityOnly = is32BitUnsignedCompare;
2491
2492 if (equalityOnly) {
2493 // We need to check the uses of the condition register in order to reject
2494 // non-equality comparisons.
2496 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2497 I != IE; ++I) {
2498 MachineInstr *UseMI = &*I;
2499 if (UseMI->getOpcode() == PPC::BCC) {
2500 PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
2501 unsigned PredCond = PPC::getPredicateCondition(Pred);
2502 // We ignore hint bits when checking for non-equality comparisons.
2503 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2504 return false;
2505 } else if (UseMI->getOpcode() == PPC::ISEL ||
2506 UseMI->getOpcode() == PPC::ISEL8) {
2507 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2508 if (SubIdx != PPC::sub_eq)
2509 return false;
2510 } else
2511 return false;
2512 }
2513 }
2514
2515 MachineBasicBlock::iterator I = CmpInstr;
2516
2517 // Scan forward to find the first use of the compare.
2518 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2519 ++I) {
2520 bool FoundUse = false;
2522 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2523 J != JE; ++J)
2524 if (&*J == &*I) {
2525 FoundUse = true;
2526 break;
2527 }
2528
2529 if (FoundUse)
2530 break;
2531 }
2532
2535
2536 // There are two possible candidates which can be changed to set CR[01].
2537 // One is MI, the other is a SUB instruction.
2538 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2539 MachineInstr *Sub = nullptr;
2540 if (SrcReg2 != 0)
2541 // MI is not a candidate for CMPrr.
2542 MI = nullptr;
2543 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2544 // same BB as the comparison. This is to allow the check below to avoid calls
2545 // (and other explicit clobbers); instead we should really check for these
2546 // more explicitly (in at least a few predecessors).
2547 else if (MI->getParent() != CmpInstr.getParent())
2548 return false;
2549 else if (Value != 0) {
2550 // The record-form instructions set CR bit based on signed comparison
2551 // against 0. We try to convert a compare against 1 or -1 into a compare
2552 // against 0 to exploit record-form instructions. For example, we change
2553 // the condition "greater than -1" into "greater than or equal to 0"
2554 // and "less than 1" into "less than or equal to 0".
2555
2556 // Since we optimize comparison based on a specific branch condition,
2557 // we don't optimize if condition code is used by more than once.
2558 if (equalityOnly || !MRI->hasOneUse(CRReg))
2559 return false;
2560
2561 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2562 if (UseMI->getOpcode() != PPC::BCC)
2563 return false;
2564
2565 PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
2566 unsigned PredCond = PPC::getPredicateCondition(Pred);
2567 unsigned PredHint = PPC::getPredicateHint(Pred);
2568 int16_t Immed = (int16_t)Value;
2569
2570 // When modifying the condition in the predicate, we propagate hint bits
2571 // from the original predicate to the new one.
2572 if (Immed == -1 && PredCond == PPC::PRED_GT)
2573 // We convert "greater than -1" into "greater than or equal to 0",
2574 // since we are assuming signed comparison by !equalityOnly
2575 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2576 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2577 // We convert "less than or equal to -1" into "less than 0".
2578 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2579 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2580 // We convert "less than 1" into "less than or equal to 0".
2581 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2582 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2583 // We convert "greater than or equal to 1" into "greater than 0".
2584 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2585 else
2586 return false;
2587
2588 // Convert the comparison and its user to a compare against zero with the
2589 // appropriate predicate on the branch. Zero comparison might provide
2590 // optimization opportunities post-RA (see optimization in
2591 // PPCPreEmitPeephole.cpp).
2592 UseMI->getOperand(0).setImm(Pred);
2593 CmpInstr.getOperand(2).setImm(0);
2594 }
2595
2596 // Search for Sub.
2597 --I;
2598
2599 // Get ready to iterate backward from CmpInstr.
2600 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2601
2602 for (; I != E && !noSub; --I) {
2603 const MachineInstr &Instr = *I;
2604 unsigned IOpC = Instr.getOpcode();
2605
2606 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2607 Instr.readsRegister(PPC::CR0, TRI)))
2608 // This instruction modifies or uses the record condition register after
2609 // the one we want to change. While we could do this transformation, it
2610 // would likely not be profitable. This transformation removes one
2611 // instruction, and so even forcing RA to generate one move probably
2612 // makes it unprofitable.
2613 return false;
2614
2615 // Check whether CmpInstr can be made redundant by the current instruction.
2616 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2617 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2618 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2619 ((Instr.getOperand(1).getReg() == SrcReg &&
2620 Instr.getOperand(2).getReg() == SrcReg2) ||
2621 (Instr.getOperand(1).getReg() == SrcReg2 &&
2622 Instr.getOperand(2).getReg() == SrcReg))) {
2623 Sub = &*I;
2624 break;
2625 }
2626
2627 if (I == B)
2628 // The 'and' is below the comparison instruction.
2629 return false;
2630 }
2631
2632 // Return false if no candidates exist.
2633 if (!MI && !Sub)
2634 return false;
2635
2636 // The single candidate is called MI.
2637 if (!MI) MI = Sub;
2638
2639 int NewOpC = -1;
2640 int MIOpC = MI->getOpcode();
2641 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2642 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2643 NewOpC = MIOpC;
2644 else {
2645 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2646 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2647 NewOpC = MIOpC;
2648 }
2649
2650 // FIXME: On the non-embedded POWER architectures, only some of the record
2651 // forms are fast, and we should use only the fast ones.
2652
2653 // The defining instruction has a record form (or is already a record
2654 // form). It is possible, however, that we'll need to reverse the condition
2655 // code of the users.
2656 if (NewOpC == -1)
2657 return false;
2658
2659 // This transformation should not be performed if `nsw` is missing and is not
2660 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2661 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2662 // CRReg can reflect if compared values are equal, this optz is still valid.
2663 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2664 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2665 return false;
2666
2667 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2668 // needs to be updated to be based on SUB. Push the condition code
2669 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2670 // condition code of these operands will be modified.
2671 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2672 // comparison against 0, which may modify predicate.
2673 bool ShouldSwap = false;
2674 if (Sub && Value == 0) {
2675 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2676 Sub->getOperand(2).getReg() == SrcReg;
2677
2678 // The operands to subf are the opposite of sub, so only in the fixed-point
2679 // case, invert the order.
2680 ShouldSwap = !ShouldSwap;
2681 }
2682
2683 if (ShouldSwap)
2685 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2686 I != IE; ++I) {
2687 MachineInstr *UseMI = &*I;
2688 if (UseMI->getOpcode() == PPC::BCC) {
2689 PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
2690 unsigned PredCond = PPC::getPredicateCondition(Pred);
2691 assert((!equalityOnly ||
2692 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2693 "Invalid predicate for equality-only optimization");
2694 (void)PredCond; // To suppress warning in release build.
2695 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2697 } else if (UseMI->getOpcode() == PPC::ISEL ||
2698 UseMI->getOpcode() == PPC::ISEL8) {
2699 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2700 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2701 "Invalid CR bit for equality-only optimization");
2702
2703 if (NewSubReg == PPC::sub_lt)
2704 NewSubReg = PPC::sub_gt;
2705 else if (NewSubReg == PPC::sub_gt)
2706 NewSubReg = PPC::sub_lt;
2707
2708 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2709 NewSubReg));
2710 } else // We need to abort on a user we don't understand.
2711 return false;
2712 }
2713 assert(!(Value != 0 && ShouldSwap) &&
2714 "Non-zero immediate support and ShouldSwap"
2715 "may conflict in updating predicate");
2716
2717 // Create a new virtual register to hold the value of the CR set by the
2718 // record-form instruction. If the instruction was not previously in
2719 // record form, then set the kill flag on the CR.
2720 CmpInstr.eraseFromParent();
2721
2723 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2724 get(TargetOpcode::COPY), CRReg)
2725 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2726
2727 // Even if CR0 register were dead before, it is alive now since the
2728 // instruction we just built uses it.
2729 MI->clearRegisterDeads(PPC::CR0);
2730
2731 if (MIOpC != NewOpC) {
2732 // We need to be careful here: we're replacing one instruction with
2733 // another, and we need to make sure that we get all of the right
2734 // implicit uses and defs. On the other hand, the caller may be holding
2735 // an iterator to this instruction, and so we can't delete it (this is
2736 // specifically the case if this is the instruction directly after the
2737 // compare).
2738
2739 // Rotates are expensive instructions. If we're emitting a record-form
2740 // rotate that can just be an andi/andis, we should just emit that.
2741 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2742 Register GPRRes = MI->getOperand(0).getReg();
2743 int64_t SH = MI->getOperand(2).getImm();
2744 int64_t MB = MI->getOperand(3).getImm();
2745 int64_t ME = MI->getOperand(4).getImm();
2746 // We can only do this if both the start and end of the mask are in the
2747 // same halfword.
2748 bool MBInLoHWord = MB >= 16;
2749 bool MEInLoHWord = ME >= 16;
2750 uint64_t Mask = ~0LLU;
2751
2752 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2753 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2754 // The mask value needs to shift right 16 if we're emitting andis.
2755 Mask >>= MBInLoHWord ? 0 : 16;
2756 NewOpC = MIOpC == PPC::RLWINM
2757 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2758 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2759 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2760 (ME - MB + 1 == SH) && (MB >= 16)) {
2761 // If we are rotating by the exact number of bits as are in the mask
2762 // and the mask is in the least significant bits of the register,
2763 // that's just an andis. (as long as the GPR result has no uses).
2764 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2765 Mask >>= 16;
2766 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2767 }
2768 // If we've set the mask, we can transform.
2769 if (Mask != ~0LLU) {
2770 MI->removeOperand(4);
2771 MI->removeOperand(3);
2772 MI->getOperand(2).setImm(Mask);
2773 NumRcRotatesConvertedToRcAnd++;
2774 }
2775 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2776 int64_t MB = MI->getOperand(3).getImm();
2777 if (MB >= 48) {
2778 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2779 NewOpC = PPC::ANDI8_rec;
2780 MI->removeOperand(3);
2781 MI->getOperand(2).setImm(Mask);
2782 NumRcRotatesConvertedToRcAnd++;
2783 }
2784 }
2785
2786 const MCInstrDesc &NewDesc = get(NewOpC);
2787 MI->setDesc(NewDesc);
2788
2789 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2790 if (!MI->definesRegister(ImpDef, /*TRI=*/nullptr)) {
2791 MI->addOperand(*MI->getParent()->getParent(),
2792 MachineOperand::CreateReg(ImpDef, true, true));
2793 }
2794 }
2795 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2796 if (!MI->readsRegister(ImpUse, /*TRI=*/nullptr)) {
2797 MI->addOperand(*MI->getParent()->getParent(),
2798 MachineOperand::CreateReg(ImpUse, false, true));
2799 }
2800 }
2801 }
2802 assert(MI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2803 "Record-form instruction does not define cr0?");
2804
2805 // Modify the condition code of operands in OperandsToUpdate.
2806 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2807 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2808 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2809 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2810
2811 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2812 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2813
2814 return true;
2815}
2816
2819 if (MRI->isSSA())
2820 return false;
2821
2822 Register SrcReg, SrcReg2;
2823 int64_t CmpMask, CmpValue;
2824 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2825 return false;
2826
2827 // Try to optimize the comparison against 0.
2828 if (CmpValue || !CmpMask || SrcReg2)
2829 return false;
2830
2831 // The record forms set the condition register based on a signed comparison
2832 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2833 // equality checks in post-RA, we are more restricted on a unsigned
2834 // comparison.
2835 unsigned Opc = CmpMI.getOpcode();
2836 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2837 return false;
2838
2839 // The record forms are always based on a 64-bit comparison on PPC64
2840 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2841 // comparison. Since we can't do the equality checks in post-RA, we bail out
2842 // the case.
2843 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2844 return false;
2845
2846 // CmpMI can't be deleted if it has implicit def.
2847 if (CmpMI.hasImplicitDef())
2848 return false;
2849
2850 bool SrcRegHasOtherUse = false;
2851 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2852 if (!SrcMI || !SrcMI->definesRegister(SrcReg, /*TRI=*/nullptr))
2853 return false;
2854
2855 MachineOperand RegMO = CmpMI.getOperand(0);
2856 Register CRReg = RegMO.getReg();
2857 if (CRReg != PPC::CR0)
2858 return false;
2859
2860 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2861 bool SeenUseOfCRReg = false;
2862 bool IsCRRegKilled = false;
2863 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2864 SeenUseOfCRReg) ||
2865 SrcMI->definesRegister(CRReg, /*TRI=*/nullptr) || SeenUseOfCRReg)
2866 return false;
2867
2868 int SrcMIOpc = SrcMI->getOpcode();
2869 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2870 if (NewOpC == -1)
2871 return false;
2872
2873 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2874 LLVM_DEBUG(SrcMI->dump());
2875
2876 const MCInstrDesc &NewDesc = get(NewOpC);
2877 SrcMI->setDesc(NewDesc);
2878 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2880 SrcMI->clearRegisterDeads(CRReg);
2881
2882 assert(SrcMI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2883 "Record-form instruction does not define cr0?");
2884
2885 LLVM_DEBUG(dbgs() << "with: ");
2886 LLVM_DEBUG(SrcMI->dump());
2887 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2888 LLVM_DEBUG(CmpMI.dump());
2889 return true;
2890}
2891
2894 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2895 const TargetRegisterInfo *TRI) const {
2896 const MachineOperand *BaseOp;
2897 OffsetIsScalable = false;
2898 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2899 return false;
2900 BaseOps.push_back(BaseOp);
2901 return true;
2902}
2903
2904static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2905 const TargetRegisterInfo *TRI) {
2906 // If this is a volatile load/store, don't mess with it.
2907 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2908 return false;
2909
2910 if (LdSt.getOperand(2).isFI())
2911 return true;
2912
2913 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2914 // Can't cluster if the instruction modifies the base register
2915 // or it is update form. e.g. ld r2,3(r2)
2916 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2917 return false;
2918
2919 return true;
2920}
2921
2922// Only cluster instruction pair that have the same opcode, and they are
2923// clusterable according to PowerPC specification.
2924static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2925 const PPCSubtarget &Subtarget) {
2926 switch (FirstOpc) {
2927 default:
2928 return false;
2929 case PPC::STD:
2930 case PPC::STFD:
2931 case PPC::STXSD:
2932 case PPC::DFSTOREf64:
2933 return FirstOpc == SecondOpc;
2934 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2935 // 32bit and 64bit instruction selection. They are clusterable pair though
2936 // they are different opcode.
2937 case PPC::STW:
2938 case PPC::STW8:
2939 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2940 }
2941}
2942
2944 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2945 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2946 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2947 unsigned NumBytes) const {
2948
2949 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2950 const MachineOperand &BaseOp1 = *BaseOps1.front();
2951 const MachineOperand &BaseOp2 = *BaseOps2.front();
2952 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2953 "Only base registers and frame indices are supported.");
2954
2955 // ClusterSize means the number of memory operations that will have been
2956 // clustered if this hook returns true.
2957 // Don't cluster memory op if there are already two ops clustered at least.
2958 if (ClusterSize > 2)
2959 return false;
2960
2961 // Cluster the load/store only when they have the same base
2962 // register or FI.
2963 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2964 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2965 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2966 return false;
2967
2968 // Check if the load/store are clusterable according to the PowerPC
2969 // specification.
2970 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2971 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2972 unsigned FirstOpc = FirstLdSt.getOpcode();
2973 unsigned SecondOpc = SecondLdSt.getOpcode();
2975 // Cluster the load/store only when they have the same opcode, and they are
2976 // clusterable opcode according to PowerPC specification.
2977 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2978 return false;
2979
2980 // Can't cluster load/store that have ordered or volatile memory reference.
2981 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2982 !isLdStSafeToCluster(SecondLdSt, TRI))
2983 return false;
2984
2985 int64_t Offset1 = 0, Offset2 = 0;
2987 Width2 = LocationSize::precise(0);
2988 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2989 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2990 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2991 Width1 != Width2)
2992 return false;
2993
2994 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2995 "getMemOperandWithOffsetWidth return incorrect base op");
2996 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2997 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2998 return Offset1 + (int64_t)Width1.getValue() == Offset2;
2999}
3000
3001/// GetInstSize - Return the number of bytes of code the specified
3002/// instruction may be. This returns the maximum number of bytes.
3003///
3005 unsigned Opcode = MI.getOpcode();
3006
3007 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
3008 const MachineFunction *MF = MI.getParent()->getParent();
3009 const char *AsmStr = MI.getOperand(0).getSymbolName();
3010 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
3011 } else if (Opcode == TargetOpcode::STACKMAP) {
3012 StackMapOpers Opers(&MI);
3013 return Opers.getNumPatchBytes();
3014 } else if (Opcode == TargetOpcode::PATCHPOINT) {
3015 PatchPointOpers Opers(&MI);
3016 return Opers.getNumPatchBytes();
3017 } else {
3018 return get(Opcode).getSize();
3019 }
3020}
3021
3022std::pair<unsigned, unsigned>
3024 // PPC always uses a direct mask.
3025 return std::make_pair(TF, 0u);
3026}
3027
3030 using namespace PPCII;
3031 static const std::pair<unsigned, const char *> TargetFlags[] = {
3032 {MO_PLT, "ppc-plt"},
3033 {MO_PIC_FLAG, "ppc-pic"},
3034 {MO_PCREL_FLAG, "ppc-pcrel"},
3035 {MO_GOT_FLAG, "ppc-got"},
3036 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
3037 {MO_TLSGD_FLAG, "ppc-tlsgd"},
3038 {MO_TPREL_FLAG, "ppc-tprel"},
3039 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
3040 {MO_TLSLD_FLAG, "ppc-tlsld"},
3041 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
3042 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
3043 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
3044 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
3045 {MO_LO, "ppc-lo"},
3046 {MO_HA, "ppc-ha"},
3047 {MO_TPREL_LO, "ppc-tprel-lo"},
3048 {MO_TPREL_HA, "ppc-tprel-ha"},
3049 {MO_DTPREL_LO, "ppc-dtprel-lo"},
3050 {MO_TLSLD_LO, "ppc-tlsld-lo"},
3051 {MO_TOC_LO, "ppc-toc-lo"},
3052 {MO_TLS, "ppc-tls"},
3053 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
3054 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
3055 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
3056 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
3057 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3058 };
3059 return ArrayRef(TargetFlags);
3060}
3061
3062// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3063// The VSX versions have the advantage of a full 64-register target whereas
3064// the FP ones have the advantage of lower latency and higher throughput. So
3065// what we are after is using the faster instructions in low register pressure
3066// situations and using the larger register file in high register pressure
3067// situations.
3069 unsigned UpperOpcode, LowerOpcode;
3070 switch (MI.getOpcode()) {
3071 case PPC::DFLOADf32:
3072 UpperOpcode = PPC::LXSSP;
3073 LowerOpcode = PPC::LFS;
3074 break;
3075 case PPC::DFLOADf64:
3076 UpperOpcode = PPC::LXSD;
3077 LowerOpcode = PPC::LFD;
3078 break;
3079 case PPC::DFSTOREf32:
3080 UpperOpcode = PPC::STXSSP;
3081 LowerOpcode = PPC::STFS;
3082 break;
3083 case PPC::DFSTOREf64:
3084 UpperOpcode = PPC::STXSD;
3085 LowerOpcode = PPC::STFD;
3086 break;
3087 case PPC::XFLOADf32:
3088 UpperOpcode = PPC::LXSSPX;
3089 LowerOpcode = PPC::LFSX;
3090 break;
3091 case PPC::XFLOADf64:
3092 UpperOpcode = PPC::LXSDX;
3093 LowerOpcode = PPC::LFDX;
3094 break;
3095 case PPC::XFSTOREf32:
3096 UpperOpcode = PPC::STXSSPX;
3097 LowerOpcode = PPC::STFSX;
3098 break;
3099 case PPC::XFSTOREf64:
3100 UpperOpcode = PPC::STXSDX;
3101 LowerOpcode = PPC::STFDX;
3102 break;
3103 case PPC::LIWAX:
3104 UpperOpcode = PPC::LXSIWAX;
3105 LowerOpcode = PPC::LFIWAX;
3106 break;
3107 case PPC::LIWZX:
3108 UpperOpcode = PPC::LXSIWZX;
3109 LowerOpcode = PPC::LFIWZX;
3110 break;
3111 case PPC::STIWX:
3112 UpperOpcode = PPC::STXSIWX;
3113 LowerOpcode = PPC::STFIWX;
3114 break;
3115 default:
3116 llvm_unreachable("Unknown Operation!");
3117 }
3118
3119 Register TargetReg = MI.getOperand(0).getReg();
3120 unsigned Opcode;
3121 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3122 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3123 Opcode = LowerOpcode;
3124 else
3125 Opcode = UpperOpcode;
3126 MI.setDesc(get(Opcode));
3127 return true;
3128}
3129
3130static bool isAnImmediateOperand(const MachineOperand &MO) {
3131 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3132}
3133
3135 auto &MBB = *MI.getParent();
3136 auto DL = MI.getDebugLoc();
3137
3138 switch (MI.getOpcode()) {
3139 case PPC::BUILD_UACC: {
3140 MCRegister ACC = MI.getOperand(0).getReg();
3141 MCRegister UACC = MI.getOperand(1).getReg();
3142 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3143 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3144 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3145 // FIXME: This can easily be improved to look up to the top of the MBB
3146 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3147 // we can just re-target any such XXLOR's to DstVSR + offset.
3148 for (int VecNo = 0; VecNo < 4; VecNo++)
3149 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3150 .addReg(SrcVSR + VecNo)
3151 .addReg(SrcVSR + VecNo);
3152 }
3153 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3154 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3155 // with a NOP.
3156 [[fallthrough]];
3157 }
3158 case PPC::KILL_PAIR: {
3159 MI.setDesc(get(PPC::UNENCODED_NOP));
3160 MI.removeOperand(1);
3161 MI.removeOperand(0);
3162 return true;
3163 }
3164 case TargetOpcode::LOAD_STACK_GUARD: {
3165 auto M = MBB.getParent()->getFunction().getParent();
3166 assert(
3167 (Subtarget.isTargetLinux() || M->getStackProtectorGuard() == "tls") &&
3168 "Only Linux target or tls mode are expected to contain "
3169 "LOAD_STACK_GUARD");
3170 int64_t Offset;
3171 if (M->getStackProtectorGuard() == "tls")
3172 Offset = M->getStackProtectorGuardOffset();
3173 else
3174 Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3175 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3176 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3177 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3178 .addImm(Offset)
3179 .addReg(Reg);
3180 return true;
3181 }
3182 case PPC::PPCLdFixedAddr: {
3183 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3184 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3185 int64_t Offset = 0;
3186 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3187 MI.setDesc(get(PPC::LWZ));
3188 uint64_t FAType = MI.getOperand(1).getImm();
3189#undef PPC_LNX_FEATURE
3190#undef PPC_CPU
3191#define PPC_LNX_DEFINE_OFFSETS
3192#include "llvm/TargetParser/PPCTargetParser.def"
3193 bool IsLE = Subtarget.isLittleEndian();
3194 bool Is64 = Subtarget.isPPC64();
3195 if (FAType == PPC_FAWORD_HWCAP) {
3196 if (IsLE)
3197 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3198 else
3199 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3200 } else if (FAType == PPC_FAWORD_HWCAP2) {
3201 if (IsLE)
3202 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3203 else
3204 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3205 } else if (FAType == PPC_FAWORD_CPUID) {
3206 if (IsLE)
3207 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3208 else
3209 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3210 }
3211 assert(Offset && "Do not know the offset for this fixed addr load");
3212 MI.removeOperand(1);
3213 Subtarget.getTargetMachine().setGlibcHWCAPAccess();
3214 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3215 .addImm(Offset)
3216 .addReg(Reg);
3217 return true;
3218#define PPC_TGT_PARSER_UNDEF_MACROS
3219#include "llvm/TargetParser/PPCTargetParser.def"
3220#undef PPC_TGT_PARSER_UNDEF_MACROS
3221 }
3222 case PPC::DFLOADf32:
3223 case PPC::DFLOADf64:
3224 case PPC::DFSTOREf32:
3225 case PPC::DFSTOREf64: {
3226 assert(Subtarget.hasP9Vector() &&
3227 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3228 assert(MI.getOperand(2).isReg() &&
3229 isAnImmediateOperand(MI.getOperand(1)) &&
3230 "D-form op must have register and immediate operands");
3231 return expandVSXMemPseudo(MI);
3232 }
3233 case PPC::XFLOADf32:
3234 case PPC::XFSTOREf32:
3235 case PPC::LIWAX:
3236 case PPC::LIWZX:
3237 case PPC::STIWX: {
3238 assert(Subtarget.hasP8Vector() &&
3239 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3240 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3241 "X-form op must have register and register operands");
3242 return expandVSXMemPseudo(MI);
3243 }
3244 case PPC::XFLOADf64:
3245 case PPC::XFSTOREf64: {
3246 assert(Subtarget.hasVSX() &&
3247 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3248 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3249 "X-form op must have register and register operands");
3250 return expandVSXMemPseudo(MI);
3251 }
3252 case PPC::SPILLTOVSR_LD: {
3253 Register TargetReg = MI.getOperand(0).getReg();
3254 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3255 MI.setDesc(get(PPC::DFLOADf64));
3256 return expandPostRAPseudo(MI);
3257 }
3258 else
3259 MI.setDesc(get(PPC::LD));
3260 return true;
3261 }
3262 case PPC::SPILLTOVSR_ST: {
3263 Register SrcReg = MI.getOperand(0).getReg();
3264 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3265 NumStoreSPILLVSRRCAsVec++;
3266 MI.setDesc(get(PPC::DFSTOREf64));
3267 return expandPostRAPseudo(MI);
3268 } else {
3269 NumStoreSPILLVSRRCAsGpr++;
3270 MI.setDesc(get(PPC::STD));
3271 }
3272 return true;
3273 }
3274 case PPC::SPILLTOVSR_LDX: {
3275 Register TargetReg = MI.getOperand(0).getReg();
3276 if (PPC::VSFRCRegClass.contains(TargetReg))
3277 MI.setDesc(get(PPC::LXSDX));
3278 else
3279 MI.setDesc(get(PPC::LDX));
3280 return true;
3281 }
3282 case PPC::SPILLTOVSR_STX: {
3283 Register SrcReg = MI.getOperand(0).getReg();
3284 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3285 NumStoreSPILLVSRRCAsVec++;
3286 MI.setDesc(get(PPC::STXSDX));
3287 } else {
3288 NumStoreSPILLVSRRCAsGpr++;
3289 MI.setDesc(get(PPC::STDX));
3290 }
3291 return true;
3292 }
3293
3294 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3295 case PPC::CFENCE:
3296 case PPC::CFENCE8: {
3297 auto Val = MI.getOperand(0).getReg();
3298 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3299 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3300 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3302 .addReg(PPC::CR7)
3303 .addImm(1);
3304 MI.setDesc(get(PPC::ISYNC));
3305 MI.removeOperand(0);
3306 return true;
3307 }
3308 }
3309 return false;
3310}
3311
3312// Essentially a compile-time implementation of a compare->isel sequence.
3313// It takes two constants to compare, along with the true/false registers
3314// and the comparison type (as a subreg to a CR field) and returns one
3315// of the true/false registers, depending on the comparison results.
3316static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3317 unsigned TrueReg, unsigned FalseReg,
3318 unsigned CRSubReg) {
3319 // Signed comparisons. The immediates are assumed to be sign-extended.
3320 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3321 switch (CRSubReg) {
3322 default: llvm_unreachable("Unknown integer comparison type.");
3323 case PPC::sub_lt:
3324 return Imm1 < Imm2 ? TrueReg : FalseReg;
3325 case PPC::sub_gt:
3326 return Imm1 > Imm2 ? TrueReg : FalseReg;
3327 case PPC::sub_eq:
3328 return Imm1 == Imm2 ? TrueReg : FalseReg;
3329 }
3330 }
3331 // Unsigned comparisons.
3332 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3333 switch (CRSubReg) {
3334 default: llvm_unreachable("Unknown integer comparison type.");
3335 case PPC::sub_lt:
3336 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3337 case PPC::sub_gt:
3338 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3339 case PPC::sub_eq:
3340 return Imm1 == Imm2 ? TrueReg : FalseReg;
3341 }
3342 }
3343 return PPC::NoRegister;
3344}
3345
3347 unsigned OpNo,
3348 int64_t Imm) const {
3349 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3350 // Replace the REG with the Immediate.
3351 Register InUseReg = MI.getOperand(OpNo).getReg();
3352 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3353
3354 // We need to make sure that the MI didn't have any implicit use
3355 // of this REG any more. We don't call MI.implicit_operands().empty() to
3356 // return early, since MI's MCID might be changed in calling context, as a
3357 // result its number of explicit operands may be changed, thus the begin of
3358 // implicit operand is changed.
3360 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, TRI, false);
3361 if (UseOpIdx >= 0) {
3362 MachineOperand &MO = MI.getOperand(UseOpIdx);
3363 if (MO.isImplicit())
3364 // The operands must always be in the following order:
3365 // - explicit reg defs,
3366 // - other explicit operands (reg uses, immediates, etc.),
3367 // - implicit reg defs
3368 // - implicit reg uses
3369 // Therefore, removing the implicit operand won't change the explicit
3370 // operands layout.
3371 MI.removeOperand(UseOpIdx);
3372 }
3373}
3374
3375// Replace an instruction with one that materializes a constant (and sets
3376// CR0 if the original instruction was a record-form instruction).
3378 const LoadImmediateInfo &LII) const {
3379 // Remove existing operands.
3380 int OperandToKeep = LII.SetCR ? 1 : 0;
3381 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3382 MI.removeOperand(i);
3383
3384 // Replace the instruction.
3385 if (LII.SetCR) {
3386 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3387 // Set the immediate.
3388 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3389 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3390 return;
3391 }
3392 else
3393 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3394
3395 // Set the immediate.
3396 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3397 .addImm(LII.Imm);
3398}
3399
3401 bool &SeenIntermediateUse) const {
3402 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3403 "Should be called after register allocation.");
3405 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3406 It++;
3407 SeenIntermediateUse = false;
3408 for (; It != E; ++It) {
3409 if (It->modifiesRegister(Reg, TRI))
3410 return &*It;
3411 if (It->readsRegister(Reg, TRI))
3412 SeenIntermediateUse = true;
3413 }
3414 return nullptr;
3415}
3416
3419 const DebugLoc &DL, Register Reg,
3420 int64_t Imm) const {
3421 assert(!MBB.getParent()->getRegInfo().isSSA() &&
3422 "Register should be in non-SSA form after RA");
3423 bool isPPC64 = Subtarget.isPPC64();
3424 // FIXME: Materialization here is not optimal.
3425 // For some special bit patterns we can use less instructions.
3426 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3427 if (isInt<16>(Imm)) {
3428 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3429 } else if (isInt<32>(Imm)) {
3430 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3431 .addImm(Imm >> 16);
3432 if (Imm & 0xFFFF)
3433 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3434 .addReg(Reg, RegState::Kill)
3435 .addImm(Imm & 0xFFFF);
3436 } else {
3437 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3438 "only supported in PPC64");
3439 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3440 if ((Imm >> 32) & 0xFFFF)
3441 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3442 .addReg(Reg, RegState::Kill)
3443 .addImm((Imm >> 32) & 0xFFFF);
3444 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3445 .addReg(Reg, RegState::Kill)
3446 .addImm(32)
3447 .addImm(31);
3448 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3449 .addReg(Reg, RegState::Kill)
3450 .addImm((Imm >> 16) & 0xFFFF);
3451 if (Imm & 0xFFFF)
3452 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3453 .addReg(Reg, RegState::Kill)
3454 .addImm(Imm & 0xFFFF);
3455 }
3456}
3457
3458MachineInstr *PPCInstrInfo::getForwardingDefMI(
3460 unsigned &OpNoForForwarding,
3461 bool &SeenIntermediateUse) const {
3462 OpNoForForwarding = ~0U;
3463 MachineInstr *DefMI = nullptr;
3464 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3466 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3467 // within the basic block to see if the register is defined using an
3468 // LI/LI8/ADDI/ADDI8.
3469 if (MRI->isSSA()) {
3470 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3471 if (!MI.getOperand(i).isReg())
3472 continue;
3473 Register Reg = MI.getOperand(i).getReg();
3474 if (!Reg.isVirtual())
3475 continue;
3476 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3477 if (TrueReg.isVirtual()) {
3478 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3479 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3480 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3481 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3482 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3483 OpNoForForwarding = i;
3484 DefMI = DefMIForTrueReg;
3485 // The ADDI and LI operand maybe exist in one instruction at same
3486 // time. we prefer to fold LI operand as LI only has one Imm operand
3487 // and is more possible to be converted. So if current DefMI is
3488 // ADDI/ADDI8, we continue to find possible LI/LI8.
3489 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3490 break;
3491 }
3492 }
3493 }
3494 } else {
3495 // Looking back through the definition for each operand could be expensive,
3496 // so exit early if this isn't an instruction that either has an immediate
3497 // form or is already an immediate form that we can handle.
3498 ImmInstrInfo III;
3499 unsigned Opc = MI.getOpcode();
3500 bool ConvertibleImmForm =
3501 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3502 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3503 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3504 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3505 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3506 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3507 Opc == PPC::RLWINM8_rec;
3508 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3509 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3510 : false;
3511 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3512 return nullptr;
3513
3514 // Don't convert or %X, %Y, %Y since that's just a register move.
3515 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3516 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3517 return nullptr;
3518 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3519 MachineOperand &MO = MI.getOperand(i);
3520 SeenIntermediateUse = false;
3521 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3522 Register Reg = MI.getOperand(i).getReg();
3523 // If we see another use of this reg between the def and the MI,
3524 // we want to flag it so the def isn't deleted.
3525 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3526 if (DefMI) {
3527 // Is this register defined by some form of add-immediate (including
3528 // load-immediate) within this basic block?
3529 switch (DefMI->getOpcode()) {
3530 default:
3531 break;
3532 case PPC::LI:
3533 case PPC::LI8:
3534 case PPC::ADDItocL8:
3535 case PPC::ADDI:
3536 case PPC::ADDI8:
3537 OpNoForForwarding = i;
3538 return DefMI;
3539 }
3540 }
3541 }
3542 }
3543 }
3544 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3545}
3546
3547unsigned PPCInstrInfo::getSpillTarget() const {
3548 // With P10, we may need to spill paired vector registers or accumulator
3549 // registers. MMA implies paired vectors, so we can just check that.
3550 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3551 // P11 uses the P10 target.
3552 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3553 2 : Subtarget.hasP9Vector() ?
3554 1 : 0;
3555}
3556
3557ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3558 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3559}
3560
3561ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3562 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3563}
3564
3565// This opt tries to convert the following imm form to an index form to save an
3566// add for stack variables.
3567// Return false if no such pattern found.
3568//
3569// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3570// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3571// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3572//
3573// can be converted to:
3574//
3575// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3576// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3577//
3578// In order to eliminate ADD instr, make sure that:
3579// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3580// new ADDI instr and ADDI can only take int16 Imm.
3581// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3582// between ADDI and ADD instr since its original def in ADDI will be changed
3583// in new ADDI instr. And also there should be no new def for it between
3584// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3585// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3586// between ADD and Imm instr since ADD instr will be eliminated.
3587// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3588// moved to Index instr.
3590 MachineFunction *MF = MI.getParent()->getParent();
3592 bool PostRA = !MRI->isSSA();
3593 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3594 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3595 // frame base(OffsetAddi) are determined.
3596 if (!PostRA)
3597 return false;
3598 unsigned ToBeDeletedReg = 0;
3599 int64_t OffsetImm = 0;
3600 unsigned XFormOpcode = 0;
3601 ImmInstrInfo III;
3602
3603 // Check if Imm instr meets requirement.
3604 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3605 III))
3606 return false;
3607
3608 bool OtherIntermediateUse = false;
3609 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3610
3611 // Exit if there is other use between ADD and Imm instr or no def found.
3612 if (OtherIntermediateUse || !ADDMI)
3613 return false;
3614
3615 // Check if ADD instr meets requirement.
3616 if (!isADDInstrEligibleForFolding(*ADDMI))
3617 return false;
3618
3619 unsigned ScaleRegIdx = 0;
3620 int64_t OffsetAddi = 0;
3621 MachineInstr *ADDIMI = nullptr;
3622
3623 // Check if there is a valid ToBeChangedReg in ADDMI.
3624 // 1: It must be killed.
3625 // 2: Its definition must be a valid ADDIMI.
3626 // 3: It must satify int16 offset requirement.
3627 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3628 ScaleRegIdx = 2;
3629 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3630 ScaleRegIdx = 1;
3631 else
3632 return false;
3633
3634 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3635 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3636 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3637 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3639 for (auto It = ++Start; It != End; It++)
3640 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3641 return true;
3642 return false;
3643 };
3644
3645 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3646 // treated as special zero when ScaleReg is R0/X0 register.
3647 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3648 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3649 return false;
3650
3651 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3652 // and Imm Instr.
3653 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3654 return false;
3655
3656 // Now start to do the transformation.
3657 LLVM_DEBUG(dbgs() << "Replace instruction: "
3658 << "\n");
3659 LLVM_DEBUG(ADDIMI->dump());
3660 LLVM_DEBUG(ADDMI->dump());
3661 LLVM_DEBUG(MI.dump());
3662 LLVM_DEBUG(dbgs() << "with: "
3663 << "\n");
3664
3665 // Update ADDI instr.
3666 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3667
3668 // Update Imm instr.
3669 MI.setDesc(get(XFormOpcode));
3670 MI.getOperand(III.ImmOpNo)
3671 .ChangeToRegister(ScaleReg, false, false,
3672 ADDMI->getOperand(ScaleRegIdx).isKill());
3673
3674 MI.getOperand(III.OpNoForForwarding)
3675 .ChangeToRegister(ToBeChangedReg, false, false, true);
3676
3677 // Eliminate ADD instr.
3678 ADDMI->eraseFromParent();
3679
3680 LLVM_DEBUG(ADDIMI->dump());
3681 LLVM_DEBUG(MI.dump());
3682
3683 return true;
3684}
3685
3687 int64_t &Imm) const {
3688 unsigned Opc = ADDIMI.getOpcode();
3689
3690 // Exit if the instruction is not ADDI.
3691 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3692 return false;
3693
3694 // The operand may not necessarily be an immediate - it could be a relocation.
3695 if (!ADDIMI.getOperand(2).isImm())
3696 return false;
3697
3698 Imm = ADDIMI.getOperand(2).getImm();
3699
3700 return true;
3701}
3702
3704 unsigned Opc = ADDMI.getOpcode();
3705
3706 // Exit if the instruction is not ADD.
3707 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3708}
3709
3711 unsigned &ToBeDeletedReg,
3712 unsigned &XFormOpcode,
3713 int64_t &OffsetImm,
3714 ImmInstrInfo &III) const {
3715 // Only handle load/store.
3716 if (!MI.mayLoadOrStore())
3717 return false;
3718
3719 unsigned Opc = MI.getOpcode();
3720
3721 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3722
3723 // Exit if instruction has no index form.
3724 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3725 return false;
3726
3727 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3728 if (!instrHasImmForm(XFormOpcode,
3729 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3730 return false;
3731
3732 if (!III.IsSummingOperands)
3733 return false;
3734
3735 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3736 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3737 // Only support imm operands, not relocation slots or others.
3738 if (!ImmOperand.isImm())
3739 return false;
3740
3741 assert(RegOperand.isReg() && "Instruction format is not right");
3742
3743 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3744 if (!RegOperand.isKill())
3745 return false;
3746
3747 ToBeDeletedReg = RegOperand.getReg();
3748 OffsetImm = ImmOperand.getImm();
3749
3750 return true;
3751}
3752
3754 MachineInstr *&ADDIMI,
3755 int64_t &OffsetAddi,
3756 int64_t OffsetImm) const {
3757 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3758 MachineOperand &MO = ADDMI->getOperand(Index);
3759
3760 if (!MO.isKill())
3761 return false;
3762
3763 bool OtherIntermediateUse = false;
3764
3765 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3766 // Currently handle only one "add + Imminstr" pair case, exit if other
3767 // intermediate use for ToBeChangedReg found.
3768 // TODO: handle the cases where there are other "add + Imminstr" pairs
3769 // with same offset in Imminstr which is like:
3770 //
3771 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3772 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3773 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3774 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3775 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3776 //
3777 // can be converted to:
3778 //
3779 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3780 // (OffsetAddi + OffsetImm)
3781 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3782 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3783
3784 if (OtherIntermediateUse || !ADDIMI)
3785 return false;
3786 // Check if ADDI instr meets requirement.
3787 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3788 return false;
3789
3790 if (isInt<16>(OffsetAddi + OffsetImm))
3791 return true;
3792 return false;
3793}
3794
3795// If this instruction has an immediate form and one of its operands is a
3796// result of a load-immediate or an add-immediate, convert it to
3797// the immediate form if the constant is in range.
3799 SmallSet<Register, 4> &RegsToUpdate,
3800 MachineInstr **KilledDef) const {
3801 MachineFunction *MF = MI.getParent()->getParent();
3803 bool PostRA = !MRI->isSSA();
3804 bool SeenIntermediateUse = true;
3805 unsigned ForwardingOperand = ~0U;
3806 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3807 SeenIntermediateUse);
3808 if (!DefMI)
3809 return false;
3810 assert(ForwardingOperand < MI.getNumOperands() &&
3811 "The forwarding operand needs to be valid at this point");
3812 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3813 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3814 if (KilledDef && KillFwdDefMI)
3815 *KilledDef = DefMI;
3816
3817 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3818 // registers that need their kill flags updated.
3819 for (const MachineOperand &MO : DefMI->operands())
3820 if (MO.isReg() && MO.isDef())
3821 RegsToUpdate.insert(MO.getReg());
3822 for (const MachineOperand &MO : MI.operands())
3823 if (MO.isReg())
3824 RegsToUpdate.insert(MO.getReg());
3825
3826 // If this is a imm instruction and its register operands is produced by ADDI,
3827 // put the imm into imm inst directly.
3828 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3829 PPC::INSTRUCTION_LIST_END &&
3830 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3831 return true;
3832
3833 ImmInstrInfo III;
3834 bool IsVFReg = MI.getOperand(0).isReg()
3835 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3836 : false;
3837 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3838 // If this is a reg+reg instruction that has a reg+imm form,
3839 // and one of the operands is produced by an add-immediate,
3840 // try to convert it.
3841 if (HasImmForm &&
3842 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3843 KillFwdDefMI))
3844 return true;
3845
3846 // If this is a reg+reg instruction that has a reg+imm form,
3847 // and one of the operands is produced by LI, convert it now.
3848 if (HasImmForm &&
3849 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3850 return true;
3851
3852 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3853 // can be simpified to LI.
3854 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3855 return true;
3856
3857 return false;
3858}
3859
3861 MachineInstr **ToErase) const {
3862 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3863 Register FoldingReg = MI.getOperand(1).getReg();
3864 if (!FoldingReg.isVirtual())
3865 return false;
3866 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3867 if (SrcMI->getOpcode() != PPC::RLWINM &&
3868 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3869 SrcMI->getOpcode() != PPC::RLWINM8 &&
3870 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3871 return false;
3872 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3873 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3874 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3875 "Invalid PPC::RLWINM Instruction!");
3876 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3877 uint64_t SHMI = MI.getOperand(2).getImm();
3878 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3879 uint64_t MBMI = MI.getOperand(3).getImm();
3880 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3881 uint64_t MEMI = MI.getOperand(4).getImm();
3882
3883 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3884 "Invalid PPC::RLWINM Instruction!");
3885 // If MBMI is bigger than MEMI, we always can not get run of ones.
3886 // RotatedSrcMask non-wrap:
3887 // 0........31|32........63
3888 // RotatedSrcMask: B---E B---E
3889 // MaskMI: -----------|--E B------
3890 // Result: ----- --- (Bad candidate)
3891 //
3892 // RotatedSrcMask wrap:
3893 // 0........31|32........63
3894 // RotatedSrcMask: --E B----|--E B----
3895 // MaskMI: -----------|--E B------
3896 // Result: --- -----|--- ----- (Bad candidate)
3897 //
3898 // One special case is RotatedSrcMask is a full set mask.
3899 // RotatedSrcMask full:
3900 // 0........31|32........63
3901 // RotatedSrcMask: ------EB---|-------EB---
3902 // MaskMI: -----------|--E B------
3903 // Result: -----------|--- ------- (Good candidate)
3904
3905 // Mark special case.
3906 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3907
3908 // For other MBMI > MEMI cases, just return.
3909 if ((MBMI > MEMI) && !SrcMaskFull)
3910 return false;
3911
3912 // Handle MBMI <= MEMI cases.
3913 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3914 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3915 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3916 // while in PowerPC ISA, lowerest bit is at index 63.
3917 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3918
3919 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3920 APInt FinalMask = RotatedSrcMask & MaskMI;
3921 uint32_t NewMB, NewME;
3922 bool Simplified = false;
3923
3924 // If final mask is 0, MI result should be 0 too.
3925 if (FinalMask.isZero()) {
3926 bool Is64Bit =
3927 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3928 Simplified = true;
3929 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3930 LLVM_DEBUG(MI.dump());
3931
3932 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3933 // Replace MI with "LI 0"
3934 MI.removeOperand(4);
3935 MI.removeOperand(3);
3936 MI.removeOperand(2);
3937 MI.getOperand(1).ChangeToImmediate(0);
3938 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3939 } else {
3940 // Replace MI with "ANDI_rec reg, 0"
3941 MI.removeOperand(4);
3942 MI.removeOperand(3);
3943 MI.getOperand(2).setImm(0);
3944 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3945 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3946 if (SrcMI->getOperand(1).isKill()) {
3947 MI.getOperand(1).setIsKill(true);
3948 SrcMI->getOperand(1).setIsKill(false);
3949 } else
3950 // About to replace MI.getOperand(1), clear its kill flag.
3951 MI.getOperand(1).setIsKill(false);
3952 }
3953
3954 LLVM_DEBUG(dbgs() << "With: ");
3955 LLVM_DEBUG(MI.dump());
3956
3957 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3958 NewMB <= NewME) ||
3959 SrcMaskFull) {
3960 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3961 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3962 // return a 32 bit value.
3963 Simplified = true;
3964 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3965 LLVM_DEBUG(MI.dump());
3966
3967 uint16_t NewSH = (SHSrc + SHMI) % 32;
3968 MI.getOperand(2).setImm(NewSH);
3969 // If SrcMI mask is full, no need to update MBMI and MEMI.
3970 if (!SrcMaskFull) {
3971 MI.getOperand(3).setImm(NewMB);
3972 MI.getOperand(4).setImm(NewME);
3973 }
3974 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3975 if (SrcMI->getOperand(1).isKill()) {
3976 MI.getOperand(1).setIsKill(true);
3977 SrcMI->getOperand(1).setIsKill(false);
3978 } else
3979 // About to replace MI.getOperand(1), clear its kill flag.
3980 MI.getOperand(1).setIsKill(false);
3981
3982 LLVM_DEBUG(dbgs() << "To: ");
3983 LLVM_DEBUG(MI.dump());
3984 }
3985 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3986 !SrcMI->hasImplicitDef()) {
3987 // If FoldingReg has no non-debug use and it has no implicit def (it
3988 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3989 // Otherwise keep it.
3990 *ToErase = SrcMI;
3991 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3992 LLVM_DEBUG(SrcMI->dump());
3993 }
3994 return Simplified;
3995}
3996
3997bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3998 ImmInstrInfo &III, bool PostRA) const {
3999 // The vast majority of the instructions would need their operand 2 replaced
4000 // with an immediate when switching to the reg+imm form. A marked exception
4001 // are the update form loads/stores for which a constant operand 2 would need
4002 // to turn into a displacement and move operand 1 to the operand 2 position.
4003 III.ImmOpNo = 2;
4004 III.OpNoForForwarding = 2;
4005 III.ImmWidth = 16;
4006 III.ImmMustBeMultipleOf = 1;
4007 III.TruncateImmTo = 0;
4008 III.IsSummingOperands = false;
4009 switch (Opc) {
4010 default: return false;
4011 case PPC::ADD4:
4012 case PPC::ADD8:
4013 III.SignedImm = true;
4014 III.ZeroIsSpecialOrig = 0;
4015 III.ZeroIsSpecialNew = 1;
4016 III.IsCommutative = true;
4017 III.IsSummingOperands = true;
4018 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
4019 break;
4020 case PPC::ADDC:
4021 case PPC::ADDC8:
4022 III.SignedImm = true;
4023 III.ZeroIsSpecialOrig = 0;
4024 III.ZeroIsSpecialNew = 0;
4025 III.IsCommutative = true;
4026 III.IsSummingOperands = true;
4027 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
4028 break;
4029 case PPC::ADDC_rec:
4030 III.SignedImm = true;
4031 III.ZeroIsSpecialOrig = 0;
4032 III.ZeroIsSpecialNew = 0;
4033 III.IsCommutative = true;
4034 III.IsSummingOperands = true;
4035 III.ImmOpcode = PPC::ADDIC_rec;
4036 break;
4037 case PPC::SUBFC:
4038 case PPC::SUBFC8:
4039 III.SignedImm = true;
4040 III.ZeroIsSpecialOrig = 0;
4041 III.ZeroIsSpecialNew = 0;
4042 III.IsCommutative = false;
4043 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
4044 break;
4045 case PPC::CMPW:
4046 case PPC::CMPD:
4047 III.SignedImm = true;
4048 III.ZeroIsSpecialOrig = 0;
4049 III.ZeroIsSpecialNew = 0;
4050 III.IsCommutative = false;
4051 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
4052 break;
4053 case PPC::CMPLW:
4054 case PPC::CMPLD:
4055 III.SignedImm = false;
4056 III.ZeroIsSpecialOrig = 0;
4057 III.ZeroIsSpecialNew = 0;
4058 III.IsCommutative = false;
4059 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
4060 break;
4061 case PPC::AND_rec:
4062 case PPC::AND8_rec:
4063 case PPC::OR:
4064 case PPC::OR8:
4065 case PPC::XOR:
4066 case PPC::XOR8:
4067 III.SignedImm = false;
4068 III.ZeroIsSpecialOrig = 0;
4069 III.ZeroIsSpecialNew = 0;
4070 III.IsCommutative = true;
4071 switch(Opc) {
4072 default: llvm_unreachable("Unknown opcode");
4073 case PPC::AND_rec:
4074 III.ImmOpcode = PPC::ANDI_rec;
4075 break;
4076 case PPC::AND8_rec:
4077 III.ImmOpcode = PPC::ANDI8_rec;
4078 break;
4079 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4080 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4081 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4082 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4083 }
4084 break;
4085 case PPC::RLWNM:
4086 case PPC::RLWNM8:
4087 case PPC::RLWNM_rec:
4088 case PPC::RLWNM8_rec:
4089 case PPC::SLW:
4090 case PPC::SLW8:
4091 case PPC::SLW_rec:
4092 case PPC::SLW8_rec:
4093 case PPC::SRW:
4094 case PPC::SRW8:
4095 case PPC::SRW_rec:
4096 case PPC::SRW8_rec:
4097 case PPC::SRAW:
4098 case PPC::SRAW_rec:
4099 III.SignedImm = false;
4100 III.ZeroIsSpecialOrig = 0;
4101 III.ZeroIsSpecialNew = 0;
4102 III.IsCommutative = false;
4103 // This isn't actually true, but the instructions ignore any of the
4104 // upper bits, so any immediate loaded with an LI is acceptable.
4105 // This does not apply to shift right algebraic because a value
4106 // out of range will produce a -1/0.
4107 III.ImmWidth = 16;
4108 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4109 Opc == PPC::RLWNM8_rec)
4110 III.TruncateImmTo = 5;
4111 else
4112 III.TruncateImmTo = 6;
4113 switch(Opc) {
4114 default: llvm_unreachable("Unknown opcode");
4115 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4116 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4117 case PPC::RLWNM_rec:
4118 III.ImmOpcode = PPC::RLWINM_rec;
4119 break;
4120 case PPC::RLWNM8_rec:
4121 III.ImmOpcode = PPC::RLWINM8_rec;
4122 break;
4123 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4124 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4125 case PPC::SLW_rec:
4126 III.ImmOpcode = PPC::RLWINM_rec;
4127 break;
4128 case PPC::SLW8_rec:
4129 III.ImmOpcode = PPC::RLWINM8_rec;
4130 break;
4131 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4132 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4133 case PPC::SRW_rec:
4134 III.ImmOpcode = PPC::RLWINM_rec;
4135 break;
4136 case PPC::SRW8_rec:
4137 III.ImmOpcode = PPC::RLWINM8_rec;
4138 break;
4139 case PPC::SRAW:
4140 III.ImmWidth = 5;
4141 III.TruncateImmTo = 0;
4142 III.ImmOpcode = PPC::SRAWI;
4143 break;
4144 case PPC::SRAW_rec:
4145 III.ImmWidth = 5;
4146 III.TruncateImmTo = 0;
4147 III.ImmOpcode = PPC::SRAWI_rec;
4148 break;
4149 }
4150 break;
4151 case PPC::RLDCL:
4152 case PPC::RLDCL_rec:
4153 case PPC::RLDCR:
4154 case PPC::RLDCR_rec:
4155 case PPC::SLD:
4156 case PPC::SLD_rec:
4157 case PPC::SRD:
4158 case PPC::SRD_rec:
4159 case PPC::SRAD:
4160 case PPC::SRAD_rec:
4161 III.SignedImm = false;
4162 III.ZeroIsSpecialOrig = 0;
4163 III.ZeroIsSpecialNew = 0;
4164 III.IsCommutative = false;
4165 // This isn't actually true, but the instructions ignore any of the
4166 // upper bits, so any immediate loaded with an LI is acceptable.
4167 // This does not apply to shift right algebraic because a value
4168 // out of range will produce a -1/0.
4169 III.ImmWidth = 16;
4170 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4171 Opc == PPC::RLDCR_rec)
4172 III.TruncateImmTo = 6;
4173 else
4174 III.TruncateImmTo = 7;
4175 switch(Opc) {
4176 default: llvm_unreachable("Unknown opcode");
4177 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4178 case PPC::RLDCL_rec:
4179 III.ImmOpcode = PPC::RLDICL_rec;
4180 break;
4181 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4182 case PPC::RLDCR_rec:
4183 III.ImmOpcode = PPC::RLDICR_rec;
4184 break;
4185 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4186 case PPC::SLD_rec:
4187 III.ImmOpcode = PPC::RLDICR_rec;
4188 break;
4189 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4190 case PPC::SRD_rec:
4191 III.ImmOpcode = PPC::RLDICL_rec;
4192 break;
4193 case PPC::SRAD:
4194 III.ImmWidth = 6;
4195 III.TruncateImmTo = 0;
4196 III.ImmOpcode = PPC::SRADI;
4197 break;
4198 case PPC::SRAD_rec:
4199 III.ImmWidth = 6;
4200 III.TruncateImmTo = 0;
4201 III.ImmOpcode = PPC::SRADI_rec;
4202 break;
4203 }
4204 break;
4205 // Loads and stores:
4206 case PPC::LBZX:
4207 case PPC::LBZX8:
4208 case PPC::LHZX:
4209 case PPC::LHZX8:
4210 case PPC::LHAX:
4211 case PPC::LHAX8:
4212 case PPC::LWZX:
4213 case PPC::LWZX8:
4214 case PPC::LWAX:
4215 case PPC::LDX:
4216 case PPC::LFSX:
4217 case PPC::LFDX:
4218 case PPC::STBX:
4219 case PPC::STBX8:
4220 case PPC::STHX:
4221 case PPC::STHX8:
4222 case PPC::STWX:
4223 case PPC::STWX8:
4224 case PPC::STDX:
4225 case PPC::STFSX:
4226 case PPC::STFDX:
4227 III.SignedImm = true;
4228 III.ZeroIsSpecialOrig = 1;
4229 III.ZeroIsSpecialNew = 2;
4230 III.IsCommutative = true;
4231 III.IsSummingOperands = true;
4232 III.ImmOpNo = 1;
4233 III.OpNoForForwarding = 2;
4234 switch(Opc) {
4235 default: llvm_unreachable("Unknown opcode");
4236 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4237 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4238 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4239 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4240 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4241 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4242 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4243 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4244 case PPC::LWAX:
4245 III.ImmOpcode = PPC::LWA;
4246 III.ImmMustBeMultipleOf = 4;
4247 break;
4248 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4249 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4250 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4251 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4252 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4253 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4254 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4255 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4256 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4257 case PPC::STDX:
4258 III.ImmOpcode = PPC::STD;
4259 III.ImmMustBeMultipleOf = 4;
4260 break;
4261 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4262 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4263 }
4264 break;
4265 case PPC::LBZUX:
4266 case PPC::LBZUX8:
4267 case PPC::LHZUX:
4268 case PPC::LHZUX8:
4269 case PPC::LHAUX:
4270 case PPC::LHAUX8:
4271 case PPC::LWZUX:
4272 case PPC::LWZUX8:
4273 case PPC::LDUX:
4274 case PPC::LFSUX:
4275 case PPC::LFDUX:
4276 case PPC::STBUX:
4277 case PPC::STBUX8:
4278 case PPC::STHUX:
4279 case PPC::STHUX8:
4280 case PPC::STWUX:
4281 case PPC::STWUX8:
4282 case PPC::STDUX:
4283 case PPC::STFSUX:
4284 case PPC::STFDUX:
4285 III.SignedImm = true;
4286 III.ZeroIsSpecialOrig = 2;
4287 III.ZeroIsSpecialNew = 3;
4288 III.IsCommutative = false;
4289 III.IsSummingOperands = true;
4290 III.ImmOpNo = 2;
4291 III.OpNoForForwarding = 3;
4292 switch(Opc) {
4293 default: llvm_unreachable("Unknown opcode");
4294 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4295 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4296 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4297 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4298 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4299 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4300 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4301 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4302 case PPC::LDUX:
4303 III.ImmOpcode = PPC::LDU;
4304 III.ImmMustBeMultipleOf = 4;
4305 break;
4306 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4307 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4308 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4309 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4310 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4311 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4312 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4313 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4314 case PPC::STDUX:
4315 III.ImmOpcode = PPC::STDU;
4316 III.ImmMustBeMultipleOf = 4;
4317 break;
4318 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4319 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4320 }
4321 break;
4322 // Power9 and up only. For some of these, the X-Form version has access to all
4323 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4324 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4325 // into or stored from is one of the VR registers.
4326 case PPC::LXVX:
4327 case PPC::LXSSPX:
4328 case PPC::LXSDX:
4329 case PPC::STXVX:
4330 case PPC::STXSSPX:
4331 case PPC::STXSDX:
4332 case PPC::XFLOADf32:
4333 case PPC::XFLOADf64:
4334 case PPC::XFSTOREf32:
4335 case PPC::XFSTOREf64:
4336 if (!Subtarget.hasP9Vector())
4337 return false;
4338 III.SignedImm = true;
4339 III.ZeroIsSpecialOrig = 1;
4340 III.ZeroIsSpecialNew = 2;
4341 III.IsCommutative = true;
4342 III.IsSummingOperands = true;
4343 III.ImmOpNo = 1;
4344 III.OpNoForForwarding = 2;
4345 III.ImmMustBeMultipleOf = 4;
4346 switch(Opc) {
4347 default: llvm_unreachable("Unknown opcode");
4348 case PPC::LXVX:
4349 III.ImmOpcode = PPC::LXV;
4350 III.ImmMustBeMultipleOf = 16;
4351 break;
4352 case PPC::LXSSPX:
4353 if (PostRA) {
4354 if (IsVFReg)
4355 III.ImmOpcode = PPC::LXSSP;
4356 else {
4357 III.ImmOpcode = PPC::LFS;
4358 III.ImmMustBeMultipleOf = 1;
4359 }
4360 break;
4361 }
4362 [[fallthrough]];
4363 case PPC::XFLOADf32:
4364 III.ImmOpcode = PPC::DFLOADf32;
4365 break;
4366 case PPC::LXSDX:
4367 if (PostRA) {
4368 if (IsVFReg)
4369 III.ImmOpcode = PPC::LXSD;
4370 else {
4371 III.ImmOpcode = PPC::LFD;
4372 III.ImmMustBeMultipleOf = 1;
4373 }
4374 break;
4375 }
4376 [[fallthrough]];
4377 case PPC::XFLOADf64:
4378 III.ImmOpcode = PPC::DFLOADf64;
4379 break;
4380 case PPC::STXVX:
4381 III.ImmOpcode = PPC::STXV;
4382 III.ImmMustBeMultipleOf = 16;
4383 break;
4384 case PPC::STXSSPX:
4385 if (PostRA) {
4386 if (IsVFReg)
4387 III.ImmOpcode = PPC::STXSSP;
4388 else {
4389 III.ImmOpcode = PPC::STFS;
4390 III.ImmMustBeMultipleOf = 1;
4391 }
4392 break;
4393 }
4394 [[fallthrough]];
4395 case PPC::XFSTOREf32:
4396 III.ImmOpcode = PPC::DFSTOREf32;
4397 break;
4398 case PPC::STXSDX:
4399 if (PostRA) {
4400 if (IsVFReg)
4401 III.ImmOpcode = PPC::STXSD;
4402 else {
4403 III.ImmOpcode = PPC::STFD;
4404 III.ImmMustBeMultipleOf = 1;
4405 }
4406 break;
4407 }
4408 [[fallthrough]];
4409 case PPC::XFSTOREf64:
4410 III.ImmOpcode = PPC::DFSTOREf64;
4411 break;
4412 }
4413 break;
4414 }
4415 return true;
4416}
4417
4418// Utility function for swaping two arbitrary operands of an instruction.
4419static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4420 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4421
4422 unsigned MaxOp = std::max(Op1, Op2);
4423 unsigned MinOp = std::min(Op1, Op2);
4424 MachineOperand MOp1 = MI.getOperand(MinOp);
4425 MachineOperand MOp2 = MI.getOperand(MaxOp);
4426 MI.removeOperand(std::max(Op1, Op2));
4427 MI.removeOperand(std::min(Op1, Op2));
4428
4429 // If the operands we are swapping are the two at the end (the common case)
4430 // we can just remove both and add them in the opposite order.
4431 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4432 MI.addOperand(MOp2);
4433 MI.addOperand(MOp1);
4434 } else {
4435 // Store all operands in a temporary vector, remove them and re-add in the
4436 // right order.
4438 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4439 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4440 MOps.push_back(MI.getOperand(i));
4441 MI.removeOperand(i);
4442 }
4443 // MOp2 needs to be added next.
4444 MI.addOperand(MOp2);
4445 // Now add the rest.
4446 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4447 if (i == MaxOp)
4448 MI.addOperand(MOp1);
4449 else {
4450 MI.addOperand(MOps.back());
4451 MOps.pop_back();
4452 }
4453 }
4454 }
4455}
4456
4457// Check if the 'MI' that has the index OpNoForForwarding
4458// meets the requirement described in the ImmInstrInfo.
4459bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4460 const ImmInstrInfo &III,
4461 unsigned OpNoForForwarding
4462 ) const {
4463 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4464 // would not work pre-RA, we can only do the check post RA.
4465 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4466 if (MRI.isSSA())
4467 return false;
4468
4469 // Cannot do the transform if MI isn't summing the operands.
4470 if (!III.IsSummingOperands)
4471 return false;
4472
4473 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4474 if (!III.ZeroIsSpecialOrig)
4475 return false;
4476
4477 // We cannot do the transform if the operand we are trying to replace
4478 // isn't the same as the operand the instruction allows.
4479 if (OpNoForForwarding != III.OpNoForForwarding)
4480 return false;
4481
4482 // Check if the instruction we are trying to transform really has
4483 // the special zero register as its operand.
4484 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4485 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4486 return false;
4487
4488 // This machine instruction is convertible if it is,
4489 // 1. summing the operands.
4490 // 2. one of the operands is special zero register.
4491 // 3. the operand we are trying to replace is allowed by the MI.
4492 return true;
4493}
4494
4495// Check if the DefMI is the add inst and set the ImmMO and RegMO
4496// accordingly.
4497bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4498 const ImmInstrInfo &III,
4499 MachineOperand *&ImmMO,
4500 MachineOperand *&RegMO) const {
4501 unsigned Opc = DefMI.getOpcode();
4502 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4503 return false;
4504
4505 // Skip the optimization of transformTo[NewImm|Imm]FormFedByAdd for ADDItocL8
4506 // on AIX which is used for toc-data access. TODO: Follow up to see if it can
4507 // apply for AIX toc-data as well.
4508 if (Opc == PPC::ADDItocL8 && Subtarget.isAIX())
4509 return false;
4510
4511 assert(DefMI.getNumOperands() >= 3 &&
4512 "Add inst must have at least three operands");
4513 RegMO = &DefMI.getOperand(1);
4514 ImmMO = &DefMI.getOperand(2);
4515
4516 // Before RA, ADDI first operand could be a frame index.
4517 if (!RegMO->isReg())
4518 return false;
4519
4520 // This DefMI is elgible for forwarding if it is:
4521 // 1. add inst
4522 // 2. one of the operands is Imm/CPI/Global.
4523 return isAnImmediateOperand(*ImmMO);
4524}
4525
4526bool PPCInstrInfo::isRegElgibleForForwarding(
4527 const MachineOperand &RegMO, const MachineInstr &DefMI,
4528 const MachineInstr &MI, bool KillDefMI,
4529 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4530 // x = addi y, imm
4531 // ...
4532 // z = lfdx 0, x -> z = lfd imm(y)
4533 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4534 // of "y" between the DEF of "x" and "z".
4535 // The query is only valid post RA.
4536 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4537 if (MRI.isSSA())
4538 return false;
4539
4540 Register Reg = RegMO.getReg();
4541
4542 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4544 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4545 It++;
4546 for (; It != E; ++It) {
4547 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4548 return false;
4549 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4550 IsFwdFeederRegKilled = true;
4551 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4552 SeenIntermediateUse = true;
4553 // Made it to DefMI without encountering a clobber.
4554 if ((&*It) == &DefMI)
4555 break;
4556 }
4557 assert((&*It) == &DefMI && "DefMI is missing");
4558
4559 // If DefMI also defines the register to be forwarded, we can only forward it
4560 // if DefMI is being erased.
4561 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4562 return KillDefMI;
4563
4564 return true;
4565}
4566
4567bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4568 const MachineInstr &DefMI,
4569 const ImmInstrInfo &III,
4570 int64_t &Imm,
4571 int64_t BaseImm) const {
4572 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4573 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4574 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4575 // However, we know that, it is 16-bit width, and has the alignment of 4.
4576 // Check if the instruction met the requirement.
4577 if (III.ImmMustBeMultipleOf > 4 ||
4578 III.TruncateImmTo || III.ImmWidth != 16)
4579 return false;
4580
4581 // Going from XForm to DForm loads means that the displacement needs to be
4582 // not just an immediate but also a multiple of 4, or 16 depending on the
4583 // load. A DForm load cannot be represented if it is a multiple of say 2.
4584 // XForm loads do not have this restriction.
4585 if (ImmMO.isGlobal()) {
4586 const DataLayout &DL = ImmMO.getGlobal()->getDataLayout();
4588 return false;
4589 }
4590
4591 return true;
4592 }
4593
4594 if (ImmMO.isImm()) {
4595 // It is Imm, we need to check if the Imm fit the range.
4596 // Sign-extend to 64-bits.
4597 // DefMI may be folded with another imm form instruction, the result Imm is
4598 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4599 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4600 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4601 return false;
4602 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4603 return false;
4604 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4605
4606 if (Imm % III.ImmMustBeMultipleOf)
4607 return false;
4608 if (III.TruncateImmTo)
4609 Imm &= ((1 << III.TruncateImmTo) - 1);
4610 }
4611 else
4612 return false;
4613
4614 // This ImmMO is forwarded if it meets the requriement describle
4615 // in ImmInstrInfo
4616 return true;
4617}
4618
4619bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4620 unsigned OpNoForForwarding,
4621 MachineInstr **KilledDef) const {
4622 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4623 !DefMI.getOperand(1).isImm())
4624 return false;
4625
4626 MachineFunction *MF = MI.getParent()->getParent();
4627 MachineRegisterInfo *MRI = &MF->getRegInfo();
4628 bool PostRA = !MRI->isSSA();
4629
4630 int64_t Immediate = DefMI.getOperand(1).getImm();
4631 // Sign-extend to 64-bits.
4632 int64_t SExtImm = SignExtend64<16>(Immediate);
4633
4634 bool ReplaceWithLI = false;
4635 bool Is64BitLI = false;
4636 int64_t NewImm = 0;
4637 bool SetCR = false;
4638 unsigned Opc = MI.getOpcode();
4639 switch (Opc) {
4640 default:
4641 return false;
4642
4643 // FIXME: Any branches conditional on such a comparison can be made
4644 // unconditional. At this time, this happens too infrequently to be worth
4645 // the implementation effort, but if that ever changes, we could convert
4646 // such a pattern here.
4647 case PPC::CMPWI:
4648 case PPC::CMPLWI:
4649 case PPC::CMPDI:
4650 case PPC::CMPLDI: {
4651 // Doing this post-RA would require dataflow analysis to reliably find uses
4652 // of the CR register set by the compare.
4653 // No need to fixup killed/dead flag since this transformation is only valid
4654 // before RA.
4655 if (PostRA)
4656 return false;
4657 // If a compare-immediate is fed by an immediate and is itself an input of
4658 // an ISEL (the most common case) into a COPY of the correct register.
4659 bool Changed = false;
4660 Register DefReg = MI.getOperand(0).getReg();
4661 int64_t Comparand = MI.getOperand(2).getImm();
4662 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4663 ? (Comparand | 0xFFFFFFFFFFFF0000)
4664 : Comparand;
4665
4666 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4667 unsigned UseOpc = CompareUseMI.getOpcode();
4668 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4669 continue;
4670 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4671 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4672 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4673 unsigned RegToCopy =
4674 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4675 if (RegToCopy == PPC::NoRegister)
4676 continue;
4677 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4678 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4679 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4680 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4681 CompareUseMI.removeOperand(3);
4682 CompareUseMI.removeOperand(2);
4683 continue;
4684 }
4685 LLVM_DEBUG(
4686 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4687 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4688 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4689 // Convert to copy and remove unneeded operands.
4690 CompareUseMI.setDesc(get(PPC::COPY));
4691 CompareUseMI.removeOperand(3);
4692 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4693 CmpIselsConverted++;
4694 Changed = true;
4695 LLVM_DEBUG(CompareUseMI.dump());
4696 }
4697 if (Changed)
4698 return true;
4699 // This may end up incremented multiple times since this function is called
4700 // during a fixed-point transformation, but it is only meant to indicate the
4701 // presence of this opportunity.
4702 MissedConvertibleImmediateInstrs++;
4703 return false;
4704 }
4705
4706 // Immediate forms - may simply be convertable to an LI.
4707 case PPC::ADDI:
4708 case PPC::ADDI8: {
4709 // Does the sum fit in a 16-bit signed field?
4710 int64_t Addend = MI.getOperand(2).getImm();
4711 if (isInt<16>(Addend + SExtImm)) {
4712 ReplaceWithLI = true;
4713 Is64BitLI = Opc == PPC::ADDI8;
4714 NewImm = Addend + SExtImm;
4715 break;
4716 }
4717 return false;
4718 }
4719 case PPC::SUBFIC:
4720 case PPC::SUBFIC8: {
4721 // Only transform this if the CARRY implicit operand is dead.
4722 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4723 return false;
4724 int64_t Minuend = MI.getOperand(2).getImm();
4725 if (isInt<16>(Minuend - SExtImm)) {
4726 ReplaceWithLI = true;
4727 Is64BitLI = Opc == PPC::SUBFIC8;
4728 NewImm = Minuend - SExtImm;
4729 break;
4730 }
4731 return false;
4732 }
4733 case PPC::RLDICL:
4734 case PPC::RLDICL_rec:
4735 case PPC::RLDICL_32:
4736 case PPC::RLDICL_32_64: {
4737 // Use APInt's rotate function.
4738 int64_t SH = MI.getOperand(2).getImm();
4739 int64_t MB = MI.getOperand(3).getImm();
4740 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4741 SExtImm, true);
4742 InVal = InVal.rotl(SH);
4743 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4744 InVal &= Mask;
4745 // Can't replace negative values with an LI as that will sign-extend
4746 // and not clear the left bits. If we're setting the CR bit, we will use
4747 // ANDI_rec which won't sign extend, so that's safe.
4748 if (isUInt<15>(InVal.getSExtValue()) ||
4749 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4750 ReplaceWithLI = true;
4751 Is64BitLI = Opc != PPC::RLDICL_32;
4752 NewImm = InVal.getSExtValue();
4753 SetCR = Opc == PPC::RLDICL_rec;
4754 break;
4755 }
4756 return false;
4757 }
4758 case PPC::RLWINM:
4759 case PPC::RLWINM8:
4760 case PPC::RLWINM_rec:
4761 case PPC::RLWINM8_rec: {
4762 int64_t SH = MI.getOperand(2).getImm();
4763 int64_t MB = MI.getOperand(3).getImm();
4764 int64_t ME = MI.getOperand(4).getImm();
4765 APInt InVal(32, SExtImm, true);
4766 InVal = InVal.rotl(SH);
4767 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4768 InVal &= Mask;
4769 // Can't replace negative values with an LI as that will sign-extend
4770 // and not clear the left bits. If we're setting the CR bit, we will use
4771 // ANDI_rec which won't sign extend, so that's safe.
4772 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4773 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4774 isUInt<16>(InVal.getSExtValue()));
4775 if (ValueFits) {
4776 ReplaceWithLI = true;
4777 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4778 NewImm = InVal.getSExtValue();
4779 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4780 break;
4781 }
4782 return false;
4783 }
4784 case PPC::ORI:
4785 case PPC::ORI8:
4786 case PPC::XORI:
4787 case PPC::XORI8: {
4788 int64_t LogicalImm = MI.getOperand(2).getImm();
4789 int64_t Result = 0;
4790 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4791 Result = LogicalImm | SExtImm;
4792 else
4793 Result = LogicalImm ^ SExtImm;
4794 if (isInt<16>(Result)) {
4795 ReplaceWithLI = true;
4796 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4797 NewImm = Result;
4798 break;
4799 }
4800 return false;
4801 }
4802 }
4803
4804 if (ReplaceWithLI) {
4805 // We need to be careful with CR-setting instructions we're replacing.
4806 if (SetCR) {
4807 // We don't know anything about uses when we're out of SSA, so only
4808 // replace if the new immediate will be reproduced.
4809 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4810 if (PostRA && ImmChanged)
4811 return false;
4812
4813 if (!PostRA) {
4814 // If the defining load-immediate has no other uses, we can just replace
4815 // the immediate with the new immediate.
4816 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4817 DefMI.getOperand(1).setImm(NewImm);
4818
4819 // If we're not using the GPR result of the CR-setting instruction, we
4820 // just need to and with zero/non-zero depending on the new immediate.
4821 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4822 if (NewImm) {
4823 assert(Immediate && "Transformation converted zero to non-zero?");
4824 NewImm = Immediate;
4825 }
4826 } else if (ImmChanged)
4827 return false;
4828 }
4829 }
4830
4831 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4832 LLVM_DEBUG(MI.dump());
4833 LLVM_DEBUG(dbgs() << "Fed by:\n");
4834 LLVM_DEBUG(DefMI.dump());
4835 LoadImmediateInfo LII;
4836 LII.Imm = NewImm;
4837 LII.Is64Bit = Is64BitLI;
4838 LII.SetCR = SetCR;
4839 // If we're setting the CR, the original load-immediate must be kept (as an
4840 // operand to ANDI_rec/ANDI8_rec).
4841 if (KilledDef && SetCR)
4842 *KilledDef = nullptr;
4843 replaceInstrWithLI(MI, LII);
4844
4845 if (PostRA)
4846 recomputeLivenessFlags(*MI.getParent());
4847
4848 LLVM_DEBUG(dbgs() << "With:\n");
4849 LLVM_DEBUG(MI.dump());
4850 return true;
4851 }
4852 return false;
4853}
4854
4855bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4856 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4857 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4858 bool PostRA = !MRI->isSSA();
4859 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4860 // for post-ra.
4861 if (PostRA)
4862 return false;
4863
4864 // Only handle load/store.
4865 if (!MI.mayLoadOrStore())
4866 return false;
4867
4868 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4869
4870 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4871 "MI must have x-form opcode");
4872
4873 // get Imm Form info.
4874 ImmInstrInfo III;
4875 bool IsVFReg = MI.getOperand(0).isReg()
4876 ? PPC::isVFRegister(MI.getOperand(0).getReg())
4877 : false;
4878
4879 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4880 return false;
4881
4882 if (!III.IsSummingOperands)
4883 return false;
4884
4885 if (OpNoForForwarding != III.OpNoForForwarding)
4886 return false;
4887
4888 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4889 if (!ImmOperandMI.isImm())
4890 return false;
4891
4892 // Check DefMI.
4893 MachineOperand *ImmMO = nullptr;
4894 MachineOperand *RegMO = nullptr;
4895 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4896 return false;
4897 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4898
4899 // Check Imm.
4900 // Set ImmBase from imm instruction as base and get new Imm inside
4901 // isImmElgibleForForwarding.
4902 int64_t ImmBase = ImmOperandMI.getImm();
4903 int64_t Imm = 0;
4904 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4905 return false;
4906
4907 // Do the transform
4908 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4909 LLVM_DEBUG(MI.dump());
4910 LLVM_DEBUG(dbgs() << "Fed by:\n");
4911 LLVM_DEBUG(DefMI.dump());
4912
4913 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4914 MI.getOperand(III.ImmOpNo).setImm(Imm);
4915
4916 LLVM_DEBUG(dbgs() << "With:\n");
4917 LLVM_DEBUG(MI.dump());
4918 return true;
4919}
4920
4921// If an X-Form instruction is fed by an add-immediate and one of its operands
4922// is the literal zero, attempt to forward the source of the add-immediate to
4923// the corresponding D-Form instruction with the displacement coming from
4924// the immediate being added.
4925bool PPCInstrInfo::transformToImmFormFedByAdd(
4926 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4927 MachineInstr &DefMI, bool KillDefMI) const {
4928 // RegMO ImmMO
4929 // | |
4930 // x = addi reg, imm <----- DefMI
4931 // y = op 0 , x <----- MI
4932 // |
4933 // OpNoForForwarding
4934 // Check if the MI meet the requirement described in the III.
4935 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4936 return false;
4937
4938 // Check if the DefMI meet the requirement
4939 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4940 MachineOperand *ImmMO = nullptr;
4941 MachineOperand *RegMO = nullptr;
4942 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4943 return false;
4944 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4945
4946 // As we get the Imm operand now, we need to check if the ImmMO meet
4947 // the requirement described in the III. If yes set the Imm.
4948 int64_t Imm = 0;
4949 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4950 return false;
4951
4952 bool IsFwdFeederRegKilled = false;
4953 bool SeenIntermediateUse = false;
4954 // Check if the RegMO can be forwarded to MI.
4955 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4956 IsFwdFeederRegKilled, SeenIntermediateUse))
4957 return false;
4958
4959 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4960 bool PostRA = !MRI.isSSA();
4961
4962 // We know that, the MI and DefMI both meet the pattern, and
4963 // the Imm also meet the requirement with the new Imm-form.
4964 // It is safe to do the transformation now.
4965 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4966 LLVM_DEBUG(MI.dump());
4967 LLVM_DEBUG(dbgs() << "Fed by:\n");
4968 LLVM_DEBUG(DefMI.dump());
4969
4970 // Update the base reg first.
4971 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
4972 false, false,
4973 RegMO->isKill());
4974
4975 // Then, update the imm.
4976 if (ImmMO->isImm()) {
4977 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4978 // directly.
4980 }
4981 else {
4982 // Otherwise, it is Constant Pool Index(CPI) or Global,
4983 // which is relocation in fact. We need to replace the special zero
4984 // register with ImmMO.
4985 // Before that, we need to fixup the target flags for imm.
4986 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4987 if (DefMI.getOpcode() == PPC::ADDItocL8)
4989
4990 // MI didn't have the interface such as MI.setOperand(i) though
4991 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4992 // ImmMO, we need to remove ZERO operand and all the operands behind it,
4993 // and, add the ImmMO, then, move back all the operands behind ZERO.
4995 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
4996 MOps.push_back(MI.getOperand(i));
4997 MI.removeOperand(i);
4998 }
4999
5000 // Remove the last MO in the list, which is ZERO operand in fact.
5001 MOps.pop_back();
5002 // Add the imm operand.
5003 MI.addOperand(*ImmMO);
5004 // Now add the rest back.
5005 for (auto &MO : MOps)
5006 MI.addOperand(MO);
5007 }
5008
5009 // Update the opcode.
5010 MI.setDesc(get(III.ImmOpcode));
5011
5012 if (PostRA)
5013 recomputeLivenessFlags(*MI.getParent());
5014 LLVM_DEBUG(dbgs() << "With:\n");
5015 LLVM_DEBUG(MI.dump());
5016
5017 return true;
5018}
5019
5020bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
5021 const ImmInstrInfo &III,
5022 unsigned ConstantOpNo,
5023 MachineInstr &DefMI) const {
5024 // DefMI must be LI or LI8.
5025 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
5026 !DefMI.getOperand(1).isImm())
5027 return false;
5028
5029 // Get Imm operand and Sign-extend to 64-bits.
5030 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
5031
5032 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5033 bool PostRA = !MRI.isSSA();
5034 // Exit early if we can't convert this.
5035 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
5036 return false;
5037 if (Imm % III.ImmMustBeMultipleOf)
5038 return false;
5039 if (III.TruncateImmTo)
5040 Imm &= ((1 << III.TruncateImmTo) - 1);
5041 if (III.SignedImm) {
5042 APInt ActualValue(64, Imm, true);
5043 if (!ActualValue.isSignedIntN(III.ImmWidth))
5044 return false;
5045 } else {
5046 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
5047 if ((uint64_t)Imm > UnsignedMax)
5048 return false;
5049 }
5050
5051 // If we're post-RA, the instructions don't agree on whether register zero is
5052 // special, we can transform this as long as the register operand that will
5053 // end up in the location where zero is special isn't R0.
5054 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5055 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
5056 III.ZeroIsSpecialNew + 1;
5057 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
5058 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5059 // If R0 is in the operand where zero is special for the new instruction,
5060 // it is unsafe to transform if the constant operand isn't that operand.
5061 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
5062 ConstantOpNo != III.ZeroIsSpecialNew)
5063 return false;
5064 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
5065 ConstantOpNo != PosForOrigZero)
5066 return false;
5067 }
5068
5069 unsigned Opc = MI.getOpcode();
5070 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
5071 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
5072 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
5073 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
5074 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
5075 Opc == PPC::SRD || Opc == PPC::SRD_rec;
5076 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
5077 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
5078 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
5079 Opc == PPC::SRD_rec;
5080
5081 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5082 LLVM_DEBUG(MI.dump());
5083 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5084 LLVM_DEBUG(DefMI.dump());
5085 MI.setDesc(get(III.ImmOpcode));
5086 if (ConstantOpNo == III.OpNoForForwarding) {
5087 // Converting shifts to immediate form is a bit tricky since they may do
5088 // one of three things:
5089 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5090 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5091 // setting CR0)
5092 // 3. If the shift amount is in [1, OpSize), it's just a shift
5093 if (SpecialShift32 || SpecialShift64) {
5094 LoadImmediateInfo LII;
5095 LII.Imm = 0;
5096 LII.SetCR = SetCR;
5097 LII.Is64Bit = SpecialShift64;
5098 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5099 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5100 replaceInstrWithLI(MI, LII);
5101 // Shifts by zero don't change the value. If we don't need to set CR0,
5102 // just convert this to a COPY. Can't do this post-RA since we've already
5103 // cleaned up the copies.
5104 else if (!SetCR && ShAmt == 0 && !PostRA) {
5105 MI.removeOperand(2);
5106 MI.setDesc(get(PPC::COPY));
5107 } else {
5108 // The 32 bit and 64 bit instructions are quite different.
5109 if (SpecialShift32) {
5110 // Left shifts use (N, 0, 31-N).
5111 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5112 // use (0, 0, 31) if N == 0.
5113 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5114 uint64_t MB = RightShift ? ShAmt : 0;
5115 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5117 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5118 .addImm(ME);
5119 } else {
5120 // Left shifts use (N, 63-N).
5121 // Right shifts use (64-N, N) if 0 < N < 64.
5122 // use (0, 0) if N == 0.
5123 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5124 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5126 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5127 }
5128 }
5129 } else
5130 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5131 }
5132 // Convert commutative instructions (switch the operands and convert the
5133 // desired one to an immediate.
5134 else if (III.IsCommutative) {
5135 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5136 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5137 } else
5138 llvm_unreachable("Should have exited early!");
5139
5140 // For instructions for which the constant register replaces a different
5141 // operand than where the immediate goes, we need to swap them.
5142 if (III.OpNoForForwarding != III.ImmOpNo)
5144
5145 // If the special R0/X0 register index are different for original instruction
5146 // and new instruction, we need to fix up the register class in new
5147 // instruction.
5148 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5149 if (III.ZeroIsSpecialNew) {
5150 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5151 // need to fix up register class.
5152 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5153 if (RegToModify.isVirtual()) {
5154 const TargetRegisterClass *NewRC =
5155 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5156 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5157 MRI.setRegClass(RegToModify, NewRC);
5158 }
5159 }
5160 }
5161
5162 if (PostRA)
5163 recomputeLivenessFlags(*MI.getParent());
5164
5165 LLVM_DEBUG(dbgs() << "With: ");
5166 LLVM_DEBUG(MI.dump());
5167 LLVM_DEBUG(dbgs() << "\n");
5168 return true;
5169}
5170
5171const TargetRegisterClass *
5173 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5174 return &PPC::VSRCRegClass;
5175 return RC;
5176}
5177
5179 return PPC::getRecordFormOpcode(Opcode);
5180}
5181
5182static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5183 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5184 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5185 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5186 Opcode == PPC::LHZUX8);
5187}
5188
5189// This function checks for sign extension from 32 bits to 64 bits.
5190static bool definedBySignExtendingOp(const unsigned Reg,
5191 const MachineRegisterInfo *MRI) {
5193 return false;
5194
5195 MachineInstr *MI = MRI->getVRegDef(Reg);
5196 if (!MI)
5197 return false;
5198
5199 int Opcode = MI->getOpcode();
5200 const PPCInstrInfo *TII =
5201 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5202 if (TII->isSExt32To64(Opcode))
5203 return true;
5204
5205 // The first def of LBZU/LHZU is sign extended.
5206 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5207 return true;
5208
5209 // RLDICL generates sign-extended output if it clears at least
5210 // 33 bits from the left (MSB).
5211 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5212 return true;
5213
5214 // If at least one bit from left in a lower word is masked out,
5215 // all of 0 to 32-th bits of the output are cleared.
5216 // Hence the output is already sign extended.
5217 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5218 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5219 MI->getOperand(3).getImm() > 0 &&
5220 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5221 return true;
5222
5223 // If the most significant bit of immediate in ANDIS is zero,
5224 // all of 0 to 32-th bits are cleared.
5225 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5226 uint16_t Imm = MI->getOperand(2).getImm();
5227 if ((Imm & 0x8000) == 0)
5228 return true;
5229 }
5230
5231 return false;
5232}
5233
5234// This function checks the machine instruction that defines the input register
5235// Reg. If that machine instruction always outputs a value that has only zeros
5236// in the higher 32 bits then this function will return true.
5237static bool definedByZeroExtendingOp(const unsigned Reg,
5238 const MachineRegisterInfo *MRI) {
5240 return false;
5241
5242 MachineInstr *MI = MRI->getVRegDef(Reg);
5243 if (!MI)
5244 return false;
5245
5246 int Opcode = MI->getOpcode();
5247 const PPCInstrInfo *TII =
5248 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5249 if (TII->isZExt32To64(Opcode))
5250 return true;
5251
5252 // The first def of LBZU/LHZU/LWZU are zero extended.
5253 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5254 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5255 MI->getOperand(0).getReg() == Reg)
5256 return true;
5257
5258 // The 16-bit immediate is sign-extended in li/lis.
5259 // If the most significant bit is zero, all higher bits are zero.
5260 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5261 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5262 int64_t Imm = MI->getOperand(1).getImm();
5263 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5264 return true;
5265 }
5266
5267 // We have some variations of rotate-and-mask instructions
5268 // that clear higher 32-bits.
5269 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5270 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5271 Opcode == PPC::RLDICL_32_64) &&
5272 MI->getOperand(3).getImm() >= 32)
5273 return true;
5274
5275 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5276 MI->getOperand(3).getImm() >= 32 &&
5277 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5278 return true;
5279
5280 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5281 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5282 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5283 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5284 return true;
5285
5286 return false;
5287}
5288
5289// This function returns true if the input MachineInstr is a TOC save
5290// instruction.
5292 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5293 return false;
5294 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5295 unsigned StackOffset = MI.getOperand(1).getImm();
5296 Register StackReg = MI.getOperand(2).getReg();
5297 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5298 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5299 return true;
5300
5301 return false;
5302}
5303
5304// We limit the max depth to track incoming values of PHIs or binary ops
5305// (e.g. AND) to avoid excessive cost.
5306const unsigned MAX_BINOP_DEPTH = 1;
5307
5308// This function will promote the instruction which defines the register `Reg`
5309// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
5310// used to check whether an instruction needs to be promoted or not is similar
5311// to the logic used to check whether or not a defined register is sign or zero
5312// extended within the function PPCInstrInfo::isSignOrZeroExtended.
5313// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
5314// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
5315// incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
5316// than once. This is done to prevent exponential recursion.
5319 unsigned BinOpDepth,
5320 LiveVariables *LV) const {
5321 if (!Reg.isVirtual())
5322 return;
5323
5324 MachineInstr *MI = MRI->getVRegDef(Reg);
5325 if (!MI)
5326 return;
5327
5328 unsigned Opcode = MI->getOpcode();
5329
5330 switch (Opcode) {
5331 case PPC::OR:
5332 case PPC::ISEL:
5333 case PPC::OR8:
5334 case PPC::PHI: {
5335 if (BinOpDepth >= MAX_BINOP_DEPTH)
5336 break;
5337 unsigned OperandEnd = 3, OperandStride = 1;
5338 if (Opcode == PPC::PHI) {
5339 OperandEnd = MI->getNumOperands();
5340 OperandStride = 2;
5341 }
5342
5343 for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
5344 assert(MI->getOperand(I).isReg() && "Operand must be register");
5345 promoteInstr32To64ForElimEXTSW(MI->getOperand(I).getReg(), MRI,
5346 BinOpDepth + 1, LV);
5347 }
5348
5349 break;
5350 }
5351 case PPC::COPY: {
5352 // Refers to the logic of the `case PPC::COPY` statement in the function
5353 // PPCInstrInfo::isSignOrZeroExtended().
5354
5355 Register SrcReg = MI->getOperand(1).getReg();
5356 // In both ELFv1 and v2 ABI, method parameters and the return value
5357 // are sign- or zero-extended.
5358 const MachineFunction *MF = MI->getMF();
5359 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5360 // If this is a copy from another register, we recursively promote the
5361 // source.
5362 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5363 return;
5364 }
5365
5366 // From here on everything is SVR4ABI. COPY will be eliminated in the other
5367 // pass, we do not need promote the COPY pseudo opcode.
5368
5369 if (SrcReg != PPC::X3)
5370 // If this is a copy from another register, we recursively promote the
5371 // source.
5372 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5373 return;
5374 }
5375 case PPC::ORI:
5376 case PPC::XORI:
5377 case PPC::ORIS:
5378 case PPC::XORIS:
5379 case PPC::ORI8:
5380 case PPC::XORI8:
5381 case PPC::ORIS8:
5382 case PPC::XORIS8:
5383 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI, BinOpDepth,
5384 LV);
5385 break;
5386 case PPC::AND:
5387 case PPC::AND8:
5388 if (BinOpDepth >= MAX_BINOP_DEPTH)
5389 break;
5390
5391 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI,
5392 BinOpDepth + 1, LV);
5393 promoteInstr32To64ForElimEXTSW(MI->getOperand(2).getReg(), MRI,
5394 BinOpDepth + 1, LV);
5395 break;
5396 }
5397
5398 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
5399 if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
5400 return;
5401
5402 const PPCInstrInfo *TII =
5403 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5404
5405 // Map the 32bit to 64bit opcodes for instructions that are not signed or zero
5406 // extended themselves, but may have operands who's destination registers of
5407 // signed or zero extended instructions.
5408 std::unordered_map<unsigned, unsigned> OpcodeMap = {
5409 {PPC::OR, PPC::OR8}, {PPC::ISEL, PPC::ISEL8},
5410 {PPC::ORI, PPC::ORI8}, {PPC::XORI, PPC::XORI8},
5411 {PPC::ORIS, PPC::ORIS8}, {PPC::XORIS, PPC::XORIS8},
5412 {PPC::AND, PPC::AND8}};
5413
5414 int NewOpcode = -1;
5415 auto It = OpcodeMap.find(Opcode);
5416 if (It != OpcodeMap.end()) {
5417 // Set the new opcode to the mapped 64-bit version.
5418 NewOpcode = It->second;
5419 } else {
5420 if (!TII->isSExt32To64(Opcode))
5421 return;
5422
5423 // The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
5424 // map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
5425 // instruction with the same opcode.
5426 NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
5427 }
5428
5429 assert(NewOpcode != -1 &&
5430 "Must have a 64-bit opcode to map the 32-bit opcode!");
5431
5432 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
5433 const MCInstrDesc &MCID = TII->get(NewOpcode);
5434 const TargetRegisterClass *NewRC =
5435 TRI->getRegClass(MCID.operands()[0].RegClass);
5436
5437 Register SrcReg = MI->getOperand(0).getReg();
5438 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
5439
5440 // If the register class of the defined register in the 32-bit instruction
5441 // is the same as the register class of the defined register in the promoted
5442 // 64-bit instruction, we do not need to promote the instruction.
5443 if (NewRC == SrcRC)
5444 return;
5445
5446 DebugLoc DL = MI->getDebugLoc();
5447 auto MBB = MI->getParent();
5448
5449 // Since the pseudo-opcode of the instruction is promoted from 32-bit to
5450 // 64-bit, if the source reg class of the original instruction belongs to
5451 // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
5452 // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
5453 // respectively.
5454 DenseMap<unsigned, Register> PromoteRegs;
5455 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5456 MachineOperand &Operand = MI->getOperand(i);
5457 if (!Operand.isReg())
5458 continue;
5459
5460 Register OperandReg = Operand.getReg();
5461 if (!OperandReg.isVirtual())
5462 continue;
5463
5464 const TargetRegisterClass *NewUsedRegRC =
5465 TRI->getRegClass(MCID.operands()[i].RegClass);
5466 const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
5467 if (NewUsedRegRC != OrgRC && (OrgRC == &PPC::GPRCRegClass ||
5468 OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
5469 // Promote the used 32-bit register to 64-bit register.
5470 Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5471 Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5472 BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
5473 BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
5474 .addReg(TmpReg)
5475 .addReg(OperandReg)
5476 .addImm(PPC::sub_32);
5477 PromoteRegs[i] = DstTmpReg;
5478 }
5479 }
5480
5481 Register NewDefinedReg = MRI->createVirtualRegister(NewRC);
5482
5483 BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
5485 --Iter;
5486 MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
5487 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5488 if (auto It = PromoteRegs.find(i); It != PromoteRegs.end())
5489 MIBuilder.addReg(It->second, RegState::Kill);
5490 else
5491 Iter->addOperand(MI->getOperand(i));
5492 }
5493
5494 for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
5495 MachineOperand &Operand = Iter->getOperand(i);
5496 if (!Operand.isReg())
5497 continue;
5498 Register OperandReg = Operand.getReg();
5499 if (!OperandReg.isVirtual())
5500 continue;
5501 LV->recomputeForSingleDefVirtReg(OperandReg);
5502 }
5503
5504 MI->eraseFromParent();
5505
5506 // A defined register may be used by other instructions that are 32-bit.
5507 // After the defined register is promoted to 64-bit for the promoted
5508 // instruction, we need to demote the 64-bit defined register back to a
5509 // 32-bit register
5510 BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
5511 .addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
5512 LV->recomputeForSingleDefVirtReg(NewDefinedReg);
5513}
5514
5515// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5516// does not count all of the recursions. The parameter BinOpDepth is incremented
5517// only when isSignOrZeroExtended calls itself more than once. This is done to
5518// prevent expontential recursion. There is no parameter to track linear
5519// recursion.
5520std::pair<bool, bool>
5522 const unsigned BinOpDepth,
5523 const MachineRegisterInfo *MRI) const {
5525 return std::pair<bool, bool>(false, false);
5526
5527 MachineInstr *MI = MRI->getVRegDef(Reg);
5528 if (!MI)
5529 return std::pair<bool, bool>(false, false);
5530
5531 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5532 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5533
5534 // If we know the instruction always returns sign- and zero-extended result,
5535 // return here.
5536 if (IsSExt && IsZExt)
5537 return std::pair<bool, bool>(IsSExt, IsZExt);
5538
5539 switch (MI->getOpcode()) {
5540 case PPC::COPY: {
5541 Register SrcReg = MI->getOperand(1).getReg();
5542
5543 // In both ELFv1 and v2 ABI, method parameters and the return value
5544 // are sign- or zero-extended.
5545 const MachineFunction *MF = MI->getMF();
5546
5547 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5548 // If this is a copy from another register, we recursively check source.
5549 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5550 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5551 SrcExt.second || IsZExt);
5552 }
5553
5554 // From here on everything is SVR4ABI
5555 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5556 // We check the ZExt/SExt flags for a method parameter.
5557 if (MI->getParent()->getBasicBlock() ==
5558 &MF->getFunction().getEntryBlock()) {
5559 Register VReg = MI->getOperand(0).getReg();
5560 if (MF->getRegInfo().isLiveIn(VReg)) {
5561 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5562 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5563 return std::pair<bool, bool>(IsSExt, IsZExt);
5564 }
5565 }
5566
5567 if (SrcReg != PPC::X3) {
5568 // If this is a copy from another register, we recursively check source.
5569 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5570 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5571 SrcExt.second || IsZExt);
5572 }
5573
5574 // For a method return value, we check the ZExt/SExt flags in attribute.
5575 // We assume the following code sequence for method call.
5576 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5577 // BL8_NOP @func,...
5578 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5579 // %5 = COPY %x3; G8RC:%5
5580 const MachineBasicBlock *MBB = MI->getParent();
5581 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5584 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5585 return IsExtendPair;
5586
5587 const MachineInstr &CallMI = *(--II);
5588 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5589 return IsExtendPair;
5590
5591 const Function *CalleeFn =
5593 if (!CalleeFn)
5594 return IsExtendPair;
5595 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5596 if (IntTy && IntTy->getBitWidth() <= 32) {
5597 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5598 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5599 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5600 return std::pair<bool, bool>(IsSExt, IsZExt);
5601 }
5602
5603 return IsExtendPair;
5604 }
5605
5606 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5607 // So, we track the operand register as we do for register copy.
5608 case PPC::ORI:
5609 case PPC::XORI:
5610 case PPC::ORI8:
5611 case PPC::XORI8: {
5612 Register SrcReg = MI->getOperand(1).getReg();
5613 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5614 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5615 SrcExt.second || IsZExt);
5616 }
5617
5618 // OR, XOR with shifted 16-bit immediate does not change the upper
5619 // 32 bits. So, we track the operand register for zero extension.
5620 // For sign extension when the MSB of the immediate is zero, we also
5621 // track the operand register since the upper 33 bits are unchanged.
5622 case PPC::ORIS:
5623 case PPC::XORIS:
5624 case PPC::ORIS8:
5625 case PPC::XORIS8: {
5626 Register SrcReg = MI->getOperand(1).getReg();
5627 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5628 uint16_t Imm = MI->getOperand(2).getImm();
5629 if (Imm & 0x8000)
5630 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5631 else
5632 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5633 SrcExt.second || IsZExt);
5634 }
5635
5636 // If all incoming values are sign-/zero-extended,
5637 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5638 case PPC::OR:
5639 case PPC::OR8:
5640 case PPC::ISEL:
5641 case PPC::PHI: {
5642 if (BinOpDepth >= MAX_BINOP_DEPTH)
5643 return std::pair<bool, bool>(false, false);
5644
5645 // The input registers for PHI are operand 1, 3, ...
5646 // The input registers for others are operand 1 and 2.
5647 unsigned OperandEnd = 3, OperandStride = 1;
5648 if (MI->getOpcode() == PPC::PHI) {
5649 OperandEnd = MI->getNumOperands();
5650 OperandStride = 2;
5651 }
5652
5653 IsSExt = true;
5654 IsZExt = true;
5655 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5656 if (!MI->getOperand(I).isReg())
5657 return std::pair<bool, bool>(false, false);
5658
5659 Register SrcReg = MI->getOperand(I).getReg();
5660 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5661 IsSExt &= SrcExt.first;
5662 IsZExt &= SrcExt.second;
5663 }
5664 return std::pair<bool, bool>(IsSExt, IsZExt);
5665 }
5666
5667 // If at least one of the incoming values of an AND is zero extended
5668 // then the output is also zero-extended. If both of the incoming values
5669 // are sign-extended then the output is also sign extended.
5670 case PPC::AND:
5671 case PPC::AND8: {
5672 if (BinOpDepth >= MAX_BINOP_DEPTH)
5673 return std::pair<bool, bool>(false, false);
5674
5675 Register SrcReg1 = MI->getOperand(1).getReg();
5676 Register SrcReg2 = MI->getOperand(2).getReg();
5677 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5678 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5679 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5680 Src1Ext.second || Src2Ext.second);
5681 }
5682
5683 default:
5684 break;
5685 }
5686 return std::pair<bool, bool>(IsSExt, IsZExt);
5687}
5688
5689bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5690 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5691}
5692
5693namespace {
5694class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5695 MachineInstr *Loop, *EndLoop, *LoopCount;
5696 MachineFunction *MF;
5697 const TargetInstrInfo *TII;
5698 int64_t TripCount;
5699
5700public:
5701 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5702 MachineInstr *LoopCount)
5703 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5704 MF(Loop->getParent()->getParent()),
5705 TII(MF->getSubtarget().getInstrInfo()) {
5706 // Inspect the Loop instruction up-front, as it may be deleted when we call
5707 // createTripCountGreaterCondition.
5708 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5709 TripCount = LoopCount->getOperand(1).getImm();
5710 else
5711 TripCount = -1;
5712 }
5713
5714 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5715 // Only ignore the terminator.
5716 return MI == EndLoop;
5717 }
5718
5719 std::optional<bool> createTripCountGreaterCondition(
5720 int TC, MachineBasicBlock &MBB,
5721 SmallVectorImpl<MachineOperand> &Cond) override {
5722 if (TripCount == -1) {
5723 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5724 // so we don't need to generate any thing here.
5725 Cond.push_back(MachineOperand::CreateImm(0));
5727 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5728 true));
5729 return {};
5730 }
5731
5732 return TripCount > TC;
5733 }
5734
5735 void setPreheader(MachineBasicBlock *NewPreheader) override {
5736 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5737 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5738 }
5739
5740 void adjustTripCount(int TripCountAdjust) override {
5741 // If the loop trip count is a compile-time value, then just change the
5742 // value.
5743 if (LoopCount->getOpcode() == PPC::LI8 ||
5744 LoopCount->getOpcode() == PPC::LI) {
5745 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5746 LoopCount->getOperand(1).setImm(TripCount);
5747 return;
5748 }
5749
5750 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5751 // so we don't need to generate any thing here.
5752 }
5753
5754 void disposed(LiveIntervals *LIS) override {
5755 if (LIS) {
5756 LIS->RemoveMachineInstrFromMaps(*Loop);
5757 LIS->RemoveMachineInstrFromMaps(*LoopCount);
5758 }
5759 Loop->eraseFromParent();
5760 // Ensure the loop setup instruction is deleted too.
5761 LoopCount->eraseFromParent();
5762 }
5763};
5764} // namespace
5765
5766std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5768 // We really "analyze" only hardware loops right now.
5770 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5771 if (Preheader == LoopBB)
5772 Preheader = *std::next(LoopBB->pred_begin());
5773 MachineFunction *MF = Preheader->getParent();
5774
5775 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5777 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5778 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5780 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5781 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5782 }
5783 }
5784 return nullptr;
5785}
5786
5788 MachineBasicBlock &PreHeader,
5789 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5790
5791 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5792
5793 // The loop set-up instruction should be in preheader
5794 for (auto &I : PreHeader.instrs())
5795 if (I.getOpcode() == LOOPi)
5796 return &I;
5797 return nullptr;
5798}
5799
5800// Return true if get the base operand, byte offset of an instruction and the
5801// memory width. Width is the size of memory that is being loaded/stored.
5803 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5804 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5805 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5806 return false;
5807
5808 // Handle only loads/stores with base register followed by immediate offset.
5809 if (!LdSt.getOperand(1).isImm() ||
5810 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5811 return false;
5812 if (!LdSt.getOperand(1).isImm() ||
5813 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5814 return false;
5815
5816 if (!LdSt.hasOneMemOperand())
5817 return false;
5818
5819 Width = (*LdSt.memoperands_begin())->getSize();
5820 Offset = LdSt.getOperand(1).getImm();
5821 BaseReg = &LdSt.getOperand(2);
5822 return true;
5823}
5824
5826 const MachineInstr &MIa, const MachineInstr &MIb) const {
5827 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5828 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5829
5832 return false;
5833
5834 // Retrieve the base register, offset from the base register and width. Width
5835 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5836 // base registers are identical, and the offset of a lower memory access +
5837 // the width doesn't overlap the offset of a higher memory access,
5838 // then the memory accesses are different.
5840 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5841 int64_t OffsetA = 0, OffsetB = 0;
5843 WidthB = LocationSize::precise(0);
5844 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5845 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5846 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5847 int LowOffset = std::min(OffsetA, OffsetB);
5848 int HighOffset = std::max(OffsetA, OffsetB);
5849 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5850 if (LowWidth.hasValue() &&
5851 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5852 return true;
5853 }
5854 }
5855 return false;
5856}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis false
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool isPhysical(const MachineOperand &MO)
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
void changeSign()
Definition APFloat.h:1279
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition APInt.cpp:1141
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
const BasicBlock & getEntryBlock() const
Definition Function.h:807
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
void setOpcode(unsigned Op)
Definition MCInst.h:201
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:87
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineInstrBundleIterator< const MachineInstr, true > const_reverse_iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void dump() const
LLVM_ABI void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
LLVM_ABI bool isLiveIn(Register Reg) const
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReMaterializableImpl(const MachineInstr &MI) const override
PPCInstrInfo(const PPCSubtarget &STI)
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
MCInst getNop() const override
Return the noop instruction to use for a noop.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static int getRecordFormOpcode(unsigned Opcode)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
CombinerObjective getCombinerObjective(unsigned Pattern) const override
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
void promoteInstr32To64ForElimEXTSW(const Register &Reg, MachineRegisterInfo *MRI, unsigned BinOpDepth, LiveVariables *LV) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool foldFrameOffset(MachineInstr &MI) const
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
bool isSVR4ABI() const
const PPCTargetMachine & getTargetMachine() const
void dump() const
Definition Pass.cpp:146
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
LLVM_ABI void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
LLVM_ABI void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
LLVM_ABI void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
PPCII - This namespace holds all of the PowerPC target-specific per-instruction flags.
@ MO_TOC_LO
Definition PPC.h:185
Define some predicates that are used for node matching.
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
int getAltVSXFMAOpcode(uint16_t Opcode)
int getNonRecordFormOpcode(uint16_t)
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
static bool isVFRegister(MCRegister Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
template class LLVM_TEMPLATE_ABI opt< bool >
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
@ REASSOC_XY_BCA
@ REASSOC_XY_BAC
@ REASSOC_XY_AMM_BMM
@ REASSOC_XMM_AMM_BMM
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
@ SOK_CRBitSpill
@ SOK_VSXVectorSpill
@ SOK_SpillToVSR
@ SOK_Int4Spill
@ SOK_PairedVecSpill
@ SOK_VectorFloat8Spill
@ SOK_UAccumulatorSpill
@ SOK_PairedG8Spill
@ SOK_DMRSpill
@ SOK_VectorFloat4Spill
@ SOK_Float8Spill
@ SOK_Float4Spill
@ SOK_VRVectorSpill
@ SOK_WAccumulatorSpill
@ SOK_SPESpill
@ SOK_CRSpill
@ SOK_AccumulatorSpill
@ SOK_Int8Spill
@ SOK_LastOpcodeSpill
@ SOK_DMRpSpill
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t IsSummingOperands
uint64_t OpNoForForwarding
uint64_t ImmMustBeMultipleOf
uint64_t ZeroIsSpecialNew
uint64_t ZeroIsSpecialOrig
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.