103#define DEBUG_TYPE "peephole-opt"
107 cl::desc(
"Aggressive extension optimization"));
111 cl::desc(
"Disable the peephole optimizer"));
118 cl::desc(
"Disable advanced copy optimization"));
122 cl::desc(
"Disable non-allocatable physical register copy optimization"));
128 cl::desc(
"Limit the length of PHI chains to lookup"));
134 cl::desc(
"Maximum length of recurrence chain when evaluating the benefit "
135 "of commuting operands"));
137STATISTIC(NumReuse,
"Number of extension results reused");
139STATISTIC(NumImmFold,
"Number of move immediate folded");
142STATISTIC(NumUncoalescableCopies,
"Number of uncoalescable copies optimized");
143STATISTIC(NumRewrittenCopies,
"Number of copies rewritten");
144STATISTIC(NumNAPhysCopies,
"Number of non-allocatable physical copies removed");
148class ValueTrackerResult;
149class RecurrenceInstr;
155 int CurrentSrcIdx = 0;
158 virtual ~Rewriter() =
default;
190 virtual bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg) = 0;
194class CopyRewriter :
public Rewriter {
197 assert(
MI.isCopy() &&
"Expected copy instruction");
199 ~CopyRewriter()
override =
default;
203 if (++CurrentSrcIdx > 1)
207 const MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
210 const MachineOperand &MODef = CopyLike.getOperand(0);
215 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
216 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
225class UncoalescableRewriter :
public Rewriter {
229 UncoalescableRewriter(MachineInstr &
MI) :
Rewriter(
MI) {
230 NumDefs =
MI.getDesc().getNumDefs();
240 if (CurrentSrcIdx == NumDefs)
243 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
245 if (CurrentSrcIdx == NumDefs)
251 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
258 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
264class InsertSubregRewriter :
public Rewriter {
267 assert(
MI.isInsertSubreg() &&
"Invalid instruction");
284 if (CurrentSrcIdx == 2)
288 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
290 const MachineOperand &MODef = CopyLike.getOperand(0);
298 (
unsigned)CopyLike.getOperand(3).getImm());
302 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
303 if (CurrentSrcIdx != 2)
306 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
314class ExtractSubregRewriter :
public Rewriter {
315 const TargetInstrInfo &TII;
318 ExtractSubregRewriter(MachineInstr &
MI,
const TargetInstrInfo &TII)
320 assert(
MI.isExtractSubreg() &&
"Invalid instruction");
331 if (CurrentSrcIdx == 1)
335 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
344 const MachineOperand &MODef = CopyLike.getOperand(0);
349 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
351 if (CurrentSrcIdx != 1)
354 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
365 CopyLike.removeOperand(2);
367 CopyLike.setDesc(TII.get(TargetOpcode::COPY));
370 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
376class RegSequenceRewriter :
public Rewriter {
379 assert(
MI.isRegSequence() &&
"Invalid instruction");
403 if (
static_cast<unsigned>(CurrentSrcIdx) >= CopyLike.getNumOperands())
406 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
407 Src.Reg = MOInsertedReg.
getReg();
412 Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
414 const MachineOperand &MODef = CopyLike.getOperand(0);
416 assert(MODef.
getSubReg() == 0 &&
"cannot have subregister def in SSA");
420 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
421 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
429 const TargetInstrInfo *TII =
nullptr;
430 const TargetRegisterInfo *TRI =
nullptr;
431 MachineRegisterInfo *MRI =
nullptr;
432 MachineDominatorTree *DT =
nullptr;
433 MachineLoopInfo *MLI =
nullptr;
436 PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
437 : DT(DT), MLI(MLI) {}
439 bool run(MachineFunction &MF);
441 using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
444 using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
447 bool optimizeCmpInstr(MachineInstr &
MI);
448 bool optimizeExtInstr(MachineInstr &
MI, MachineBasicBlock &
MBB,
449 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
450 bool optimizeSelect(MachineInstr &
MI,
451 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
452 bool optimizeCondBranch(MachineInstr &
MI);
454 bool optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter);
455 bool optimizeCoalescableCopy(MachineInstr &
MI);
456 bool optimizeUncoalescableCopy(MachineInstr &
MI,
457 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
458 bool optimizeRecurrence(MachineInstr &
PHI);
459 bool findNextSource(
const TargetRegisterClass *DefRC,
unsigned DefSubReg,
461 bool isMoveImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
462 DenseMap<Register, MachineInstr *> &ImmDefMIs);
463 bool foldImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
464 DenseMap<Register, MachineInstr *> &ImmDefMIs,
472 const SmallSet<Register, 2> &TargetReg,
473 RecurrenceCycle &RC);
480 bool foldRedundantCopy(MachineInstr &
MI);
491 foldRedundantNAPhysCopy(MachineInstr &
MI,
492 DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
494 bool isLoadFoldable(MachineInstr &
MI,
495 SmallSet<Register, 16> &FoldAsLoadDefCandidates);
499 static bool isCoalescableCopy(
const MachineInstr &
MI) {
502 return MI.isCopy() ||
504 MI.isExtractSubreg()));
509 static bool isUncoalescableCopy(
const MachineInstr &
MI) {
511 MI.isInsertSubregLike() ||
512 MI.isExtractSubregLike()));
515 MachineInstr &rewriteSource(MachineInstr &CopyLike,
RegSubRegPair Def,
516 RewriteMapTy &RewriteMap);
520 DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
523 void MF_HandleInsertion(MachineInstr &
MI)
override {}
530 unsigned SrcSubReg =
MI.getOperand(1).getSubReg();
531 if (!SrcReg.
isVirtual() && !MRI->isConstantPhysReg(SrcReg))
540 void deleteChangedCopy(MachineInstr &
MI) {
542 if (!getCopySrc(
MI, SrcPair))
545 auto It = CopySrcMIs.find(SrcPair);
546 if (It != CopySrcMIs.end() && It->second == &
MI)
547 CopySrcMIs.erase(It);
550 void MF_HandleRemoval(MachineInstr &
MI)
override { deleteChangedCopy(
MI); }
552 void MF_HandleChangeDesc(MachineInstr &
MI,
const MCInstrDesc &TID)
override {
553 deleteChangedCopy(
MI);
561 PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {}
563 bool runOnMachineFunction(MachineFunction &MF)
override;
565 void getAnalysisUsage(AnalysisUsage &AU)
const override {
576 MachineFunctionProperties getRequiredProperties()
const override {
577 return MachineFunctionProperties().setIsSSA();
587class RecurrenceInstr {
589 using IndexPair = std::pair<unsigned, unsigned>;
591 RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
592 RecurrenceInstr(MachineInstr *MI,
unsigned Idx1,
unsigned Idx2)
593 : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
595 MachineInstr *getMI()
const {
return MI; }
596 std::optional<IndexPair> getCommutePair()
const {
return CommutePair; }
600 std::optional<IndexPair> CommutePair;
606class ValueTrackerResult {
612 const MachineInstr *Inst =
nullptr;
615 ValueTrackerResult() =
default;
619 bool isValid()
const {
return getNumSources() > 0; }
621 void setInst(
const MachineInstr *
I) { Inst =
I; }
622 const MachineInstr *getInst()
const {
return Inst; }
629 void addSource(
Register SrcReg,
unsigned SrcSubReg) {
633 void setSource(
int Idx,
Register SrcReg,
unsigned SrcSubReg) {
634 assert(Idx < getNumSources() &&
"Reg pair source out of index");
638 int getNumSources()
const {
return RegSrcs.size(); }
643 assert(Idx < getNumSources() &&
"Reg source out of index");
644 return RegSrcs[Idx].Reg;
647 unsigned getSrcSubReg(
int Idx)
const {
648 assert(Idx < getNumSources() &&
"SubReg source out of index");
649 return RegSrcs[Idx].SubReg;
653 if (
Other.getInst() != getInst())
656 if (
Other.getNumSources() != getNumSources())
659 for (
int i = 0, e =
Other.getNumSources(); i != e; ++i)
660 if (
Other.getSrcReg(i) != getSrcReg(i) ||
661 Other.getSrcSubReg(i) != getSrcSubReg(i))
686 const MachineInstr *Def =
nullptr;
698 const MachineRegisterInfo &MRI;
701 const TargetInstrInfo *TII;
704 ValueTrackerResult getNextSourceImpl();
707 ValueTrackerResult getNextSourceFromCopy();
710 ValueTrackerResult getNextSourceFromBitcast();
713 ValueTrackerResult getNextSourceFromRegSequence();
716 ValueTrackerResult getNextSourceFromInsertSubreg();
719 ValueTrackerResult getNextSourceFromExtractSubreg();
722 ValueTrackerResult getNextSourceFromSubregToReg();
725 ValueTrackerResult getNextSourceFromPHI();
737 ValueTracker(
Register Reg,
unsigned DefSubReg,
const MachineRegisterInfo &MRI,
738 const TargetInstrInfo *TII =
nullptr)
739 : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
740 if (!Reg.isPhysical()) {
741 Def = MRI.getVRegDef(Reg);
742 DefIdx = MRI.def_begin(Reg).getOperandNo();
751 ValueTrackerResult getNextSource();
756char PeepholeOptimizerLegacy::ID = 0;
761 "Peephole Optimizations",
false,
false)
775bool PeepholeOptimizer::optimizeExtInstr(
780 if (!
TII->isCoalescableExtInstr(
MI, SrcReg, DstReg, SubIdx))
786 if (
MRI->hasOneNonDBGUse(SrcReg))
793 DstRC =
TRI->getSubClassWithSubReg(DstRC, SubIdx);
803 TRI->getSubClassWithSubReg(
MRI->getRegClass(SrcReg), SubIdx) !=
nullptr;
809 ReachedBBs.insert(UI.getParent());
817 bool ExtendLife =
true;
819 MachineInstr *UseMI = UseMO.getParent();
823 if (UseMI->isPHI()) {
829 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
849 if (
UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
853 if (UseMBB == &
MBB) {
855 if (!LocalMIs.count(
UseMI))
856 Uses.push_back(&UseMO);
857 }
else if (ReachedBBs.count(UseMBB)) {
860 Uses.push_back(&UseMO);
864 ExtendedUses.push_back(&UseMO);
873 if (ExtendLife && !ExtendedUses.empty())
875 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
880 SmallPtrSet<MachineBasicBlock *, 4> PHIBBs;
885 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
887 PHIBBs.insert(UI.getParent());
889 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
890 for (MachineOperand *UseMO : Uses) {
891 MachineInstr *UseMI = UseMO->getParent();
892 MachineBasicBlock *UseMBB = UseMI->getParent();
893 if (PHIBBs.count(UseMBB))
898 MRI->clearKillFlags(DstReg);
899 MRI->constrainRegClass(DstReg, DstRC);
917 RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
919 Register NewVR = MRI->createVirtualRegister(RC);
920 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
921 TII->get(TargetOpcode::COPY), NewVR)
922 .addReg(DstReg, 0, SubIdx);
926 UseMO->setReg(NewVR);
943 int64_t CmpMask, CmpValue;
950 if (
TII->optimizeCompareInstr(
MI, SrcReg, SrcReg2, CmpMask, CmpValue,
MRI)) {
960bool PeepholeOptimizer::optimizeSelect(
961 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
963 unsigned FalseOp = 0;
964 bool Optimizable =
false;
966 if (
TII->analyzeSelect(
MI,
Cond, TrueOp, FalseOp, Optimizable))
970 if (!
TII->optimizeSelect(
MI, LocalMIs))
973 MI.eraseFromParent();
979bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &
MI) {
980 return TII->optimizeCondBranch(
MI);
996bool PeepholeOptimizer::findNextSource(
const TargetRegisterClass *DefRC,
999 RewriteMapTy &RewriteMap) {
1008 unsigned PHICount = 0;
1015 ValueTracker ValTracker(CurSrcPair.
Reg, CurSrcPair.
SubReg, *
MRI,
TII);
1020 ValueTrackerResult Res = ValTracker.getNextSource();
1026 auto [InsertPt, WasInserted] = RewriteMap.try_emplace(CurSrcPair, Res);
1029 const ValueTrackerResult &CurSrcRes = InsertPt->second;
1031 assert(CurSrcRes == Res &&
"ValueTrackerResult found must match");
1034 if (CurSrcRes.getNumSources() > 1) {
1036 <<
"findNextSource: found PHI cycle, aborting...\n");
1044 unsigned NumSrcs = Res.getNumSources();
1052 for (
unsigned i = 0; i < NumSrcs; ++i)
1057 CurSrcPair = Res.getSrc(0);
1066 const TargetRegisterClass *SrcRC =
MRI->getRegClass(CurSrcPair.
Reg);
1067 if (!
TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
1073 if (PHICount > 0 && CurSrcPair.
SubReg != 0)
1079 }
while (!SrcToLook.
empty());
1082 return CurSrcPair.
Reg !=
Reg;
1094 assert(!SrcRegs.
empty() &&
"No sources to create a PHI instruction?");
1099 assert(SrcRegs[0].
SubReg == 0 &&
"should not have subreg operand");
1100 Register NewVR =
MRI.createVirtualRegister(NewRC);
1103 TII.get(TargetOpcode::PHI), NewVR);
1105 unsigned MBBOpIdx = 2;
1107 MIB.
addReg(RegPair.Reg, 0, RegPair.SubReg);
1112 MRI.clearKillFlags(RegPair.Reg);
1128 const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1129 bool HandleMultipleSources =
true) {
1132 ValueTrackerResult Res = RewriteMap.
lookup(LookupSrc);
1138 unsigned NumSrcs = Res.getNumSources();
1140 LookupSrc.
Reg = Res.getSrcReg(0);
1141 LookupSrc.
SubReg = Res.getSrcSubReg(0);
1146 if (!HandleMultipleSources)
1152 for (
unsigned i = 0; i < NumSrcs; ++i) {
1153 RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1171bool PeepholeOptimizer::optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter) {
1177 while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
1178 if (Dst.Reg.isPhysical()) {
1186 const TargetRegisterClass *DefRC =
MRI->getRegClass(Dst.Reg);
1189 RewriteMapTy RewriteMap;
1192 if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
1200 "should not rewrite source to original value");
1208 const TargetRegisterClass *RC =
MRI->getRegClass(NewSrc.
Reg);
1209 const TargetRegisterClass *WithSubRC =
1210 TRI->getSubClassWithSubReg(RC, NewSrc.
SubReg);
1211 if (!
MRI->constrainRegClass(NewSrc.
Reg, WithSubRC))
1217 if (CpyRewriter.RewriteCurrentSource(NewSrc.
Reg, NewSrc.
SubReg)) {
1219 MRI->clearKillFlags(NewSrc.
Reg);
1229 NumRewrittenCopies +=
Changed;
1244bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &
MI) {
1245 assert(isCoalescableCopy(
MI) &&
"Invalid argument");
1246 assert(
MI.getDesc().getNumDefs() == 1 &&
1247 "Coalescer can understand multiple defs?!");
1248 const MachineOperand &MODef =
MI.getOperand(0);
1253 switch (
MI.getOpcode()) {
1254 case TargetOpcode::COPY:
1255 return optimizeCoalescableCopyImpl(CopyRewriter(
MI));
1256 case TargetOpcode::INSERT_SUBREG:
1257 return optimizeCoalescableCopyImpl(InsertSubregRewriter(
MI));
1258 case TargetOpcode::EXTRACT_SUBREG:
1259 return optimizeCoalescableCopyImpl(ExtractSubregRewriter(
MI, *
TII));
1260 case TargetOpcode::REG_SEQUENCE:
1261 return optimizeCoalescableCopyImpl(RegSequenceRewriter(
MI));
1264 if (
MI.isBitcast() ||
MI.isRegSequenceLike() ||
MI.isInsertSubregLike() ||
1265 MI.isExtractSubregLike())
1266 return optimizeCoalescableCopyImpl(UncoalescableRewriter(
MI));
1276MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1278 RewriteMapTy &RewriteMap) {
1279 assert(!
Def.Reg.isPhysical() &&
"We do not rewrite physical registers");
1285 const TargetRegisterClass *DefRC =
MRI->getRegClass(
Def.Reg);
1286 Register NewVReg =
MRI->createVirtualRegister(DefRC);
1289 const TargetRegisterClass *NewSrcRC =
MRI->getRegClass(NewSrc.
Reg);
1290 const TargetRegisterClass *WithSubRC =
1291 TRI->getSubClassWithSubReg(NewSrcRC, NewSrc.
SubReg);
1296 if (!
MRI->constrainRegClass(NewSrc.
Reg, WithSubRC))
1300 MachineInstr *NewCopy =
1302 TII->get(TargetOpcode::COPY), NewVReg)
1313 MRI->replaceRegWith(
Def.Reg, NewVReg);
1314 MRI->clearKillFlags(NewVReg);
1318 MRI->clearKillFlags(NewSrc.
Reg);
1334bool PeepholeOptimizer::optimizeUncoalescableCopy(
1335 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
1336 assert(isUncoalescableCopy(
MI) &&
"Invalid argument");
1337 UncoalescableRewriter CpyRewriter(
MI);
1342 RewriteMapTy RewriteMap;
1346 while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1349 if (
Def.Reg.isPhysical())
1355 const TargetRegisterClass *DefRC =
MRI->getRegClass(
Def.Reg);
1359 if (!findNextSource(DefRC,
Def.SubReg, Def, RewriteMap))
1368 MachineInstr &NewCopy = rewriteSource(
MI, Def, RewriteMap);
1369 LocalMIs.
insert(&NewCopy);
1374 MI.eraseFromParent();
1375 ++NumUncoalescableCopies;
1382bool PeepholeOptimizer::isLoadFoldable(
1383 MachineInstr &
MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1384 if (!
MI.canFoldAsLoad() || !
MI.mayLoad())
1386 const MCInstrDesc &MCID =
MI.getDesc();
1395 MRI->hasOneNonDBGUser(
Reg)) {
1402bool PeepholeOptimizer::isMoveImmediate(
1403 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1404 DenseMap<Register, MachineInstr *> &ImmDefMIs) {
1405 const MCInstrDesc &MCID =
MI.getDesc();
1406 if (MCID.
getNumDefs() != 1 || !
MI.getOperand(0).isReg())
1413 if (!
MI.isMoveImmediate() && !
TII->getConstValDefinedInReg(
MI,
Reg, ImmVal))
1424bool PeepholeOptimizer::foldImmediate(
1425 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1426 DenseMap<Register, MachineInstr *> &ImmDefMIs,
bool &
Deleted) {
1428 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1429 MachineOperand &MO =
MI.getOperand(i);
1437 DenseMap<Register, MachineInstr *>::iterator
II = ImmDefMIs.
find(
Reg);
1438 assert(
II != ImmDefMIs.
end() &&
"couldn't find immediate definition");
1444 if (
MRI->getVRegDef(
Reg) &&
1448 MRI->getRegClass(DstReg) ==
MRI->getRegClass(
Reg)) {
1449 MRI->replaceRegWith(DstReg,
Reg);
1450 MI.eraseFromParent();
1474bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &
MI) {
1475 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1478 if (!getCopySrc(
MI, SrcPair))
1485 if (CopySrcMIs.
insert(std::make_pair(SrcPair, &
MI)).second) {
1490 MachineInstr *PrevCopy = CopySrcMIs.
find(SrcPair)->second;
1493 "Unexpected mismatching subreg!");
1501 if (
MRI->getRegClass(DstReg) !=
MRI->getRegClass(PrevDstReg))
1504 MRI->replaceRegWith(DstReg, PrevDstReg);
1507 MRI->clearKillFlags(PrevDstReg);
1511bool PeepholeOptimizer::isNAPhysCopy(
Register Reg) {
1515bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1516 MachineInstr &
MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
1517 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1524 if (isNAPhysCopy(SrcReg) && DstReg.
isVirtual()) {
1528 NAPhysToVirtMIs.
insert({SrcReg, &
MI});
1532 if (!(SrcReg.
isVirtual() && isNAPhysCopy(DstReg)))
1536 auto PrevCopy = NAPhysToVirtMIs.
find(DstReg);
1537 if (PrevCopy == NAPhysToVirtMIs.
end()) {
1540 LLVM_DEBUG(
dbgs() <<
"NAPhysCopy: intervening clobber forbids erasing "
1546 if (PrevDstReg == SrcReg) {
1559 NAPhysToVirtMIs.
erase(PrevCopy);
1568bool PeepholeOptimizer::findTargetRecurrence(
1569 Register Reg,
const SmallSet<Register, 2> &TargetRegs,
1570 RecurrenceCycle &RC) {
1580 if (!
MRI->hasOneNonDBGUse(
Reg))
1587 MachineInstr &
MI = *(
MRI->use_instr_nodbg_begin(
Reg));
1588 unsigned Idx =
MI.findRegisterUseOperandIdx(
Reg,
nullptr);
1592 if (
MI.getDesc().getNumDefs() != 1)
1595 MachineOperand &DefOp =
MI.getOperand(0);
1602 unsigned TiedUseIdx;
1603 if (!
MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1606 if (Idx == TiedUseIdx) {
1607 RC.push_back(RecurrenceInstr(&
MI));
1608 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1612 if (
TII->findCommutedOpIndices(
MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1613 RC.push_back(RecurrenceInstr(&
MI, Idx, CommIdx));
1614 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1639bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &
PHI) {
1640 SmallSet<Register, 2> TargetRegs;
1641 for (
unsigned Idx = 1; Idx <
PHI.getNumOperands(); Idx += 2) {
1642 MachineOperand &MO =
PHI.getOperand(Idx);
1649 if (findTargetRecurrence(
PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1653 for (
auto &RI : RC) {
1655 auto CP = RI.getCommutePair();
1658 TII->commuteInstruction(*(RI.getMI()),
false, (*CP).first,
1675 PeepholeOptimizer Impl(DT, MLI);
1687bool PeepholeOptimizerLegacy::runOnMachineFunction(
MachineFunction &MF) {
1691 ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1693 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1694 PeepholeOptimizer Impl(DT, MLI);
1695 return Impl.run(MF);
1700 LLVM_DEBUG(
dbgs() <<
"********** PEEPHOLE OPTIMIZER **********\n");
1714 bool SeenMoveImm =
false;
1747 if (
MI->isDebugInstr())
1750 if (
MI->isPosition())
1753 if (IsLoopHeader &&
MI->isPHI()) {
1754 if (optimizeRecurrence(*
MI)) {
1760 if (!
MI->isCopy()) {
1761 for (
const MachineOperand &MO :
MI->operands()) {
1765 if (MO.
isDef() && isNAPhysCopy(
Reg)) {
1767 if (Def != NAPhysToVirtMIs.
end()) {
1771 <<
"NAPhysCopy: invalidating because of " << *
MI);
1772 NAPhysToVirtMIs.
erase(Def);
1777 for (
auto &RegMI : NAPhysToVirtMIs) {
1781 <<
"NAPhysCopy: invalidating because of " << *
MI);
1782 NAPhysToVirtMIs.erase(Def);
1789 if (
MI->isImplicitDef() ||
MI->isKill())
1792 if (
MI->isInlineAsm() ||
MI->hasUnmodeledSideEffects()) {
1799 NAPhysToVirtMIs.clear();
1802 if ((isUncoalescableCopy(*
MI) &&
1803 optimizeUncoalescableCopy(*
MI, LocalMIs)) ||
1804 (
MI->isCompare() && optimizeCmpInstr(*
MI)) ||
1805 (
MI->isSelect() && optimizeSelect(*
MI, LocalMIs))) {
1812 if (
MI->isConditionalBranch() && optimizeCondBranch(*
MI)) {
1817 if (isCoalescableCopy(*
MI) && optimizeCoalescableCopy(*
MI)) {
1823 if (
MI->isCopy() && (foldRedundantCopy(*
MI) ||
1824 foldRedundantNAPhysCopy(*
MI, NAPhysToVirtMIs))) {
1827 MI->eraseFromParent();
1832 if (isMoveImmediate(*
MI, ImmDefRegs, ImmDefMIs)) {
1854 if (!isLoadFoldable(*
MI, FoldAsLoadDefCandidates) &&
1855 !FoldAsLoadDefCandidates.
empty()) {
1862 const MCInstrDesc &MIDesc =
MI->getDesc();
1863 for (
unsigned i = MIDesc.
getNumDefs(); i !=
MI->getNumOperands(); ++i) {
1864 const MachineOperand &MOp =
MI->getOperand(i);
1868 if (FoldAsLoadDefCandidates.
count(FoldAsLoadDefReg)) {
1873 Register FoldedReg = FoldAsLoadDefReg;
1874 MachineInstr *
DefMI =
nullptr;
1875 if (MachineInstr *FoldMI =
1876 TII->optimizeLoadInstr(*
MI,
MRI, FoldAsLoadDefReg,
DefMI)) {
1885 if (
MI->shouldUpdateAdditionalCallInfo())
1886 MI->getMF()->moveAdditionalCallInfo(
MI, FoldMI);
1887 MI->eraseFromParent();
1889 MRI->markUsesInDebugValueAsUndef(FoldedReg);
1890 FoldAsLoadDefCandidates.
erase(FoldedReg);
1904 if (
MI->isLoadFoldBarrier()) {
1906 FoldAsLoadDefCandidates.
clear();
1911 MF.resetDelegate(
this);
1915ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1916 assert(
Def->isCopy() &&
"Invalid definition");
1921 assert(
Def->getNumOperands() -
Def->getNumImplicitOperands() == 2 &&
1922 "Invalid number of operands");
1923 assert(!
Def->hasImplicitDef() &&
"Only implicit uses are allowed");
1924 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1927 const MachineOperand &Src =
Def->getOperand(1);
1929 return ValueTrackerResult();
1932 unsigned SubReg = Src.getSubReg();
1934 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
1939 const TargetRegisterClass *RegRC =
MRI.getRegClass(SrcReg);
1940 const TargetRegisterClass *SrcWithSubRC =
1941 TRI->getSubClassWithSubReg(RegRC,
SubReg);
1942 if (RegRC != SrcWithSubRC)
1943 return ValueTrackerResult();
1946 return ValueTrackerResult();
1950 return ValueTrackerResult(SrcReg,
SubReg);
1953ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1954 assert(
Def->isBitcast() &&
"Invalid definition");
1957 if (
Def->mayRaiseFPException() ||
Def->hasUnmodeledSideEffects())
1958 return ValueTrackerResult();
1961 if (
Def->getDesc().getNumDefs() != 1)
1962 return ValueTrackerResult();
1964 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1966 unsigned SrcIdx =
Def->getNumOperands();
1967 for (
unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx;
OpIdx != EndOpIdx;
1969 const MachineOperand &MO =
Def->getOperand(
OpIdx);
1975 assert(!MO.
isDef() &&
"We should have skipped all the definitions by now");
1976 if (SrcIdx != EndOpIdx)
1978 return ValueTrackerResult();
1984 if (SrcIdx >=
Def->getNumOperands())
1985 return ValueTrackerResult();
1987 const MachineOperand &DefOp =
Def->getOperand(DefIdx);
1991 for (
const MachineInstr &
UseMI :
MRI.use_nodbg_instructions(DefOp.
getReg())) {
1992 if (
UseMI.isSubregToReg())
1993 return ValueTrackerResult();
1996 const MachineOperand &Src =
Def->getOperand(SrcIdx);
1998 return ValueTrackerResult();
1999 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
2002ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
2003 assert((
Def->isRegSequence() ||
Def->isRegSequenceLike()) &&
2004 "Invalid definition");
2006 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"illegal subregister def");
2009 if (!
TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
2010 return ValueTrackerResult();
2018 if (RegSeqInput.SubIdx == DefSubReg)
2019 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
2022 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
2027 LaneBitmask DefMask =
TRI->getSubRegIndexLaneMask(DefSubReg);
2028 LaneBitmask ThisOpRegMask =
TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
2034 if ((DefMask & ThisOpRegMask) != DefMask)
2037 unsigned ReverseDefCompose =
2038 TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg);
2039 if (!ReverseDefCompose)
2042 unsigned ComposedDefInSrcReg1 =
2043 TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
2049 const TargetRegisterClass *SrcRC =
MRI.getRegClass(RegSeqInput.Reg);
2050 const TargetRegisterClass *SrcWithSubRC =
2051 TRI->getSubClassWithSubReg(SrcRC, ComposedDefInSrcReg1);
2052 if (SrcRC != SrcWithSubRC)
2053 return ValueTrackerResult();
2055 return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
2061 return ValueTrackerResult();
2064ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2065 assert((
Def->isInsertSubreg() ||
Def->isInsertSubregLike()) &&
2066 "Invalid definition");
2067 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subreg defs in SSA");
2071 if (!
TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2072 return ValueTrackerResult();
2081 if (InsertedReg.
SubIdx == DefSubReg) {
2082 return ValueTrackerResult(InsertedReg.
Reg, InsertedReg.
SubReg);
2087 const MachineOperand &MODef =
Def->getOperand(DefIdx);
2093 return ValueTrackerResult();
2097 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
2098 if ((
TRI->getSubRegIndexLaneMask(DefSubReg) &
2099 TRI->getSubRegIndexLaneMask(InsertedReg.
SubIdx))
2101 return ValueTrackerResult();
2104 return ValueTrackerResult(
BaseReg.Reg, DefSubReg);
2107ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2108 assert((
Def->isExtractSubreg() ||
Def->isExtractSubregLike()) &&
2109 "Invalid definition");
2116 return ValueTrackerResult();
2119 if (!
TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2120 return ValueTrackerResult();
2124 if (ExtractSubregInputReg.
SubReg)
2125 return ValueTrackerResult();
2127 return ValueTrackerResult(ExtractSubregInputReg.
Reg,
2128 ExtractSubregInputReg.
SubIdx);
2131ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2132 assert(
Def->isSubregToReg() &&
"Invalid definition");
2140 if (DefSubReg !=
Def->getOperand(3).getImm())
2141 return ValueTrackerResult();
2144 if (
Def->getOperand(2).getSubReg())
2145 return ValueTrackerResult();
2147 return ValueTrackerResult(
Def->getOperand(2).getReg(),
2148 Def->getOperand(3).getImm());
2152ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2153 assert(
Def->isPHI() &&
"Invalid definition");
2154 ValueTrackerResult Res;
2157 for (
unsigned i = 1, e =
Def->getNumOperands(); i < e; i += 2) {
2158 const MachineOperand &MO =
Def->getOperand(i);
2163 return ValueTrackerResult();
2170ValueTrackerResult ValueTracker::getNextSourceImpl() {
2171 assert(Def &&
"This method needs a valid definition");
2173 assert(((
Def->getOperand(DefIdx).isDef() &&
2174 (DefIdx < Def->
getDesc().getNumDefs() ||
2175 Def->getDesc().isVariadic())) ||
2176 Def->getOperand(DefIdx).isImplicit()) &&
2179 return getNextSourceFromCopy();
2180 if (
Def->isBitcast())
2181 return getNextSourceFromBitcast();
2185 return ValueTrackerResult();
2186 if (
Def->isRegSequence() ||
Def->isRegSequenceLike())
2187 return getNextSourceFromRegSequence();
2188 if (
Def->isInsertSubreg() ||
Def->isInsertSubregLike())
2189 return getNextSourceFromInsertSubreg();
2190 if (
Def->isExtractSubreg() ||
Def->isExtractSubregLike())
2191 return getNextSourceFromExtractSubreg();
2192 if (
Def->isSubregToReg())
2193 return getNextSourceFromSubregToReg();
2195 return getNextSourceFromPHI();
2196 return ValueTrackerResult();
2199ValueTrackerResult ValueTracker::getNextSource() {
2203 return ValueTrackerResult();
2205 ValueTrackerResult Res = getNextSourceImpl();
2206 if (Res.isValid()) {
2210 bool OneRegSrc = Res.getNumSources() == 1;
2212 Reg = Res.getSrcReg(0);
2221 if (DI !=
MRI.def_end()) {
2224 DefSubReg = Res.getSrcSubReg(0);
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Virtual Register Rewriter
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
bool isLoopHeader(const BlockT *BB) const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
An RAII based helper class to modify MachineFunctionProperties when running pass.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setDelegate(Delegate *delegate)
Set the delegate.
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_iterator< false, true, false, true, false > def_iterator
def_iterator/def_begin/def_end - Walk all defs of the specified register.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI char & PeepholeOptimizerLegacyID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.