103#define DEBUG_TYPE "peephole-opt"
107 cl::desc(
"Aggressive extension optimization"));
111 cl::desc(
"Disable the peephole optimizer"));
118 cl::desc(
"Disable advanced copy optimization"));
122 cl::desc(
"Disable non-allocatable physical register copy optimization"));
128 cl::desc(
"Limit the length of PHI chains to lookup"));
134 cl::desc(
"Maximum length of recurrence chain when evaluating the benefit "
135 "of commuting operands"));
137STATISTIC(NumReuse,
"Number of extension results reused");
139STATISTIC(NumImmFold,
"Number of move immediate folded");
142STATISTIC(NumUncoalescableCopies,
"Number of uncoalescable copies optimized");
143STATISTIC(NumRewrittenCopies,
"Number of copies rewritten");
144STATISTIC(NumNAPhysCopies,
"Number of non-allocatable physical copies removed");
148class ValueTrackerResult;
149class RecurrenceInstr;
155 int CurrentSrcIdx = 0;
158 virtual ~Rewriter() =
default;
190 virtual bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg) = 0;
194class CopyRewriter :
public Rewriter {
197 assert(
MI.isCopy() &&
"Expected copy instruction");
199 ~CopyRewriter()
override =
default;
203 if (++CurrentSrcIdx > 1)
207 const MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
210 const MachineOperand &MODef = CopyLike.getOperand(0);
215 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
216 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
225class UncoalescableRewriter :
public Rewriter {
229 UncoalescableRewriter(MachineInstr &
MI) :
Rewriter(
MI) {
230 NumDefs =
MI.getDesc().getNumDefs();
240 if (CurrentSrcIdx == NumDefs)
243 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
245 if (CurrentSrcIdx == NumDefs)
251 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
258 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
264class InsertSubregRewriter :
public Rewriter {
267 assert(
MI.isInsertSubreg() &&
"Invalid instruction");
284 if (CurrentSrcIdx == 2)
288 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
290 const MachineOperand &MODef = CopyLike.getOperand(0);
298 (
unsigned)CopyLike.getOperand(3).getImm());
302 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
303 if (CurrentSrcIdx != 2)
306 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
314class ExtractSubregRewriter :
public Rewriter {
315 const TargetInstrInfo &TII;
318 ExtractSubregRewriter(MachineInstr &
MI,
const TargetInstrInfo &TII)
320 assert(
MI.isExtractSubreg() &&
"Invalid instruction");
331 if (CurrentSrcIdx == 1)
335 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
344 const MachineOperand &MODef = CopyLike.getOperand(0);
349 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
351 if (CurrentSrcIdx != 1)
354 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
365 CopyLike.removeOperand(2);
367 CopyLike.setDesc(TII.get(TargetOpcode::COPY));
370 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
376class RegSequenceRewriter :
public Rewriter {
379 assert(
MI.isRegSequence() &&
"Invalid instruction");
403 if (
static_cast<unsigned>(CurrentSrcIdx) >= CopyLike.getNumOperands())
406 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
407 Src.Reg = MOInsertedReg.
getReg();
412 Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
414 const MachineOperand &MODef = CopyLike.getOperand(0);
416 assert(MODef.
getSubReg() == 0 &&
"cannot have subregister def in SSA");
420 bool RewriteCurrentSource(
Register NewReg,
unsigned NewSubReg)
override {
421 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
429 const TargetInstrInfo *TII =
nullptr;
430 const TargetRegisterInfo *TRI =
nullptr;
431 MachineRegisterInfo *MRI =
nullptr;
432 MachineDominatorTree *DT =
nullptr;
433 MachineLoopInfo *MLI =
nullptr;
436 PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI)
437 : DT(DT), MLI(MLI) {}
439 bool run(MachineFunction &MF);
441 using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
444 using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
447 bool optimizeCmpInstr(MachineInstr &
MI);
448 bool optimizeExtInstr(MachineInstr &
MI, MachineBasicBlock &
MBB,
449 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
450 bool optimizeSelect(MachineInstr &
MI,
451 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
452 bool optimizeCondBranch(MachineInstr &
MI);
454 bool optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter);
455 bool optimizeCoalescableCopy(MachineInstr &
MI);
456 bool optimizeUncoalescableCopy(MachineInstr &
MI,
457 SmallPtrSetImpl<MachineInstr *> &LocalMIs);
458 bool optimizeRecurrence(MachineInstr &
PHI);
459 bool findNextSource(
const TargetRegisterClass *DefRC,
unsigned DefSubReg,
461 bool isMoveImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
462 DenseMap<Register, MachineInstr *> &ImmDefMIs);
463 bool foldImmediate(MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
464 DenseMap<Register, MachineInstr *> &ImmDefMIs,
472 const SmallSet<Register, 2> &TargetReg,
473 RecurrenceCycle &RC);
480 bool foldRedundantCopy(MachineInstr &
MI);
491 foldRedundantNAPhysCopy(MachineInstr &
MI,
492 DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
494 bool isLoadFoldable(MachineInstr &
MI,
495 SmallSet<Register, 16> &FoldAsLoadDefCandidates);
499 static bool isCoalescableCopy(
const MachineInstr &
MI) {
502 return MI.isCopy() ||
504 MI.isExtractSubreg()));
509 static bool isUncoalescableCopy(
const MachineInstr &
MI) {
511 MI.isInsertSubregLike() ||
512 MI.isExtractSubregLike()));
515 MachineInstr &rewriteSource(MachineInstr &CopyLike,
RegSubRegPair Def,
516 RewriteMapTy &RewriteMap);
520 DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
523 void MF_HandleInsertion(MachineInstr &
MI)
override {}
530 unsigned SrcSubReg =
MI.getOperand(1).getSubReg();
531 if (!SrcReg.
isVirtual() && !MRI->isConstantPhysReg(SrcReg))
540 void deleteChangedCopy(MachineInstr &
MI) {
542 if (!getCopySrc(
MI, SrcPair))
545 auto It = CopySrcMIs.find(SrcPair);
546 if (It != CopySrcMIs.end() && It->second == &
MI)
547 CopySrcMIs.erase(It);
550 void MF_HandleRemoval(MachineInstr &
MI)
override { deleteChangedCopy(
MI); }
552 void MF_HandleChangeDesc(MachineInstr &
MI,
const MCInstrDesc &TID)
override {
553 deleteChangedCopy(
MI);
561 PeepholeOptimizerLegacy() : MachineFunctionPass(ID) {}
563 bool runOnMachineFunction(MachineFunction &MF)
override;
565 void getAnalysisUsage(AnalysisUsage &AU)
const override {
576 MachineFunctionProperties getRequiredProperties()
const override {
577 return MachineFunctionProperties().setIsSSA();
587class RecurrenceInstr {
589 using IndexPair = std::pair<unsigned, unsigned>;
591 RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
592 RecurrenceInstr(MachineInstr *MI,
unsigned Idx1,
unsigned Idx2)
593 : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
595 MachineInstr *getMI()
const {
return MI; }
596 std::optional<IndexPair> getCommutePair()
const {
return CommutePair; }
600 std::optional<IndexPair> CommutePair;
606class ValueTrackerResult {
612 const MachineInstr *Inst =
nullptr;
615 ValueTrackerResult() =
default;
619 bool isValid()
const {
return getNumSources() > 0; }
621 void setInst(
const MachineInstr *
I) { Inst =
I; }
622 const MachineInstr *getInst()
const {
return Inst; }
629 void addSource(
Register SrcReg,
unsigned SrcSubReg) {
633 void setSource(
int Idx,
Register SrcReg,
unsigned SrcSubReg) {
634 assert(Idx < getNumSources() &&
"Reg pair source out of index");
638 int getNumSources()
const {
return RegSrcs.size(); }
643 assert(Idx < getNumSources() &&
"Reg source out of index");
644 return RegSrcs[Idx].Reg;
647 unsigned getSrcSubReg(
int Idx)
const {
648 assert(Idx < getNumSources() &&
"SubReg source out of index");
649 return RegSrcs[Idx].SubReg;
653 if (
Other.getInst() != getInst())
656 if (
Other.getNumSources() != getNumSources())
659 for (
int i = 0, e =
Other.getNumSources(); i != e; ++i)
660 if (
Other.getSrcReg(i) != getSrcReg(i) ||
661 Other.getSrcSubReg(i) != getSrcSubReg(i))
686 const MachineInstr *Def =
nullptr;
698 const MachineRegisterInfo &MRI;
701 const TargetInstrInfo *TII;
704 ValueTrackerResult getNextSourceImpl();
707 ValueTrackerResult getNextSourceFromCopy();
710 ValueTrackerResult getNextSourceFromBitcast();
713 ValueTrackerResult getNextSourceFromRegSequence();
716 ValueTrackerResult getNextSourceFromInsertSubreg();
719 ValueTrackerResult getNextSourceFromExtractSubreg();
722 ValueTrackerResult getNextSourceFromSubregToReg();
725 ValueTrackerResult getNextSourceFromPHI();
737 ValueTracker(
Register Reg,
unsigned DefSubReg,
const MachineRegisterInfo &MRI,
738 const TargetInstrInfo *TII =
nullptr)
739 : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
740 if (!Reg.isPhysical()) {
741 Def = MRI.getVRegDef(Reg);
742 DefIdx = MRI.def_begin(Reg).getOperandNo();
751 ValueTrackerResult getNextSource();
756char PeepholeOptimizerLegacy::ID = 0;
761 "Peephole Optimizations",
false,
false)
775bool PeepholeOptimizer::optimizeExtInstr(
780 if (!
TII->isCoalescableExtInstr(
MI, SrcReg, DstReg, SubIdx))
786 if (
MRI->hasOneNonDBGUse(SrcReg))
793 DstRC =
TRI->getSubClassWithSubReg(DstRC, SubIdx);
803 TRI->getSubClassWithSubReg(
MRI->getRegClass(SrcReg), SubIdx) !=
nullptr;
809 ReachedBBs.insert(UI.getParent());
817 bool ExtendLife =
true;
819 MachineInstr *UseMI = UseMO.getParent();
823 if (UseMI->isPHI()) {
829 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
849 if (
UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
853 if (UseMBB == &
MBB) {
855 if (!LocalMIs.count(
UseMI))
856 Uses.push_back(&UseMO);
857 }
else if (ReachedBBs.count(UseMBB)) {
860 Uses.push_back(&UseMO);
864 ExtendedUses.push_back(&UseMO);
873 if (ExtendLife && !ExtendedUses.empty())
875 Uses.append(ExtendedUses.begin(), ExtendedUses.end());
880 SmallPtrSet<MachineBasicBlock *, 4> PHIBBs;
885 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
887 PHIBBs.insert(UI.getParent());
889 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
890 for (MachineOperand *UseMO : Uses) {
891 MachineInstr *UseMI = UseMO->getParent();
892 MachineBasicBlock *UseMBB = UseMI->getParent();
893 if (PHIBBs.count(UseMBB))
898 MRI->clearKillFlags(DstReg);
899 MRI->constrainRegClass(DstReg, DstRC);
917 RC = MRI->getRegClass(UseMI->getOperand(0).getReg());
919 Register NewVR = MRI->createVirtualRegister(RC);
920 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
921 TII->get(TargetOpcode::COPY), NewVR)
922 .addReg(DstReg, {}, SubIdx);
926 UseMO->setReg(NewVR);
943 int64_t CmpMask, CmpValue;
950 if (
TII->optimizeCompareInstr(
MI, SrcReg, SrcReg2, CmpMask, CmpValue,
MRI)) {
960bool PeepholeOptimizer::optimizeSelect(
961 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
962 assert(
MI.isSelect() &&
"Should only be called when MI->isSelect() is true");
963 if (!
TII->optimizeSelect(
MI, LocalMIs))
966 MI.eraseFromParent();
972bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &
MI) {
973 return TII->optimizeCondBranch(
MI);
989bool PeepholeOptimizer::findNextSource(
const TargetRegisterClass *DefRC,
992 RewriteMapTy &RewriteMap) {
1001 unsigned PHICount = 0;
1008 ValueTracker ValTracker(CurSrcPair.
Reg, CurSrcPair.
SubReg, *
MRI,
TII);
1013 ValueTrackerResult Res = ValTracker.getNextSource();
1019 auto [InsertPt, WasInserted] = RewriteMap.try_emplace(CurSrcPair, Res);
1022 const ValueTrackerResult &CurSrcRes = InsertPt->second;
1024 assert(CurSrcRes == Res &&
"ValueTrackerResult found must match");
1027 if (CurSrcRes.getNumSources() > 1) {
1029 <<
"findNextSource: found PHI cycle, aborting...\n");
1037 unsigned NumSrcs = Res.getNumSources();
1045 for (
unsigned i = 0; i < NumSrcs; ++i)
1050 CurSrcPair = Res.getSrc(0);
1059 const TargetRegisterClass *SrcRC =
MRI->getRegClass(CurSrcPair.
Reg);
1060 if (!
TRI->shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC,
1066 if (PHICount > 0 && CurSrcPair.
SubReg != 0)
1072 }
while (!SrcToLook.
empty());
1075 return CurSrcPair.
Reg !=
Reg;
1087 assert(!SrcRegs.
empty() &&
"No sources to create a PHI instruction?");
1092 assert(SrcRegs[0].
SubReg == 0 &&
"should not have subreg operand");
1093 Register NewVR =
MRI.createVirtualRegister(NewRC);
1096 TII.get(TargetOpcode::PHI), NewVR);
1098 unsigned MBBOpIdx = 2;
1100 MIB.
addReg(RegPair.Reg, {}, RegPair.SubReg);
1105 MRI.clearKillFlags(RegPair.Reg);
1121 const PeepholeOptimizer::RewriteMapTy &RewriteMap,
1122 bool HandleMultipleSources =
true) {
1125 ValueTrackerResult Res = RewriteMap.
lookup(LookupSrc);
1131 unsigned NumSrcs = Res.getNumSources();
1133 LookupSrc.
Reg = Res.getSrcReg(0);
1134 LookupSrc.
SubReg = Res.getSrcSubReg(0);
1139 if (!HandleMultipleSources)
1145 for (
unsigned i = 0; i < NumSrcs; ++i) {
1146 RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
1164bool PeepholeOptimizer::optimizeCoalescableCopyImpl(
Rewriter &&CpyRewriter) {
1170 while (CpyRewriter.getNextRewritableSource(TrackPair, Dst)) {
1171 if (Dst.Reg.isPhysical()) {
1179 const TargetRegisterClass *DefRC =
MRI->getRegClass(Dst.Reg);
1182 RewriteMapTy RewriteMap;
1185 if (!findNextSource(DefRC, Dst.SubReg, TrackPair, RewriteMap))
1193 "should not rewrite source to original value");
1201 const TargetRegisterClass *RC =
MRI->getRegClass(NewSrc.
Reg);
1202 const TargetRegisterClass *WithSubRC =
1203 TRI->getSubClassWithSubReg(RC, NewSrc.
SubReg);
1204 if (!
MRI->constrainRegClass(NewSrc.
Reg, WithSubRC))
1210 if (CpyRewriter.RewriteCurrentSource(NewSrc.
Reg, NewSrc.
SubReg)) {
1212 MRI->clearKillFlags(NewSrc.
Reg);
1222 NumRewrittenCopies +=
Changed;
1237bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &
MI) {
1238 assert(isCoalescableCopy(
MI) &&
"Invalid argument");
1239 assert(
MI.getDesc().getNumDefs() == 1 &&
1240 "Coalescer can understand multiple defs?!");
1241 const MachineOperand &MODef =
MI.getOperand(0);
1246 switch (
MI.getOpcode()) {
1247 case TargetOpcode::COPY:
1248 return optimizeCoalescableCopyImpl(CopyRewriter(
MI));
1249 case TargetOpcode::INSERT_SUBREG:
1250 return optimizeCoalescableCopyImpl(InsertSubregRewriter(
MI));
1251 case TargetOpcode::EXTRACT_SUBREG:
1252 return optimizeCoalescableCopyImpl(ExtractSubregRewriter(
MI, *
TII));
1253 case TargetOpcode::REG_SEQUENCE:
1254 return optimizeCoalescableCopyImpl(RegSequenceRewriter(
MI));
1257 if (
MI.isBitcast() ||
MI.isRegSequenceLike() ||
MI.isInsertSubregLike() ||
1258 MI.isExtractSubregLike())
1259 return optimizeCoalescableCopyImpl(UncoalescableRewriter(
MI));
1269MachineInstr &PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
1271 RewriteMapTy &RewriteMap) {
1272 assert(!
Def.Reg.isPhysical() &&
"We do not rewrite physical registers");
1278 const TargetRegisterClass *DefRC =
MRI->getRegClass(
Def.Reg);
1279 Register NewVReg =
MRI->createVirtualRegister(DefRC);
1282 const TargetRegisterClass *NewSrcRC =
MRI->getRegClass(NewSrc.
Reg);
1283 const TargetRegisterClass *WithSubRC =
1284 TRI->getSubClassWithSubReg(NewSrcRC, NewSrc.
SubReg);
1289 if (!
MRI->constrainRegClass(NewSrc.
Reg, WithSubRC))
1293 MachineInstr *NewCopy =
1295 TII->get(TargetOpcode::COPY), NewVReg)
1306 MRI->replaceRegWith(
Def.Reg, NewVReg);
1307 MRI->clearKillFlags(NewVReg);
1311 MRI->clearKillFlags(NewSrc.
Reg);
1327bool PeepholeOptimizer::optimizeUncoalescableCopy(
1328 MachineInstr &
MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
1329 assert(isUncoalescableCopy(
MI) &&
"Invalid argument");
1330 UncoalescableRewriter CpyRewriter(
MI);
1335 RewriteMapTy RewriteMap;
1339 while (CpyRewriter.getNextRewritableSource(Src, Def)) {
1342 if (
Def.Reg.isPhysical())
1348 const TargetRegisterClass *DefRC =
MRI->getRegClass(
Def.Reg);
1352 if (!findNextSource(DefRC,
Def.SubReg, Def, RewriteMap))
1361 MachineInstr &NewCopy = rewriteSource(
MI, Def, RewriteMap);
1362 LocalMIs.
insert(&NewCopy);
1367 MI.eraseFromParent();
1368 ++NumUncoalescableCopies;
1375bool PeepholeOptimizer::isLoadFoldable(
1376 MachineInstr &
MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
1377 if (!
MI.canFoldAsLoad() || !
MI.mayLoad())
1379 const MCInstrDesc &MCID =
MI.getDesc();
1388 MRI->hasOneNonDBGUser(
Reg)) {
1395bool PeepholeOptimizer::isMoveImmediate(
1396 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1397 DenseMap<Register, MachineInstr *> &ImmDefMIs) {
1398 const MCInstrDesc &MCID =
MI.getDesc();
1399 if (MCID.
getNumDefs() != 1 || !
MI.getOperand(0).isReg())
1406 if (!
MI.isMoveImmediate() && !
TII->getConstValDefinedInReg(
MI,
Reg, ImmVal))
1417bool PeepholeOptimizer::foldImmediate(
1418 MachineInstr &
MI, SmallSet<Register, 4> &ImmDefRegs,
1419 DenseMap<Register, MachineInstr *> &ImmDefMIs,
bool &
Deleted) {
1421 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1422 MachineOperand &MO =
MI.getOperand(i);
1430 DenseMap<Register, MachineInstr *>::iterator
II = ImmDefMIs.
find(
Reg);
1431 assert(
II != ImmDefMIs.
end() &&
"couldn't find immediate definition");
1437 if (
MRI->getVRegDef(
Reg) &&
1441 MRI->getRegClass(DstReg) ==
MRI->getRegClass(
Reg)) {
1442 MRI->replaceRegWith(DstReg,
Reg);
1443 MI.eraseFromParent();
1467bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &
MI) {
1468 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1471 if (!getCopySrc(
MI, SrcPair))
1478 if (CopySrcMIs.
insert(std::make_pair(SrcPair, &
MI)).second) {
1483 MachineInstr *PrevCopy = CopySrcMIs.
find(SrcPair)->second;
1486 "Unexpected mismatching subreg!");
1494 if (
MRI->getRegClass(DstReg) !=
MRI->getRegClass(PrevDstReg))
1497 MRI->replaceRegWith(DstReg, PrevDstReg);
1500 MRI->clearKillFlags(PrevDstReg);
1504bool PeepholeOptimizer::isNAPhysCopy(
Register Reg) {
1508bool PeepholeOptimizer::foldRedundantNAPhysCopy(
1509 MachineInstr &
MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
1510 assert(
MI.isCopy() &&
"expected a COPY machine instruction");
1517 if (isNAPhysCopy(SrcReg) && DstReg.
isVirtual()) {
1521 NAPhysToVirtMIs.
insert({SrcReg, &
MI});
1525 if (!(SrcReg.
isVirtual() && isNAPhysCopy(DstReg)))
1529 auto PrevCopy = NAPhysToVirtMIs.
find(DstReg);
1530 if (PrevCopy == NAPhysToVirtMIs.
end()) {
1533 LLVM_DEBUG(
dbgs() <<
"NAPhysCopy: intervening clobber forbids erasing "
1539 if (PrevDstReg == SrcReg) {
1552 NAPhysToVirtMIs.
erase(PrevCopy);
1561bool PeepholeOptimizer::findTargetRecurrence(
1562 Register Reg,
const SmallSet<Register, 2> &TargetRegs,
1563 RecurrenceCycle &RC) {
1573 if (!
MRI->hasOneNonDBGUse(
Reg))
1580 MachineInstr &
MI = *(
MRI->use_instr_nodbg_begin(
Reg));
1581 unsigned Idx =
MI.findRegisterUseOperandIdx(
Reg,
nullptr);
1585 if (
MI.getDesc().getNumDefs() != 1)
1588 MachineOperand &DefOp =
MI.getOperand(0);
1595 unsigned TiedUseIdx;
1596 if (!
MI.isRegTiedToUseOperand(0, &TiedUseIdx))
1599 if (Idx == TiedUseIdx) {
1600 RC.push_back(RecurrenceInstr(&
MI));
1601 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1605 if (
TII->findCommutedOpIndices(
MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
1606 RC.push_back(RecurrenceInstr(&
MI, Idx, CommIdx));
1607 return findTargetRecurrence(DefOp.
getReg(), TargetRegs, RC);
1632bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &
PHI) {
1633 SmallSet<Register, 2> TargetRegs;
1634 for (
unsigned Idx = 1; Idx <
PHI.getNumOperands(); Idx += 2) {
1635 MachineOperand &MO =
PHI.getOperand(Idx);
1642 if (findTargetRecurrence(
PHI.getOperand(0).getReg(), TargetRegs, RC)) {
1646 for (
auto &RI : RC) {
1648 auto CP = RI.getCommutePair();
1651 TII->commuteInstruction(*(RI.getMI()),
false, (*CP).first,
1668 PeepholeOptimizer Impl(DT, MLI);
1680bool PeepholeOptimizerLegacy::runOnMachineFunction(
MachineFunction &MF) {
1684 ? &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()
1686 auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1687 PeepholeOptimizer Impl(DT, MLI);
1688 return Impl.run(MF);
1693 LLVM_DEBUG(
dbgs() <<
"********** PEEPHOLE OPTIMIZER **********\n");
1707 bool SeenMoveImm =
false;
1740 if (
MI->isDebugInstr())
1743 if (
MI->isPosition())
1746 if (IsLoopHeader &&
MI->isPHI()) {
1747 if (optimizeRecurrence(*
MI)) {
1753 if (!
MI->isCopy()) {
1754 for (
const MachineOperand &MO :
MI->operands()) {
1758 if (MO.
isDef() && isNAPhysCopy(
Reg)) {
1760 if (Def != NAPhysToVirtMIs.
end()) {
1764 <<
"NAPhysCopy: invalidating because of " << *
MI);
1765 NAPhysToVirtMIs.
erase(Def);
1770 for (
auto &RegMI : NAPhysToVirtMIs) {
1774 <<
"NAPhysCopy: invalidating because of " << *
MI);
1775 NAPhysToVirtMIs.erase(Def);
1782 if (
MI->isImplicitDef() ||
MI->isKill())
1785 if (
MI->isInlineAsm() ||
MI->hasUnmodeledSideEffects()) {
1792 NAPhysToVirtMIs.clear();
1795 if ((isUncoalescableCopy(*
MI) &&
1796 optimizeUncoalescableCopy(*
MI, LocalMIs)) ||
1797 (
MI->isCompare() && optimizeCmpInstr(*
MI)) ||
1798 (
MI->isSelect() && optimizeSelect(*
MI, LocalMIs))) {
1805 if (
MI->isConditionalBranch() && optimizeCondBranch(*
MI)) {
1810 if (isCoalescableCopy(*
MI) && optimizeCoalescableCopy(*
MI)) {
1816 if (
MI->isCopy() && (foldRedundantCopy(*
MI) ||
1817 foldRedundantNAPhysCopy(*
MI, NAPhysToVirtMIs))) {
1820 MI->eraseFromParent();
1825 if (isMoveImmediate(*
MI, ImmDefRegs, ImmDefMIs)) {
1847 if (!isLoadFoldable(*
MI, FoldAsLoadDefCandidates) &&
1848 !FoldAsLoadDefCandidates.
empty()) {
1855 const MCInstrDesc &MIDesc =
MI->getDesc();
1856 for (
unsigned i = MIDesc.
getNumDefs(); i !=
MI->getNumOperands(); ++i) {
1857 const MachineOperand &MOp =
MI->getOperand(i);
1861 if (FoldAsLoadDefCandidates.
count(FoldAsLoadDefReg)) {
1866 Register FoldedReg = FoldAsLoadDefReg;
1867 MachineInstr *
DefMI =
nullptr;
1868 if (MachineInstr *FoldMI =
1869 TII->optimizeLoadInstr(*
MI,
MRI, FoldAsLoadDefReg,
DefMI)) {
1878 if (
MI->shouldUpdateAdditionalCallInfo())
1879 MI->getMF()->moveAdditionalCallInfo(
MI, FoldMI);
1880 MI->eraseFromParent();
1882 MRI->markUsesInDebugValueAsUndef(FoldedReg);
1883 FoldAsLoadDefCandidates.
erase(FoldedReg);
1897 if (
MI->isLoadFoldBarrier()) {
1899 FoldAsLoadDefCandidates.
clear();
1904 MF.resetDelegate(
this);
1908ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
1909 assert(
Def->isCopy() &&
"Invalid definition");
1914 assert(
Def->getNumOperands() -
Def->getNumImplicitOperands() == 2 &&
1915 "Invalid number of operands");
1916 assert(!
Def->hasImplicitDef() &&
"Only implicit uses are allowed");
1917 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1920 const MachineOperand &Src =
Def->getOperand(1);
1922 return ValueTrackerResult();
1925 unsigned SubReg = Src.getSubReg();
1927 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
1932 const TargetRegisterClass *RegRC =
MRI.getRegClass(SrcReg);
1933 if (!
TRI->isSubRegValidForRegClass(RegRC,
SubReg))
1934 return ValueTrackerResult();
1937 return ValueTrackerResult();
1941 return ValueTrackerResult(SrcReg,
SubReg);
1944ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
1945 assert(
Def->isBitcast() &&
"Invalid definition");
1948 if (
Def->mayRaiseFPException() ||
Def->hasUnmodeledSideEffects())
1949 return ValueTrackerResult();
1952 if (
Def->getDesc().getNumDefs() != 1)
1953 return ValueTrackerResult();
1955 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subregister defs in SSA");
1957 unsigned SrcIdx =
Def->getNumOperands();
1958 for (
unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx;
OpIdx != EndOpIdx;
1960 const MachineOperand &MO =
Def->getOperand(
OpIdx);
1966 assert(!MO.
isDef() &&
"We should have skipped all the definitions by now");
1967 if (SrcIdx != EndOpIdx)
1969 return ValueTrackerResult();
1975 if (SrcIdx >=
Def->getNumOperands())
1976 return ValueTrackerResult();
1978 const MachineOperand &DefOp =
Def->getOperand(DefIdx);
1982 for (
const MachineInstr &
UseMI :
MRI.use_nodbg_instructions(DefOp.
getReg())) {
1983 if (
UseMI.isSubregToReg())
1984 return ValueTrackerResult();
1987 const MachineOperand &Src =
Def->getOperand(SrcIdx);
1989 return ValueTrackerResult();
1990 return ValueTrackerResult(Src.getReg(), Src.getSubReg());
1993ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
1994 assert((
Def->isRegSequence() ||
Def->isRegSequenceLike()) &&
1995 "Invalid definition");
1997 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"illegal subregister def");
2000 if (!
TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
2001 return ValueTrackerResult();
2009 if (RegSeqInput.SubIdx == DefSubReg)
2010 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
2013 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
2018 LaneBitmask DefMask =
TRI->getSubRegIndexLaneMask(DefSubReg);
2019 LaneBitmask ThisOpRegMask =
TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
2025 if ((DefMask & ThisOpRegMask) != DefMask)
2028 unsigned ReverseDefCompose =
2029 TRI->reverseComposeSubRegIndices(RegSeqInput.SubIdx, DefSubReg);
2030 if (!ReverseDefCompose)
2033 unsigned ComposedDefInSrcReg1 =
2034 TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
2040 const TargetRegisterClass *SrcRC =
MRI.getRegClass(RegSeqInput.Reg);
2041 if (!
TRI->isSubRegValidForRegClass(SrcRC, ComposedDefInSrcReg1))
2042 return ValueTrackerResult();
2044 return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
2050 return ValueTrackerResult();
2053ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
2054 assert((
Def->isInsertSubreg() ||
Def->isInsertSubregLike()) &&
2055 "Invalid definition");
2056 assert(!
Def->getOperand(DefIdx).getSubReg() &&
"no subreg defs in SSA");
2060 if (!
TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
2061 return ValueTrackerResult();
2070 if (InsertedReg.
SubIdx == DefSubReg) {
2071 return ValueTrackerResult(InsertedReg.
Reg, InsertedReg.
SubReg);
2076 const MachineOperand &MODef =
Def->getOperand(DefIdx);
2082 return ValueTrackerResult();
2086 const TargetRegisterInfo *
TRI =
MRI.getTargetRegisterInfo();
2087 if ((
TRI->getSubRegIndexLaneMask(DefSubReg) &
2088 TRI->getSubRegIndexLaneMask(InsertedReg.
SubIdx))
2090 return ValueTrackerResult();
2093 return ValueTrackerResult(
BaseReg.Reg, DefSubReg);
2096ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
2097 assert((
Def->isExtractSubreg() ||
Def->isExtractSubregLike()) &&
2098 "Invalid definition");
2105 return ValueTrackerResult();
2108 if (!
TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
2109 return ValueTrackerResult();
2113 if (ExtractSubregInputReg.
SubReg)
2114 return ValueTrackerResult();
2116 return ValueTrackerResult(ExtractSubregInputReg.
Reg,
2117 ExtractSubregInputReg.
SubIdx);
2120ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
2121 assert(
Def->isSubregToReg() &&
"Invalid definition");
2129 if (DefSubReg !=
Def->getOperand(2).getImm())
2130 return ValueTrackerResult();
2133 if (
Def->getOperand(1).getSubReg())
2134 return ValueTrackerResult();
2136 return ValueTrackerResult(
Def->getOperand(1).getReg(),
2137 Def->getOperand(2).getImm());
2141ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
2142 assert(
Def->isPHI() &&
"Invalid definition");
2143 ValueTrackerResult Res;
2146 for (
unsigned i = 1, e =
Def->getNumOperands(); i < e; i += 2) {
2147 const MachineOperand &MO =
Def->getOperand(i);
2152 return ValueTrackerResult();
2159ValueTrackerResult ValueTracker::getNextSourceImpl() {
2160 assert(Def &&
"This method needs a valid definition");
2162 assert(((
Def->getOperand(DefIdx).isDef() &&
2163 (DefIdx < Def->
getDesc().getNumDefs() ||
2164 Def->getDesc().isVariadic())) ||
2165 Def->getOperand(DefIdx).isImplicit()) &&
2168 return getNextSourceFromCopy();
2169 if (
Def->isBitcast())
2170 return getNextSourceFromBitcast();
2174 return ValueTrackerResult();
2175 if (
Def->isRegSequence() ||
Def->isRegSequenceLike())
2176 return getNextSourceFromRegSequence();
2177 if (
Def->isInsertSubreg() ||
Def->isInsertSubregLike())
2178 return getNextSourceFromInsertSubreg();
2179 if (
Def->isExtractSubreg() ||
Def->isExtractSubregLike())
2180 return getNextSourceFromExtractSubreg();
2181 if (
Def->isSubregToReg())
2182 return getNextSourceFromSubregToReg();
2184 return getNextSourceFromPHI();
2185 return ValueTrackerResult();
2188ValueTrackerResult ValueTracker::getNextSource() {
2192 return ValueTrackerResult();
2194 ValueTrackerResult Res = getNextSourceImpl();
2195 if (Res.isValid()) {
2199 bool OneRegSrc = Res.getNumSources() == 1;
2201 Reg = Res.getSrcReg(0);
2210 if (DI !=
MRI.def_end()) {
2213 DefSubReg = Res.getSrcSubReg(0);
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static cl::opt< unsigned > RewritePHILimit("rewrite-phi-limit", cl::Hidden, cl::init(10), cl::desc("Limit the length of PHI chains to lookup"))
static cl::opt< bool > DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer"))
static cl::opt< unsigned > MaxRecurrenceChain("recurrence-chain-limit", cl::Hidden, cl::init(3), cl::desc("Maximum length of recurrence chain when evaluating the benefit " "of commuting operands"))
static cl::opt< bool > DisableNAPhysCopyOpt("disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable non-allocatable physical register copy optimization"))
static bool isVirtualRegisterOperand(MachineOperand &MO)
\bried Returns true if MO is a virtual register operand.
static MachineInstr & insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const SmallVectorImpl< RegSubRegPair > &SrcRegs, MachineInstr &OrigPHI)
Insert a PHI instruction with incoming edges SrcRegs that are guaranteed to have the same register cl...
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static cl::opt< bool > DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization"))
Specifiy whether or not the value tracking looks through complex instructions.
TargetInstrInfo::RegSubRegPairAndIdx RegSubRegPairAndIdx
static RegSubRegPair getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, RegSubRegPair Def, const PeepholeOptimizer::RewriteMapTy &RewriteMap, bool HandleMultipleSources=true)
Given a Def.Reg and Def.SubReg pair, use RewriteMap to find the new source to use for rewrite.
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Virtual Register Rewriter
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
bool isLoopHeader(const BlockT *BB) const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
An RAII based helper class to modify MachineFunctionProperties when running pass.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setDelegate(Delegate *delegate)
Set the delegate.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const uint32_t * getRegMask() const
getRegMask - Returns a bit mask of registers preserved by this RegMask operand.
unsigned getOperandNo() const
getOperandNo - Return the operand # of this MachineOperand in its MachineInstr.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_iterator< false, true, false, true, false > def_iterator
def_iterator/def_begin/def_end - Walk all defs of the specified register.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MCInstrDesc const & getDesc(MCInstrInfo const &MCII, MCInst const &MCI)
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI char & PeepholeOptimizerLegacyID
PeepholeOptimizer - This pass performs peephole optimizations - like extension and comparison elimina...
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
A pair composed of a pair of a register and a sub-register index, and another sub-register index.
A pair composed of a register and a sub-register index.