58#define DEBUG_TYPE "machinelicm"
62 cl::desc(
"MachineLICM should avoid speculation"),
67 cl::desc(
"MachineLICM should hoist even cheap instructions"),
83 cl::desc(
"Do not hoist instructions if target"
84 "block is N times hotter than the source."),
91 cl::desc(
"Disable hoisting instructions to"
95 "disable the feature"),
97 "enable the feature when using profile data"),
99 "enable the feature with/wo profile data")));
102 "Number of machine instructions hoisted out of loops");
104 "Number of instructions hoisted in low reg pressure situation");
106 "Number of high latency instructions hoisted");
108 "Number of hoisted machine instructions CSEed");
110 "Number of machine instructions hoisted out of loops post regalloc");
112 "Number of stores of const phys reg hoisted out of loops");
114 "Number of instructions not hoisted due to block frequency");
117 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
119 class MachineLICMImpl {
120 const TargetInstrInfo *TII =
nullptr;
121 const TargetLoweringBase *TLI =
nullptr;
122 const TargetRegisterInfo *TRI =
nullptr;
123 const MachineFrameInfo *MFI =
nullptr;
124 MachineRegisterInfo *MRI =
nullptr;
125 TargetSchedModel SchedModel;
126 bool PreRegAlloc =
false;
127 bool HasProfileData =
false;
133 MachineBlockFrequencyInfo *MBFI =
nullptr;
134 MachineLoopInfo *MLI =
nullptr;
135 MachineDomTreeUpdater *MDTU =
nullptr;
138 bool Changed =
false;
139 bool FirstInLoop =
false;
143 SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
146 DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
148 bool isExitBlock(MachineLoop *CurLoop,
const MachineBasicBlock *
MBB) {
149 auto [It,
Inserted] = ExitBlockMap.try_emplace(CurLoop);
151 SmallVector<MachineBasicBlock *, 8> ExitBlocks;
153 It->second = std::move(ExitBlocks);
159 SmallDenseSet<Register> RegSeen;
160 SmallVector<unsigned, 8> RegPressure;
164 SmallVector<unsigned, 8> RegLimit;
170 DenseMap<MachineBasicBlock *,
171 DenseMap<unsigned, std::vector<MachineInstr *>>>
183 unsigned SpeculationState = SpeculateUnknown;
186 MachineLICMImpl(
bool PreRegAlloc,
Pass *LegacyPass,
188 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
189 assert((LegacyPass || MFAM) &&
"LegacyPass or MFAM must be provided");
190 assert(!(LegacyPass && MFAM) &&
191 "LegacyPass and MFAM cannot be provided at the same time");
194 bool run(MachineFunction &MF);
196 void releaseMemory() {
202 ExitBlockMap.clear();
207 struct CandidateInfo {
212 CandidateInfo(MachineInstr *mi,
Register def,
int fi)
213 : MI(mi), Def(def), FI(fi) {}
216 void HoistRegionPostRA(MachineLoop *CurLoop);
218 void HoistPostRA(MachineInstr *
MI,
Register Def, MachineLoop *CurLoop);
220 void ProcessMI(MachineInstr *
MI, BitVector &RUDefs, BitVector &RUClobbers,
221 SmallDenseSet<int> &StoredFIs,
222 SmallVectorImpl<CandidateInfo> &Candidates,
223 MachineLoop *CurLoop);
225 void AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop);
227 bool IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop);
229 bool IsLoopInvariantInst(MachineInstr &
I, MachineLoop *CurLoop);
231 bool HasLoopPHIUse(
const MachineInstr *
MI, MachineLoop *CurLoop);
233 bool HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
Register Reg,
234 MachineLoop *CurLoop)
const;
236 bool IsCheapInstruction(MachineInstr &
MI)
const;
238 bool CanCauseHighRegPressure(
const SmallDenseMap<unsigned, int> &
Cost,
241 void UpdateBackTraceRegPressure(
const MachineInstr *
MI);
243 bool IsProfitableToHoist(MachineInstr &
MI, MachineLoop *CurLoop);
245 bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
247 void EnterScope(MachineBasicBlock *
MBB);
249 void ExitScope(MachineBasicBlock *
MBB);
251 void ExitScopeIfDone(
253 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
254 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
258 void InitRegPressure(MachineBasicBlock *BB);
260 SmallDenseMap<unsigned, int> calcRegisterCost(
const MachineInstr *
MI,
262 bool ConsiderUnseenAsDef);
264 void UpdateRegPressure(
const MachineInstr *
MI,
265 bool ConsiderUnseenAsDef =
false);
267 MachineInstr *ExtractHoistableLoad(MachineInstr *
MI, MachineLoop *CurLoop);
269 MachineInstr *LookForDuplicate(
const MachineInstr *
MI,
270 std::vector<MachineInstr *> &PrevMIs);
273 EliminateCSE(MachineInstr *
MI,
274 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI);
276 bool MayCSE(MachineInstr *
MI);
278 unsigned Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
279 MachineLoop *CurLoop);
281 void InitCSEMap(MachineBasicBlock *BB);
283 void InitializeLoadsHoistableLoops();
285 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
286 MachineBasicBlock *TgtBlock);
287 MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop);
294 MachineLICMBase(
char &
ID,
bool PreRegAlloc)
295 : MachineFunctionPass(
ID), PreRegAlloc(PreRegAlloc) {}
297 bool runOnMachineFunction(MachineFunction &MF)
override;
299 void getAnalysisUsage(AnalysisUsage &AU)
const override {
302 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
310 class MachineLICM :
public MachineLICMBase {
313 MachineLICM() : MachineLICMBase(ID,
false) {}
316 class EarlyMachineLICM :
public MachineLICMBase {
319 EarlyMachineLICM() : MachineLICMBase(ID,
true) {}
325char EarlyMachineLICM::ID;
331 "Machine Loop Invariant Code Motion",
false,
false)
340 "Early Machine Loop Invariant Code Motion",
false,
false)
346 "Early Machine Loop Invariant Code Motion",
false,
false)
349 if (skipFunction(MF.getFunction()))
352 MachineLICMImpl Impl(PreRegAlloc,
this,
nullptr);
356#define GET_RESULT(RESULT, GETTER, INFIX) \
358 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \
359 : &MFAM->getResult<RESULT##Analysis>(MF))
368 MachineDomTreeUpdater::UpdateStrategy::Lazy);
372 ?
GET_RESULT(MachineBlockFrequency, getMBFI, Info)
377 TII = ST.getInstrInfo();
378 TLI = ST.getTargetLowering();
379 TRI = ST.getRegisterInfo();
382 SchedModel.
init(&ST);
394 unsigned NumRPS =
TRI->getNumRegPressureSets();
395 RegPressure.resize(NumRPS);
398 for (
unsigned i = 0, e = NumRPS; i != e; ++i)
399 RegLimit[i] =
TRI->getRegPressureSetLimit(MF, i);
403 InitializeLoadsHoistableLoops();
406 while (!Worklist.
empty()) {
410 HoistRegionPostRA(CurLoop);
416 HoistOutOfLoop(
N, CurLoop);
432 if (
MI->memoperands_empty())
435 if (!
MemOp->isStore() || !
MemOp->getPseudoValue())
439 if (
Value->getFrameIndex() == FI)
480 const unsigned NumRegs =
TRI.getNumRegs();
481 const unsigned MaskWords = (NumRegs + 31) / 32;
482 for (
unsigned K = 0; K < MaskWords; ++K) {
484 for (
unsigned Bit = 0; Bit < 32; ++Bit) {
485 const unsigned PhysReg = (K * 32) + Bit;
486 if (PhysReg == NumRegs)
489 if (PhysReg && !((Word >> Bit) & 1)) {
490 for (MCRegUnit Unit :
TRI.regunits(PhysReg))
491 RUsFromRegsNotInMask.
set(
static_cast<unsigned>(Unit));
496 RUs |= RUsFromRegsNotInMask;
501void MachineLICMImpl::ProcessMI(MachineInstr *
MI, BitVector &RUDefs,
502 BitVector &RUClobbers,
503 SmallDenseSet<int> &StoredFIs,
504 SmallVectorImpl<CandidateInfo> &Candidates,
505 MachineLoop *CurLoop) {
506 bool RuledOut =
false;
507 bool HasNonInvariantUse =
false;
509 for (
const MachineOperand &MO :
MI->operands()) {
512 int FI = MO.getIndex();
513 if (!StoredFIs.
count(FI) &&
517 HasNonInvariantUse =
true;
523 if (MO.isRegMask()) {
536 if (!HasNonInvariantUse) {
537 for (MCRegUnit Unit :
TRI->regunits(
Reg)) {
540 if (RUDefs.
test(
static_cast<unsigned>(Unit)) ||
541 RUClobbers.
test(
static_cast<unsigned>(Unit))) {
542 HasNonInvariantUse =
true;
561 for (MCRegUnit Unit :
TRI->regunits(
Reg)) {
562 if (RUDefs.
test(
static_cast<unsigned>(Unit))) {
563 RUClobbers.
set(
static_cast<unsigned>(Unit));
565 }
else if (RUClobbers.
test(
static_cast<unsigned>(Unit))) {
571 RUDefs.
set(
static_cast<unsigned>(Unit));
577 if (Def && !RuledOut) {
578 int FI = std::numeric_limits<int>::min();
579 if ((!HasNonInvariantUse && IsLICMCandidate(*
MI, CurLoop)) ||
587void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop) {
588 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
592 unsigned NumRegUnits =
TRI->getNumRegUnits();
593 BitVector RUDefs(NumRegUnits);
594 BitVector RUClobbers(NumRegUnits);
597 SmallDenseSet<int> StoredFIs;
601 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
605 if (
ML &&
ML->getHeader()->isEHPad())
continue;
610 for (
const auto &LI : BB->liveins()) {
611 for (MCRegUnit Unit :
TRI->regunits(LI.PhysReg))
612 RUDefs.
set(
static_cast<unsigned>(Unit));
616 if (
const uint32_t *Mask = BB->getBeginClobberMask(
TRI))
621 const MachineFunction &MF = *BB->getParent();
625 for (MCRegUnit Unit :
TRI->regunits(
Reg))
626 RUClobbers.
set(
static_cast<unsigned>(Unit));
628 for (MCRegUnit Unit :
TRI->regunits(
Reg))
629 RUClobbers.
set(
static_cast<unsigned>(Unit));
632 SpeculationState = SpeculateUnknown;
633 for (MachineInstr &
MI : *BB)
634 ProcessMI(&
MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
638 BitVector TermRUs(NumRegUnits);
640 if (TI != Preheader->
end()) {
641 for (
const MachineOperand &MO : TI->operands()) {
647 for (MCRegUnit Unit :
TRI->regunits(
Reg))
648 TermRUs.set(
static_cast<unsigned>(Unit));
660 for (CandidateInfo &Candidate : Candidates) {
661 if (Candidate.FI != std::numeric_limits<int>::min() &&
662 StoredFIs.
count(Candidate.FI))
667 for (MCRegUnit Unit :
TRI->regunits(Def)) {
668 if (RUClobbers.
test(
static_cast<unsigned>(Unit)) ||
669 TermRUs.test(
static_cast<unsigned>(Unit))) {
678 MachineInstr *
MI = Candidate.MI;
679 for (
const MachineOperand &MO :
MI->all_uses()) {
682 for (MCRegUnit Unit :
TRI->regunits(MO.getReg())) {
683 if (RUDefs.
test(
static_cast<unsigned>(Unit)) ||
684 RUClobbers.
test(
static_cast<unsigned>(Unit))) {
697 HoistPostRA(
MI, Candidate.Def, CurLoop);
703void MachineLICMImpl::AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop) {
704 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
705 if (!BB->isLiveIn(
Reg))
707 for (MachineInstr &
MI : *BB) {
708 for (MachineOperand &MO :
MI.all_uses()) {
711 if (
TRI->regsOverlap(
Reg, MO.getReg()))
720void MachineLICMImpl::HoistPostRA(MachineInstr *
MI,
Register Def,
721 MachineLoop *CurLoop) {
731 MachineBasicBlock *
MBB =
MI->getParent();
737 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
743 AddToLiveIns(Def, CurLoop);
751bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
752 MachineLoop *CurLoop) {
753 if (SpeculationState != SpeculateUnknown)
754 return SpeculationState == SpeculateFalse;
758 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
760 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
762 SpeculationState = SpeculateTrue;
767 SpeculationState = SpeculateFalse;
771void MachineLICMImpl::EnterScope(MachineBasicBlock *
MBB) {
778void MachineLICMImpl::ExitScope(MachineBasicBlock *
MBB) {
786void MachineLICMImpl::ExitScopeIfDone(
788 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
789 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap) {
790 if (OpenChildren[Node])
794 ExitScope(
Node->getBlock());
797 if (!Parent || --OpenChildren[Parent] != 0)
808 MachineLoop *CurLoop) {
809 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
815 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
816 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
820 while (!WorkList.
empty()) {
822 assert(Node &&
"Null dominator tree node?");
823 MachineBasicBlock *BB =
Node->getBlock();
828 if (
ML &&
ML->getHeader()->isEHPad())
841 OpenChildren[
Node] = 0;
848 size_t WorkListStart = WorkList.
size();
850 ParentMap[Child] =
Node;
853 std::reverse(WorkList.
begin() + WorkListStart, WorkList.
end());
854 OpenChildren[
Node] = WorkList.
size() - WorkListStart;
863 InitRegPressure(Preheader);
867 MachineBasicBlock *
MBB =
Node->getBlock();
872 SpeculationState = SpeculateUnknown;
874 unsigned HoistRes = HoistResult::NotHoisted;
875 HoistRes = Hoist(&
MI, Preheader, CurLoop);
876 if (HoistRes & HoistResult::NotHoisted) {
880 for (MachineLoop *L = MLI->
getLoopFor(
MI.getParent()); L != CurLoop;
881 L =
L->getParentLoop())
884 while (!InnerLoopWorkList.
empty()) {
885 MachineLoop *InnerLoop = InnerLoopWorkList.
pop_back_val();
887 if (InnerLoopPreheader) {
888 HoistRes = Hoist(&
MI, InnerLoopPreheader, InnerLoop);
889 if (HoistRes & HoistResult::Hoisted)
895 if (HoistRes & HoistResult::ErasedMI)
898 UpdateRegPressure(&
MI);
902 ExitScopeIfDone(Node, OpenChildren, ParentMap);
913void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
921 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
927 for (
const MachineInstr &
MI : *BB)
928 UpdateRegPressure(&
MI,
true);
932void MachineLICMImpl::UpdateRegPressure(
const MachineInstr *
MI,
933 bool ConsiderUnseenAsDef) {
934 auto Cost = calcRegisterCost(
MI,
true, ConsiderUnseenAsDef);
935 for (
const auto &[Class, Weight] :
Cost) {
936 if (
static_cast<int>(RegPressure[Class]) < -Weight)
949SmallDenseMap<unsigned, int>
950MachineLICMImpl::calcRegisterCost(
const MachineInstr *
MI,
bool ConsiderSeen,
951 bool ConsiderUnseenAsDef) {
952 SmallDenseMap<unsigned, int>
Cost;
953 if (
MI->isImplicitDef())
955 for (
unsigned i = 0, e =
MI->getDesc().getNumOperands(); i != e; ++i) {
956 const MachineOperand &MO =
MI->getOperand(i);
964 bool isNew = ConsiderSeen ? RegSeen.
insert(
Reg).second :
false;
965 const TargetRegisterClass *RC =
MRI->getRegClass(
Reg);
967 RegClassWeight
W =
TRI->getRegClassWeight(RC);
970 RCCost =
W.RegWeight;
973 if (isNew && !isKill && ConsiderUnseenAsDef)
975 RCCost =
W.RegWeight;
976 else if (!isNew && isKill)
977 RCCost = -
W.RegWeight;
981 const int *PS =
TRI->getRegClassPressureSets(RC);
982 for (; *PS != -1; ++PS)
991 assert(
MI.mayLoad() &&
"Expected MI that loads!");
995 if (
MI.memoperands_empty())
1000 if (PSV->isGOT() || PSV->isConstantPool())
1017 bool FoundCallerPresReg =
false;
1018 if (!
MI.mayStore() ||
MI.hasUnmodeledSideEffects() ||
1019 (
MI.getNumOperands() == 0))
1028 if (
Reg.isVirtual())
1030 if (
Reg.isVirtual())
1032 if (!
TRI->isCallerPreservedPhysReg(
Reg.asMCReg(), *
MI.getMF()))
1035 FoundCallerPresReg =
true;
1036 }
else if (!MO.
isImm()) {
1040 return FoundCallerPresReg;
1058 Register CopySrcReg =
MI.getOperand(1).getReg();
1062 if (!
TRI->isCallerPreservedPhysReg(CopySrcReg.
asMCReg(), *MF))
1065 Register CopyDstReg =
MI.getOperand(0).getReg();
1078bool MachineLICMImpl::IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop) {
1080 bool DontMoveAcrossStore = !
HoistConstLoads || !AllowedToHoistLoads[CurLoop];
1081 if ((!
I.isSafeToMove(DontMoveAcrossStore)) &&
1094 !IsGuaranteedToExecute(
I.getParent(), CurLoop)) {
1103 if (
I.isConvergent())
1106 if (!
TII->shouldHoist(
I, CurLoop))
1113bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &
I,
1114 MachineLoop *CurLoop) {
1115 if (!IsLICMCandidate(
I, CurLoop)) {
1116 LLVM_DEBUG(
dbgs() <<
"LICM: Instruction not a LICM candidate\n");
1124bool MachineLICMImpl::HasLoopPHIUse(
const MachineInstr *
MI,
1125 MachineLoop *CurLoop) {
1128 MI = Work.pop_back_val();
1129 for (
const MachineOperand &MO :
MI->all_defs()) {
1133 for (MachineInstr &
UseMI :
MRI->use_instructions(
Reg)) {
1135 if (
UseMI.isPHI()) {
1149 Work.push_back(&
UseMI);
1152 }
while (!Work.empty());
1158bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
1160 MachineLoop *CurLoop)
const {
1161 if (
MRI->use_nodbg_empty(
Reg))
1164 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(
Reg)) {
1165 if (
UseMI.isCopyLike())
1169 for (
unsigned i = 0, e =
UseMI.getNumOperands(); i != e; ++i) {
1170 const MachineOperand &MO =
UseMI.getOperand(i);
1177 if (
TII->hasHighOperandLatency(SchedModel,
MRI,
MI, DefIdx,
UseMI, i))
1190bool MachineLICMImpl::IsCheapInstruction(MachineInstr &
MI)
const {
1194 bool isCheap =
false;
1195 unsigned NumDefs =
MI.getDesc().getNumDefs();
1196 for (
unsigned i = 0, e =
MI.getNumOperands(); NumDefs && i != e; ++i) {
1197 MachineOperand &DefMO =
MI.getOperand(i);
1205 if (!
TII->hasLowDefLatency(SchedModel,
MI, i))
1215bool MachineLICMImpl::CanCauseHighRegPressure(
1216 const SmallDenseMap<unsigned, int> &
Cost,
bool CheapInstr) {
1217 for (
const auto &[Class, Weight] :
Cost) {
1221 int Limit = RegLimit[
Class];
1228 for (
const auto &RP : BackTrace)
1229 if (
static_cast<int>(RP[Class]) + Weight >= Limit)
1239void MachineLICMImpl::UpdateBackTraceRegPressure(
const MachineInstr *
MI) {
1242 auto Cost = calcRegisterCost(
MI,
false,
1246 for (
auto &RP : BackTrace)
1247 for (
const auto &[Class, Weight] :
Cost)
1253bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &
MI,
1254 MachineLoop *CurLoop) {
1255 if (
MI.isImplicitDef())
1273 bool CheapInstr = IsCheapInstruction(
MI);
1274 bool CreatesCopy = HasLoopPHIUse(&
MI, CurLoop);
1277 if (CheapInstr && CreatesCopy) {
1284 if (
TII->isTriviallyReMaterializable(
MI))
1289 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1290 const MachineOperand &MO =
MI.getOperand(i);
1296 if (MO.
isDef() && HasHighOperandLatency(
MI, i,
Reg, CurLoop)) {
1309 auto Cost = calcRegisterCost(&
MI,
false,
1314 if (!CanCauseHighRegPressure(
Cost, CheapInstr)) {
1330 (!IsGuaranteedToExecute(
MI.getParent(), CurLoop) && !MayCSE(&
MI))) {
1338 if (
MI.isCopy() ||
MI.isRegSequence()) {
1342 [
this](
const MachineOperand &UseOp) {
1343 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1344 MRI->isConstantPhysReg(UseOp.getReg());
1346 IsLoopInvariantInst(
MI, CurLoop) &&
1347 any_of(
MRI->use_nodbg_instructions(DefReg),
1348 [&CurLoop,
this, DefReg,
1350 if (!CurLoop->contains(&UseMI))
1357 if (CanCauseHighRegPressure(Cost, false) &&
1358 !CurLoop->isLoopInvariant(UseMI, DefReg))
1368 if (!
TII->isTriviallyReMaterializable(
MI) &&
1369 !
MI.isDereferenceableInvariantLoad()) {
1380MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *
MI,
1381 MachineLoop *CurLoop) {
1383 if (
MI->canFoldAsLoad())
1389 if (!
MI->isDereferenceableInvariantLoad())
1393 unsigned LoadRegIndex;
1395 TII->getOpcodeAfterMemoryUnfold(
MI->getOpcode(),
1399 if (NewOpc == 0)
return nullptr;
1400 const MCInstrDesc &MID =
TII->get(NewOpc);
1401 MachineFunction &MF = *
MI->getMF();
1402 const TargetRegisterClass *RC =
TII->getRegClass(MID, LoadRegIndex);
1406 SmallVector<MachineInstr *, 2> NewMIs;
1412 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1415 "Unfolded a load into multiple instructions!");
1416 MachineBasicBlock *
MBB =
MI->getParent();
1422 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1423 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1424 NewMIs[0]->eraseFromParent();
1425 NewMIs[1]->eraseFromParent();
1430 UpdateRegPressure(NewMIs[1]);
1435 if (
MI->shouldUpdateAdditionalCallInfo())
1438 MI->eraseFromParent();
1445void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {
1446 for (MachineInstr &
MI : *BB)
1452void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1458 while (!Worklist.empty()) {
1459 auto *
L = Worklist.pop_back_val();
1460 AllowedToHoistLoads[
L] =
true;
1472 for (
auto *Loop :
reverse(LoopsInPreOrder)) {
1473 for (
auto *
MBB : Loop->blocks()) {
1475 if (!AllowedToHoistLoads[Loop])
1477 for (
auto &
MI : *
MBB) {
1478 if (!
MI.isLoadFoldBarrier() && !
MI.mayStore() && !
MI.isCall() &&
1479 !(
MI.mayLoad() &&
MI.hasOrderedMemoryRef()))
1481 for (MachineLoop *L = Loop;
L !=
nullptr;
L =
L->getParentLoop())
1482 AllowedToHoistLoads[
L] =
false;
1492MachineLICMImpl::LookForDuplicate(
const MachineInstr *
MI,
1493 std::vector<MachineInstr *> &PrevMIs) {
1494 for (MachineInstr *PrevMI : PrevMIs)
1495 if (
TII->produceSameValue(*
MI, *PrevMI, (PreRegAlloc ?
MRI :
nullptr)))
1505bool MachineLICMImpl::EliminateCSE(
1507 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1510 if (
MI->isImplicitDef())
1515 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1518 if (MachineInstr *Dup = LookForDuplicate(
MI, CI->second)) {
1523 SmallVector<unsigned, 2> Defs;
1524 for (
unsigned i = 0, e =
MI->getNumOperands(); i != e; ++i) {
1525 const MachineOperand &MO =
MI->getOperand(i);
1529 MO.
getReg() == Dup->getOperand(i).getReg()) &&
1530 "Instructions with different phys regs are not identical!");
1537 for (
unsigned i = 0, e = Defs.
size(); i != e; ++i) {
1538 unsigned Idx = Defs[i];
1540 Register DupReg = Dup->getOperand(Idx).getReg();
1543 if (!
MRI->constrainRegClass(DupReg,
MRI->getRegClass(
Reg))) {
1545 for (
unsigned j = 0;
j != i; ++
j)
1546 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1551 for (
unsigned Idx : Defs) {
1553 Register DupReg = Dup->getOperand(Idx).getReg();
1554 MRI->replaceRegWith(
Reg, DupReg);
1555 MRI->clearKillFlags(DupReg);
1557 if (!
MRI->use_nodbg_empty(DupReg))
1558 Dup->getOperand(Idx).setIsDead(
false);
1561 MI->eraseFromParent();
1570bool MachineLICMImpl::MayCSE(MachineInstr *
MI) {
1571 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1574 unsigned Opcode =
MI->getOpcode();
1575 for (
auto &Map : CSEMap) {
1578 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1579 Map.second.find(Opcode);
1582 if (CI ==
Map.second.end() ||
MI->isImplicitDef())
1584 if (LookForDuplicate(
MI, CI->second) !=
nullptr)
1595unsigned MachineLICMImpl::Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
1596 MachineLoop *CurLoop) {
1597 MachineBasicBlock *SrcBlock =
MI->getParent();
1602 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1603 ++NumNotHoistedDueToHotness;
1604 return HoistResult::NotHoisted;
1607 bool HasExtractHoistableLoad =
false;
1608 if (!IsLoopInvariantInst(*
MI, CurLoop) ||
1609 !IsProfitableToHoist(*
MI, CurLoop)) {
1611 MI = ExtractHoistableLoad(
MI, CurLoop);
1613 return HoistResult::NotHoisted;
1614 HasExtractHoistableLoad =
true;
1625 dbgs() <<
"Hoisting " << *
MI;
1626 if (
MI->getParent()->getBasicBlock())
1636 InitCSEMap(Preheader);
1637 FirstInLoop =
false;
1641 unsigned Opcode =
MI->getOpcode();
1642 bool HasCSEDone =
false;
1643 for (
auto &Map : CSEMap) {
1646 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1647 Map.second.find(Opcode);
1648 if (CI !=
Map.second.end()) {
1649 if (EliminateCSE(
MI, CI)) {
1664 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
1668 UpdateBackTraceRegPressure(
MI);
1673 for (MachineOperand &MO :
MI->all_defs())
1677 CSEMap[Preheader][Opcode].push_back(
MI);
1683 if (HasCSEDone || HasExtractHoistableLoad)
1684 return HoistResult::Hoisted | HoistResult::ErasedMI;
1685 return HoistResult::Hoisted;
1689MachineBasicBlock *MachineLICMImpl::getOrCreatePreheader(MachineLoop *CurLoop) {
1698 MachineBasicBlock *NewPreheader = Pred->SplitCriticalEdge(
1699 CurLoop->
getHeader(), LegacyPass, MFAM,
nullptr, MDTU);
1702 return NewPreheader;
1710bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1711 MachineBasicBlock *TgtBlock) {
1720 double Ratio = (double)DstBF / SrcBF;
1726template <
typename DerivedT,
bool PreRegAlloc>
1729 bool Changed = MachineLICMImpl(PreRegAlloc,
nullptr, &MFAM).run(MF);