58#define DEBUG_TYPE "machinelicm"
62 cl::desc(
"MachineLICM should avoid speculation"),
67 cl::desc(
"MachineLICM should hoist even cheap instructions"),
83 cl::desc(
"Do not hoist instructions if target"
84 "block is N times hotter than the source."),
91 cl::desc(
"Disable hoisting instructions to"
95 "disable the feature"),
97 "enable the feature when using profile data"),
99 "enable the feature with/wo profile data")));
102 "Number of machine instructions hoisted out of loops");
104 "Number of instructions hoisted in low reg pressure situation");
106 "Number of high latency instructions hoisted");
108 "Number of hoisted machine instructions CSEed");
110 "Number of machine instructions hoisted out of loops post regalloc");
112 "Number of stores of const phys reg hoisted out of loops");
114 "Number of instructions not hoisted due to block frequency");
117 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
119 class MachineLICMImpl {
120 const TargetInstrInfo *TII =
nullptr;
121 const TargetLoweringBase *TLI =
nullptr;
122 const TargetRegisterInfo *TRI =
nullptr;
123 const MachineFrameInfo *MFI =
nullptr;
124 MachineRegisterInfo *MRI =
nullptr;
125 TargetSchedModel SchedModel;
126 bool PreRegAlloc =
false;
127 bool HasProfileData =
false;
133 MachineBlockFrequencyInfo *MBFI =
nullptr;
134 MachineLoopInfo *MLI =
nullptr;
135 MachineDomTreeUpdater *MDTU =
nullptr;
138 bool Changed =
false;
139 bool FirstInLoop =
false;
143 SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
146 DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
148 bool isExitBlock(MachineLoop *CurLoop,
const MachineBasicBlock *
MBB) {
149 auto [It,
Inserted] = ExitBlockMap.try_emplace(CurLoop);
151 SmallVector<MachineBasicBlock *, 8> ExitBlocks;
153 It->second = std::move(ExitBlocks);
159 SmallDenseSet<Register> RegSeen;
160 SmallVector<unsigned, 8> RegPressure;
164 SmallVector<unsigned, 8> RegLimit;
170 DenseMap<MachineBasicBlock *,
171 DenseMap<unsigned, std::vector<MachineInstr *>>>
183 unsigned SpeculationState = SpeculateUnknown;
186 MachineLICMImpl(
bool PreRegAlloc,
Pass *LegacyPass,
188 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
189 assert((LegacyPass || MFAM) &&
"LegacyPass or MFAM must be provided");
190 assert(!(LegacyPass && MFAM) &&
191 "LegacyPass and MFAM cannot be provided at the same time");
194 bool run(MachineFunction &MF);
196 void releaseMemory() {
202 ExitBlockMap.clear();
207 struct CandidateInfo {
212 CandidateInfo(MachineInstr *mi,
Register def,
int fi)
213 : MI(mi), Def(def), FI(fi) {}
216 void HoistRegionPostRA(MachineLoop *CurLoop);
218 void HoistPostRA(MachineInstr *
MI,
Register Def, MachineLoop *CurLoop);
220 void ProcessMI(MachineInstr *
MI, BitVector &RUDefs, BitVector &RUClobbers,
221 SmallDenseSet<int> &StoredFIs,
222 SmallVectorImpl<CandidateInfo> &Candidates,
223 MachineLoop *CurLoop);
225 void AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop);
227 bool IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop);
229 bool IsLoopInvariantInst(MachineInstr &
I, MachineLoop *CurLoop);
231 bool HasLoopPHIUse(
const MachineInstr *
MI, MachineLoop *CurLoop);
233 bool HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
Register Reg,
234 MachineLoop *CurLoop)
const;
236 bool IsCheapInstruction(MachineInstr &
MI)
const;
238 bool CanCauseHighRegPressure(
const SmallDenseMap<unsigned, int> &
Cost,
241 void UpdateBackTraceRegPressure(
const MachineInstr *
MI);
243 bool IsProfitableToHoist(MachineInstr &
MI, MachineLoop *CurLoop);
245 bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
247 void EnterScope(MachineBasicBlock *
MBB);
249 void ExitScope(MachineBasicBlock *
MBB);
251 void ExitScopeIfDone(
253 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
254 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
258 void InitRegPressure(MachineBasicBlock *BB);
260 SmallDenseMap<unsigned, int> calcRegisterCost(
const MachineInstr *
MI,
262 bool ConsiderUnseenAsDef);
264 void UpdateRegPressure(
const MachineInstr *
MI,
265 bool ConsiderUnseenAsDef =
false);
267 MachineInstr *ExtractHoistableLoad(MachineInstr *
MI, MachineLoop *CurLoop);
269 MachineInstr *LookForDuplicate(
const MachineInstr *
MI,
270 std::vector<MachineInstr *> &PrevMIs);
273 EliminateCSE(MachineInstr *
MI,
274 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI);
276 bool MayCSE(MachineInstr *
MI);
278 unsigned Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
279 MachineLoop *CurLoop);
281 void InitCSEMap(MachineBasicBlock *BB);
283 void InitializeLoadsHoistableLoops();
285 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
286 MachineBasicBlock *TgtBlock);
287 MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop);
294 MachineLICMBase(
char &
ID,
bool PreRegAlloc)
295 : MachineFunctionPass(
ID), PreRegAlloc(PreRegAlloc) {}
297 bool runOnMachineFunction(MachineFunction &MF)
override;
299 void getAnalysisUsage(AnalysisUsage &AU)
const override {
302 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
310 class MachineLICM :
public MachineLICMBase {
313 MachineLICM() : MachineLICMBase(ID,
false) {
318 class EarlyMachineLICM :
public MachineLICMBase {
321 EarlyMachineLICM() : MachineLICMBase(ID,
true) {
329char EarlyMachineLICM::ID;
335 "Machine Loop Invariant Code Motion",
false,
false)
344 "Early Machine Loop Invariant Code Motion",
false,
false)
350 "Early Machine Loop Invariant Code Motion",
false,
false)
353 if (skipFunction(MF.getFunction()))
356 MachineLICMImpl Impl(PreRegAlloc,
this,
nullptr);
360#define GET_RESULT(RESULT, GETTER, INFIX) \
362 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \
363 : &MFAM->getResult<RESULT##Analysis>(MF))
372 MachineDomTreeUpdater::UpdateStrategy::Lazy);
381 TII = ST.getInstrInfo();
382 TLI = ST.getTargetLowering();
383 TRI = ST.getRegisterInfo();
386 SchedModel.
init(&ST);
398 unsigned NumRPS =
TRI->getNumRegPressureSets();
399 RegPressure.resize(NumRPS);
402 for (
unsigned i = 0, e = NumRPS; i != e; ++i)
403 RegLimit[i] =
TRI->getRegPressureSetLimit(MF, i);
407 InitializeLoadsHoistableLoops();
410 while (!Worklist.
empty()) {
414 HoistRegionPostRA(CurLoop);
420 HoistOutOfLoop(
N, CurLoop);
436 if (
MI->memoperands_empty())
439 if (!
MemOp->isStore() || !
MemOp->getPseudoValue())
443 if (
Value->getFrameIndex() == FI)
484 const unsigned NumRegs =
TRI.getNumRegs();
485 const unsigned MaskWords = (NumRegs + 31) / 32;
486 for (
unsigned K = 0; K < MaskWords; ++K) {
488 for (
unsigned Bit = 0; Bit < 32; ++Bit) {
489 const unsigned PhysReg = (K * 32) + Bit;
490 if (PhysReg == NumRegs)
493 if (PhysReg && !((Word >> Bit) & 1)) {
495 RUsFromRegsNotInMask.
set(Unit);
500 RUs |= RUsFromRegsNotInMask;
505void MachineLICMImpl::ProcessMI(MachineInstr *
MI, BitVector &RUDefs,
506 BitVector &RUClobbers,
507 SmallDenseSet<int> &StoredFIs,
508 SmallVectorImpl<CandidateInfo> &Candidates,
509 MachineLoop *CurLoop) {
510 bool RuledOut =
false;
511 bool HasNonInvariantUse =
false;
513 for (
const MachineOperand &MO :
MI->operands()) {
516 int FI = MO.getIndex();
517 if (!StoredFIs.
count(FI) &&
521 HasNonInvariantUse =
true;
527 if (MO.isRegMask()) {
540 if (!HasNonInvariantUse) {
544 if (RUDefs.
test(Unit) || RUClobbers.
test(Unit)) {
545 HasNonInvariantUse =
true;
565 if (RUDefs.
test(Unit)) {
566 RUClobbers.
set(Unit);
568 }
else if (RUClobbers.
test(Unit)) {
580 if (Def && !RuledOut) {
581 int FI = std::numeric_limits<int>::min();
582 if ((!HasNonInvariantUse && IsLICMCandidate(*
MI, CurLoop)) ||
590void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop) {
591 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
595 unsigned NumRegUnits =
TRI->getNumRegUnits();
596 BitVector RUDefs(NumRegUnits);
597 BitVector RUClobbers(NumRegUnits);
600 SmallDenseSet<int> StoredFIs;
604 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
608 if (
ML &&
ML->getHeader()->isEHPad())
continue;
613 for (
const auto &LI : BB->liveins()) {
619 if (
const uint32_t *Mask = BB->getBeginClobberMask(
TRI))
624 const MachineFunction &MF = *BB->getParent();
629 RUClobbers.
set(Unit);
632 RUClobbers.
set(Unit);
635 SpeculationState = SpeculateUnknown;
636 for (MachineInstr &
MI : *BB)
637 ProcessMI(&
MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
641 BitVector TermRUs(NumRegUnits);
643 if (TI != Preheader->
end()) {
644 for (
const MachineOperand &MO : TI->operands()) {
663 for (CandidateInfo &Candidate : Candidates) {
664 if (Candidate.FI != std::numeric_limits<int>::min() &&
665 StoredFIs.
count(Candidate.FI))
671 if (RUClobbers.
test(Unit) || TermRUs.test(Unit)) {
680 MachineInstr *
MI = Candidate.MI;
681 for (
const MachineOperand &MO :
MI->all_uses()) {
685 if (RUDefs.
test(Unit) || RUClobbers.
test(Unit)) {
698 HoistPostRA(
MI, Candidate.Def, CurLoop);
704void MachineLICMImpl::AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop) {
705 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
706 if (!BB->isLiveIn(
Reg))
708 for (MachineInstr &
MI : *BB) {
709 for (MachineOperand &MO :
MI.all_uses()) {
712 if (
TRI->regsOverlap(
Reg, MO.getReg()))
721void MachineLICMImpl::HoistPostRA(MachineInstr *
MI,
Register Def,
722 MachineLoop *CurLoop) {
732 MachineBasicBlock *
MBB =
MI->getParent();
738 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
744 AddToLiveIns(Def, CurLoop);
752bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
753 MachineLoop *CurLoop) {
754 if (SpeculationState != SpeculateUnknown)
755 return SpeculationState == SpeculateFalse;
759 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
761 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
763 SpeculationState = SpeculateTrue;
768 SpeculationState = SpeculateFalse;
772void MachineLICMImpl::EnterScope(MachineBasicBlock *
MBB) {
779void MachineLICMImpl::ExitScope(MachineBasicBlock *
MBB) {
787void MachineLICMImpl::ExitScopeIfDone(
789 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
790 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap) {
791 if (OpenChildren[Node])
795 ExitScope(
Node->getBlock());
798 if (!Parent || --OpenChildren[Parent] != 0)
809 MachineLoop *CurLoop) {
810 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
816 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
817 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
821 while (!WorkList.
empty()) {
823 assert(Node &&
"Null dominator tree node?");
824 MachineBasicBlock *BB =
Node->getBlock();
829 if (
ML &&
ML->getHeader()->isEHPad())
837 unsigned NumChildren =
Node->getNumChildren();
845 OpenChildren[
Node] = NumChildren;
851 ParentMap[Child] =
Node;
863 InitRegPressure(Preheader);
867 MachineBasicBlock *
MBB =
Node->getBlock();
872 SpeculationState = SpeculateUnknown;
874 unsigned HoistRes = HoistResult::NotHoisted;
875 HoistRes = Hoist(&
MI, Preheader, CurLoop);
876 if (HoistRes & HoistResult::NotHoisted) {
880 for (MachineLoop *L = MLI->
getLoopFor(
MI.getParent()); L != CurLoop;
881 L =
L->getParentLoop())
884 while (!InnerLoopWorkList.
empty()) {
885 MachineLoop *InnerLoop = InnerLoopWorkList.
pop_back_val();
887 if (InnerLoopPreheader) {
888 HoistRes = Hoist(&
MI, InnerLoopPreheader, InnerLoop);
889 if (HoistRes & HoistResult::Hoisted)
895 if (HoistRes & HoistResult::ErasedMI)
898 UpdateRegPressure(&
MI);
902 ExitScopeIfDone(Node, OpenChildren, ParentMap);
913void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
921 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
927 for (
const MachineInstr &
MI : *BB)
928 UpdateRegPressure(&
MI,
true);
932void MachineLICMImpl::UpdateRegPressure(
const MachineInstr *
MI,
933 bool ConsiderUnseenAsDef) {
934 auto Cost = calcRegisterCost(
MI,
true, ConsiderUnseenAsDef);
935 for (
const auto &RPIdAndCost :
Cost) {
936 unsigned Class = RPIdAndCost.first;
937 if (
static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
950SmallDenseMap<unsigned, int>
951MachineLICMImpl::calcRegisterCost(
const MachineInstr *
MI,
bool ConsiderSeen,
952 bool ConsiderUnseenAsDef) {
953 SmallDenseMap<unsigned, int>
Cost;
954 if (
MI->isImplicitDef())
956 for (
unsigned i = 0, e =
MI->getDesc().getNumOperands(); i != e; ++i) {
957 const MachineOperand &MO =
MI->getOperand(i);
965 bool isNew = ConsiderSeen ? RegSeen.
insert(
Reg).second :
false;
966 const TargetRegisterClass *RC =
MRI->getRegClass(
Reg);
968 RegClassWeight
W =
TRI->getRegClassWeight(RC);
971 RCCost =
W.RegWeight;
974 if (isNew && !isKill && ConsiderUnseenAsDef)
976 RCCost =
W.RegWeight;
977 else if (!isNew && isKill)
978 RCCost = -
W.RegWeight;
982 const int *PS =
TRI->getRegClassPressureSets(RC);
983 for (; *PS != -1; ++PS)
992 assert(
MI.mayLoad() &&
"Expected MI that loads!");
996 if (
MI.memoperands_empty())
1001 if (PSV->isGOT() || PSV->isConstantPool())
1018 bool FoundCallerPresReg =
false;
1019 if (!
MI.mayStore() ||
MI.hasUnmodeledSideEffects() ||
1020 (
MI.getNumOperands() == 0))
1029 if (
Reg.isVirtual())
1031 if (
Reg.isVirtual())
1033 if (!
TRI->isCallerPreservedPhysReg(
Reg.asMCReg(), *
MI.getMF()))
1036 FoundCallerPresReg =
true;
1037 }
else if (!MO.
isImm()) {
1041 return FoundCallerPresReg;
1059 Register CopySrcReg =
MI.getOperand(1).getReg();
1063 if (!
TRI->isCallerPreservedPhysReg(CopySrcReg.
asMCReg(), *MF))
1066 Register CopyDstReg =
MI.getOperand(0).getReg();
1079bool MachineLICMImpl::IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop) {
1081 bool DontMoveAcrossStore = !
HoistConstLoads || !AllowedToHoistLoads[CurLoop];
1082 if ((!
I.isSafeToMove(DontMoveAcrossStore)) &&
1095 !IsGuaranteedToExecute(
I.getParent(), CurLoop)) {
1104 if (
I.isConvergent())
1107 if (!
TII->shouldHoist(
I, CurLoop))
1114bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &
I,
1115 MachineLoop *CurLoop) {
1116 if (!IsLICMCandidate(
I, CurLoop)) {
1117 LLVM_DEBUG(
dbgs() <<
"LICM: Instruction not a LICM candidate\n");
1125bool MachineLICMImpl::HasLoopPHIUse(
const MachineInstr *
MI,
1126 MachineLoop *CurLoop) {
1129 MI = Work.pop_back_val();
1130 for (
const MachineOperand &MO :
MI->all_defs()) {
1134 for (MachineInstr &
UseMI :
MRI->use_instructions(
Reg)) {
1136 if (
UseMI.isPHI()) {
1150 Work.push_back(&
UseMI);
1153 }
while (!Work.empty());
1159bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
1161 MachineLoop *CurLoop)
const {
1162 if (
MRI->use_nodbg_empty(
Reg))
1165 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(
Reg)) {
1166 if (
UseMI.isCopyLike())
1170 for (
unsigned i = 0, e =
UseMI.getNumOperands(); i != e; ++i) {
1171 const MachineOperand &MO =
UseMI.getOperand(i);
1178 if (
TII->hasHighOperandLatency(SchedModel,
MRI,
MI, DefIdx,
UseMI, i))
1191bool MachineLICMImpl::IsCheapInstruction(MachineInstr &
MI)
const {
1195 bool isCheap =
false;
1196 unsigned NumDefs =
MI.getDesc().getNumDefs();
1197 for (
unsigned i = 0, e =
MI.getNumOperands(); NumDefs && i != e; ++i) {
1198 MachineOperand &DefMO =
MI.getOperand(i);
1206 if (!
TII->hasLowDefLatency(SchedModel,
MI, i))
1216bool MachineLICMImpl::CanCauseHighRegPressure(
1217 const SmallDenseMap<unsigned, int> &
Cost,
bool CheapInstr) {
1218 for (
const auto &RPIdAndCost :
Cost) {
1219 if (RPIdAndCost.second <= 0)
1222 unsigned Class = RPIdAndCost.first;
1223 int Limit = RegLimit[
Class];
1230 for (
const auto &RP : BackTrace)
1231 if (
static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1241void MachineLICMImpl::UpdateBackTraceRegPressure(
const MachineInstr *
MI) {
1244 auto Cost = calcRegisterCost(
MI,
false,
1248 for (
auto &RP : BackTrace)
1249 for (
const auto &RPIdAndCost :
Cost)
1250 RP[RPIdAndCost.first] += RPIdAndCost.second;
1255bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &
MI,
1256 MachineLoop *CurLoop) {
1257 if (
MI.isImplicitDef())
1275 bool CheapInstr = IsCheapInstruction(
MI);
1276 bool CreatesCopy = HasLoopPHIUse(&
MI, CurLoop);
1279 if (CheapInstr && CreatesCopy) {
1286 if (
TII->isTriviallyReMaterializable(
MI))
1291 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1292 const MachineOperand &MO =
MI.getOperand(i);
1298 if (MO.
isDef() && HasHighOperandLatency(
MI, i,
Reg, CurLoop)) {
1311 auto Cost = calcRegisterCost(&
MI,
false,
1316 if (!CanCauseHighRegPressure(
Cost, CheapInstr)) {
1332 (!IsGuaranteedToExecute(
MI.getParent(), CurLoop) && !MayCSE(&
MI))) {
1340 if (
MI.isCopy() ||
MI.isRegSequence()) {
1344 [
this](
const MachineOperand &UseOp) {
1345 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1346 MRI->isConstantPhysReg(UseOp.getReg());
1348 IsLoopInvariantInst(
MI, CurLoop) &&
1349 any_of(
MRI->use_nodbg_instructions(DefReg),
1350 [&CurLoop,
this, DefReg,
1352 if (!CurLoop->contains(&UseMI))
1359 if (CanCauseHighRegPressure(Cost, false) &&
1360 !CurLoop->isLoopInvariant(UseMI, DefReg))
1370 if (!
TII->isTriviallyReMaterializable(
MI) &&
1371 !
MI.isDereferenceableInvariantLoad()) {
1382MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *
MI,
1383 MachineLoop *CurLoop) {
1385 if (
MI->canFoldAsLoad())
1391 if (!
MI->isDereferenceableInvariantLoad())
1395 unsigned LoadRegIndex;
1397 TII->getOpcodeAfterMemoryUnfold(
MI->getOpcode(),
1401 if (NewOpc == 0)
return nullptr;
1402 const MCInstrDesc &MID =
TII->get(NewOpc);
1403 MachineFunction &MF = *
MI->getMF();
1404 const TargetRegisterClass *RC =
TII->getRegClass(MID, LoadRegIndex,
TRI);
1408 SmallVector<MachineInstr *, 2> NewMIs;
1414 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1417 "Unfolded a load into multiple instructions!");
1418 MachineBasicBlock *
MBB =
MI->getParent();
1424 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1425 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1426 NewMIs[0]->eraseFromParent();
1427 NewMIs[1]->eraseFromParent();
1432 UpdateRegPressure(NewMIs[1]);
1437 if (
MI->shouldUpdateAdditionalCallInfo())
1440 MI->eraseFromParent();
1447void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {
1448 for (MachineInstr &
MI : *BB)
1454void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1460 while (!Worklist.empty()) {
1461 auto *
L = Worklist.pop_back_val();
1462 AllowedToHoistLoads[
L] =
true;
1474 for (
auto *Loop :
reverse(LoopsInPreOrder)) {
1475 for (
auto *
MBB : Loop->blocks()) {
1477 if (!AllowedToHoistLoads[Loop])
1479 for (
auto &
MI : *
MBB) {
1480 if (!
MI.isLoadFoldBarrier() && !
MI.mayStore() && !
MI.isCall() &&
1481 !(
MI.mayLoad() &&
MI.hasOrderedMemoryRef()))
1483 for (MachineLoop *L = Loop;
L !=
nullptr;
L =
L->getParentLoop())
1484 AllowedToHoistLoads[
L] =
false;
1494MachineLICMImpl::LookForDuplicate(
const MachineInstr *
MI,
1495 std::vector<MachineInstr *> &PrevMIs) {
1496 for (MachineInstr *PrevMI : PrevMIs)
1497 if (
TII->produceSameValue(*
MI, *PrevMI, (PreRegAlloc ?
MRI :
nullptr)))
1507bool MachineLICMImpl::EliminateCSE(
1509 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1512 if (
MI->isImplicitDef())
1517 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1520 if (MachineInstr *Dup = LookForDuplicate(
MI, CI->second)) {
1525 SmallVector<unsigned, 2> Defs;
1526 for (
unsigned i = 0, e =
MI->getNumOperands(); i != e; ++i) {
1527 const MachineOperand &MO =
MI->getOperand(i);
1531 MO.
getReg() == Dup->getOperand(i).getReg()) &&
1532 "Instructions with different phys regs are not identical!");
1539 for (
unsigned i = 0, e = Defs.
size(); i != e; ++i) {
1540 unsigned Idx = Defs[i];
1542 Register DupReg = Dup->getOperand(Idx).getReg();
1545 if (!
MRI->constrainRegClass(DupReg,
MRI->getRegClass(
Reg))) {
1547 for (
unsigned j = 0;
j != i; ++
j)
1548 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1553 for (
unsigned Idx : Defs) {
1555 Register DupReg = Dup->getOperand(Idx).getReg();
1556 MRI->replaceRegWith(
Reg, DupReg);
1557 MRI->clearKillFlags(DupReg);
1559 if (!
MRI->use_nodbg_empty(DupReg))
1560 Dup->getOperand(Idx).setIsDead(
false);
1563 MI->eraseFromParent();
1572bool MachineLICMImpl::MayCSE(MachineInstr *
MI) {
1573 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1576 unsigned Opcode =
MI->getOpcode();
1577 for (
auto &Map : CSEMap) {
1580 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1581 Map.second.find(Opcode);
1584 if (CI ==
Map.second.end() ||
MI->isImplicitDef())
1586 if (LookForDuplicate(
MI, CI->second) !=
nullptr)
1597unsigned MachineLICMImpl::Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
1598 MachineLoop *CurLoop) {
1599 MachineBasicBlock *SrcBlock =
MI->getParent();
1604 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1605 ++NumNotHoistedDueToHotness;
1606 return HoistResult::NotHoisted;
1609 bool HasExtractHoistableLoad =
false;
1610 if (!IsLoopInvariantInst(*
MI, CurLoop) ||
1611 !IsProfitableToHoist(*
MI, CurLoop)) {
1613 MI = ExtractHoistableLoad(
MI, CurLoop);
1615 return HoistResult::NotHoisted;
1616 HasExtractHoistableLoad =
true;
1627 dbgs() <<
"Hoisting " << *
MI;
1628 if (
MI->getParent()->getBasicBlock())
1638 InitCSEMap(Preheader);
1639 FirstInLoop =
false;
1643 unsigned Opcode =
MI->getOpcode();
1644 bool HasCSEDone =
false;
1645 for (
auto &Map : CSEMap) {
1648 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1649 Map.second.find(Opcode);
1650 if (CI !=
Map.second.end()) {
1651 if (EliminateCSE(
MI, CI)) {
1666 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
1670 UpdateBackTraceRegPressure(
MI);
1675 for (MachineOperand &MO :
MI->all_defs())
1679 CSEMap[Preheader][Opcode].push_back(
MI);
1685 if (HasCSEDone || HasExtractHoistableLoad)
1686 return HoistResult::Hoisted | HoistResult::ErasedMI;
1687 return HoistResult::Hoisted;
1691MachineBasicBlock *MachineLICMImpl::getOrCreatePreheader(MachineLoop *CurLoop) {
1700 MachineBasicBlock *NewPreheader = Pred->SplitCriticalEdge(
1701 CurLoop->
getHeader(), LegacyPass, MFAM,
nullptr, MDTU);
1704 return NewPreheader;
1712bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1713 MachineBasicBlock *TgtBlock) {
1722 double Ratio = (double)DstBF / SrcBF;
1728template <
typename DerivedT,
bool PreRegAlloc>
1731 bool Changed = MachineLICMImpl(PreRegAlloc,
nullptr, &MFAM).run(MF);