58#define DEBUG_TYPE "machinelicm"
62 cl::desc(
"MachineLICM should avoid speculation"),
67 cl::desc(
"MachineLICM should hoist even cheap instructions"),
83 cl::desc(
"Do not hoist instructions if target"
84 "block is N times hotter than the source."),
91 cl::desc(
"Disable hoisting instructions to"
95 "disable the feature"),
97 "enable the feature when using profile data"),
99 "enable the feature with/wo profile data")));
102 "Number of machine instructions hoisted out of loops");
104 "Number of instructions hoisted in low reg pressure situation");
106 "Number of high latency instructions hoisted");
108 "Number of hoisted machine instructions CSEed");
110 "Number of machine instructions hoisted out of loops post regalloc");
112 "Number of stores of const phys reg hoisted out of loops");
114 "Number of instructions not hoisted due to block frequency");
117 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
119 class MachineLICMImpl {
120 const TargetInstrInfo *TII =
nullptr;
121 const TargetLoweringBase *TLI =
nullptr;
122 const TargetRegisterInfo *TRI =
nullptr;
123 const MachineFrameInfo *MFI =
nullptr;
124 MachineRegisterInfo *MRI =
nullptr;
125 TargetSchedModel SchedModel;
126 bool PreRegAlloc =
false;
127 bool HasProfileData =
false;
133 MachineBlockFrequencyInfo *MBFI =
nullptr;
134 MachineLoopInfo *MLI =
nullptr;
135 MachineDomTreeUpdater *MDTU =
nullptr;
138 bool Changed =
false;
139 bool FirstInLoop =
false;
143 SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
146 DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
148 bool isExitBlock(MachineLoop *CurLoop,
const MachineBasicBlock *
MBB) {
149 auto [It,
Inserted] = ExitBlockMap.try_emplace(CurLoop);
151 SmallVector<MachineBasicBlock *, 8> ExitBlocks;
153 It->second = std::move(ExitBlocks);
159 SmallDenseSet<Register> RegSeen;
160 SmallVector<unsigned, 8> RegPressure;
164 SmallVector<unsigned, 8> RegLimit;
170 DenseMap<MachineBasicBlock *,
171 DenseMap<unsigned, std::vector<MachineInstr *>>>
183 unsigned SpeculationState = SpeculateUnknown;
186 MachineLICMImpl(
bool PreRegAlloc,
Pass *LegacyPass,
188 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
189 assert((LegacyPass || MFAM) &&
"LegacyPass or MFAM must be provided");
190 assert(!(LegacyPass && MFAM) &&
191 "LegacyPass and MFAM cannot be provided at the same time");
194 bool run(MachineFunction &MF);
196 void releaseMemory() {
202 ExitBlockMap.clear();
207 struct CandidateInfo {
212 CandidateInfo(MachineInstr *mi,
Register def,
int fi)
213 : MI(mi), Def(def), FI(fi) {}
216 void HoistRegionPostRA(MachineLoop *CurLoop);
218 void HoistPostRA(MachineInstr *
MI,
Register Def, MachineLoop *CurLoop);
220 void ProcessMI(MachineInstr *
MI, BitVector &RUDefs, BitVector &RUClobbers,
221 SmallDenseSet<int> &StoredFIs,
222 SmallVectorImpl<CandidateInfo> &Candidates,
223 MachineLoop *CurLoop);
225 void AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop);
227 bool IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop);
229 bool IsLoopInvariantInst(MachineInstr &
I, MachineLoop *CurLoop);
231 bool HasLoopPHIUse(
const MachineInstr *
MI, MachineLoop *CurLoop);
233 bool HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
Register Reg,
234 MachineLoop *CurLoop)
const;
236 bool IsCheapInstruction(MachineInstr &
MI)
const;
238 bool CanCauseHighRegPressure(
const SmallDenseMap<unsigned, int> &
Cost,
241 void UpdateBackTraceRegPressure(
const MachineInstr *
MI);
243 bool IsProfitableToHoist(MachineInstr &
MI, MachineLoop *CurLoop);
245 bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
247 void EnterScope(MachineBasicBlock *
MBB);
249 void ExitScope(MachineBasicBlock *
MBB);
251 void ExitScopeIfDone(
253 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
254 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
258 void InitRegPressure(MachineBasicBlock *BB);
260 SmallDenseMap<unsigned, int> calcRegisterCost(
const MachineInstr *
MI,
262 bool ConsiderUnseenAsDef);
264 void UpdateRegPressure(
const MachineInstr *
MI,
265 bool ConsiderUnseenAsDef =
false);
267 MachineInstr *ExtractHoistableLoad(MachineInstr *
MI, MachineLoop *CurLoop);
269 MachineInstr *LookForDuplicate(
const MachineInstr *
MI,
270 std::vector<MachineInstr *> &PrevMIs);
273 EliminateCSE(MachineInstr *
MI,
274 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI);
276 bool MayCSE(MachineInstr *
MI);
278 unsigned Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
279 MachineLoop *CurLoop);
281 void InitCSEMap(MachineBasicBlock *BB);
283 void InitializeLoadsHoistableLoops();
285 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
286 MachineBasicBlock *TgtBlock);
287 MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop);
294 MachineLICMBase(
char &
ID,
bool PreRegAlloc)
295 : MachineFunctionPass(
ID), PreRegAlloc(PreRegAlloc) {}
297 bool runOnMachineFunction(MachineFunction &MF)
override;
299 void getAnalysisUsage(AnalysisUsage &AU)
const override {
302 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
310 class MachineLICM :
public MachineLICMBase {
313 MachineLICM() : MachineLICMBase(ID,
false) {
318 class EarlyMachineLICM :
public MachineLICMBase {
321 EarlyMachineLICM() : MachineLICMBase(ID,
true) {
329char EarlyMachineLICM::ID;
335 "Machine Loop Invariant Code Motion",
false,
false)
344 "Early Machine Loop Invariant Code Motion",
false,
false)
350 "Early Machine Loop Invariant Code Motion",
false,
false)
353 if (skipFunction(MF.getFunction()))
356 MachineLICMImpl Impl(PreRegAlloc,
this,
nullptr);
360#define GET_RESULT(RESULT, GETTER, INFIX) \
362 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \
363 : &MFAM->getResult<RESULT##Analysis>(MF))
372 MachineDomTreeUpdater::UpdateStrategy::Lazy);
381 TII = ST.getInstrInfo();
382 TLI = ST.getTargetLowering();
383 TRI = ST.getRegisterInfo();
386 SchedModel.
init(&ST);
398 unsigned NumRPS =
TRI->getNumRegPressureSets();
399 RegPressure.resize(NumRPS);
402 for (
unsigned i = 0, e = NumRPS; i != e; ++i)
403 RegLimit[i] =
TRI->getRegPressureSetLimit(MF, i);
407 InitializeLoadsHoistableLoops();
410 while (!Worklist.
empty()) {
414 HoistRegionPostRA(CurLoop);
420 HoistOutOfLoop(
N, CurLoop);
436 if (
MI->memoperands_empty())
439 if (!
MemOp->isStore() || !
MemOp->getPseudoValue())
443 if (
Value->getFrameIndex() == FI)
484 const unsigned NumRegs =
TRI.getNumRegs();
485 const unsigned MaskWords = (NumRegs + 31) / 32;
486 for (
unsigned K = 0; K < MaskWords; ++K) {
488 for (
unsigned Bit = 0; Bit < 32; ++Bit) {
489 const unsigned PhysReg = (K * 32) + Bit;
490 if (PhysReg == NumRegs)
493 if (PhysReg && !((Word >> Bit) & 1)) {
494 for (MCRegUnit Unit :
TRI.regunits(PhysReg))
495 RUsFromRegsNotInMask.
set(
static_cast<unsigned>(Unit));
500 RUs |= RUsFromRegsNotInMask;
505void MachineLICMImpl::ProcessMI(MachineInstr *
MI, BitVector &RUDefs,
506 BitVector &RUClobbers,
507 SmallDenseSet<int> &StoredFIs,
508 SmallVectorImpl<CandidateInfo> &Candidates,
509 MachineLoop *CurLoop) {
510 bool RuledOut =
false;
511 bool HasNonInvariantUse =
false;
513 for (
const MachineOperand &MO :
MI->operands()) {
516 int FI = MO.getIndex();
517 if (!StoredFIs.
count(FI) &&
521 HasNonInvariantUse =
true;
527 if (MO.isRegMask()) {
540 if (!HasNonInvariantUse) {
541 for (MCRegUnit Unit :
TRI->regunits(
Reg)) {
544 if (RUDefs.
test(
static_cast<unsigned>(Unit)) ||
545 RUClobbers.
test(
static_cast<unsigned>(Unit))) {
546 HasNonInvariantUse =
true;
565 for (MCRegUnit Unit :
TRI->regunits(
Reg)) {
566 if (RUDefs.
test(
static_cast<unsigned>(Unit))) {
567 RUClobbers.
set(
static_cast<unsigned>(Unit));
569 }
else if (RUClobbers.
test(
static_cast<unsigned>(Unit))) {
575 RUDefs.
set(
static_cast<unsigned>(Unit));
581 if (Def && !RuledOut) {
582 int FI = std::numeric_limits<int>::min();
583 if ((!HasNonInvariantUse && IsLICMCandidate(*
MI, CurLoop)) ||
591void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop) {
592 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
596 unsigned NumRegUnits =
TRI->getNumRegUnits();
597 BitVector RUDefs(NumRegUnits);
598 BitVector RUClobbers(NumRegUnits);
601 SmallDenseSet<int> StoredFIs;
605 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
609 if (
ML &&
ML->getHeader()->isEHPad())
continue;
614 for (
const auto &LI : BB->liveins()) {
615 for (MCRegUnit Unit :
TRI->regunits(LI.PhysReg))
616 RUDefs.
set(
static_cast<unsigned>(Unit));
620 if (
const uint32_t *Mask = BB->getBeginClobberMask(
TRI))
625 const MachineFunction &MF = *BB->getParent();
629 for (MCRegUnit Unit :
TRI->regunits(
Reg))
630 RUClobbers.
set(
static_cast<unsigned>(Unit));
632 for (MCRegUnit Unit :
TRI->regunits(
Reg))
633 RUClobbers.
set(
static_cast<unsigned>(Unit));
636 SpeculationState = SpeculateUnknown;
637 for (MachineInstr &
MI : *BB)
638 ProcessMI(&
MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
642 BitVector TermRUs(NumRegUnits);
644 if (TI != Preheader->
end()) {
645 for (
const MachineOperand &MO : TI->operands()) {
651 for (MCRegUnit Unit :
TRI->regunits(
Reg))
652 TermRUs.set(
static_cast<unsigned>(Unit));
664 for (CandidateInfo &Candidate : Candidates) {
665 if (Candidate.FI != std::numeric_limits<int>::min() &&
666 StoredFIs.
count(Candidate.FI))
671 for (MCRegUnit Unit :
TRI->regunits(Def)) {
672 if (RUClobbers.
test(
static_cast<unsigned>(Unit)) ||
673 TermRUs.test(
static_cast<unsigned>(Unit))) {
682 MachineInstr *
MI = Candidate.MI;
683 for (
const MachineOperand &MO :
MI->all_uses()) {
686 for (MCRegUnit Unit :
TRI->regunits(MO.getReg())) {
687 if (RUDefs.
test(
static_cast<unsigned>(Unit)) ||
688 RUClobbers.
test(
static_cast<unsigned>(Unit))) {
701 HoistPostRA(
MI, Candidate.Def, CurLoop);
707void MachineLICMImpl::AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop) {
708 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
709 if (!BB->isLiveIn(
Reg))
711 for (MachineInstr &
MI : *BB) {
712 for (MachineOperand &MO :
MI.all_uses()) {
715 if (
TRI->regsOverlap(
Reg, MO.getReg()))
724void MachineLICMImpl::HoistPostRA(MachineInstr *
MI,
Register Def,
725 MachineLoop *CurLoop) {
735 MachineBasicBlock *
MBB =
MI->getParent();
741 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
747 AddToLiveIns(Def, CurLoop);
755bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
756 MachineLoop *CurLoop) {
757 if (SpeculationState != SpeculateUnknown)
758 return SpeculationState == SpeculateFalse;
762 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
764 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
766 SpeculationState = SpeculateTrue;
771 SpeculationState = SpeculateFalse;
775void MachineLICMImpl::EnterScope(MachineBasicBlock *
MBB) {
782void MachineLICMImpl::ExitScope(MachineBasicBlock *
MBB) {
790void MachineLICMImpl::ExitScopeIfDone(
792 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
793 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap) {
794 if (OpenChildren[Node])
798 ExitScope(
Node->getBlock());
801 if (!Parent || --OpenChildren[Parent] != 0)
812 MachineLoop *CurLoop) {
813 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
819 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
820 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
824 while (!WorkList.
empty()) {
826 assert(Node &&
"Null dominator tree node?");
827 MachineBasicBlock *BB =
Node->getBlock();
832 if (
ML &&
ML->getHeader()->isEHPad())
845 OpenChildren[
Node] = 0;
852 size_t WorkListStart = WorkList.
size();
854 ParentMap[Child] =
Node;
857 std::reverse(WorkList.
begin() + WorkListStart, WorkList.
end());
858 OpenChildren[
Node] = WorkList.
size() - WorkListStart;
867 InitRegPressure(Preheader);
871 MachineBasicBlock *
MBB =
Node->getBlock();
876 SpeculationState = SpeculateUnknown;
878 unsigned HoistRes = HoistResult::NotHoisted;
879 HoistRes = Hoist(&
MI, Preheader, CurLoop);
880 if (HoistRes & HoistResult::NotHoisted) {
884 for (MachineLoop *L = MLI->
getLoopFor(
MI.getParent()); L != CurLoop;
885 L =
L->getParentLoop())
888 while (!InnerLoopWorkList.
empty()) {
889 MachineLoop *InnerLoop = InnerLoopWorkList.
pop_back_val();
891 if (InnerLoopPreheader) {
892 HoistRes = Hoist(&
MI, InnerLoopPreheader, InnerLoop);
893 if (HoistRes & HoistResult::Hoisted)
899 if (HoistRes & HoistResult::ErasedMI)
902 UpdateRegPressure(&
MI);
906 ExitScopeIfDone(Node, OpenChildren, ParentMap);
917void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
925 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
931 for (
const MachineInstr &
MI : *BB)
932 UpdateRegPressure(&
MI,
true);
936void MachineLICMImpl::UpdateRegPressure(
const MachineInstr *
MI,
937 bool ConsiderUnseenAsDef) {
938 auto Cost = calcRegisterCost(
MI,
true, ConsiderUnseenAsDef);
939 for (
const auto &[Class, Weight] :
Cost) {
940 if (
static_cast<int>(RegPressure[Class]) < -Weight)
953SmallDenseMap<unsigned, int>
954MachineLICMImpl::calcRegisterCost(
const MachineInstr *
MI,
bool ConsiderSeen,
955 bool ConsiderUnseenAsDef) {
956 SmallDenseMap<unsigned, int>
Cost;
957 if (
MI->isImplicitDef())
959 for (
unsigned i = 0, e =
MI->getDesc().getNumOperands(); i != e; ++i) {
960 const MachineOperand &MO =
MI->getOperand(i);
968 bool isNew = ConsiderSeen ? RegSeen.
insert(
Reg).second :
false;
969 const TargetRegisterClass *RC =
MRI->getRegClass(
Reg);
971 RegClassWeight
W =
TRI->getRegClassWeight(RC);
974 RCCost =
W.RegWeight;
977 if (isNew && !isKill && ConsiderUnseenAsDef)
979 RCCost =
W.RegWeight;
980 else if (!isNew && isKill)
981 RCCost = -
W.RegWeight;
985 const int *PS =
TRI->getRegClassPressureSets(RC);
986 for (; *PS != -1; ++PS)
995 assert(
MI.mayLoad() &&
"Expected MI that loads!");
999 if (
MI.memoperands_empty())
1004 if (PSV->isGOT() || PSV->isConstantPool())
1021 bool FoundCallerPresReg =
false;
1022 if (!
MI.mayStore() ||
MI.hasUnmodeledSideEffects() ||
1023 (
MI.getNumOperands() == 0))
1032 if (
Reg.isVirtual())
1034 if (
Reg.isVirtual())
1036 if (!
TRI->isCallerPreservedPhysReg(
Reg.asMCReg(), *
MI.getMF()))
1039 FoundCallerPresReg =
true;
1040 }
else if (!MO.
isImm()) {
1044 return FoundCallerPresReg;
1062 Register CopySrcReg =
MI.getOperand(1).getReg();
1066 if (!
TRI->isCallerPreservedPhysReg(CopySrcReg.
asMCReg(), *MF))
1069 Register CopyDstReg =
MI.getOperand(0).getReg();
1082bool MachineLICMImpl::IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop) {
1084 bool DontMoveAcrossStore = !
HoistConstLoads || !AllowedToHoistLoads[CurLoop];
1085 if ((!
I.isSafeToMove(DontMoveAcrossStore)) &&
1098 !IsGuaranteedToExecute(
I.getParent(), CurLoop)) {
1107 if (
I.isConvergent())
1110 if (!
TII->shouldHoist(
I, CurLoop))
1117bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &
I,
1118 MachineLoop *CurLoop) {
1119 if (!IsLICMCandidate(
I, CurLoop)) {
1120 LLVM_DEBUG(
dbgs() <<
"LICM: Instruction not a LICM candidate\n");
1128bool MachineLICMImpl::HasLoopPHIUse(
const MachineInstr *
MI,
1129 MachineLoop *CurLoop) {
1132 MI = Work.pop_back_val();
1133 for (
const MachineOperand &MO :
MI->all_defs()) {
1137 for (MachineInstr &
UseMI :
MRI->use_instructions(
Reg)) {
1139 if (
UseMI.isPHI()) {
1153 Work.push_back(&
UseMI);
1156 }
while (!Work.empty());
1162bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
1164 MachineLoop *CurLoop)
const {
1165 if (
MRI->use_nodbg_empty(
Reg))
1168 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(
Reg)) {
1169 if (
UseMI.isCopyLike())
1173 for (
unsigned i = 0, e =
UseMI.getNumOperands(); i != e; ++i) {
1174 const MachineOperand &MO =
UseMI.getOperand(i);
1181 if (
TII->hasHighOperandLatency(SchedModel,
MRI,
MI, DefIdx,
UseMI, i))
1194bool MachineLICMImpl::IsCheapInstruction(MachineInstr &
MI)
const {
1198 bool isCheap =
false;
1199 unsigned NumDefs =
MI.getDesc().getNumDefs();
1200 for (
unsigned i = 0, e =
MI.getNumOperands(); NumDefs && i != e; ++i) {
1201 MachineOperand &DefMO =
MI.getOperand(i);
1209 if (!
TII->hasLowDefLatency(SchedModel,
MI, i))
1219bool MachineLICMImpl::CanCauseHighRegPressure(
1220 const SmallDenseMap<unsigned, int> &
Cost,
bool CheapInstr) {
1221 for (
const auto &[Class, Weight] :
Cost) {
1225 int Limit = RegLimit[
Class];
1232 for (
const auto &RP : BackTrace)
1233 if (
static_cast<int>(RP[Class]) + Weight >= Limit)
1243void MachineLICMImpl::UpdateBackTraceRegPressure(
const MachineInstr *
MI) {
1246 auto Cost = calcRegisterCost(
MI,
false,
1250 for (
auto &RP : BackTrace)
1251 for (
const auto &[Class, Weight] :
Cost)
1257bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &
MI,
1258 MachineLoop *CurLoop) {
1259 if (
MI.isImplicitDef())
1277 bool CheapInstr = IsCheapInstruction(
MI);
1278 bool CreatesCopy = HasLoopPHIUse(&
MI, CurLoop);
1281 if (CheapInstr && CreatesCopy) {
1288 if (
TII->isTriviallyReMaterializable(
MI))
1293 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1294 const MachineOperand &MO =
MI.getOperand(i);
1300 if (MO.
isDef() && HasHighOperandLatency(
MI, i,
Reg, CurLoop)) {
1313 auto Cost = calcRegisterCost(&
MI,
false,
1318 if (!CanCauseHighRegPressure(
Cost, CheapInstr)) {
1334 (!IsGuaranteedToExecute(
MI.getParent(), CurLoop) && !MayCSE(&
MI))) {
1342 if (
MI.isCopy() ||
MI.isRegSequence()) {
1346 [
this](
const MachineOperand &UseOp) {
1347 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1348 MRI->isConstantPhysReg(UseOp.getReg());
1350 IsLoopInvariantInst(
MI, CurLoop) &&
1351 any_of(
MRI->use_nodbg_instructions(DefReg),
1352 [&CurLoop,
this, DefReg,
1354 if (!CurLoop->contains(&UseMI))
1361 if (CanCauseHighRegPressure(Cost, false) &&
1362 !CurLoop->isLoopInvariant(UseMI, DefReg))
1372 if (!
TII->isTriviallyReMaterializable(
MI) &&
1373 !
MI.isDereferenceableInvariantLoad()) {
1384MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *
MI,
1385 MachineLoop *CurLoop) {
1387 if (
MI->canFoldAsLoad())
1393 if (!
MI->isDereferenceableInvariantLoad())
1397 unsigned LoadRegIndex;
1399 TII->getOpcodeAfterMemoryUnfold(
MI->getOpcode(),
1403 if (NewOpc == 0)
return nullptr;
1404 const MCInstrDesc &MID =
TII->get(NewOpc);
1405 MachineFunction &MF = *
MI->getMF();
1406 const TargetRegisterClass *RC =
TII->getRegClass(MID, LoadRegIndex);
1410 SmallVector<MachineInstr *, 2> NewMIs;
1416 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1419 "Unfolded a load into multiple instructions!");
1420 MachineBasicBlock *
MBB =
MI->getParent();
1426 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1427 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1428 NewMIs[0]->eraseFromParent();
1429 NewMIs[1]->eraseFromParent();
1434 UpdateRegPressure(NewMIs[1]);
1439 if (
MI->shouldUpdateAdditionalCallInfo())
1442 MI->eraseFromParent();
1449void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {
1450 for (MachineInstr &
MI : *BB)
1456void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1462 while (!Worklist.empty()) {
1463 auto *
L = Worklist.pop_back_val();
1464 AllowedToHoistLoads[
L] =
true;
1476 for (
auto *Loop :
reverse(LoopsInPreOrder)) {
1477 for (
auto *
MBB : Loop->blocks()) {
1479 if (!AllowedToHoistLoads[Loop])
1481 for (
auto &
MI : *
MBB) {
1482 if (!
MI.isLoadFoldBarrier() && !
MI.mayStore() && !
MI.isCall() &&
1483 !(
MI.mayLoad() &&
MI.hasOrderedMemoryRef()))
1485 for (MachineLoop *L = Loop;
L !=
nullptr;
L =
L->getParentLoop())
1486 AllowedToHoistLoads[
L] =
false;
1496MachineLICMImpl::LookForDuplicate(
const MachineInstr *
MI,
1497 std::vector<MachineInstr *> &PrevMIs) {
1498 for (MachineInstr *PrevMI : PrevMIs)
1499 if (
TII->produceSameValue(*
MI, *PrevMI, (PreRegAlloc ?
MRI :
nullptr)))
1509bool MachineLICMImpl::EliminateCSE(
1511 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1514 if (
MI->isImplicitDef())
1519 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1522 if (MachineInstr *Dup = LookForDuplicate(
MI, CI->second)) {
1527 SmallVector<unsigned, 2> Defs;
1528 for (
unsigned i = 0, e =
MI->getNumOperands(); i != e; ++i) {
1529 const MachineOperand &MO =
MI->getOperand(i);
1533 MO.
getReg() == Dup->getOperand(i).getReg()) &&
1534 "Instructions with different phys regs are not identical!");
1541 for (
unsigned i = 0, e = Defs.
size(); i != e; ++i) {
1542 unsigned Idx = Defs[i];
1544 Register DupReg = Dup->getOperand(Idx).getReg();
1547 if (!
MRI->constrainRegClass(DupReg,
MRI->getRegClass(
Reg))) {
1549 for (
unsigned j = 0;
j != i; ++
j)
1550 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1555 for (
unsigned Idx : Defs) {
1557 Register DupReg = Dup->getOperand(Idx).getReg();
1558 MRI->replaceRegWith(
Reg, DupReg);
1559 MRI->clearKillFlags(DupReg);
1561 if (!
MRI->use_nodbg_empty(DupReg))
1562 Dup->getOperand(Idx).setIsDead(
false);
1565 MI->eraseFromParent();
1574bool MachineLICMImpl::MayCSE(MachineInstr *
MI) {
1575 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1578 unsigned Opcode =
MI->getOpcode();
1579 for (
auto &Map : CSEMap) {
1582 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1583 Map.second.find(Opcode);
1586 if (CI ==
Map.second.end() ||
MI->isImplicitDef())
1588 if (LookForDuplicate(
MI, CI->second) !=
nullptr)
1599unsigned MachineLICMImpl::Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
1600 MachineLoop *CurLoop) {
1601 MachineBasicBlock *SrcBlock =
MI->getParent();
1606 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1607 ++NumNotHoistedDueToHotness;
1608 return HoistResult::NotHoisted;
1611 bool HasExtractHoistableLoad =
false;
1612 if (!IsLoopInvariantInst(*
MI, CurLoop) ||
1613 !IsProfitableToHoist(*
MI, CurLoop)) {
1615 MI = ExtractHoistableLoad(
MI, CurLoop);
1617 return HoistResult::NotHoisted;
1618 HasExtractHoistableLoad =
true;
1629 dbgs() <<
"Hoisting " << *
MI;
1630 if (
MI->getParent()->getBasicBlock())
1640 InitCSEMap(Preheader);
1641 FirstInLoop =
false;
1645 unsigned Opcode =
MI->getOpcode();
1646 bool HasCSEDone =
false;
1647 for (
auto &Map : CSEMap) {
1650 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1651 Map.second.find(Opcode);
1652 if (CI !=
Map.second.end()) {
1653 if (EliminateCSE(
MI, CI)) {
1668 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
1672 UpdateBackTraceRegPressure(
MI);
1677 for (MachineOperand &MO :
MI->all_defs())
1681 CSEMap[Preheader][Opcode].push_back(
MI);
1687 if (HasCSEDone || HasExtractHoistableLoad)
1688 return HoistResult::Hoisted | HoistResult::ErasedMI;
1689 return HoistResult::Hoisted;
1693MachineBasicBlock *MachineLICMImpl::getOrCreatePreheader(MachineLoop *CurLoop) {
1702 MachineBasicBlock *NewPreheader = Pred->SplitCriticalEdge(
1703 CurLoop->
getHeader(), LegacyPass, MFAM,
nullptr, MDTU);
1706 return NewPreheader;
1714bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1715 MachineBasicBlock *TgtBlock) {
1724 double Ratio = (double)DstBF / SrcBF;
1730template <
typename DerivedT,
bool PreRegAlloc>
1733 bool Changed = MachineLICMImpl(PreRegAlloc,
nullptr, &MFAM).run(MF);