LLVM 20.0.0git
MachineLICM.cpp
Go to the documentation of this file.
1//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs loop invariant code motion on machine instructions. We
10// attempt to remove as much code from the body of a loop as possible.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/Statistic.h"
43#include "llvm/IR/DebugLoc.h"
45#include "llvm/MC/MCInstrDesc.h"
46#include "llvm/MC/MCRegister.h"
48#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
53#include <algorithm>
54#include <cassert>
55#include <limits>
56#include <vector>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "machinelicm"
61
62static cl::opt<bool>
63AvoidSpeculation("avoid-speculation",
64 cl::desc("MachineLICM should avoid speculation"),
65 cl::init(true), cl::Hidden);
66
67static cl::opt<bool>
68HoistCheapInsts("hoist-cheap-insts",
69 cl::desc("MachineLICM should hoist even cheap instructions"),
70 cl::init(false), cl::Hidden);
71
72static cl::opt<bool>
73HoistConstStores("hoist-const-stores",
74 cl::desc("Hoist invariant stores"),
75 cl::init(true), cl::Hidden);
76
77static cl::opt<bool> HoistConstLoads("hoist-const-loads",
78 cl::desc("Hoist invariant loads"),
79 cl::init(true), cl::Hidden);
80
81// The default threshold of 100 (i.e. if target block is 100 times hotter)
82// is based on empirical data on a single target and is subject to tuning.
84BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
85 cl::desc("Do not hoist instructions if target"
86 "block is N times hotter than the source."),
87 cl::init(100), cl::Hidden);
88
89enum class UseBFI { None, PGO, All };
90
91static cl::opt<UseBFI>
92DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
93 cl::desc("Disable hoisting instructions to"
94 " hotter blocks"),
97 "disable the feature"),
99 "enable the feature when using profile data"),
100 clEnumValN(UseBFI::All, "all",
101 "enable the feature with/wo profile data")));
102
103STATISTIC(NumHoisted,
104 "Number of machine instructions hoisted out of loops");
105STATISTIC(NumLowRP,
106 "Number of instructions hoisted in low reg pressure situation");
107STATISTIC(NumHighLatency,
108 "Number of high latency instructions hoisted");
109STATISTIC(NumCSEed,
110 "Number of hoisted machine instructions CSEed");
111STATISTIC(NumPostRAHoisted,
112 "Number of machine instructions hoisted out of loops post regalloc");
113STATISTIC(NumStoreConst,
114 "Number of stores of const phys reg hoisted out of loops");
115STATISTIC(NumNotHoistedDueToHotness,
116 "Number of instructions not hoisted due to block frequency");
117
118namespace {
119 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
120
121 class MachineLICMImpl {
122 const TargetInstrInfo *TII = nullptr;
123 const TargetLoweringBase *TLI = nullptr;
124 const TargetRegisterInfo *TRI = nullptr;
125 const MachineFrameInfo *MFI = nullptr;
126 MachineRegisterInfo *MRI = nullptr;
127 RegisterClassInfo RegClassInfo;
128 TargetSchedModel SchedModel;
129 bool PreRegAlloc = false;
130 bool HasProfileData = false;
131 Pass *LegacyPass;
133
134 // Various analyses that we use...
135 AliasAnalysis *AA = nullptr; // Alias analysis info.
136 MachineBlockFrequencyInfo *MBFI = nullptr; // Machine block frequncy info
137 MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
138 MachineDomTreeUpdater *MDTU = nullptr; // Wraps current dominator tree
139
140 // State that is updated as we process loops
141 bool Changed = false; // True if a loop is changed.
142 bool FirstInLoop = false; // True if it's the first LICM in the loop.
143
144 // Holds information about whether it is allowed to move load instructions
145 // out of the loop
146 SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
147
148 // Exit blocks of each Loop.
150
151 bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) {
152 auto [It, Inserted] = ExitBlockMap.try_emplace(CurLoop);
153 if (Inserted) {
155 CurLoop->getExitBlocks(ExitBlocks);
156 It->second = std::move(ExitBlocks);
157 }
158 return is_contained(It->second, MBB);
159 }
160
161 // Track 'estimated' register pressure.
164
165 // Register pressure "limit" per register pressure set. If the pressure
166 // is higher than the limit, then it's considered high.
168
169 // Register pressure on path leading from loop preheader to current BB.
171
172 // For each opcode per preheader, keep a list of potential CSE instructions.
175 CSEMap;
176
177 enum {
178 SpeculateFalse = 0,
179 SpeculateTrue = 1,
180 SpeculateUnknown = 2
181 };
182
183 // If a MBB does not dominate loop exiting blocks then it may not safe
184 // to hoist loads from this block.
185 // Tri-state: 0 - false, 1 - true, 2 - unknown
186 unsigned SpeculationState = SpeculateUnknown;
187
188 public:
189 MachineLICMImpl(bool PreRegAlloc, Pass *LegacyPass,
191 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
192 assert((LegacyPass || MFAM) && "LegacyPass or MFAM must be provided");
193 assert(!(LegacyPass && MFAM) &&
194 "LegacyPass and MFAM cannot be provided at the same time");
195 }
196
197 bool run(MachineFunction &MF);
198
199 void releaseMemory() {
200 RegSeen.clear();
201 RegPressure.clear();
202 RegLimit.clear();
203 BackTrace.clear();
204 CSEMap.clear();
205 ExitBlockMap.clear();
206 }
207
208 private:
209 /// Keep track of information about hoisting candidates.
210 struct CandidateInfo {
212 unsigned Def;
213 int FI;
214
215 CandidateInfo(MachineInstr *mi, unsigned def, int fi)
216 : MI(mi), Def(def), FI(fi) {}
217 };
218
219 void HoistRegionPostRA(MachineLoop *CurLoop,
220 MachineBasicBlock *CurPreheader);
221
222 void HoistPostRA(MachineInstr *MI, unsigned Def, MachineLoop *CurLoop,
223 MachineBasicBlock *CurPreheader);
224
225 void ProcessMI(MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers,
226 SmallDenseSet<int> &StoredFIs,
228 MachineLoop *CurLoop);
229
230 void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop);
231
232 bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop);
233
234 bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop);
235
236 bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop);
237
238 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg,
239 MachineLoop *CurLoop) const;
240
241 bool IsCheapInstruction(MachineInstr &MI) const;
242
243 bool CanCauseHighRegPressure(const SmallDenseMap<unsigned, int> &Cost,
244 bool Cheap);
245
246 void UpdateBackTraceRegPressure(const MachineInstr *MI);
247
248 bool IsProfitableToHoist(MachineInstr &MI, MachineLoop *CurLoop);
249
250 bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
251
252 bool isTriviallyReMaterializable(const MachineInstr &MI) const;
253
254 void EnterScope(MachineBasicBlock *MBB);
255
256 void ExitScope(MachineBasicBlock *MBB);
257
258 void ExitScopeIfDone(
262
263 void HoistOutOfLoop(MachineDomTreeNode *HeaderN, MachineLoop *CurLoop,
264 MachineBasicBlock *CurPreheader);
265
266 void InitRegPressure(MachineBasicBlock *BB);
267
268 SmallDenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
269 bool ConsiderSeen,
270 bool ConsiderUnseenAsDef);
271
272 void UpdateRegPressure(const MachineInstr *MI,
273 bool ConsiderUnseenAsDef = false);
274
275 MachineInstr *ExtractHoistableLoad(MachineInstr *MI, MachineLoop *CurLoop);
276
277 MachineInstr *LookForDuplicate(const MachineInstr *MI,
278 std::vector<MachineInstr *> &PrevMIs);
279
280 bool
281 EliminateCSE(MachineInstr *MI,
282 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
283
284 bool MayCSE(MachineInstr *MI);
285
286 unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
287 MachineLoop *CurLoop);
288
289 void InitCSEMap(MachineBasicBlock *BB);
290
291 void InitializeLoadsHoistableLoops();
292
293 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
294 MachineBasicBlock *TgtBlock);
295 MachineBasicBlock *getCurPreheader(MachineLoop *CurLoop,
296 MachineBasicBlock *CurPreheader);
297 };
298
299 class MachineLICMBase : public MachineFunctionPass {
300 bool PreRegAlloc;
301
302 public:
303 MachineLICMBase(char &ID, bool PreRegAlloc)
304 : MachineFunctionPass(ID), PreRegAlloc(PreRegAlloc) {}
305
306 bool runOnMachineFunction(MachineFunction &MF) override;
307
308 void getAnalysisUsage(AnalysisUsage &AU) const override {
310 if (DisableHoistingToHotterBlocks != UseBFI::None)
316 }
317 };
318
319 class MachineLICM : public MachineLICMBase {
320 public:
321 static char ID;
322 MachineLICM() : MachineLICMBase(ID, false) {
324 }
325 };
326
327 class EarlyMachineLICM : public MachineLICMBase {
328 public:
329 static char ID;
330 EarlyMachineLICM() : MachineLICMBase(ID, true) {
332 }
333 };
334
335} // end anonymous namespace
336
337char MachineLICM::ID;
338char EarlyMachineLICM::ID;
339
340char &llvm::MachineLICMID = MachineLICM::ID;
341char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
342
344 "Machine Loop Invariant Code Motion", false, false)
350 "Machine Loop Invariant Code Motion", false, false)
351
352INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
353 "Early Machine Loop Invariant Code Motion", false, false)
358INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
359 "Early Machine Loop Invariant Code Motion", false, false)
360
361bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
362 if (skipFunction(MF.getFunction()))
363 return false;
364
365 MachineLICMImpl Impl(PreRegAlloc, this, nullptr);
366 return Impl.run(MF);
367}
368
369#define GET_RESULT(RESULT, GETTER, INFIX) \
370 ((LegacyPass) \
371 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \
372 : &MFAM->getResult<RESULT##Analysis>(MF))
373
374bool MachineLICMImpl::run(MachineFunction &MF) {
375 AA = MFAM != nullptr
377 .getManager()
378 .getResult<AAManager>(MF.getFunction())
381 MachineDomTreeUpdater::UpdateStrategy::Lazy);
382 MDTU = &DTU;
383 MLI = GET_RESULT(MachineLoop, getLI, Info);
385 ? GET_RESULT(MachineBlockFrequency, getMBFI, Info)
386 : nullptr;
387
388 Changed = FirstInLoop = false;
389 const TargetSubtargetInfo &ST = MF.getSubtarget();
390 TII = ST.getInstrInfo();
391 TLI = ST.getTargetLowering();
392 TRI = ST.getRegisterInfo();
393 MFI = &MF.getFrameInfo();
394 MRI = &MF.getRegInfo();
395 SchedModel.init(&ST);
396 RegClassInfo.runOnMachineFunction(MF);
397
398 HasProfileData = MF.getFunction().hasProfileData();
399
400 if (PreRegAlloc)
401 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
402 else
403 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
404 LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
405
406 if (PreRegAlloc) {
407 // Estimate register pressure during pre-regalloc pass.
408 unsigned NumRPS = TRI->getNumRegPressureSets();
409 RegPressure.resize(NumRPS);
410 std::fill(RegPressure.begin(), RegPressure.end(), 0);
411 RegLimit.resize(NumRPS);
412 for (unsigned i = 0, e = NumRPS; i != e; ++i)
413 RegLimit[i] = RegClassInfo.getRegPressureSetLimit(i);
414 }
415
416 if (HoistConstLoads)
417 InitializeLoadsHoistableLoops();
418
419 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
420 while (!Worklist.empty()) {
421 MachineLoop *CurLoop = Worklist.pop_back_val();
422 MachineBasicBlock *CurPreheader = nullptr;
423
424 if (!PreRegAlloc)
425 HoistRegionPostRA(CurLoop, CurPreheader);
426 else {
427 // CSEMap is initialized for loop header when the first instruction is
428 // being hoisted.
429 MachineDomTreeNode *N = MDTU->getDomTree().getNode(CurLoop->getHeader());
430 FirstInLoop = true;
431 HoistOutOfLoop(N, CurLoop, CurPreheader);
432 CSEMap.clear();
433 }
434 }
435 releaseMemory();
436 return Changed;
437}
438
439/// Return true if instruction stores to the specified frame.
440static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
441 // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
442 // true since they have no memory operands.
443 if (!MI->mayStore())
444 return false;
445 // If we lost memory operands, conservatively assume that the instruction
446 // writes to all slots.
447 if (MI->memoperands_empty())
448 return true;
449 for (const MachineMemOperand *MemOp : MI->memoperands()) {
450 if (!MemOp->isStore() || !MemOp->getPseudoValue())
451 continue;
453 dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
454 if (Value->getFrameIndex() == FI)
455 return true;
456 }
457 }
458 return false;
459}
460
462 BitVector &RUs,
463 const uint32_t *Mask) {
464 // FIXME: This intentionally works in reverse due to some issues with the
465 // Register Units infrastructure.
466 //
467 // This is used to apply callee-saved-register masks to the clobbered regunits
468 // mask.
469 //
470 // The right way to approach this is to start with a BitVector full of ones,
471 // then reset all the bits of the regunits of each register that is set in the
472 // mask (registers preserved), then OR the resulting bits with the Clobbers
473 // mask. This correctly prioritizes the saved registers, so if a RU is shared
474 // between a register that is preserved, and one that is NOT preserved, that
475 // RU will not be set in the output vector (the clobbers).
476 //
477 // What we have to do for now is the opposite: we have to assume that the
478 // regunits of all registers that are NOT preserved are clobbered, even if
479 // those regunits are preserved by another register. So if a RU is shared
480 // like described previously, that RU will be set.
481 //
482 // This is to work around an issue which appears in AArch64, but isn't
483 // exclusive to that target: AArch64's Qn registers (128 bits) have Dn
484 // register (lower 64 bits). A few Dn registers are preserved by some calling
485 // conventions, but Qn and Dn share exactly the same reg units.
486 //
487 // If we do this the right way, Qn will be marked as NOT clobbered even though
488 // its upper 64 bits are NOT preserved. The conservative approach handles this
489 // correctly at the cost of some missed optimizations on other targets.
490 //
491 // This is caused by how RegUnits are handled within TableGen. Ideally, Qn
492 // should have an extra RegUnit to model the "unknown" bits not covered by the
493 // subregs.
494 BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits());
495 const unsigned NumRegs = TRI.getNumRegs();
496 const unsigned MaskWords = (NumRegs + 31) / 32;
497 for (unsigned K = 0; K < MaskWords; ++K) {
498 const uint32_t Word = Mask[K];
499 for (unsigned Bit = 0; Bit < 32; ++Bit) {
500 const unsigned PhysReg = (K * 32) + Bit;
501 if (PhysReg == NumRegs)
502 break;
503
504 if (PhysReg && !((Word >> Bit) & 1)) {
505 for (MCRegUnitIterator RUI(PhysReg, &TRI); RUI.isValid(); ++RUI)
506 RUsFromRegsNotInMask.set(*RUI);
507 }
508 }
509 }
510
511 RUs |= RUsFromRegsNotInMask;
512}
513
514/// Examine the instruction for potential LICM candidate. Also
515/// gather register def and frame object update information.
516void MachineLICMImpl::ProcessMI(MachineInstr *MI, BitVector &RUDefs,
517 BitVector &RUClobbers,
518 SmallDenseSet<int> &StoredFIs,
520 MachineLoop *CurLoop) {
521 bool RuledOut = false;
522 bool HasNonInvariantUse = false;
523 unsigned Def = 0;
524 for (const MachineOperand &MO : MI->operands()) {
525 if (MO.isFI()) {
526 // Remember if the instruction stores to the frame index.
527 int FI = MO.getIndex();
528 if (!StoredFIs.count(FI) &&
529 MFI->isSpillSlotObjectIndex(FI) &&
531 StoredFIs.insert(FI);
532 HasNonInvariantUse = true;
533 continue;
534 }
535
536 // We can't hoist an instruction defining a physreg that is clobbered in
537 // the loop.
538 if (MO.isRegMask()) {
539 applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, MO.getRegMask());
540 continue;
541 }
542
543 if (!MO.isReg())
544 continue;
545 Register Reg = MO.getReg();
546 if (!Reg)
547 continue;
548 assert(Reg.isPhysical() && "Not expecting virtual register!");
549
550 if (!MO.isDef()) {
551 if (!HasNonInvariantUse) {
552 for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
553 // If it's using a non-loop-invariant register, then it's obviously
554 // not safe to hoist.
555 if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {
556 HasNonInvariantUse = true;
557 break;
558 }
559 }
560 }
561 continue;
562 }
563
564 if (MO.isImplicit()) {
565 for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
566 RUClobbers.set(*RUI);
567 if (!MO.isDead())
568 // Non-dead implicit def? This cannot be hoisted.
569 RuledOut = true;
570 // No need to check if a dead implicit def is also defined by
571 // another instruction.
572 continue;
573 }
574
575 // FIXME: For now, avoid instructions with multiple defs, unless
576 // it's a dead implicit def.
577 if (Def)
578 RuledOut = true;
579 else
580 Def = Reg;
581
582 // If we have already seen another instruction that defines the same
583 // register, then this is not safe. Two defs is indicated by setting a
584 // PhysRegClobbers bit.
585 for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
586 if (RUDefs.test(*RUI)) {
587 RUClobbers.set(*RUI);
588 RuledOut = true;
589 } else if (RUClobbers.test(*RUI)) {
590 // MI defined register is seen defined by another instruction in
591 // the loop, it cannot be a LICM candidate.
592 RuledOut = true;
593 }
594
595 RUDefs.set(*RUI);
596 }
597 }
598
599 // Only consider reloads for now and remats which do not have register
600 // operands. FIXME: Consider unfold load folding instructions.
601 if (Def && !RuledOut) {
602 int FI = std::numeric_limits<int>::min();
603 if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||
605 Candidates.push_back(CandidateInfo(MI, Def, FI));
606 }
607}
608
609/// Walk the specified region of the CFG and hoist loop invariants out to the
610/// preheader.
611void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop,
612 MachineBasicBlock *CurPreheader) {
613 MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
614 if (!Preheader)
615 return;
616
617 unsigned NumRegUnits = TRI->getNumRegUnits();
618 BitVector RUDefs(NumRegUnits); // RUs defined once in the loop.
619 BitVector RUClobbers(NumRegUnits); // RUs defined more than once.
620
622 SmallDenseSet<int> StoredFIs;
623
624 // Walk the entire region, count number of defs for each register, and
625 // collect potential LICM candidates.
626 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
627 // If the header of the loop containing this basic block is a landing pad,
628 // then don't try to hoist instructions out of this loop.
629 const MachineLoop *ML = MLI->getLoopFor(BB);
630 if (ML && ML->getHeader()->isEHPad()) continue;
631
632 // Conservatively treat live-in's as an external def.
633 // FIXME: That means a reload that're reused in successor block(s) will not
634 // be LICM'ed.
635 for (const auto &LI : BB->liveins()) {
636 for (MCRegUnitIterator RUI(LI.PhysReg, TRI); RUI.isValid(); ++RUI)
637 RUDefs.set(*RUI);
638 }
639
640 // Funclet entry blocks will clobber all registers
641 if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
642 applyBitsNotInRegMaskToRegUnitsMask(*TRI, RUClobbers, Mask);
643
644 SpeculationState = SpeculateUnknown;
645 for (MachineInstr &MI : *BB)
646 ProcessMI(&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
647 }
648
649 // Gather the registers read / clobbered by the terminator.
650 BitVector TermRUs(NumRegUnits);
652 if (TI != Preheader->end()) {
653 for (const MachineOperand &MO : TI->operands()) {
654 if (!MO.isReg())
655 continue;
656 Register Reg = MO.getReg();
657 if (!Reg)
658 continue;
659 for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
660 TermRUs.set(*RUI);
661 }
662 }
663
664 // Now evaluate whether the potential candidates qualify.
665 // 1. Check if the candidate defined register is defined by another
666 // instruction in the loop.
667 // 2. If the candidate is a load from stack slot (always true for now),
668 // check if the slot is stored anywhere in the loop.
669 // 3. Make sure candidate def should not clobber
670 // registers read by the terminator. Similarly its def should not be
671 // clobbered by the terminator.
672 for (CandidateInfo &Candidate : Candidates) {
673 if (Candidate.FI != std::numeric_limits<int>::min() &&
674 StoredFIs.count(Candidate.FI))
675 continue;
676
677 unsigned Def = Candidate.Def;
678 bool Safe = true;
679 for (MCRegUnitIterator RUI(Def, TRI); RUI.isValid(); ++RUI) {
680 if (RUClobbers.test(*RUI) || TermRUs.test(*RUI)) {
681 Safe = false;
682 break;
683 }
684 }
685
686 if (!Safe)
687 continue;
688
689 MachineInstr *MI = Candidate.MI;
690 for (const MachineOperand &MO : MI->all_uses()) {
691 if (!MO.getReg())
692 continue;
693 for (MCRegUnitIterator RUI(MO.getReg(), TRI); RUI.isValid(); ++RUI) {
694 if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {
695 // If it's using a non-loop-invariant register, then it's obviously
696 // not safe to hoist.
697 Safe = false;
698 break;
699 }
700 }
701
702 if (!Safe)
703 break;
704 }
705
706 if (Safe)
707 HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader);
708 }
709}
710
711/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
712/// sure it is not killed by any instructions in the loop.
713void MachineLICMImpl::AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop) {
714 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
715 if (!BB->isLiveIn(Reg))
716 BB->addLiveIn(Reg);
717 for (MachineInstr &MI : *BB) {
718 for (MachineOperand &MO : MI.all_uses()) {
719 if (!MO.getReg())
720 continue;
721 if (TRI->regsOverlap(Reg, MO.getReg()))
722 MO.setIsKill(false);
723 }
724 }
725 }
726}
727
728/// When an instruction is found to only use loop invariant operands that is
729/// safe to hoist, this instruction is called to do the dirty work.
730void MachineLICMImpl::HoistPostRA(MachineInstr *MI, unsigned Def,
731 MachineLoop *CurLoop,
732 MachineBasicBlock *CurPreheader) {
733 MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
734
735 // Now move the instructions to the predecessor, inserting it before any
736 // terminator instructions.
737 LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
738 << " from " << printMBBReference(*MI->getParent()) << ": "
739 << *MI);
740
741 // Splice the instruction to the preheader.
742 MachineBasicBlock *MBB = MI->getParent();
743 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
744
745 // Since we are moving the instruction out of its basic block, we do not
746 // retain its debug location. Doing so would degrade the debugging
747 // experience and adversely affect the accuracy of profiling information.
748 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
749 MI->setDebugLoc(DebugLoc());
750
751 // Add register to livein list to all the BBs in the current loop since a
752 // loop invariant must be kept live throughout the whole loop. This is
753 // important to ensure later passes do not scavenge the def register.
754 AddToLiveIns(Def, CurLoop);
755
756 ++NumPostRAHoisted;
757 Changed = true;
758}
759
760/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
761/// may not be safe to hoist.
762bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
763 MachineLoop *CurLoop) {
764 if (SpeculationState != SpeculateUnknown)
765 return SpeculationState == SpeculateFalse;
766
767 if (BB != CurLoop->getHeader()) {
768 // Check loop exiting blocks.
769 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
770 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
771 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
772 if (!MDTU->getDomTree().dominates(BB, CurrentLoopExitingBlock)) {
773 SpeculationState = SpeculateTrue;
774 return false;
775 }
776 }
777
778 SpeculationState = SpeculateFalse;
779 return true;
780}
781
782/// Check if \p MI is trivially remateralizable and if it does not have any
783/// virtual register uses. Even though rematerializable RA might not actually
784/// rematerialize it in this scenario. In that case we do not want to hoist such
785/// instruction out of the loop in a belief RA will sink it back if needed.
786bool MachineLICMImpl::isTriviallyReMaterializable(
787 const MachineInstr &MI) const {
788 if (!TII->isTriviallyReMaterializable(MI))
789 return false;
790
791 for (const MachineOperand &MO : MI.all_uses()) {
792 if (MO.getReg().isVirtual())
793 return false;
794 }
795
796 return true;
797}
798
799void MachineLICMImpl::EnterScope(MachineBasicBlock *MBB) {
800 LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
801
802 // Remember livein register pressure.
803 BackTrace.push_back(RegPressure);
804}
805
806void MachineLICMImpl::ExitScope(MachineBasicBlock *MBB) {
807 LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
808 BackTrace.pop_back();
809}
810
811/// Destroy scope for the MBB that corresponds to the given dominator tree node
812/// if its a leaf or all of its children are done. Walk up the dominator tree to
813/// destroy ancestors which are now done.
814void MachineLICMImpl::ExitScopeIfDone(
818 if (OpenChildren[Node])
819 return;
820
821 for(;;) {
822 ExitScope(Node->getBlock());
823 // Now traverse upwards to pop ancestors whose offsprings are all done.
824 MachineDomTreeNode *Parent = ParentMap.lookup(Node);
825 if (!Parent || --OpenChildren[Parent] != 0)
826 break;
827 Node = Parent;
828 }
829}
830
831/// Walk the specified loop in the CFG (defined by all blocks dominated by the
832/// specified header block, and that are in the current loop) in depth first
833/// order w.r.t the DominatorTree. This allows us to visit definitions before
834/// uses, allowing us to hoist a loop body in one pass without iteration.
835void MachineLICMImpl::HoistOutOfLoop(MachineDomTreeNode *HeaderN,
836 MachineLoop *CurLoop,
837 MachineBasicBlock *CurPreheader) {
838 MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);
839 if (!Preheader)
840 return;
841
846
847 // Perform a DFS walk to determine the order of visit.
848 WorkList.push_back(HeaderN);
849 while (!WorkList.empty()) {
851 assert(Node && "Null dominator tree node?");
852 MachineBasicBlock *BB = Node->getBlock();
853
854 // If the header of the loop containing this basic block is a landing pad,
855 // then don't try to hoist instructions out of this loop.
856 const MachineLoop *ML = MLI->getLoopFor(BB);
857 if (ML && ML->getHeader()->isEHPad())
858 continue;
859
860 // If this subregion is not in the top level loop at all, exit.
861 if (!CurLoop->contains(BB))
862 continue;
863
864 Scopes.push_back(Node);
865 unsigned NumChildren = Node->getNumChildren();
866
867 // Don't hoist things out of a large switch statement. This often causes
868 // code to be hoisted that wasn't going to be executed, and increases
869 // register pressure in a situation where it's likely to matter.
870 if (BB->succ_size() >= 25)
871 NumChildren = 0;
872
873 OpenChildren[Node] = NumChildren;
874 if (NumChildren) {
875 // Add children in reverse order as then the next popped worklist node is
876 // the first child of this node. This means we ultimately traverse the
877 // DOM tree in exactly the same order as if we'd recursed.
878 for (MachineDomTreeNode *Child : reverse(Node->children())) {
879 ParentMap[Child] = Node;
880 WorkList.push_back(Child);
881 }
882 }
883 }
884
885 if (Scopes.size() == 0)
886 return;
887
888 // Compute registers which are livein into the loop headers.
889 RegSeen.clear();
890 BackTrace.clear();
891 InitRegPressure(Preheader);
892
893 // Now perform LICM.
894 for (MachineDomTreeNode *Node : Scopes) {
895 MachineBasicBlock *MBB = Node->getBlock();
896
897 EnterScope(MBB);
898
899 // Process the block
900 SpeculationState = SpeculateUnknown;
902 unsigned HoistRes = HoistResult::NotHoisted;
903 HoistRes = Hoist(&MI, Preheader, CurLoop);
904 if (HoistRes & HoistResult::NotHoisted) {
905 // We have failed to hoist MI to outermost loop's preheader. If MI is in
906 // a subloop, try to hoist it to subloop's preheader.
907 SmallVector<MachineLoop *> InnerLoopWorkList;
908 for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop;
909 L = L->getParentLoop())
910 InnerLoopWorkList.push_back(L);
911
912 while (!InnerLoopWorkList.empty()) {
913 MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val();
914 MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
915 if (InnerLoopPreheader) {
916 HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);
917 if (HoistRes & HoistResult::Hoisted)
918 break;
919 }
920 }
921 }
922
923 if (HoistRes & HoistResult::ErasedMI)
924 continue;
925
926 UpdateRegPressure(&MI);
927 }
928
929 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
930 ExitScopeIfDone(Node, OpenChildren, ParentMap);
931 }
932}
933
935 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
936}
937
938/// Find all virtual register references that are liveout of the preheader to
939/// initialize the starting "register pressure". Note this does not count live
940/// through (livein but not used) registers.
941void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
942 std::fill(RegPressure.begin(), RegPressure.end(), 0);
943
944 // If the preheader has only a single predecessor and it ends with a
945 // fallthrough or an unconditional branch, then scan its predecessor for live
946 // defs as well. This happens whenever the preheader is created by splitting
947 // the critical edge from the loop predecessor to the loop header.
948 if (BB->pred_size() == 1) {
949 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
951 if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
952 InitRegPressure(*BB->pred_begin());
953 }
954
955 for (const MachineInstr &MI : *BB)
956 UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
957}
958
959/// Update estimate of register pressure after the specified instruction.
960void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI,
961 bool ConsiderUnseenAsDef) {
962 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
963 for (const auto &RPIdAndCost : Cost) {
964 unsigned Class = RPIdAndCost.first;
965 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
966 RegPressure[Class] = 0;
967 else
968 RegPressure[Class] += RPIdAndCost.second;
969 }
970}
971
972/// Calculate the additional register pressure that the registers used in MI
973/// cause.
974///
975/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
976/// figure out which usages are live-ins.
977/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
979MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
980 bool ConsiderUnseenAsDef) {
982 if (MI->isImplicitDef())
983 return Cost;
984 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
985 const MachineOperand &MO = MI->getOperand(i);
986 if (!MO.isReg() || MO.isImplicit())
987 continue;
988 Register Reg = MO.getReg();
989 if (!Reg.isVirtual())
990 continue;
991
992 // FIXME: It seems bad to use RegSeen only for some of these calculations.
993 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
994 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
995
996 RegClassWeight W = TRI->getRegClassWeight(RC);
997 int RCCost = 0;
998 if (MO.isDef())
999 RCCost = W.RegWeight;
1000 else {
1001 bool isKill = isOperandKill(MO, MRI);
1002 if (isNew && !isKill && ConsiderUnseenAsDef)
1003 // Haven't seen this, it must be a livein.
1004 RCCost = W.RegWeight;
1005 else if (!isNew && isKill)
1006 RCCost = -W.RegWeight;
1007 }
1008 if (RCCost == 0)
1009 continue;
1010 const int *PS = TRI->getRegClassPressureSets(RC);
1011 for (; *PS != -1; ++PS)
1012 Cost[*PS] += RCCost;
1013 }
1014 return Cost;
1015}
1016
1017/// Return true if this machine instruction loads from global offset table or
1018/// constant pool.
1020 assert(MI.mayLoad() && "Expected MI that loads!");
1021
1022 // If we lost memory operands, conservatively assume that the instruction
1023 // reads from everything..
1024 if (MI.memoperands_empty())
1025 return true;
1026
1027 for (MachineMemOperand *MemOp : MI.memoperands())
1028 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
1029 if (PSV->isGOT() || PSV->isConstantPool())
1030 return true;
1031
1032 return false;
1033}
1034
1035// This function iterates through all the operands of the input store MI and
1036// checks that each register operand statisfies isCallerPreservedPhysReg.
1037// This means, the value being stored and the address where it is being stored
1038// is constant throughout the body of the function (not including prologue and
1039// epilogue). When called with an MI that isn't a store, it returns false.
1040// A future improvement can be to check if the store registers are constant
1041// throughout the loop rather than throughout the funtion.
1043 const TargetRegisterInfo *TRI,
1044 const MachineRegisterInfo *MRI) {
1045
1046 bool FoundCallerPresReg = false;
1047 if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
1048 (MI.getNumOperands() == 0))
1049 return false;
1050
1051 // Check that all register operands are caller-preserved physical registers.
1052 for (const MachineOperand &MO : MI.operands()) {
1053 if (MO.isReg()) {
1054 Register Reg = MO.getReg();
1055 // If operand is a virtual register, check if it comes from a copy of a
1056 // physical register.
1057 if (Reg.isVirtual())
1058 Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
1059 if (Reg.isVirtual())
1060 return false;
1061 if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
1062 return false;
1063 else
1064 FoundCallerPresReg = true;
1065 } else if (!MO.isImm()) {
1066 return false;
1067 }
1068 }
1069 return FoundCallerPresReg;
1070}
1071
1072// Return true if the input MI is a copy instruction that feeds an invariant
1073// store instruction. This means that the src of the copy has to satisfy
1074// isCallerPreservedPhysReg and atleast one of it's users should satisfy
1075// isInvariantStore.
1077 const MachineRegisterInfo *MRI,
1078 const TargetRegisterInfo *TRI) {
1079
1080 // FIXME: If targets would like to look through instructions that aren't
1081 // pure copies, this can be updated to a query.
1082 if (!MI.isCopy())
1083 return false;
1084
1085 const MachineFunction *MF = MI.getMF();
1086 // Check that we are copying a constant physical register.
1087 Register CopySrcReg = MI.getOperand(1).getReg();
1088 if (CopySrcReg.isVirtual())
1089 return false;
1090
1091 if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
1092 return false;
1093
1094 Register CopyDstReg = MI.getOperand(0).getReg();
1095 // Check if any of the uses of the copy are invariant stores.
1096 assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
1097
1098 for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
1099 if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
1100 return true;
1101 }
1102 return false;
1103}
1104
1105/// Returns true if the instruction may be a suitable candidate for LICM.
1106/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
1107bool MachineLICMImpl::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {
1108 // Check if it's safe to move the instruction.
1109 bool DontMoveAcrossStore = !HoistConstLoads || !AllowedToHoistLoads[CurLoop];
1110 if ((!I.isSafeToMove(DontMoveAcrossStore)) &&
1112 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
1113 return false;
1114 }
1115
1116 // If it is a load then check if it is guaranteed to execute by making sure
1117 // that it dominates all exiting blocks. If it doesn't, then there is a path
1118 // out of the loop which does not execute this load, so we can't hoist it.
1119 // Loads from constant memory are safe to speculate, for example indexed load
1120 // from a jump table.
1121 // Stores and side effects are already checked by isSafeToMove.
1122 if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
1123 !IsGuaranteedToExecute(I.getParent(), CurLoop)) {
1124 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
1125 return false;
1126 }
1127
1128 // Convergent attribute has been used on operations that involve inter-thread
1129 // communication which results are implicitly affected by the enclosing
1130 // control flows. It is not safe to hoist or sink such operations across
1131 // control flow.
1132 if (I.isConvergent())
1133 return false;
1134
1135 if (!TII->shouldHoist(I, CurLoop))
1136 return false;
1137
1138 return true;
1139}
1140
1141/// Returns true if the instruction is loop invariant.
1142bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &I,
1143 MachineLoop *CurLoop) {
1144 if (!IsLICMCandidate(I, CurLoop)) {
1145 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
1146 return false;
1147 }
1148 return CurLoop->isLoopInvariant(I);
1149}
1150
1151/// Return true if the specified instruction is used by a phi node and hoisting
1152/// it could cause a copy to be inserted.
1153bool MachineLICMImpl::HasLoopPHIUse(const MachineInstr *MI,
1154 MachineLoop *CurLoop) {
1156 do {
1157 MI = Work.pop_back_val();
1158 for (const MachineOperand &MO : MI->all_defs()) {
1159 Register Reg = MO.getReg();
1160 if (!Reg.isVirtual())
1161 continue;
1162 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
1163 // A PHI may cause a copy to be inserted.
1164 if (UseMI.isPHI()) {
1165 // A PHI inside the loop causes a copy because the live range of Reg is
1166 // extended across the PHI.
1167 if (CurLoop->contains(&UseMI))
1168 return true;
1169 // A PHI in an exit block can cause a copy to be inserted if the PHI
1170 // has multiple predecessors in the loop with different values.
1171 // For now, approximate by rejecting all exit blocks.
1172 if (isExitBlock(CurLoop, UseMI.getParent()))
1173 return true;
1174 continue;
1175 }
1176 // Look past copies as well.
1177 if (UseMI.isCopy() && CurLoop->contains(&UseMI))
1178 Work.push_back(&UseMI);
1179 }
1180 }
1181 } while (!Work.empty());
1182 return false;
1183}
1184
1185/// Compute operand latency between a def of 'Reg' and an use in the current
1186/// loop, return true if the target considered it high.
1187bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
1188 Register Reg,
1189 MachineLoop *CurLoop) const {
1190 if (MRI->use_nodbg_empty(Reg))
1191 return false;
1192
1193 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
1194 if (UseMI.isCopyLike())
1195 continue;
1196 if (!CurLoop->contains(UseMI.getParent()))
1197 continue;
1198 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1199 const MachineOperand &MO = UseMI.getOperand(i);
1200 if (!MO.isReg() || !MO.isUse())
1201 continue;
1202 Register MOReg = MO.getReg();
1203 if (MOReg != Reg)
1204 continue;
1205
1206 if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
1207 return true;
1208 }
1209
1210 // Only look at the first in loop use.
1211 break;
1212 }
1213
1214 return false;
1215}
1216
1217/// Return true if the instruction is marked "cheap" or the operand latency
1218/// between its def and a use is one or less.
1219bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {
1220 if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
1221 return true;
1222
1223 bool isCheap = false;
1224 unsigned NumDefs = MI.getDesc().getNumDefs();
1225 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1226 MachineOperand &DefMO = MI.getOperand(i);
1227 if (!DefMO.isReg() || !DefMO.isDef())
1228 continue;
1229 --NumDefs;
1230 Register Reg = DefMO.getReg();
1231 if (Reg.isPhysical())
1232 continue;
1233
1234 if (!TII->hasLowDefLatency(SchedModel, MI, i))
1235 return false;
1236 isCheap = true;
1237 }
1238
1239 return isCheap;
1240}
1241
1242/// Visit BBs from header to current BB, check if hoisting an instruction of the
1243/// given cost matrix can cause high register pressure.
1244bool MachineLICMImpl::CanCauseHighRegPressure(
1245 const SmallDenseMap<unsigned, int> &Cost, bool CheapInstr) {
1246 for (const auto &RPIdAndCost : Cost) {
1247 if (RPIdAndCost.second <= 0)
1248 continue;
1249
1250 unsigned Class = RPIdAndCost.first;
1251 int Limit = RegLimit[Class];
1252
1253 // Don't hoist cheap instructions if they would increase register pressure,
1254 // even if we're under the limit.
1255 if (CheapInstr && !HoistCheapInsts)
1256 return true;
1257
1258 for (const auto &RP : BackTrace)
1259 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1260 return true;
1261 }
1262
1263 return false;
1264}
1265
1266/// Traverse the back trace from header to the current block and update their
1267/// register pressures to reflect the effect of hoisting MI from the current
1268/// block to the preheader.
1269void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1270 // First compute the 'cost' of the instruction, i.e. its contribution
1271 // to register pressure.
1272 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1273 /*ConsiderUnseenAsDef=*/false);
1274
1275 // Update register pressure of blocks from loop header to current block.
1276 for (auto &RP : BackTrace)
1277 for (const auto &RPIdAndCost : Cost)
1278 RP[RPIdAndCost.first] += RPIdAndCost.second;
1279}
1280
1281/// Return true if it is potentially profitable to hoist the given loop
1282/// invariant.
1283bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &MI,
1284 MachineLoop *CurLoop) {
1285 if (MI.isImplicitDef())
1286 return true;
1287
1288 // Besides removing computation from the loop, hoisting an instruction has
1289 // these effects:
1290 //
1291 // - The value defined by the instruction becomes live across the entire
1292 // loop. This increases register pressure in the loop.
1293 //
1294 // - If the value is used by a PHI in the loop, a copy will be required for
1295 // lowering the PHI after extending the live range.
1296 //
1297 // - When hoisting the last use of a value in the loop, that value no longer
1298 // needs to be live in the loop. This lowers register pressure in the loop.
1299
1301 return true;
1302
1303 bool CheapInstr = IsCheapInstruction(MI);
1304 bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop);
1305
1306 // Don't hoist a cheap instruction if it would create a copy in the loop.
1307 if (CheapInstr && CreatesCopy) {
1308 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1309 return false;
1310 }
1311
1312 // Rematerializable instructions should always be hoisted providing the
1313 // register allocator can just pull them down again when needed.
1314 if (isTriviallyReMaterializable(MI))
1315 return true;
1316
1317 // FIXME: If there are long latency loop-invariant instructions inside the
1318 // loop at this point, why didn't the optimizer's LICM hoist them?
1319 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1320 const MachineOperand &MO = MI.getOperand(i);
1321 if (!MO.isReg() || MO.isImplicit())
1322 continue;
1323 Register Reg = MO.getReg();
1324 if (!Reg.isVirtual())
1325 continue;
1326 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) {
1327 LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
1328 ++NumHighLatency;
1329 return true;
1330 }
1331 }
1332
1333 // Estimate register pressure to determine whether to LICM the instruction.
1334 // In low register pressure situation, we can be more aggressive about
1335 // hoisting. Also, favors hoisting long latency instructions even in
1336 // moderately high pressure situation.
1337 // Cheap instructions will only be hoisted if they don't increase register
1338 // pressure at all.
1339 auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1340 /*ConsiderUnseenAsDef=*/false);
1341
1342 // Visit BBs from header to current BB, if hoisting this doesn't cause
1343 // high register pressure, then it's safe to proceed.
1344 if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1345 LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1346 ++NumLowRP;
1347 return true;
1348 }
1349
1350 // Don't risk increasing register pressure if it would create copies.
1351 if (CreatesCopy) {
1352 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1353 return false;
1354 }
1355
1356 // Do not "speculate" in high register pressure situation. If an
1357 // instruction is not guaranteed to be executed in the loop, it's best to be
1358 // conservative.
1359 if (AvoidSpeculation &&
1360 (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) {
1361 LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
1362 return false;
1363 }
1364
1365 // If we have a COPY with other uses in the loop, hoist to allow the users to
1366 // also be hoisted.
1367 // TODO: Handle all isCopyLike?
1368 if (MI.isCopy() || MI.isRegSequence()) {
1369 Register DefReg = MI.getOperand(0).getReg();
1370 if (DefReg.isVirtual() &&
1371 all_of(MI.uses(),
1372 [this](const MachineOperand &UseOp) {
1373 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1374 MRI->isConstantPhysReg(UseOp.getReg());
1375 }) &&
1376 IsLoopInvariantInst(MI, CurLoop) &&
1377 any_of(MRI->use_nodbg_instructions(DefReg),
1378 [&CurLoop, this, DefReg,
1379 Cost = std::move(Cost)](MachineInstr &UseMI) {
1380 if (!CurLoop->contains(&UseMI))
1381 return false;
1382
1383 // COPY is a cheap instruction, but if moving it won't cause
1384 // high RP we're fine to hoist it even if the user can't be
1385 // hoisted later Otherwise we want to check the user if it's
1386 // hoistable
1387 if (CanCauseHighRegPressure(Cost, false) &&
1388 !CurLoop->isLoopInvariant(UseMI, DefReg))
1389 return false;
1390
1391 return true;
1392 }))
1393 return true;
1394 }
1395
1396 // High register pressure situation, only hoist if the instruction is going
1397 // to be remat'ed.
1398 if (!isTriviallyReMaterializable(MI) &&
1399 !MI.isDereferenceableInvariantLoad()) {
1400 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1401 return false;
1402 }
1403
1404 return true;
1405}
1406
1407/// Unfold a load from the given machineinstr if the load itself could be
1408/// hoisted. Return the unfolded and hoistable load, or null if the load
1409/// couldn't be unfolded or if it wouldn't be hoistable.
1410MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *MI,
1411 MachineLoop *CurLoop) {
1412 // Don't unfold simple loads.
1413 if (MI->canFoldAsLoad())
1414 return nullptr;
1415
1416 // If not, we may be able to unfold a load and hoist that.
1417 // First test whether the instruction is loading from an amenable
1418 // memory location.
1419 if (!MI->isDereferenceableInvariantLoad())
1420 return nullptr;
1421
1422 // Next determine the register class for a temporary register.
1423 unsigned LoadRegIndex;
1424 unsigned NewOpc =
1425 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1426 /*UnfoldLoad=*/true,
1427 /*UnfoldStore=*/false,
1428 &LoadRegIndex);
1429 if (NewOpc == 0) return nullptr;
1430 const MCInstrDesc &MID = TII->get(NewOpc);
1431 MachineFunction &MF = *MI->getMF();
1432 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1433 // Ok, we're unfolding. Create a temporary register and do the unfold.
1434 Register Reg = MRI->createVirtualRegister(RC);
1435
1437 bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1438 /*UnfoldLoad=*/true,
1439 /*UnfoldStore=*/false, NewMIs);
1440 (void)Success;
1441 assert(Success &&
1442 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1443 "succeeded!");
1444 assert(NewMIs.size() == 2 &&
1445 "Unfolded a load into multiple instructions!");
1446 MachineBasicBlock *MBB = MI->getParent();
1448 MBB->insert(Pos, NewMIs[0]);
1449 MBB->insert(Pos, NewMIs[1]);
1450 // If unfolding produced a load that wasn't loop-invariant or profitable to
1451 // hoist, discard the new instructions and bail.
1452 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1453 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1454 NewMIs[0]->eraseFromParent();
1455 NewMIs[1]->eraseFromParent();
1456 return nullptr;
1457 }
1458
1459 // Update register pressure for the unfolded instruction.
1460 UpdateRegPressure(NewMIs[1]);
1461
1462 // Otherwise we successfully unfolded a load that we can hoist.
1463
1464 // Update the call site info.
1465 if (MI->shouldUpdateCallSiteInfo())
1467
1468 MI->eraseFromParent();
1469 return NewMIs[0];
1470}
1471
1472/// Initialize the CSE map with instructions that are in the current loop
1473/// preheader that may become duplicates of instructions that are hoisted
1474/// out of the loop.
1475void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {
1476 for (MachineInstr &MI : *BB)
1477 CSEMap[BB][MI.getOpcode()].push_back(&MI);
1478}
1479
1480/// Initialize AllowedToHoistLoads with information about whether invariant
1481/// loads can be moved outside a given loop
1482void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1483 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
1484 SmallVector<MachineLoop *, 8> LoopsInPreOrder;
1485
1486 // Mark all loops as hoistable initially and prepare a list of loops in
1487 // pre-order DFS.
1488 while (!Worklist.empty()) {
1489 auto *L = Worklist.pop_back_val();
1490 AllowedToHoistLoads[L] = true;
1491 LoopsInPreOrder.push_back(L);
1492 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
1493 L->getSubLoops().end());
1494 }
1495
1496 // Going from the innermost to outermost loops, check if a loop has
1497 // instructions preventing invariant load hoisting. If such instruction is
1498 // found, mark this loop and its parent as non-hoistable and continue
1499 // investigating the next loop.
1500 // Visiting in a reversed pre-ordered DFS manner
1501 // allows us to not process all the instructions of the outer loop if the
1502 // inner loop is proved to be non-load-hoistable.
1503 for (auto *Loop : reverse(LoopsInPreOrder)) {
1504 for (auto *MBB : Loop->blocks()) {
1505 // If this loop has already been marked as non-hoistable, skip it.
1506 if (!AllowedToHoistLoads[Loop])
1507 continue;
1508 for (auto &MI : *MBB) {
1509 if (!MI.isLoadFoldBarrier() && !MI.mayStore() && !MI.isCall() &&
1510 !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
1511 continue;
1512 for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())
1513 AllowedToHoistLoads[L] = false;
1514 break;
1515 }
1516 }
1517 }
1518}
1519
1520/// Find an instruction amount PrevMIs that is a duplicate of MI.
1521/// Return this instruction if it's found.
1523MachineLICMImpl::LookForDuplicate(const MachineInstr *MI,
1524 std::vector<MachineInstr *> &PrevMIs) {
1525 for (MachineInstr *PrevMI : PrevMIs)
1526 if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
1527 return PrevMI;
1528
1529 return nullptr;
1530}
1531
1532/// Given a LICM'ed instruction, look for an instruction on the preheader that
1533/// computes the same value. If it's found, do a RAU on with the definition of
1534/// the existing instruction rather than hoisting the instruction to the
1535/// preheader.
1536bool MachineLICMImpl::EliminateCSE(
1538 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1539 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1540 // the undef property onto uses.
1541 if (MI->isImplicitDef())
1542 return false;
1543
1544 // Do not CSE normal loads because between them could be store instructions
1545 // that change the loaded value
1546 if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
1547 return false;
1548
1549 if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1550 LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1551
1552 // Replace virtual registers defined by MI by their counterparts defined
1553 // by Dup.
1555 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1556 const MachineOperand &MO = MI->getOperand(i);
1557
1558 // Physical registers may not differ here.
1559 assert((!MO.isReg() || MO.getReg() == 0 || !MO.getReg().isPhysical() ||
1560 MO.getReg() == Dup->getOperand(i).getReg()) &&
1561 "Instructions with different phys regs are not identical!");
1562
1563 if (MO.isReg() && MO.isDef() && !MO.getReg().isPhysical())
1564 Defs.push_back(i);
1565 }
1566
1568 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1569 unsigned Idx = Defs[i];
1570 Register Reg = MI->getOperand(Idx).getReg();
1571 Register DupReg = Dup->getOperand(Idx).getReg();
1572 OrigRCs.push_back(MRI->getRegClass(DupReg));
1573
1574 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1575 // Restore old RCs if more than one defs.
1576 for (unsigned j = 0; j != i; ++j)
1577 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1578 return false;
1579 }
1580 }
1581
1582 for (unsigned Idx : Defs) {
1583 Register Reg = MI->getOperand(Idx).getReg();
1584 Register DupReg = Dup->getOperand(Idx).getReg();
1585 MRI->replaceRegWith(Reg, DupReg);
1586 MRI->clearKillFlags(DupReg);
1587 // Clear Dup dead flag if any, we reuse it for Reg.
1588 if (!MRI->use_nodbg_empty(DupReg))
1589 Dup->getOperand(Idx).setIsDead(false);
1590 }
1591
1592 MI->eraseFromParent();
1593 ++NumCSEed;
1594 return true;
1595 }
1596 return false;
1597}
1598
1599/// Return true if the given instruction will be CSE'd if it's hoisted out of
1600/// the loop.
1601bool MachineLICMImpl::MayCSE(MachineInstr *MI) {
1602 if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
1603 return false;
1604
1605 unsigned Opcode = MI->getOpcode();
1606 for (auto &Map : CSEMap) {
1607 // Check this CSEMap's preheader dominates MI's basic block.
1608 if (MDTU->getDomTree().dominates(Map.first, MI->getParent())) {
1610 Map.second.find(Opcode);
1611 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1612 // the undef property onto uses.
1613 if (CI == Map.second.end() || MI->isImplicitDef())
1614 continue;
1615 if (LookForDuplicate(MI, CI->second) != nullptr)
1616 return true;
1617 }
1618 }
1619
1620 return false;
1621}
1622
1623/// When an instruction is found to use only loop invariant operands
1624/// that are safe to hoist, this instruction is called to do the dirty work.
1625/// It returns true if the instruction is hoisted.
1626unsigned MachineLICMImpl::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
1627 MachineLoop *CurLoop) {
1628 MachineBasicBlock *SrcBlock = MI->getParent();
1629
1630 // Disable the instruction hoisting due to block hotness
1632 (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
1633 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1634 ++NumNotHoistedDueToHotness;
1635 return HoistResult::NotHoisted;
1636 }
1637 // First check whether we should hoist this instruction.
1638 bool HasExtractHoistableLoad = false;
1639 if (!IsLoopInvariantInst(*MI, CurLoop) ||
1640 !IsProfitableToHoist(*MI, CurLoop)) {
1641 // If not, try unfolding a hoistable load.
1642 MI = ExtractHoistableLoad(MI, CurLoop);
1643 if (!MI)
1644 return HoistResult::NotHoisted;
1645 HasExtractHoistableLoad = true;
1646 }
1647
1648 // If we have hoisted an instruction that may store, it can only be a constant
1649 // store.
1650 if (MI->mayStore())
1651 NumStoreConst++;
1652
1653 // Now move the instructions to the predecessor, inserting it before any
1654 // terminator instructions.
1655 LLVM_DEBUG({
1656 dbgs() << "Hoisting " << *MI;
1657 if (MI->getParent()->getBasicBlock())
1658 dbgs() << " from " << printMBBReference(*MI->getParent());
1659 if (Preheader->getBasicBlock())
1660 dbgs() << " to " << printMBBReference(*Preheader);
1661 dbgs() << "\n";
1662 });
1663
1664 // If this is the first instruction being hoisted to the preheader,
1665 // initialize the CSE map with potential common expressions.
1666 if (FirstInLoop) {
1667 InitCSEMap(Preheader);
1668 FirstInLoop = false;
1669 }
1670
1671 // Look for opportunity to CSE the hoisted instruction.
1672 unsigned Opcode = MI->getOpcode();
1673 bool HasCSEDone = false;
1674 for (auto &Map : CSEMap) {
1675 // Check this CSEMap's preheader dominates MI's basic block.
1676 if (MDTU->getDomTree().dominates(Map.first, MI->getParent())) {
1678 Map.second.find(Opcode);
1679 if (CI != Map.second.end()) {
1680 if (EliminateCSE(MI, CI)) {
1681 HasCSEDone = true;
1682 break;
1683 }
1684 }
1685 }
1686 }
1687
1688 if (!HasCSEDone) {
1689 // Otherwise, splice the instruction to the preheader.
1690 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1691
1692 // Since we are moving the instruction out of its basic block, we do not
1693 // retain its debug location. Doing so would degrade the debugging
1694 // experience and adversely affect the accuracy of profiling information.
1695 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
1696 MI->setDebugLoc(DebugLoc());
1697
1698 // Update register pressure for BBs from header to this block.
1699 UpdateBackTraceRegPressure(MI);
1700
1701 // Clear the kill flags of any register this instruction defines,
1702 // since they may need to be live throughout the entire loop
1703 // rather than just live for part of it.
1704 for (MachineOperand &MO : MI->all_defs())
1705 if (!MO.isDead())
1706 MRI->clearKillFlags(MO.getReg());
1707
1708 CSEMap[Preheader][Opcode].push_back(MI);
1709 }
1710
1711 ++NumHoisted;
1712 Changed = true;
1713
1714 if (HasCSEDone || HasExtractHoistableLoad)
1715 return HoistResult::Hoisted | HoistResult::ErasedMI;
1716 return HoistResult::Hoisted;
1717}
1718
1719/// Get the preheader for the current loop, splitting a critical edge if needed.
1721MachineLICMImpl::getCurPreheader(MachineLoop *CurLoop,
1722 MachineBasicBlock *CurPreheader) {
1723 // Determine the block to which to hoist instructions. If we can't find a
1724 // suitable loop predecessor, we can't do any hoisting.
1725
1726 // If we've tried to get a preheader and failed, don't try again.
1727 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1728 return nullptr;
1729
1730 if (!CurPreheader) {
1731 CurPreheader = CurLoop->getLoopPreheader();
1732 if (!CurPreheader) {
1733 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1734 if (!Pred) {
1735 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1736 return nullptr;
1737 }
1738
1739 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), LegacyPass,
1740 MFAM, nullptr, MDTU);
1741 if (!CurPreheader) {
1742 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1743 return nullptr;
1744 }
1745 }
1746 }
1747 return CurPreheader;
1748}
1749
1750/// Is the target basic block at least "BlockFrequencyRatioThreshold"
1751/// times hotter than the source basic block.
1752bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1753 MachineBasicBlock *TgtBlock) {
1754 // Parse source and target basic block frequency from MBFI
1755 uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
1756 uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
1757
1758 // Disable the hoisting if source block frequency is zero
1759 if (!SrcBF)
1760 return true;
1761
1762 double Ratio = (double)DstBF / SrcBF;
1763
1764 // Compare the block frequency ratio with the threshold
1765 return Ratio > BlockFrequencyRatioThreshold;
1766}
1767
1768template <typename DerivedT, bool PreRegAlloc>
1771 bool Changed = MachineLICMImpl(PreRegAlloc, nullptr, &MFAM).run(MF);
1772 if (!Changed)
1773 return PreservedAnalyses::all();
1775 PA.preserve<MachineLoopAnalysis>();
1776 return PA;
1777}
1778
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
basic Basic Alias true
This file implements the BitVector class.
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:390
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:686
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &ExitBlocks)
Return true if the specified block is in the list.
Definition: LCSSA.cpp:69
#define I(x, y, z)
Definition: MD5.cpp:58
Machine Loop Invariant Code false early machinelicm
static cl::opt< bool > HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden)
#define GET_RESULT(RESULT, GETTER, INFIX)
static cl::opt< UseBFI > DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks", cl::desc("Disable hoisting instructions to" " hotter blocks"), cl::init(UseBFI::PGO), cl::Hidden, cl::values(clEnumValN(UseBFI::None, "none", "disable the feature"), clEnumValN(UseBFI::PGO, "pgo", "enable the feature when using profile data"), clEnumValN(UseBFI::All, "all", "enable the feature with/wo profile data")))
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI)
Return true if this machine instruction loads from global offset table or constant pool.
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI)
static cl::opt< bool > HoistConstLoads("hoist-const-loads", cl::desc("Hoist invariant loads"), cl::init(true), cl::Hidden)
UseBFI
Definition: MachineLICM.cpp:89
Machine Loop Invariant Code Motion
static cl::opt< bool > AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden)
static bool InstructionStoresToFI(const MachineInstr *MI, int FI)
Return true if instruction stores to the specified frame.
static bool isCopyFeedingInvariantStore(const MachineInstr &MI, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI)
static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI, BitVector &RUs, const uint32_t *Mask)
#define DEBUG_TYPE
Definition: MachineLICM.cpp:60
static cl::opt< bool > HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden)
static bool isInvariantStore(const MachineInstr &MI, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI)
static cl::opt< unsigned > BlockFrequencyRatioThreshold("block-freq-ratio-threshold", cl::desc("Do not hoist instructions if target" "block is N times hotter than the source."), cl::init(100), cl::Hidden)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file describes how to lower LLVM code to machine code.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition: DenseMap.h:226
Base class for the actual dominator tree node.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
A specialized PseudoSourceValue for holding FixedStack values, which must include a frame index.
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:329
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
iterator_range< block_iterator > blocks() const
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
iterator end() const
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool isValid() const
Returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
unsigned pred_size() const
MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P, std::vector< SparseBitVector<> > *LiveInSets=nullptr, MachineDomTreeUpdater *MDTU=nullptr)
Split the critical edge from this block to the given successor block, and return the newly created bl...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineInstr *A, const MachineInstr *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void eraseCallSiteInfo(const MachineInstr *MI)
Following functions update call site info.
Representation of each machine instruction.
Definition: MachineInstr.h:69
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Analysis pass that exposes the MachineLoopInfo for a machine function.
bool isLoopInvariant(MachineInstr &I, const Register ExcludeReg=0) const
Returns true if the instruction is loop invariant.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Special value supplied for machine level alias analysis.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:298
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:95
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:711
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializeMachineLICMPass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
char & MachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeEarlyMachineLICMPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
Each TargetRegisterClass has a per register weight, and weight limit which must be less than the limi...