51#define DEBUG_TYPE "machine-cse"
53STATISTIC(NumCoalesces,
"Number of copies coalesced");
54STATISTIC(NumCSEs,
"Number of common subexpression eliminated");
55STATISTIC(NumPREs,
"Number of partial redundant expression"
56 " transformed to fully redundant");
58 "Number of physreg referencing common subexpr eliminated");
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes,
"Number of copies coalesced after commuting");
66 cl::desc(
"Threshold for the size of CSUses"));
100 .
set(MachineFunctionProperties::Property::IsSSA);
118 unsigned LookAheadLimit = 0;
134 PhysDefVector &PhysDefs,
bool &PhysUseDef)
const;
137 PhysDefVector &PhysDefs,
bool &NonLocal)
const;
160char MachineCSE::ID = 0;
165 "Machine Common Subexpression Elimination",
false,
false)
177 bool Changed =
false;
179 if (!MO.isReg() || !MO.isUse())
182 if (!Reg.isVirtual())
184 bool OnlyOneUse =
MRI->hasOneNonDBGUse(Reg);
207 if (!
MRI->constrainRegAttrs(SrcReg, Reg))
214 MRI->clearKillFlags(SrcReg);
231bool MachineCSE::isPhysDefTriviallyDead(
234 unsigned LookAheadLeft = LookAheadLimit;
235 while (LookAheadLeft) {
243 bool SeenDef =
false;
245 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
247 if (!MO.isReg() || !MO.getReg())
249 if (!
TRI->regsOverlap(MO.getReg(), Reg))
280 return TRI.isCallerPreservedPhysReg(Reg, MF) ||
TII.isIgnorableUse(MO) ||
281 (
MRI.reservedRegsFrozen() &&
MRI.isConstantPhysReg(Reg));
291 PhysDefVector &PhysDefs,
292 bool &PhysUseDef)
const {
295 if (!MO.isReg() || MO.isDef())
329 PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
333 for (
unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
338 return !PhysRefs.
empty();
343 PhysDefVector &PhysDefs,
344 bool &NonLocal)
const {
351 bool CrossMBB =
false;
356 for (
unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
357 if (
MRI->isAllocatable(PhysDefs[i].second) ||
358 MRI->isReserved(PhysDefs[i].second))
368 unsigned LookAheadLeft = LookAheadLimit;
369 while (LookAheadLeft) {
371 while (
I !=
E &&
I != EE &&
I->isDebugInstr())
375 assert(CrossMBB &&
"Reaching end-of-MBB without finding MI?");
409 if (
MI->isPosition() ||
MI->isPHI() ||
MI->isImplicitDef() ||
MI->isKill() ||
410 MI->isInlineAsm() ||
MI->isDebugInstr())
414 if (
MI->isCopyLike())
418 if (
MI->mayStore() ||
MI->isCall() ||
MI->isTerminator() ||
419 MI->mayRaiseFPException() ||
MI->hasUnmodeledSideEffects())
426 if (!
MI->isDereferenceableInvariantLoad())
435 if (
MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
450 bool MayIncreasePressure =
true;
452 MayIncreasePressure =
false;
460 MayIncreasePressure =
true;
464 if (!MayIncreasePressure)
467 MayIncreasePressure =
true;
472 if (!MayIncreasePressure)
return true;
485 bool HasVRegUse =
false;
493 bool HasNonCopyUse =
false;
496 if (!
MI.isCopyLike()) {
497 HasNonCopyUse =
true;
509 HasPHI |=
UseMI.isPHI();
510 if (
UseMI.getParent() ==
MI->getParent())
519 ScopeType *
Scope =
new ScopeType(VNT);
526 assert(SI != ScopeMap.end());
532 bool Changed =
false;
538 if (!isCSECandidate(&
MI))
541 bool FoundCSE = VNT.count(&
MI);
544 if (PerformTrivialCopyPropagation(&
MI,
MBB)) {
552 FoundCSE = VNT.count(&
MI);
557 bool Commuted =
false;
558 if (!FoundCSE &&
MI.isCommutable()) {
561 FoundCSE = VNT.count(NewMI);
564 NewMI->eraseFromParent();
566 }
else if (!FoundCSE)
568 (void)
TII->commuteInstruction(
MI);
575 bool CrossMBBPhysDef =
false;
577 PhysDefVector PhysDefs;
578 bool PhysUseDef =
false;
580 hasLivePhysRegDefUses(&
MI,
MBB, PhysRefs, PhysDefs, PhysUseDef)) {
589 unsigned CSVN = VNT.lookup(&
MI);
591 if (PhysRegDefsReach(CSMI, &
MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
597 VNT.insert(&
MI, CurrVN++);
603 unsigned CSVN = VNT.lookup(&
MI);
606 LLVM_DEBUG(
dbgs() <<
"*** Found a common subexpression: " << *CSMI);
617 if (
MI.isConvergent() &&
MI.getParent() != CSMI->
getParent()) {
618 LLVM_DEBUG(
dbgs() <<
"*** Convergent MI and subexpression exist in "
619 "different BBs, avoid CSE!\n");
620 VNT.insert(&
MI, CurrVN++);
627 unsigned NumDefs =
MI.getNumDefs();
629 for (
unsigned i = 0, e =
MI.getNumOperands(); NumDefs && i != e; ++i) {
646 if (OldReg == NewReg) {
652 "Do not CSE physical register defs!");
654 if (!isProfitableToCSE(NewReg, OldReg, CSMI->
getParent(), &
MI)) {
663 if (!
MRI->constrainRegAttrs(NewReg, OldReg)) {
665 dbgs() <<
"*** Not the same register constraints, avoid CSE!\n");
670 CSEPairs.
push_back(std::make_pair(OldReg, NewReg));
676 for (
const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
677 unsigned OldReg = CSEPair.first;
678 unsigned NewReg = CSEPair.second;
681 assert(Def !=
nullptr &&
"CSEd register has no unique definition?");
682 Def->clearRegisterDeads(NewReg);
684 MRI->replaceRegWith(OldReg, NewReg);
685 MRI->clearKillFlags(NewReg);
690 for (
unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
692 for (
const auto &PhysDef : PhysDefs)
693 if (!
MI.getOperand(PhysDef.first).isDead())
708 for (
auto ImplicitDef : ImplicitDefs)
710 ImplicitDef,
true,
TRI))
715 for (
auto ImplicitDef : ImplicitDefs)
716 MRI->clearKillFlags(ImplicitDef);
719 if (CrossMBBPhysDef) {
722 while (!PhysDefs.empty()) {
723 auto LiveIn = PhysDefs.pop_back_val();
730 MI.eraseFromParent();
732 if (!PhysRefs.
empty())
738 VNT.insert(&
MI, CurrVN++);
742 ImplicitDefsToUpdate.clear();
743 ImplicitDefs.clear();
755 if (OpenChildren[
Node])
759 ExitScope(
Node->getBlock());
763 unsigned Left = --OpenChildren[Parent];
766 ExitScope(Parent->getBlock());
783 OpenChildren[
Node] =
Node->getNumChildren();
785 }
while (!WorkList.
empty());
788 bool Changed =
false;
792 Changed |= ProcessBlockCSE(
MBB);
794 ExitScopeIfDone(
Node, OpenChildren);
805 if (!isCSECandidate(
MI) ||
806 MI->isNotDuplicable() ||
809 MI->getNumDefs() != 1 ||
810 MI->getNumExplicitDefs() != 1)
827 bool Changed =
false;
830 if (!isPRECandidate(&
MI, PhysRefs))
833 if (!PREMap.count(&
MI)) {
838 auto MBB1 = PREMap[&
MI];
841 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
843 if (!CMBB->isLegalToHoistInto())
846 if (!isProfitableToHoistInto(CMBB,
MBB, MBB1))
853 if (BB !=
nullptr && BB1 !=
nullptr &&
863 if (
MI.isConvergent() && CMBB !=
MBB)
869 PhysDefVector PhysDefs;
870 if (!PhysRefs.
empty() &&
871 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &
MI, PhysRefs,
876 "First operand of instr with one explicit def must be this def");
879 if (!isProfitableToCSE(NewReg, VReg, CMBB, &
MI))
882 TII->duplicate(*CMBB, CMBB->getFirstTerminator(),
MI);
910 bool Changed =
false;
917 Changed |= ProcessBlockPRE(DT,
MBB);
919 }
while (!BBs.
empty());
931 "CandidateBB should dominate MBB1");
943 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
944 DT = &getAnalysis<MachineDominatorTree>();
945 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
946 LookAheadLimit =
TII->getMachineCSELookAheadLimit();
947 bool ChangedPRE, ChangedCSE;
948 ChangedPRE = PerformSimplePRE(DT);
950 return ChangedPRE || ChangedCSE;
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
Machine Common Subexpression Elimination
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Base class for the actual dominator tree node.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool isAsCheapAsAMove(const MachineInstr &MI) const override
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
unsigned pred_size() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
pred_iterator pred_begin()
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
MachineDomTreeNode * getRootNode() const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
void initializeMachineCSEPass(PassRegistry &)
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...