50#define DEBUG_TYPE "machine-cse"
52STATISTIC(NumCoalesces,
"Number of copies coalesced");
53STATISTIC(NumCSEs,
"Number of common subexpression eliminated");
54STATISTIC(NumPREs,
"Number of partial redundant expression"
55 " transformed to fully redundant");
57 "Number of physreg referencing common subexpr eliminated");
59 "Number of cross-MBB physreg referencing CS eliminated");
60STATISTIC(NumCommutes,
"Number of copies coalesced after commuting");
65 cl::desc(
"Threshold for the size of CSUses"));
69 cl::desc(
"Override the profitability heuristics for Machine CSE"));
103 .
set(MachineFunctionProperties::Property::IsSSA);
121 unsigned LookAheadLimit = 0;
137 PhysDefVector &PhysDefs,
bool &PhysUseDef)
const;
140 PhysDefVector &PhysDefs,
bool &NonLocal)
const;
163char MachineCSE::ID = 0;
168 "Machine Common Subexpression Elimination",
false,
false)
180 bool Changed =
false;
183 if (!Reg.isVirtual())
185 bool OnlyOneUse =
MRI->hasOneNonDBGUse(Reg);
208 if (!
MRI->constrainRegAttrs(SrcReg, Reg))
215 MRI->clearKillFlags(SrcReg);
232bool MachineCSE::isPhysDefTriviallyDead(
235 unsigned LookAheadLeft = LookAheadLimit;
236 while (LookAheadLeft) {
244 bool SeenDef =
false;
246 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
248 if (!MO.isReg() || !MO.getReg())
250 if (!
TRI->regsOverlap(MO.getReg(), Reg))
281 return TRI.isCallerPreservedPhysReg(Reg, MF) ||
TII.isIgnorableUse(MO) ||
282 (
MRI.reservedRegsFrozen() &&
MRI.isConstantPhysReg(Reg));
292 PhysDefVector &PhysDefs,
293 bool &PhysUseDef)
const {
328 PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
332 for (
unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
337 return !PhysRefs.
empty();
342 PhysDefVector &PhysDefs,
343 bool &NonLocal)
const {
350 bool CrossMBB =
false;
355 for (
unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
356 if (
MRI->isAllocatable(PhysDefs[i].second) ||
357 MRI->isReserved(PhysDefs[i].second))
367 unsigned LookAheadLeft = LookAheadLimit;
368 while (LookAheadLeft) {
370 while (
I != E &&
I != EE &&
I->isDebugInstr())
374 assert(CrossMBB &&
"Reaching end-of-MBB without finding MI?");
408 if (
MI->isPosition() ||
MI->isPHI() ||
MI->isImplicitDef() ||
MI->isKill() ||
409 MI->isInlineAsm() ||
MI->isDebugInstr() ||
MI->isJumpTableDebugInfo())
413 if (
MI->isCopyLike())
417 if (
MI->mayStore() ||
MI->isCall() ||
MI->isTerminator() ||
418 MI->mayRaiseFPException() ||
MI->hasUnmodeledSideEffects())
425 if (!
MI->isDereferenceableInvariantLoad())
434 if (
MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
452 bool MayIncreasePressure =
true;
454 MayIncreasePressure =
false;
462 MayIncreasePressure =
true;
466 if (!MayIncreasePressure)
469 MayIncreasePressure =
true;
474 if (!MayIncreasePressure)
return true;
487 bool HasVRegUse =
false;
495 bool HasNonCopyUse =
false;
498 if (!
MI.isCopyLike()) {
499 HasNonCopyUse =
true;
511 HasPHI |=
UseMI.isPHI();
512 if (
UseMI.getParent() ==
MI->getParent())
521 ScopeType *
Scope =
new ScopeType(VNT);
528 assert(SI != ScopeMap.end());
534 bool Changed =
false;
540 if (!isCSECandidate(&
MI))
543 bool FoundCSE = VNT.count(&
MI);
546 if (PerformTrivialCopyPropagation(&
MI,
MBB)) {
554 FoundCSE = VNT.count(&
MI);
559 bool Commuted =
false;
560 if (!FoundCSE &&
MI.isCommutable()) {
563 FoundCSE = VNT.count(NewMI);
566 NewMI->eraseFromParent();
568 }
else if (!FoundCSE)
570 (void)
TII->commuteInstruction(
MI);
577 bool CrossMBBPhysDef =
false;
579 PhysDefVector PhysDefs;
580 bool PhysUseDef =
false;
582 hasLivePhysRegDefUses(&
MI,
MBB, PhysRefs, PhysDefs, PhysUseDef)) {
591 unsigned CSVN = VNT.lookup(&
MI);
593 if (PhysRegDefsReach(CSMI, &
MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
605 unsigned CSVN = VNT.lookup(&
MI);
608 LLVM_DEBUG(
dbgs() <<
"*** Found a common subexpression: " << *CSMI);
619 if (
MI.isConvergent() &&
MI.getParent() != CSMI->
getParent()) {
620 LLVM_DEBUG(
dbgs() <<
"*** Convergent MI and subexpression exist in "
621 "different BBs, avoid CSE!\n");
622 VNT.insert(&
MI, CurrVN++);
629 unsigned NumDefs =
MI.getNumDefs();
631 for (
unsigned i = 0, e =
MI.getNumOperands(); NumDefs && i != e; ++i) {
648 if (OldReg == NewReg) {
654 "Do not CSE physical register defs!");
656 if (!isProfitableToCSE(NewReg, OldReg, CSMI->
getParent(), &
MI)) {
665 if (!
MRI->constrainRegAttrs(NewReg, OldReg)) {
667 dbgs() <<
"*** Not the same register constraints, avoid CSE!\n");
672 CSEPairs.
push_back(std::make_pair(OldReg, NewReg));
678 for (
const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
679 unsigned OldReg = CSEPair.first;
680 unsigned NewReg = CSEPair.second;
683 assert(Def !=
nullptr &&
"CSEd register has no unique definition?");
684 Def->clearRegisterDeads(NewReg);
686 MRI->replaceRegWith(OldReg, NewReg);
687 MRI->clearKillFlags(NewReg);
692 for (
unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
694 for (
const auto &PhysDef : PhysDefs)
695 if (!
MI.getOperand(PhysDef.first).isDead())
710 for (
auto ImplicitDef : ImplicitDefs)
712 ImplicitDef,
TRI,
true))
717 for (
auto ImplicitDef : ImplicitDefs)
718 MRI->clearKillFlags(ImplicitDef);
721 if (CrossMBBPhysDef) {
724 while (!PhysDefs.empty()) {
725 auto LiveIn = PhysDefs.pop_back_val();
732 MI.eraseFromParent();
734 if (!PhysRefs.
empty())
740 VNT.insert(&
MI, CurrVN++);
744 ImplicitDefsToUpdate.clear();
745 ImplicitDefs.clear();
757 if (OpenChildren[
Node])
761 ExitScope(
Node->getBlock());
765 unsigned Left = --OpenChildren[Parent];
768 ExitScope(Parent->getBlock());
785 OpenChildren[
Node] =
Node->getNumChildren();
787 }
while (!WorkList.
empty());
790 bool Changed =
false;
794 Changed |= ProcessBlockCSE(
MBB);
796 ExitScopeIfDone(
Node, OpenChildren);
807 if (!isCSECandidate(
MI) ||
808 MI->isNotDuplicable() ||
811 MI->getNumDefs() != 1 ||
812 MI->getNumExplicitDefs() != 1)
829 bool Changed =
false;
832 if (!isPRECandidate(&
MI, PhysRefs))
835 if (!PREMap.count(&
MI)) {
840 auto MBB1 = PREMap[&
MI];
843 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
845 if (!CMBB->isLegalToHoistInto())
848 if (!isProfitableToHoistInto(CMBB,
MBB, MBB1))
855 if (BB !=
nullptr && BB1 !=
nullptr &&
865 if (
MI.isConvergent() && CMBB !=
MBB)
871 PhysDefVector PhysDefs;
872 if (!PhysRefs.
empty() &&
873 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &
MI, PhysRefs,
878 "First operand of instr with one explicit def must be this def");
881 if (!isProfitableToCSE(NewReg, VReg, CMBB, &
MI))
884 TII->duplicate(*CMBB, CMBB->getFirstTerminator(),
MI);
912 bool Changed =
false;
919 Changed |= ProcessBlockPRE(DT,
MBB);
921 }
while (!BBs.
empty());
933 "CandidateBB should dominate MBB1");
945 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
946 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
947 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
948 LookAheadLimit =
TII->getMachineCSELookAheadLimit();
949 bool ChangedPRE, ChangedCSE;
950 ChangedPRE = PerformSimplePRE(DT);
952 return ChangedPRE || ChangedCSE;
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
This file defines the BumpPtrAllocator interface.
COFF::MachineTypes Machine
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
Machine Common Subexpression Elimination
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
static cl::opt< bool > AggressiveMachineCSE("aggressive-machine-cse", cl::Hidden, cl::init(false), cl::desc("Override the profitability heuristics for Machine CSE"))
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Base class for the actual dominator tree node.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool isAsCheapAsAMove(const MachineInstr &MI) const override
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
unsigned pred_size() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
pred_iterator pred_begin()
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
MachineDomTreeNode * getRootNode() const
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
void initializeMachineCSEPass(PassRegistry &)
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...