51#define DEBUG_TYPE "machine-cse"
53STATISTIC(NumCoalesces,
"Number of copies coalesced");
54STATISTIC(NumCSEs,
"Number of common subexpression eliminated");
55STATISTIC(NumPREs,
"Number of partial redundant expression"
56 " transformed to fully redundant");
58 "Number of physreg referencing common subexpr eliminated");
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes,
"Number of copies coalesced after commuting");
66 cl::desc(
"Threshold for the size of CSUses"));
70 cl::desc(
"Override the profitability heuristics for Machine CSE"));
83 : DT(DT), MBFI(MBFI) {}
96 unsigned LookAheadLimit = 0;
111 PhysDefVector &PhysDefs,
bool &PhysUseDef)
const;
114 PhysDefVector &PhysDefs,
bool &NonLocal)
const;
132 void releaseMemory();
157 MachineFunctionProperties::Property::IsSSA);
162char MachineCSELegacy::ID = 0;
167 "Machine Common Subexpression Elimination",
false,
false)
176bool MachineCSEImpl::PerformTrivialCopyPropagation(
MachineInstr *
MI,
178 bool Changed =
false;
181 if (!Reg.isVirtual())
183 bool OnlyOneUse =
MRI->hasOneNonDBGUse(Reg);
206 if (!
MRI->constrainRegAttrs(SrcReg, Reg))
213 MRI->clearKillFlags(SrcReg);
230bool MachineCSEImpl::isPhysDefTriviallyDead(
233 unsigned LookAheadLeft = LookAheadLimit;
234 while (LookAheadLeft) {
242 bool SeenDef =
false;
244 if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
246 if (!MO.isReg() || !MO.getReg())
248 if (!
TRI->regsOverlap(MO.getReg(), Reg))
279 return TRI.isCallerPreservedPhysReg(Reg, MF) ||
TII.isIgnorableUse(MO) ||
280 (
MRI.reservedRegsFrozen() &&
MRI.isConstantPhysReg(Reg));
290 PhysDefVector &PhysDefs,
291 bool &PhysUseDef)
const {
326 PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
330 for (
unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
335 return !PhysRefs.
empty();
340 PhysDefVector &PhysDefs,
341 bool &NonLocal)
const {
348 bool CrossMBB =
false;
353 for (
unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
354 if (
MRI->isAllocatable(PhysDefs[i].second) ||
355 MRI->isReserved(PhysDefs[i].second))
365 unsigned LookAheadLeft = LookAheadLimit;
366 while (LookAheadLeft) {
368 while (
I != E &&
I != EE &&
I->isDebugInstr())
372 assert(CrossMBB &&
"Reaching end-of-MBB without finding MI?");
406 if (
MI->isPosition() ||
MI->isPHI() ||
MI->isImplicitDef() ||
MI->isKill() ||
407 MI->isInlineAsm() ||
MI->isDebugInstr() ||
MI->isJumpTableDebugInfo() ||
412 if (
MI->isCopyLike())
416 if (
MI->mayStore() ||
MI->isCall() ||
MI->isTerminator() ||
417 MI->mayRaiseFPException() ||
MI->hasUnmodeledSideEffects())
424 if (!
MI->isDereferenceableInvariantLoad())
433 if (
MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
452 bool MayIncreasePressure =
true;
454 MayIncreasePressure =
false;
462 MayIncreasePressure =
true;
466 if (!MayIncreasePressure)
469 MayIncreasePressure =
true;
474 if (!MayIncreasePressure)
return true;
487 bool HasVRegUse =
false;
495 bool HasNonCopyUse =
false;
498 if (!
MI.isCopyLike()) {
499 HasNonCopyUse =
true;
511 HasPHI |=
UseMI.isPHI();
512 if (
UseMI.getParent() ==
MI->getParent())
521 ScopeType *
Scope =
new ScopeType(VNT);
528 assert(SI != ScopeMap.end());
534 bool Changed =
false;
540 if (!isCSECandidate(&
MI))
543 bool FoundCSE = VNT.count(&
MI);
546 if (PerformTrivialCopyPropagation(&
MI,
MBB)) {
554 FoundCSE = VNT.count(&
MI);
559 bool Commuted =
false;
560 if (!FoundCSE &&
MI.isCommutable()) {
563 FoundCSE = VNT.count(NewMI);
566 NewMI->eraseFromParent();
568 }
else if (!FoundCSE)
570 (void)
TII->commuteInstruction(
MI);
577 bool CrossMBBPhysDef =
false;
579 PhysDefVector PhysDefs;
580 bool PhysUseDef =
false;
582 hasLivePhysRegDefUses(&
MI,
MBB, PhysRefs, PhysDefs, PhysUseDef)) {
591 unsigned CSVN = VNT.lookup(&
MI);
593 if (PhysRegDefsReach(CSMI, &
MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
605 unsigned CSVN = VNT.lookup(&
MI);
608 LLVM_DEBUG(
dbgs() <<
"*** Found a common subexpression: " << *CSMI);
619 if (
MI.isConvergent() &&
MI.getParent() != CSMI->
getParent()) {
620 LLVM_DEBUG(
dbgs() <<
"*** Convergent MI and subexpression exist in "
621 "different BBs, avoid CSE!\n");
622 VNT.insert(&
MI, CurrVN++);
629 unsigned NumDefs =
MI.getNumDefs();
631 for (
unsigned i = 0, e =
MI.getNumOperands(); NumDefs && i != e; ++i) {
648 if (OldReg == NewReg) {
654 "Do not CSE physical register defs!");
656 if (!isProfitableToCSE(NewReg, OldReg, CSMI->
getParent(), &
MI)) {
665 if (!
MRI->constrainRegAttrs(NewReg, OldReg)) {
667 dbgs() <<
"*** Not the same register constraints, avoid CSE!\n");
672 CSEPairs.
push_back(std::make_pair(OldReg, NewReg));
678 for (
const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
679 unsigned OldReg = CSEPair.first;
680 unsigned NewReg = CSEPair.second;
683 assert(Def !=
nullptr &&
"CSEd register has no unique definition?");
684 Def->clearRegisterDeads(NewReg);
686 MRI->replaceRegWith(OldReg, NewReg);
687 MRI->clearKillFlags(NewReg);
692 for (
unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
694 for (
const auto &PhysDef : PhysDefs)
695 if (!
MI.getOperand(PhysDef.first).isDead())
710 for (
auto ImplicitDef : ImplicitDefs)
712 ImplicitDef,
TRI,
true))
717 for (
auto ImplicitDef : ImplicitDefs)
718 MRI->clearKillFlags(ImplicitDef);
721 if (CrossMBBPhysDef) {
724 while (!PhysDefs.empty()) {
725 auto LiveIn = PhysDefs.pop_back_val();
732 MI.eraseFromParent();
734 if (!PhysRefs.
empty())
740 VNT.insert(&
MI, CurrVN++);
744 ImplicitDefsToUpdate.clear();
745 ImplicitDefs.clear();
754void MachineCSEImpl::ExitScopeIfDone(
757 if (OpenChildren[
Node])
761 ExitScope(
Node->getBlock());
765 unsigned Left = --OpenChildren[Parent];
768 ExitScope(Parent->getBlock());
785 OpenChildren[
Node] =
Node->getNumChildren();
787 }
while (!WorkList.
empty());
790 bool Changed =
false;
794 Changed |= ProcessBlockCSE(
MBB);
796 ExitScopeIfDone(
Node, OpenChildren);
807 if (!isCSECandidate(
MI) ||
808 MI->isNotDuplicable() ||
811 MI->getNumDefs() != 1 ||
812 MI->getNumExplicitDefs() != 1)
829 bool Changed =
false;
832 if (!isPRECandidate(&
MI, PhysRefs))
835 if (!PREMap.count(&
MI)) {
840 auto MBB1 = PREMap[&
MI];
843 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
845 if (!CMBB->isLegalToHoistInto())
848 if (!isProfitableToHoistInto(CMBB,
MBB, MBB1))
855 if (BB !=
nullptr && BB1 !=
nullptr &&
865 if (
MI.isConvergent() && CMBB !=
MBB)
871 PhysDefVector PhysDefs;
872 if (!PhysRefs.
empty() &&
873 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &
MI, PhysRefs,
878 "First operand of instr with one explicit def must be this def");
881 if (!isProfitableToCSE(NewReg, VReg, CMBB, &
MI))
884 TII->duplicate(*CMBB, CMBB->getFirstTerminator(),
MI);
912 bool Changed =
false;
919 Changed |= ProcessBlockPRE(DT,
MBB);
921 }
while (!BBs.
empty());
933 "CandidateBB should dominate MBB1");
934 return MBFI->getBlockFreq(CandidateBB) <=
935 MBFI->getBlockFreq(
MBB) + MBFI->getBlockFreq(MBB1);
938void MachineCSEImpl::releaseMemory() {
948 LookAheadLimit =
TII->getMachineCSELookAheadLimit();
949 bool ChangedPRE, ChangedCSE;
950 ChangedPRE = PerformSimplePRE(DT);
953 return ChangedPRE || ChangedCSE;
963 MachineCSEImpl Impl(&MDT, &MBFI);
964 bool Changed = Impl.run(MF);
981 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
983 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
984 MachineCSEImpl Impl(&MDT, &MBFI);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
This file defines the BumpPtrAllocator interface.
COFF::MachineTypes Machine
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, const MachineOperand &MO, const MachineFunction &MF, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII)
Machine Common Subexpression Elimination
static cl::opt< int > CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses"))
static cl::opt< bool > AggressiveMachineCSE("aggressive-machine-cse", cl::Hidden, cl::init(false), cl::desc("Override the profitability heuristics for Machine CSE"))
unsigned const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Base class for the actual dominator tree node.
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
bool isAsCheapAsAMove(const MachineInstr &MI) const override
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
An RAII based helper class to modify MachineFunctionProperties when running pass.
unsigned pred_size() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
pred_iterator pred_begin()
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
void changeDebugValuesDefReg(Register Reg)
Find all DBG_VALUEs that point to the register def in this instruction and point them to Reg instead.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
RecyclingAllocator - This class wraps an Allocator, adding the functionality of recycling deleted obj...
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
ScopedHashTableScope< K, V, KInfo, AllocatorTy > ScopeTy
ScopeTy - This is a helpful typedef that allows clients to get easy access to the name of the scope f...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void initializeMachineCSELegacyPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
char & MachineLoopInfoID
MachineLoopInfo - This pass is a loop analysis pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
BumpPtrAllocatorImpl BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
char & MachineCSELegacyID
MachineCSE - This pass performs global CSE on machine instructions.
bool isPotentiallyReachable(const Instruction *From, const Instruction *To, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet=nullptr, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether instruction 'To' is reachable from 'From', without passing through any blocks in Ex...
Special DenseMapInfo traits to compare MachineInstr* by value of the instruction rather than by point...