48#define DEBUG_TYPE "tailduplication"
51STATISTIC(NumTailDups,
"Number of tail duplicated blocks");
53 "Number of instructions added due to tail duplication");
55 "Number of instructions removed due to tail duplication");
56STATISTIC(NumDeadBlocks,
"Number of dead blocks removed");
66 "tail-dup-indirect-size",
67 cl::desc(
"Maximum instructions to consider tail duplicating blocks that "
68 "end with indirect branches."),
cl::init(20),
73 cl::desc(
"Maximum predecessors (maximum successors at the "
74 "same time) to consider tail duplicating blocks."),
79 cl::desc(
"Maximum successors (maximum predecessors at the "
80 "same time) to consider tail duplicating blocks."),
85 cl::desc(
"Verify sanity of PHI instructions during taildup"),
95 bool LayoutModeIn,
unsigned TailDupSizeIn) {
104 TailDupSize = TailDupSizeIn;
106 assert(MBPI !=
nullptr &&
"Machine Branch Probability Info required");
108 LayoutMode = LayoutModeIn;
109 this->PreRegAlloc = PreRegAlloc;
122 for (
unsigned i = 1, e =
MI->getNumOperands(); i != e; i += 2) {
124 if (PHIBB == PredBB) {
132 dbgs() <<
" missing input from predecessor "
138 for (
unsigned i = 1, e =
MI->getNumOperands(); i != e; i += 2) {
140 if (CheckExtra && !Preds.count(PHIBB)) {
143 dbgs() <<
" extra input from predecessor "
179 if (!tailDuplicate(IsSimple,
MBB, ForcedLayoutPred,
180 TDBBs,
Copies, CandidatePtr))
193 updateSuccessorsPHIs(
MBB, isDead, TDBBs, Succs);
197 NumTailDupRemoved +=
MBB->
size();
198 removeDeadBlock(
MBB, RemovalCallback);
203 if (!SSAUpdateVRs.empty()) {
204 for (
unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
205 unsigned VReg = SSAUpdateVRs[i];
219 SSAUpdateVals.find(VReg);
220 for (std::pair<MachineBasicBlock *, Register> &J : LI->second) {
243 for (
auto *UseMO : DebugUses) {
250 SSAUpdateVRs.clear();
251 SSAUpdateVals.clear();
256 for (
unsigned i = 0, e =
Copies.size(); i != e; ++i) {
260 Register Dst = Copy->getOperand(0).getReg();
261 Register Src = Copy->getOperand(1).getReg();
266 Copy->eraseFromParent();
271 NumAddedPHIs += NewPHIs.
size();
274 *DuplicatedPreds = std::move(TDBBs);
283 bool MadeChange =
false;
312 if (
UseMI.isDebugValue())
314 if (
UseMI.getParent() != BB)
321 for (
unsigned i = 1, e =
MI->getNumOperands(); i != e; i += 2)
322 if (
MI->getOperand(i + 1).getMBB() == SrcBB)
332 for (
const auto &
MI : BB) {
335 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
337 UsedByPhi->
insert(SrcReg);
346 SSAUpdateVals.find(OrigReg);
347 if (LI != SSAUpdateVals.end())
348 LI->second.push_back(std::make_pair(BB, NewReg));
351 Vals.push_back(std::make_pair(BB, NewReg));
352 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
353 SSAUpdateVRs.push_back(OrigReg);
359void TailDuplicator::processPHI(
366 assert(SrcOpIdx &&
"Unable to find matching PHI source?");
367 Register SrcReg =
MI->getOperand(SrcOpIdx).getReg();
368 unsigned SrcSubReg =
MI->getOperand(SrcOpIdx).getSubReg();
377 addSSAUpdateEntry(DefReg, NewDef, PredBB);
383 MI->removeOperand(SrcOpIdx + 1);
384 MI->removeOperand(SrcOpIdx);
386 MI->eraseFromParent();
387 else if (
MI->getNumOperands() == 1)
388 MI->setDesc(TII->
get(TargetOpcode::IMPLICIT_DEF));
393void TailDuplicator::duplicateInstruction(
398 if (
MI->isCFIInstruction()) {
400 TII->
get(TargetOpcode::CFI_INSTRUCTION))
412 if (!
Reg.isVirtual())
420 addSSAUpdateEntry(Reg, NewReg, PredBB);
422 auto VI = LocalVRMap.
find(Reg);
423 if (VI != LocalVRMap.
end()) {
430 if (
VI->second.SubReg != 0) {
465 TII->
get(TargetOpcode::COPY), NewReg)
466 .
addReg(
VI->second.Reg, 0,
VI->second.SubReg);
467 LocalVRMap.
erase(VI);
487void TailDuplicator::updateSuccessorsPHIs(
497 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
499 if (MO.
getMBB() == FromBB) {
512 for (
unsigned i =
MI.getNumOperands() - 2; i !=
Idx; i -= 2) {
514 if (MO.
getMBB() == FromBB) {
515 MI.removeOperand(i + 1);
526 SSAUpdateVals.find(Reg);
527 if (LI != SSAUpdateVals.end()) {
529 for (
const std::pair<MachineBasicBlock *, Register> &J : LI->second) {
540 MI.getOperand(
Idx).setReg(SrcReg);
541 MI.getOperand(
Idx + 1).setMBB(SrcBB);
544 MIB.addReg(SrcReg).addMBB(SrcBB);
551 MI.getOperand(
Idx).setReg(Reg);
552 MI.getOperand(
Idx + 1).setMBB(SrcBB);
555 MIB.addReg(Reg).addMBB(SrcBB);
560 MI.removeOperand(
Idx + 1);
561 MI.removeOperand(
Idx);
591 unsigned MaxDuplicateCount;
594 if (TailDupSize == 0)
597 MaxDuplicateCount = TailDupSize;
599 MaxDuplicateCount = 1;
607 if (TII->
analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
617 bool HasIndirectbr =
false;
621 if (HasIndirectbr && PreRegAlloc)
633 if (
MI.isNotDuplicable() &&
635 !
MI.isCFIInstruction()))
640 if (
MI.isConvergent())
646 if (PreRegAlloc &&
MI.isReturn())
652 if (PreRegAlloc &&
MI.isCall())
660 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
665 else if (!
MI.isPHI() && !
MI.isMetaInstruction())
682 for (
auto &
I : *SB) {
693 if (HasIndirectbr && PreRegAlloc)
702 return canCompletelyDuplicateBB(TailBB);
712 if (
I == TailBB->
end())
714 return I->isUnconditionalBranch();
736 if (!PredCond.
empty())
742bool TailDuplicator::duplicateSimpleBB(
748 bool Changed =
false;
762 LLVM_DEBUG(
dbgs() <<
"\nTail-duplicating into PredBB: " << *PredBB
763 <<
"From simple Succ: " << *TailBB);
769 if (PredCond.
empty())
779 if (PredFBB == TailBB)
781 if (PredTBB == TailBB)
785 if (PredTBB == PredFBB) {
791 if (PredFBB == NextBB)
793 if (PredTBB == NextBB && PredFBB ==
nullptr)
824 if (!PredCond.
empty())
862 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi);
867 bool Changed =
false;
875 assert(TailBB != PredBB &&
876 "Single-block loop should have been rejected earlier!");
885 bool IsLayoutSuccessor =
false;
886 if (ForcedLayoutPred)
887 IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
889 IsLayoutSuccessor =
true;
890 if (IsLayoutSuccessor)
894 LLVM_DEBUG(
dbgs() <<
"\nTail-duplicating into PredBB: " << *PredBB
895 <<
"From Succ: " << *TailBB);
909 processPHI(&
MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi,
true);
913 duplicateInstruction(&
MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
916 appendCopies(PredBB, CopyInfos,
Copies);
918 NumTailDupAdded += TailBB->
size() - 1;
923 "TailDuplicate called on block with multiple successors!");
928 if (ShouldUpdateTerminators)
948 !TII->
analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
950 (!PriorTBB || PriorTBB == TailBB) &&
954 <<
"From MBB: " << *TailBB);
967 while (
I != TailBB->
end() &&
I->isPHI()) {
971 processPHI(
MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi,
976 while (
I != TailBB->
end()) {
980 assert(!
MI->isBundle() &&
"Not expecting bundles before regalloc!");
981 duplicateInstruction(
MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
982 MI->eraseFromParent();
984 appendCopies(PrevBB, CopyInfos,
Copies);
995 if (ShouldUpdateTerminators)
1001 LLVM_DEBUG(
dbgs() <<
"Abort merging blocks, the predecessor still "
1002 "contains terminator instructions");
1005 return RemovedBranches;
1007 Changed |= RemovedBranches;
1046 processPHI(&
MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi,
false);
1048 appendCopies(PredBB, CopyInfos,
Copies);
1061 for (
auto &CI : CopyInfos) {
1063 .
addReg(CI.second.Reg, 0, CI.second.SubReg);
1070void TailDuplicator::removeDeadBlock(
1079 if (
MI.shouldUpdateCallSiteInfo())
1082 if (RemovalCallback)
1083 (*RemovalCallback)(
MBB);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static unsigned InstrCount
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static cl::opt< unsigned > TailDuplicateSize("tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden)
static cl::opt< unsigned > TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden)
static cl::opt< unsigned > TailDupPredSize("tail-dup-pred-size", cl::desc("Maximum predecessors (maximum successors at the " "same time) to consider tail duplicating blocks."), cl::init(16), cl::Hidden)
static cl::opt< unsigned > TailDupSuccSize("tail-dup-succ-size", cl::desc("Maximum successors (maximum predecessors at the " "same time) to consider tail duplicating blocks."), cl::init(16), cl::Hidden)
static cl::opt< bool > TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), cl::init(false), cl::Hidden)
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra)
static bool bothUsedInPHI(const MachineBasicBlock &A, const SmallPtrSet< MachineBasicBlock *, 8 > &SuccsB)
static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB)
static void getRegsUsedByPHIs(const MachineBasicBlock &BB, DenseSet< Register > *UsedByPhi)
static cl::opt< unsigned > TailDupIndirectBranchSize("tail-dup-indirect-size", cl::desc("Maximum instructions to consider tail duplicating blocks that " "end with indirect branches."), cl::init(20), cl::Hidden)
static bool isDefLiveOut(Register Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI)
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
bool hasEHPadSuccessor() const
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
succ_iterator succ_begin()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
bool hasAddressTaken() const
Test whether this block is used as something other than the target of a terminator,...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool mayHaveInlineAsmBr() const
Returns true if this block may have an INLINEASM_BR (overestimate, by checking if any of the successo...
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
void eraseCallSiteInfo(const MachineInstr *MI)
Following functions update call site info.
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isDebugValue() const
const MachineOperand & getOperand(unsigned i) const
bool isIndirectBranch(QueryType Type=AnyInBundle) const
Return true if this is an indirect branch, such as a branch through a register.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
void Initialize(Register V)
Initialize - Reset this object to get ready for a new set of SSA updates.
Register GetValueInMiddleOfBlock(MachineBasicBlock *BB, bool ExistingValueOnly=false)
GetValueInMiddleOfBlock - Construct SSA form, materializing a value that is live in the middle of the...
void RewriteUse(MachineOperand &U)
RewriteUse - Rewrite a use of the symbolic value.
void AddAvailableValue(MachineBasicBlock *BB, Register V)
AddAvailableValue - Indicate that a rewritten value is available at the end of the specified block wi...
Analysis providing profile information.
Wrapper class representing virtual and physical registers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateBlocks()
Look for small blocks that are unconditionally branched to and do not fall through.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
const Triple & getTargetTriple() const
unsigned composeSubRegIndices(unsigned a, unsigned b) const
Return the subregister index you get from composing two subregister indices.
virtual const TargetRegisterClass * getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const
Return a subclass of the specified register class A so that each register in it has a sub-register of...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
A pair composed of a register and a sub-register index.