48#define DEBUG_TYPE "tailduplication"
51STATISTIC(NumTailDups,
"Number of tail duplicated blocks");
53 "Number of instructions added due to tail duplication");
55 "Number of instructions removed due to tail duplication");
56STATISTIC(NumDeadBlocks,
"Number of dead blocks removed");
66 "tail-dup-indirect-size",
67 cl::desc(
"Maximum instructions to consider tail duplicating blocks that "
68 "end with indirect branches."),
cl::init(20),
73 cl::desc(
"Maximum predecessors (maximum successors at the "
74 "same time) to consider tail duplicating blocks."),
79 cl::desc(
"Maximum successors (maximum predecessors at the "
80 "same time) to consider tail duplicating blocks."),
85 cl::desc(
"Verify sanity of PHI instructions during taildup"),
95 bool LayoutModeIn,
unsigned TailDupSizeIn) {
103 TailDupSize = TailDupSizeIn;
105 assert(MBPI !=
nullptr &&
"Machine Branch Probability Info required");
107 LayoutMode = LayoutModeIn;
108 this->PreRegAlloc = PreRegAlloc;
121 for (
unsigned i = 1, e =
MI->getNumOperands(); i != e; i += 2) {
123 if (PHIBB == PredBB) {
131 dbgs() <<
" missing input from predecessor "
137 for (
unsigned i = 1, e =
MI->getNumOperands(); i != e; i += 2) {
139 if (CheckExtra && !Preds.count(PHIBB)) {
142 dbgs() <<
" extra input from predecessor "
178 if (!tailDuplicate(IsSimple,
MBB, ForcedLayoutPred,
179 TDBBs,
Copies, CandidatePtr))
192 updateSuccessorsPHIs(
MBB, isDead, TDBBs, Succs);
196 NumTailDupRemoved +=
MBB->
size();
197 removeDeadBlock(
MBB, RemovalCallback);
202 if (!SSAUpdateVRs.empty()) {
203 for (
unsigned VReg : SSAUpdateVRs) {
217 SSAUpdateVals.find(VReg);
218 for (std::pair<MachineBasicBlock *, Register> &J : LI->second) {
241 for (
auto *UseMO : DebugUses) {
248 SSAUpdateVRs.
clear();
249 SSAUpdateVals.clear();
257 Register Dst = Copy->getOperand(0).getReg();
258 Register Src = Copy->getOperand(1).getReg();
263 Copy->eraseFromParent();
268 NumAddedPHIs += NewPHIs.
size();
271 *DuplicatedPreds = std::move(TDBBs);
280 bool MadeChange =
false;
309 if (
UseMI.isDebugValue())
311 if (
UseMI.getParent() != BB)
318 for (
unsigned i = 1, e =
MI->getNumOperands(); i != e; i += 2)
319 if (
MI->getOperand(i + 1).getMBB() == SrcBB)
329 for (
const auto &
MI : BB) {
332 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
334 UsedByPhi->
insert(SrcReg);
343 SSAUpdateVals.find(OrigReg);
344 if (LI != SSAUpdateVals.end())
345 LI->second.push_back(std::make_pair(BB, NewReg));
348 Vals.push_back(std::make_pair(BB, NewReg));
349 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
350 SSAUpdateVRs.push_back(OrigReg);
356void TailDuplicator::processPHI(
363 assert(SrcOpIdx &&
"Unable to find matching PHI source?");
364 Register SrcReg =
MI->getOperand(SrcOpIdx).getReg();
365 unsigned SrcSubReg =
MI->getOperand(SrcOpIdx).getSubReg();
374 addSSAUpdateEntry(DefReg, NewDef, PredBB);
380 MI->removeOperand(SrcOpIdx + 1);
381 MI->removeOperand(SrcOpIdx);
383 MI->eraseFromParent();
384 else if (
MI->getNumOperands() == 1)
385 MI->setDesc(TII->
get(TargetOpcode::IMPLICIT_DEF));
390void TailDuplicator::duplicateInstruction(
395 if (
MI->isCFIInstruction()) {
397 TII->
get(TargetOpcode::CFI_INSTRUCTION))
409 if (!
Reg.isVirtual())
417 addSSAUpdateEntry(Reg, NewReg, PredBB);
419 auto VI = LocalVRMap.
find(Reg);
420 if (VI != LocalVRMap.
end()) {
427 if (
VI->second.SubReg != 0) {
462 TII->
get(TargetOpcode::COPY), NewReg)
463 .
addReg(
VI->second.Reg, 0,
VI->second.SubReg);
464 LocalVRMap.
erase(VI);
484void TailDuplicator::updateSuccessorsPHIs(
494 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
496 if (MO.
getMBB() == FromBB) {
509 for (
unsigned i =
MI.getNumOperands() - 2; i !=
Idx; i -= 2) {
511 if (MO.
getMBB() == FromBB) {
512 MI.removeOperand(i + 1);
523 SSAUpdateVals.find(Reg);
524 if (LI != SSAUpdateVals.end()) {
526 for (
const std::pair<MachineBasicBlock *, Register> &J : LI->second) {
537 MI.getOperand(
Idx).setReg(SrcReg);
538 MI.getOperand(
Idx + 1).setMBB(SrcBB);
541 MIB.addReg(SrcReg).addMBB(SrcBB);
548 MI.getOperand(
Idx).setReg(Reg);
549 MI.getOperand(
Idx + 1).setMBB(SrcBB);
552 MIB.addReg(Reg).addMBB(SrcBB);
557 MI.removeOperand(
Idx + 1);
558 MI.removeOperand(
Idx);
588 unsigned MaxDuplicateCount;
591 if (TailDupSize == 0)
594 MaxDuplicateCount = TailDupSize;
596 MaxDuplicateCount = 1;
604 if (TII->
analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
614 bool HasIndirectbr =
false;
618 if (HasIndirectbr && PreRegAlloc)
630 if (
MI.isNotDuplicable() &&
632 !
MI.isCFIInstruction()))
637 if (
MI.isConvergent())
643 if (PreRegAlloc &&
MI.isReturn())
649 if (PreRegAlloc &&
MI.isCall())
657 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
662 else if (!
MI.isPHI() && !
MI.isMetaInstruction())
679 for (
auto &
I : *SB) {
690 if (HasIndirectbr && PreRegAlloc)
699 return canCompletelyDuplicateBB(TailBB);
709 if (
I == TailBB->
end())
711 return I->isUnconditionalBranch();
733 if (!PredCond.
empty())
739bool TailDuplicator::duplicateSimpleBB(
745 bool Changed =
false;
759 LLVM_DEBUG(
dbgs() <<
"\nTail-duplicating into PredBB: " << *PredBB
760 <<
"From simple Succ: " << *TailBB);
766 if (PredCond.
empty())
776 if (PredFBB == TailBB)
778 if (PredTBB == TailBB)
782 if (PredTBB == PredFBB) {
788 if (PredFBB == NextBB)
790 if (PredTBB == NextBB && PredFBB ==
nullptr)
821 if (!PredCond.
empty())
859 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi);
864 bool Changed =
false;
872 assert(TailBB != PredBB &&
873 "Single-block loop should have been rejected earlier!");
882 bool IsLayoutSuccessor =
false;
883 if (ForcedLayoutPred)
884 IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
886 IsLayoutSuccessor =
true;
887 if (IsLayoutSuccessor)
891 LLVM_DEBUG(
dbgs() <<
"\nTail-duplicating into PredBB: " << *PredBB
892 <<
"From Succ: " << *TailBB);
906 processPHI(&
MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi,
true);
910 duplicateInstruction(&
MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
913 appendCopies(PredBB, CopyInfos,
Copies);
915 NumTailDupAdded += TailBB->
size() - 1;
920 "TailDuplicate called on block with multiple successors!");
925 if (ShouldUpdateTerminators)
945 !TII->
analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
947 (!PriorTBB || PriorTBB == TailBB) &&
951 <<
"From MBB: " << *TailBB);
964 while (
I != TailBB->
end() &&
I->isPHI()) {
968 processPHI(
MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi,
973 while (
I != TailBB->
end()) {
977 assert(!
MI->isBundle() &&
"Not expecting bundles before regalloc!");
978 duplicateInstruction(
MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
979 MI->eraseFromParent();
981 appendCopies(PrevBB, CopyInfos,
Copies);
992 if (ShouldUpdateTerminators)
998 LLVM_DEBUG(
dbgs() <<
"Abort merging blocks, the predecessor still "
999 "contains terminator instructions");
1002 return RemovedBranches;
1004 Changed |= RemovedBranches;
1043 processPHI(&
MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi,
false);
1045 appendCopies(PredBB, CopyInfos,
Copies);
1058 for (
auto &CI : CopyInfos) {
1060 .
addReg(CI.second.Reg, 0, CI.second.SubReg);
1067void TailDuplicator::removeDeadBlock(
1076 if (
MI.shouldUpdateCallSiteInfo())
1079 if (RemovalCallback)
1080 (*RemovalCallback)(
MBB);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static unsigned InstrCount
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static cl::opt< unsigned > TailDuplicateSize("tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden)
static cl::opt< unsigned > TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden)
static cl::opt< unsigned > TailDupPredSize("tail-dup-pred-size", cl::desc("Maximum predecessors (maximum successors at the " "same time) to consider tail duplicating blocks."), cl::init(16), cl::Hidden)
static cl::opt< unsigned > TailDupSuccSize("tail-dup-succ-size", cl::desc("Maximum successors (maximum predecessors at the " "same time) to consider tail duplicating blocks."), cl::init(16), cl::Hidden)
static cl::opt< bool > TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), cl::init(false), cl::Hidden)
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra)
static bool bothUsedInPHI(const MachineBasicBlock &A, const SmallPtrSet< MachineBasicBlock *, 8 > &SuccsB)
static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB)
static void getRegsUsedByPHIs(const MachineBasicBlock &BB, DenseSet< Register > *UsedByPhi)
static cl::opt< unsigned > TailDupIndirectBranchSize("tail-dup-indirect-size", cl::desc("Maximum instructions to consider tail duplicating blocks that " "end with indirect branches."), cl::init(20), cl::Hidden)
static bool isDefLiveOut(Register Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI)
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
bool hasEHPadSuccessor() const
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
succ_iterator succ_begin()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
bool hasAddressTaken() const
Test whether this block is used as something other than the target of a terminator,...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool mayHaveInlineAsmBr() const
Returns true if this block may have an INLINEASM_BR (overestimate, by checking if any of the successo...
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void eraseCallSiteInfo(const MachineInstr *MI)
Following functions update call site info.
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isDebugValue() const
const MachineOperand & getOperand(unsigned i) const
bool isIndirectBranch(QueryType Type=AnyInBundle) const
Return true if this is an indirect branch, such as a branch through a register.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
void Initialize(Register V)
Initialize - Reset this object to get ready for a new set of SSA updates.
Register GetValueInMiddleOfBlock(MachineBasicBlock *BB, bool ExistingValueOnly=false)
GetValueInMiddleOfBlock - Construct SSA form, materializing a value that is live in the middle of the...
void RewriteUse(MachineOperand &U)
RewriteUse - Rewrite a use of the symbolic value.
void AddAvailableValue(MachineBasicBlock *BB, Register V)
AddAvailableValue - Indicate that a rewritten value is available at the end of the specified block wi...
Analysis providing profile information.
Wrapper class representing virtual and physical registers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateBlocks()
Look for small blocks that are unconditionally branched to and do not fall through.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
const Triple & getTargetTriple() const
unsigned composeSubRegIndices(unsigned a, unsigned b) const
Return the subregister index you get from composing two subregister indices.
virtual const TargetRegisterClass * getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const
Return a subclass of the specified register class A so that each register in it has a sub-register of...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
A pair composed of a register and a sub-register index.