Go to the documentation of this file.
48 #define DEBUG_TYPE "tailduplication"
50 STATISTIC(NumTails,
"Number of tails duplicated");
51 STATISTIC(NumTailDups,
"Number of tail duplicated blocks");
53 "Number of instructions added due to tail duplication");
55 "Number of instructions removed due to tail duplication");
56 STATISTIC(NumDeadBlocks,
"Number of dead blocks removed");
57 STATISTIC(NumAddedPHIs,
"Number of phis added");
66 "tail-dup-indirect-size",
67 cl::desc(
"Maximum instructions to consider tail duplicating blocks that "
68 "end with indirect branches."),
cl::init(20),
73 cl::desc(
"Verify sanity of PHI instructions during taildup"),
83 bool LayoutModeIn,
unsigned TailDupSizeIn) {
92 TailDupSize = TailDupSizeIn;
94 assert(MBPI !=
nullptr &&
"Machine Branch Probability Info required");
96 LayoutMode = LayoutModeIn;
97 this->PreRegAlloc = PreRegAlloc;
110 for (
unsigned i = 1,
e =
MI->getNumOperands();
i !=
e;
i += 2) {
112 if (PHIBB == PredBB) {
120 dbgs() <<
" missing input from predecessor "
126 for (
unsigned i = 1,
e =
MI->getNumOperands();
i !=
e;
i += 2) {
128 if (CheckExtra && !Preds.count(PHIBB)) {
131 dbgs() <<
" extra input from predecessor "
167 if (!tailDuplicate(IsSimple,
MBB, ForcedLayoutPred,
168 TDBBs,
Copies, CandidatePtr))
181 updateSuccessorsPHIs(
MBB, isDead, TDBBs, Succs);
185 NumTailDupRemoved +=
MBB->
size();
186 removeDeadBlock(
MBB, RemovalCallback);
191 if (!SSAUpdateVRs.empty()) {
192 for (
unsigned i = 0,
e = SSAUpdateVRs.size();
i !=
e; ++
i) {
193 unsigned VReg = SSAUpdateVRs[
i];
207 SSAUpdateVals.find(VReg);
208 for (std::pair<MachineBasicBlock *, Register> &J : LI->second) {
224 DebugUses.push_back(&UseMO);
231 for (
auto *UseMO : DebugUses) {
238 SSAUpdateVRs.clear();
239 SSAUpdateVals.clear();
244 for (
unsigned i = 0,
e =
Copies.size();
i !=
e; ++
i) {
248 Register Dst = Copy->getOperand(0).getReg();
249 Register Src = Copy->getOperand(1).getReg();
254 Copy->eraseFromParent();
259 NumAddedPHIs += NewPHIs.size();
271 bool MadeChange =
false;
300 if (
UseMI.isDebugValue())
309 for (
unsigned i = 1,
e =
MI->getNumOperands();
i !=
e;
i += 2)
310 if (
MI->getOperand(
i + 1).getMBB() == SrcBB)
320 for (
const auto &
MI :
BB) {
323 for (
unsigned i = 1,
e =
MI.getNumOperands();
i !=
e;
i += 2) {
325 UsedByPhi->
insert(SrcReg);
334 SSAUpdateVals.find(OrigReg);
335 if (LI != SSAUpdateVals.end())
336 LI->second.push_back(std::make_pair(
BB, NewReg));
339 Vals.push_back(std::make_pair(
BB, NewReg));
340 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
341 SSAUpdateVRs.push_back(OrigReg);
347 void TailDuplicator::processPHI(
354 assert(SrcOpIdx &&
"Unable to find matching PHI source?");
355 Register SrcReg =
MI->getOperand(SrcOpIdx).getReg();
356 unsigned SrcSubReg =
MI->getOperand(SrcOpIdx).getSubReg();
365 addSSAUpdateEntry(DefReg, NewDef, PredBB);
371 MI->removeOperand(SrcOpIdx + 1);
372 MI->removeOperand(SrcOpIdx);
373 if (
MI->getNumOperands() == 1)
374 MI->eraseFromParent();
379 void TailDuplicator::duplicateInstruction(
384 if (
MI->isCFIInstruction()) {
386 TII->
get(TargetOpcode::CFI_INSTRUCTION))
406 addSSAUpdateEntry(
Reg, NewReg, PredBB);
409 if (
VI != LocalVRMap.
end()) {
416 if (
VI->second.SubReg != 0) {
443 auto *NewRC =
MI->getRegClassConstraint(
i, TII, TRI);
444 if (NewRC ==
nullptr)
448 TII->
get(TargetOpcode::COPY), NewReg)
449 .
addReg(
VI->second.Reg, 0,
VI->second.SubReg);
470 void TailDuplicator::updateSuccessorsPHIs(
480 for (
unsigned i = 1,
e =
MI.getNumOperands();
i !=
e;
i += 2) {
482 if (MO.
getMBB() == FromBB) {
495 for (
unsigned i =
MI.getNumOperands() - 2;
i != Idx;
i -= 2) {
497 if (MO.
getMBB() == FromBB) {
498 MI.removeOperand(
i + 1);
509 SSAUpdateVals.find(
Reg);
510 if (LI != SSAUpdateVals.end()) {
512 for (
const std::pair<MachineBasicBlock *, Register> &J : LI->second) {
523 MI.getOperand(Idx).setReg(SrcReg);
524 MI.getOperand(Idx + 1).setMBB(SrcBB);
527 MIB.addReg(SrcReg).addMBB(SrcBB);
534 MI.getOperand(Idx).setReg(
Reg);
535 MI.getOperand(Idx + 1).setMBB(SrcBB);
538 MIB.addReg(
Reg).addMBB(SrcBB);
543 MI.removeOperand(Idx + 1);
544 MI.removeOperand(Idx);
566 unsigned MaxDuplicateCount;
569 if (TailDupSize == 0)
572 MaxDuplicateCount = TailDupSize;
574 MaxDuplicateCount = 1;
582 if (TII->
analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
592 bool HasIndirectbr =
false;
596 if (HasIndirectbr && PreRegAlloc)
608 if (
MI.isNotDuplicable() &&
609 (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
610 !
MI.isCFIInstruction()))
615 if (
MI.isConvergent())
621 if (PreRegAlloc &&
MI.isReturn())
627 if (PreRegAlloc &&
MI.isCall())
640 else if (!
MI.isPHI() && !
MI.isMetaInstruction())
656 for (
auto SB : TailBB.successors()) {
657 for (
auto &
I : *SB) {
668 if (HasIndirectbr && PreRegAlloc)
677 return canCompletelyDuplicateBB(TailBB);
687 if (
I == TailBB->
end())
689 return I->isUnconditionalBranch();
695 if (SuccsB.
count(
BB) && !
BB->empty() &&
BB->begin()->isPHI())
711 if (!PredCond.empty())
717 bool TailDuplicator::duplicateSimpleBB(
724 bool Changed =
false;
738 LLVM_DEBUG(
dbgs() <<
"\nTail-duplicating into PredBB: " << *PredBB
739 <<
"From simple Succ: " << *TailBB);
745 if (PredCond.empty())
755 if (PredFBB == TailBB)
757 if (PredTBB == TailBB)
761 if (PredTBB == PredFBB) {
767 if (PredFBB == NextBB)
769 if (PredTBB == NextBB && PredFBB ==
nullptr)
785 TDBBs.push_back(PredBB);
800 if (!PredCond.empty())
829 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi,
Copies);
834 bool Changed =
false;
837 Preds.
insert(CandidatePtr->begin(), CandidatePtr->end());
842 assert(TailBB != PredBB &&
843 "Single-block loop should have been rejected earlier!");
852 bool IsLayoutSuccessor =
false;
853 if (ForcedLayoutPred)
854 IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
856 IsLayoutSuccessor =
true;
857 if (IsLayoutSuccessor)
861 LLVM_DEBUG(
dbgs() <<
"\nTail-duplicating into PredBB: " << *PredBB
862 <<
"From Succ: " << *TailBB);
864 TDBBs.push_back(PredBB);
876 processPHI(&
MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi,
true);
880 duplicateInstruction(&
MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
883 appendCopies(PredBB, CopyInfos,
Copies);
885 NumTailDupAdded += TailBB->
size() - 1;
890 "TailDuplicate called on block with multiple successors!");
895 if (ShouldUpdateTerminators)
915 !TII->
analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
917 (!PriorTBB || PriorTBB == TailBB) &&
921 <<
"From MBB: " << *TailBB);
934 while (
I != TailBB->
end() &&
I->isPHI()) {
938 processPHI(
MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi,
943 while (
I != TailBB->
end()) {
947 assert(!
MI->isBundle() &&
"Not expecting bundles before regalloc!");
948 duplicateInstruction(
MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
949 MI->eraseFromParent();
951 appendCopies(PrevBB, CopyInfos,
Copies);
962 if (ShouldUpdateTerminators)
965 TDBBs.push_back(PrevBB);
968 LLVM_DEBUG(
dbgs() <<
"Abort merging blocks, the predecessor still "
969 "contains terminator instructions");
972 return RemovedBranches;
974 Changed |= RemovedBranches;
1011 while (
I != TailBB->
end() &&
I->isPHI()) {
1015 processPHI(
MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi,
false);
1017 appendCopies(PredBB, CopyInfos,
Copies);
1030 for (
auto &CI : CopyInfos) {
1032 .
addReg(CI.second.Reg, 0, CI.second.SubReg);
1039 void TailDuplicator::removeDeadBlock(
1048 if (
MI.shouldUpdateCallSiteInfo())
1051 if (RemovalCallback)
1052 (*RemovalCallback)(
MBB);
bool isDebugValue() const
unsigned succ_size() const
pred_iterator pred_begin()
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder & UseMI
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool isIndirectBranch(QueryType Type=AnyInBundle) const
Return true if this is an indirect branch, such as a branch through a register.
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual const TargetInstrInfo * getInstrInfo() const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
void setIsKill(bool Val=true)
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Reg
All possible values of the reg field in the ModR/M byte.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool erase(const KeyT &Val)
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
static unsigned InstrCount
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
iterator_range< use_iterator > use_operands(Register Reg) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
static bool isDefLiveOut(Register Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI)
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
A pair composed of a register and a sub-register index.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
unsigned pred_size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB)
(vector float) vec_cmpeq(*A, *B) C
const MachineOperand & getOperand(unsigned i) const
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void setSubReg(unsigned subReg)
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Describe properties that are true of each instruction in the target description file.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< bool > TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), cl::init(false), cl::Hidden)
STATISTIC(NumFunctions, "Total number of functions")
virtual const TargetRegisterClass * getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const
Return a subclass of the specified register class A so that each register in it has a sub-register of...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Analysis providing profile information.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra)
An efficient, type-erasing, non-owning reference to a callable.
MachineModuleInfo & getMMI() const
Implements a dense probed hash-table based set.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool mayHaveInlineAsmBr() const
Returns true if this block may have an INLINEASM_BR (overestimate, by checking if any of the successo...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
bool hasAddressTaken() const
Test whether this block is potentially the target of an indirect branch.
static bool bothUsedInPHI(const MachineBasicBlock &A, const SmallPtrSet< MachineBasicBlock *, 8 > &SuccsB)
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
initializer< Ty > init(const Ty &Val)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
succ_iterator succ_begin()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
iterator_range< pred_iterator > predecessors()
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool insert(const value_type &X)
Insert a new element into the SetVector.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MachineBasicBlock * getMBB() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< succ_iterator > successors()
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool hasEHPadSuccessor() const
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
unsigned composeSubRegIndices(unsigned a, unsigned b) const
Return the subregister index you get from composing two subregister indices.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
self_iterator getIterator()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const MachineBasicBlock * getParent() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
unsigned getSubReg() const
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Function & getFunction()
Return the LLVM function that this machine code represents.
static void getRegsUsedByPHIs(const MachineBasicBlock &BB, DenseSet< Register > *UsedByPhi)
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
static cl::opt< unsigned > TailDuplicateSize("tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden)
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void RewriteUse(MachineOperand &U)
RewriteUse - Rewrite a use of the symbolic value.
Register GetValueInMiddleOfBlock(MachineBasicBlock *BB, bool ExistingValueOnly=false)
GetValueInMiddleOfBlock - Construct SSA form, materializing a value that is live in the middle of the...
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned getNumOperands() const
Retuns the total number of operands.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
A SetVector that performs no allocations if smaller than a certain size.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
void AddAvailableValue(MachineBasicBlock *BB, Register V)
AddAvailableValue - Indicate that a rewritten value is available at the end of the specified block wi...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
void Initialize(Register V)
Initialize - Reset this object to get ready for a new set of SSA updates.
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
void eraseCallSiteInfo(const MachineInstr *MI)
Following functions update call site info.
static cl::opt< unsigned > TailDupIndirectBranchSize("tail-dup-indirect-size", cl::desc("Maximum instructions to consider tail duplicating blocks that " "end with indirect branches."), cl::init(20), cl::Hidden)
static cl::opt< unsigned > TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden)
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
bool tailDuplicateBlocks()
Look for small blocks that are unconditionally branched to and do not fall through.