48#define DEBUG_TYPE "tailduplication"
51STATISTIC(NumTailDups,
"Number of tail duplicated blocks");
53 "Number of instructions added due to tail duplication");
55 "Number of instructions removed due to tail duplication");
56STATISTIC(NumDeadBlocks,
"Number of dead blocks removed");
66 "tail-dup-indirect-size",
67 cl::desc(
"Maximum instructions to consider tail duplicating blocks that "
68 "end with indirect branches."),
cl::init(20),
73 cl::desc(
"Verify sanity of PHI instructions during taildup"),
83 bool LayoutModeIn,
unsigned TailDupSizeIn) {
92 TailDupSize = TailDupSizeIn;
94 assert(MBPI !=
nullptr &&
"Machine Branch Probability Info required");
96 LayoutMode = LayoutModeIn;
97 this->PreRegAlloc = PreRegAlloc;
110 for (
unsigned i = 1, e =
MI->getNumOperands(); i != e; i += 2) {
112 if (PHIBB == PredBB) {
120 dbgs() <<
" missing input from predecessor "
126 for (
unsigned i = 1, e =
MI->getNumOperands(); i != e; i += 2) {
128 if (CheckExtra && !Preds.count(PHIBB)) {
131 dbgs() <<
" extra input from predecessor "
167 if (!tailDuplicate(IsSimple,
MBB, ForcedLayoutPred,
168 TDBBs,
Copies, CandidatePtr))
181 updateSuccessorsPHIs(
MBB, isDead, TDBBs, Succs);
185 NumTailDupRemoved +=
MBB->
size();
186 removeDeadBlock(
MBB, RemovalCallback);
191 if (!SSAUpdateVRs.empty()) {
192 for (
unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
193 unsigned VReg = SSAUpdateVRs[i];
207 SSAUpdateVals.find(VReg);
208 for (std::pair<MachineBasicBlock *, Register> &J : LI->second) {
231 for (
auto *UseMO : DebugUses) {
238 SSAUpdateVRs.clear();
239 SSAUpdateVals.clear();
244 for (
unsigned i = 0, e =
Copies.size(); i != e; ++i) {
248 Register Dst = Copy->getOperand(0).getReg();
249 Register Src = Copy->getOperand(1).getReg();
254 Copy->eraseFromParent();
259 NumAddedPHIs += NewPHIs.
size();
262 *DuplicatedPreds = std::move(TDBBs);
271 bool MadeChange =
false;
300 if (
UseMI.isDebugValue())
302 if (
UseMI.getParent() != BB)
309 for (
unsigned i = 1, e =
MI->getNumOperands(); i != e; i += 2)
310 if (
MI->getOperand(i + 1).getMBB() == SrcBB)
320 for (
const auto &
MI : BB) {
323 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
325 UsedByPhi->
insert(SrcReg);
334 SSAUpdateVals.find(OrigReg);
335 if (LI != SSAUpdateVals.end())
336 LI->second.push_back(std::make_pair(BB, NewReg));
339 Vals.push_back(std::make_pair(BB, NewReg));
340 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
341 SSAUpdateVRs.push_back(OrigReg);
347void TailDuplicator::processPHI(
354 assert(SrcOpIdx &&
"Unable to find matching PHI source?");
355 Register SrcReg =
MI->getOperand(SrcOpIdx).getReg();
356 unsigned SrcSubReg =
MI->getOperand(SrcOpIdx).getSubReg();
365 addSSAUpdateEntry(DefReg, NewDef, PredBB);
371 MI->removeOperand(SrcOpIdx + 1);
372 MI->removeOperand(SrcOpIdx);
374 MI->eraseFromParent();
375 else if (
MI->getNumOperands() == 1)
376 MI->setDesc(TII->
get(TargetOpcode::IMPLICIT_DEF));
381void TailDuplicator::duplicateInstruction(
386 if (
MI->isCFIInstruction()) {
388 TII->
get(TargetOpcode::CFI_INSTRUCTION))
400 if (!
Reg.isVirtual())
408 addSSAUpdateEntry(Reg, NewReg, PredBB);
410 auto VI = LocalVRMap.
find(Reg);
411 if (VI != LocalVRMap.
end()) {
418 if (
VI->second.SubReg != 0) {
453 TII->
get(TargetOpcode::COPY), NewReg)
454 .
addReg(
VI->second.Reg, 0,
VI->second.SubReg);
455 LocalVRMap.
erase(VI);
475void TailDuplicator::updateSuccessorsPHIs(
485 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2) {
487 if (MO.
getMBB() == FromBB) {
500 for (
unsigned i =
MI.getNumOperands() - 2; i !=
Idx; i -= 2) {
502 if (MO.
getMBB() == FromBB) {
503 MI.removeOperand(i + 1);
514 SSAUpdateVals.find(Reg);
515 if (LI != SSAUpdateVals.end()) {
517 for (
const std::pair<MachineBasicBlock *, Register> &J : LI->second) {
528 MI.getOperand(
Idx).setReg(SrcReg);
529 MI.getOperand(
Idx + 1).setMBB(SrcBB);
532 MIB.addReg(SrcReg).addMBB(SrcBB);
539 MI.getOperand(
Idx).setReg(Reg);
540 MI.getOperand(
Idx + 1).setMBB(SrcBB);
543 MIB.addReg(Reg).addMBB(SrcBB);
548 MI.removeOperand(
Idx + 1);
549 MI.removeOperand(
Idx);
571 unsigned MaxDuplicateCount;
574 if (TailDupSize == 0)
577 MaxDuplicateCount = TailDupSize;
579 MaxDuplicateCount = 1;
587 if (TII->
analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
597 bool HasIndirectbr =
false;
601 if (HasIndirectbr && PreRegAlloc)
613 if (
MI.isNotDuplicable() &&
615 !
MI.isCFIInstruction()))
620 if (
MI.isConvergent())
626 if (PreRegAlloc &&
MI.isReturn())
632 if (PreRegAlloc &&
MI.isCall())
640 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
645 else if (!
MI.isPHI() && !
MI.isMetaInstruction())
662 for (
auto &
I : *SB) {
673 if (HasIndirectbr && PreRegAlloc)
682 return canCompletelyDuplicateBB(TailBB);
692 if (
I == TailBB->
end())
694 return I->isUnconditionalBranch();
716 if (!PredCond.
empty())
722bool TailDuplicator::duplicateSimpleBB(
728 bool Changed =
false;
742 LLVM_DEBUG(
dbgs() <<
"\nTail-duplicating into PredBB: " << *PredBB
743 <<
"From simple Succ: " << *TailBB);
749 if (PredCond.
empty())
759 if (PredFBB == TailBB)
761 if (PredTBB == TailBB)
765 if (PredTBB == PredFBB) {
771 if (PredFBB == NextBB)
773 if (PredTBB == NextBB && PredFBB ==
nullptr)
804 if (!PredCond.
empty())
842 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi);
847 bool Changed =
false;
855 assert(TailBB != PredBB &&
856 "Single-block loop should have been rejected earlier!");
865 bool IsLayoutSuccessor =
false;
866 if (ForcedLayoutPred)
867 IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
869 IsLayoutSuccessor =
true;
870 if (IsLayoutSuccessor)
874 LLVM_DEBUG(
dbgs() <<
"\nTail-duplicating into PredBB: " << *PredBB
875 <<
"From Succ: " << *TailBB);
889 processPHI(&
MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi,
true);
893 duplicateInstruction(&
MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
896 appendCopies(PredBB, CopyInfos,
Copies);
898 NumTailDupAdded += TailBB->
size() - 1;
903 "TailDuplicate called on block with multiple successors!");
908 if (ShouldUpdateTerminators)
928 !TII->
analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
930 (!PriorTBB || PriorTBB == TailBB) &&
934 <<
"From MBB: " << *TailBB);
947 while (
I != TailBB->
end() &&
I->isPHI()) {
951 processPHI(
MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi,
956 while (
I != TailBB->
end()) {
960 assert(!
MI->isBundle() &&
"Not expecting bundles before regalloc!");
961 duplicateInstruction(
MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
962 MI->eraseFromParent();
964 appendCopies(PrevBB, CopyInfos,
Copies);
975 if (ShouldUpdateTerminators)
981 LLVM_DEBUG(
dbgs() <<
"Abort merging blocks, the predecessor still "
982 "contains terminator instructions");
985 return RemovedBranches;
987 Changed |= RemovedBranches;
1026 processPHI(&
MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi,
false);
1028 appendCopies(PredBB, CopyInfos,
Copies);
1041 for (
auto &CI : CopyInfos) {
1043 .
addReg(CI.second.Reg, 0, CI.second.SubReg);
1050void TailDuplicator::removeDeadBlock(
1059 if (
MI.shouldUpdateCallSiteInfo())
1062 if (RemovalCallback)
1063 (*RemovalCallback)(
MBB);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static unsigned InstrCount
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static cl::opt< unsigned > TailDuplicateSize("tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden)
static cl::opt< unsigned > TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden)
static cl::opt< bool > TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), cl::init(false), cl::Hidden)
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra)
static bool bothUsedInPHI(const MachineBasicBlock &A, const SmallPtrSet< MachineBasicBlock *, 8 > &SuccsB)
static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB)
static void getRegsUsedByPHIs(const MachineBasicBlock &BB, DenseSet< Register > *UsedByPhi)
static cl::opt< unsigned > TailDupIndirectBranchSize("tail-dup-indirect-size", cl::desc("Maximum instructions to consider tail duplicating blocks that " "end with indirect branches."), cl::init(20), cl::Hidden)
static bool isDefLiveOut(Register Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI)
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
bool hasEHPadSuccessor() const
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
succ_iterator succ_begin()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
bool hasAddressTaken() const
Test whether this block is used as as something other than the target of a terminator,...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
bool mayHaveInlineAsmBr() const
Returns true if this block may have an INLINEASM_BR (overestimate, by checking if any of the successo...
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
void eraseCallSiteInfo(const MachineInstr *MI)
Following functions update call site info.
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isDebugValue() const
const MachineOperand & getOperand(unsigned i) const
bool isIndirectBranch(QueryType Type=AnyInBundle) const
Return true if this is an indirect branch, such as a branch through a register.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
void Initialize(Register V)
Initialize - Reset this object to get ready for a new set of SSA updates.
Register GetValueInMiddleOfBlock(MachineBasicBlock *BB, bool ExistingValueOnly=false)
GetValueInMiddleOfBlock - Construct SSA form, materializing a value that is live in the middle of the...
void RewriteUse(MachineOperand &U)
RewriteUse - Rewrite a use of the symbolic value.
void AddAvailableValue(MachineBasicBlock *BB, Register V)
AddAvailableValue - Indicate that a rewritten value is available at the end of the specified block wi...
Analysis providing profile information.
Wrapper class representing virtual and physical registers.
bool insert(const value_type &X)
Insert a new element into the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void initMF(MachineFunction &MF, bool PreRegAlloc, const MachineBranchProbabilityInfo *MBPI, MBFIWrapper *MBFI, ProfileSummaryInfo *PSI, bool LayoutMode, unsigned TailDupSize=0)
Prepare to run on a specific machine function.
bool tailDuplicateBlocks()
Look for small blocks that are unconditionally branched to and do not fall through.
bool tailDuplicateAndUpdate(bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl< MachineBasicBlock * > *DuplicatedPreds=nullptr, function_ref< void(MachineBasicBlock *)> *RemovalCallback=nullptr, SmallVectorImpl< MachineBasicBlock * > *CandidatePtr=nullptr)
Tail duplicate a single basic block into its predecessors, and then clean up.
static bool isSimpleBB(MachineBasicBlock *TailBB)
True if this BB has only one unconditional jump.
bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB)
Returns true if TailBB can successfully be duplicated into PredBB.
bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB)
Determine if it is profitable to duplicate this block.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
virtual MachineInstr & duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const
Clones instruction or the whole instruction bundle Orig and insert into MBB before InsertBefore.
const Triple & getTargetTriple() const
unsigned composeSubRegIndices(unsigned a, unsigned b) const
Return the subregister index you get from composing two subregister indices.
virtual const TargetRegisterClass * getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const
Return a subclass of the specified register class A so that each register in it has a sub-register of...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, or DriverKit).
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
A pair composed of a register and a sub-register index.