69#define DEBUG_TYPE "mergeicmps"
77 :
GEP(
GEP), LoadI(LoadI), BaseId(BaseId), Offset(std::move(Offset)) {}
79 BCEAtom(
const BCEAtom &) =
delete;
80 BCEAtom &operator=(
const BCEAtom &) =
delete;
82 BCEAtom(BCEAtom &&that) =
default;
83 BCEAtom &operator=(BCEAtom &&that) {
89 Offset = std::move(that.Offset);
104 return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
115class BaseIdentifier {
121 const auto Insertion = BaseToIndex.try_emplace(
Base, Order);
122 if (Insertion.second)
124 return Insertion.first->second;
135BCEAtom visitICmpLoadOperand(
Value *
const Val, BaseIdentifier &BaseId) {
136 auto *
const LoadI = dyn_cast<LoadInst>(Val);
140 if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
145 if (!LoadI->isSimple()) {
150 if (
Addr->getType()->getPointerAddressSpace() != 0) {
154 const auto &
DL = LoadI->getDataLayout();
164 auto *
GEP = dyn_cast<GetElementPtrInst>(
Addr);
167 if (
GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
173 Base =
GEP->getPointerOperand();
190 BCECmp(BCEAtom L, BCEAtom R,
int SizeBits,
const ICmpInst *CmpI)
205 BCECmpBlock(BCECmp Cmp,
BasicBlock *BB, InstructionSet BlockInsts)
208 const BCEAtom &Lhs()
const {
return Cmp.Lhs; }
209 const BCEAtom &Rhs()
const {
return Cmp.Rhs; }
210 int SizeBits()
const {
return Cmp.SizeBits; }
213 bool doesOtherWork()
const;
233 InstructionSet BlockInsts;
235 bool RequireSplit =
false;
237 unsigned OrigOrder = 0;
243bool BCECmpBlock::canSinkBCECmpInst(
const Instruction *Inst,
248 auto MayClobber = [&](
LoadInst *LI) {
254 if (MayClobber(
Cmp.Lhs.LoadI) || MayClobber(
Cmp.Rhs.LoadI))
260 const Instruction *OpI = dyn_cast<Instruction>(Op);
261 return OpI && BlockInsts.contains(OpI);
268 if (BlockInsts.count(&Inst))
270 assert(canSinkBCECmpInst(&Inst, AA) &&
"Split unsplittable block");
283 if (!BlockInsts.count(&Inst)) {
284 if (!canSinkBCECmpInst(&Inst, AA))
291bool BCECmpBlock::doesOtherWork()
const {
297 if (!BlockInsts.count(&Inst))
305std::optional<BCECmp> visitICmp(
const ICmpInst *
const CmpI,
307 BaseIdentifier &BaseId) {
320 << (ExpectedPredicate == ICmpInst::ICMP_EQ ?
"eq" :
"ne")
322 auto Lhs = visitICmpLoadOperand(CmpI->
getOperand(0), BaseId);
325 auto Rhs = visitICmpLoadOperand(CmpI->
getOperand(1), BaseId);
329 return BCECmp(std::move(Lhs), std::move(Rhs),
335std::optional<BCECmpBlock> visitCmpBlock(
Value *
const Val,
338 BaseIdentifier &BaseId) {
341 auto *
const BranchI = dyn_cast<BranchInst>(
Block->getTerminator());
347 if (BranchI->isUnconditional()) {
353 ExpectedPredicate = ICmpInst::ICMP_EQ;
357 const auto *
const Const = cast<ConstantInt>(Val);
359 if (!
Const->isZero())
362 assert(BranchI->getNumSuccessors() == 2 &&
"expecting a cond branch");
363 BasicBlock *
const FalseBlock = BranchI->getSuccessor(1);
364 Cond = BranchI->getCondition();
366 FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
369 auto *CmpI = dyn_cast<ICmpInst>(
Cond);
374 std::optional<BCECmp>
Result = visitICmp(CmpI, ExpectedPredicate, BaseId);
383 BlockInsts.insert(
Result->Rhs.GEP);
384 return BCECmpBlock(std::move(*Result),
Block, BlockInsts);
387static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
388 BCECmpBlock &&Comparison) {
390 <<
"': Found cmp of " <<
Comparison.SizeBits()
391 <<
" bits between " <<
Comparison.Lhs().BaseId <<
" + "
397 Comparisons.push_back(std::move(Comparison));
403 using ContiguousBlocks = std::vector<BCECmpBlock>;
405 BCECmpChain(
const std::vector<BasicBlock *> &
Blocks,
PHINode &Phi,
411 bool atLeastOneMerged()
const {
412 return any_of(MergedBlocks_,
419 std::vector<ContiguousBlocks> MergedBlocks_;
424static bool areContiguous(
const BCECmpBlock &
First,
const BCECmpBlock &Second) {
425 return First.Lhs().BaseId == Second.Lhs().BaseId &&
426 First.Rhs().BaseId == Second.Rhs().BaseId &&
427 First.Lhs().Offset +
First.SizeBits() / 8 == Second.Lhs().Offset &&
428 First.Rhs().Offset +
First.SizeBits() / 8 == Second.Rhs().Offset;
431static unsigned getMinOrigOrder(
const BCECmpChain::ContiguousBlocks &
Blocks) {
432 unsigned MinOrigOrder = std::numeric_limits<unsigned>::max();
434 MinOrigOrder = std::min(MinOrigOrder,
Block.OrigOrder);
440static std::vector<BCECmpChain::ContiguousBlocks>
441mergeBlocks(std::vector<BCECmpBlock> &&
Blocks) {
442 std::vector<BCECmpChain::ContiguousBlocks> MergedBlocks;
446 [](
const BCECmpBlock &LhsBlock,
const BCECmpBlock &RhsBlock) {
447 return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
448 std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
451 BCECmpChain::ContiguousBlocks *LastMergedBlock =
nullptr;
453 if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(),
Block)) {
454 MergedBlocks.emplace_back();
455 LastMergedBlock = &MergedBlocks.back();
458 << LastMergedBlock->back().BB->getName() <<
"\n");
460 LastMergedBlock->push_back(std::move(
Block));
465 llvm::sort(MergedBlocks, [](
const BCECmpChain::ContiguousBlocks &LhsBlocks,
466 const BCECmpChain::ContiguousBlocks &RhsBlocks) {
467 return getMinOrigOrder(LhsBlocks) < getMinOrigOrder(RhsBlocks);
473BCECmpChain::BCECmpChain(
const std::vector<BasicBlock *> &
Blocks,
PHINode &Phi,
476 assert(!
Blocks.empty() &&
"a chain should have at least one block");
478 std::vector<BCECmpBlock> Comparisons;
479 BaseIdentifier BaseId;
482 std::optional<BCECmpBlock>
Comparison = visitCmpBlock(
485 LLVM_DEBUG(
dbgs() <<
"chain with invalid BCECmpBlock, no merge.\n");
490 <<
"' does extra work besides compare\n");
491 if (Comparisons.empty()) {
507 <<
"Split initial block '" <<
Comparison->BB->getName()
508 <<
"' that does extra work besides compare\n");
510 enqueueBlock(Comparisons, std::move(*Comparison));
513 <<
"ignoring initial block '" <<
Comparison->BB->getName()
514 <<
"' that does extra work besides compare\n");
543 enqueueBlock(Comparisons, std::move(*Comparison));
547 if (Comparisons.empty()) {
548 LLVM_DEBUG(
dbgs() <<
"chain with no BCE basic blocks, no merge\n");
551 EntryBlock_ = Comparisons[0].BB;
552 MergedBlocks_ = mergeBlocks(std::move(Comparisons));
559class MergedBlockName {
565 :
Name(makeName(Comparisons)) {}
572 if (Comparisons.
size() == 1)
573 return Comparisons[0].BB->getName();
574 const int size = std::accumulate(Comparisons.
begin(), Comparisons.
end(), 0,
575 [](
int i,
const BCECmpBlock &Cmp) {
576 return i + Cmp.BB->getName().size();
587 Scratch.
append(str.begin(), str.end());
589 append(Comparisons[0].BB->getName());
590 for (
int I = 1, E = Comparisons.
size();
I < E; ++
I) {
597 return Scratch.
str();
608 assert(!Comparisons.
empty() &&
"merging zero comparisons");
610 const BCECmpBlock &FirstCmp = Comparisons[0];
615 NextCmpBlock->
getParent(), InsertBefore);
619 if (FirstCmp.Lhs().GEP)
620 Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
622 Lhs = FirstCmp.Lhs().LoadI->getPointerOperand();
623 if (FirstCmp.Rhs().GEP)
624 Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
626 Rhs = FirstCmp.Rhs().LoadI->getPointerOperand();
628 Value *IsEqual =
nullptr;
636 Comparisons, [](
const BCECmpBlock &
B) {
return B.RequireSplit; });
637 if (ToSplit != Comparisons.
end()) {
639 ToSplit->split(BB, AA);
642 if (Comparisons.
size() == 1) {
645 Instruction *
const LhsLoad = Builder.Insert(FirstCmp.Lhs().LoadI->clone());
646 Instruction *
const RhsLoad = Builder.Insert(FirstCmp.Rhs().LoadI->clone());
650 IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
652 const unsigned TotalSizeBits = std::accumulate(
653 Comparisons.
begin(), Comparisons.
end(), 0u,
654 [](
int Size,
const BCECmpBlock &
C) { return Size + C.SizeBits(); });
661 const auto &
DL =
Phi.getDataLayout();
664 ConstantInt::get(Builder.getIntNTy(SizeTBits), TotalSizeBits / 8),
666 IsEqual = Builder.CreateICmpEQ(
667 MemCmpCall, ConstantInt::get(Builder.getIntNTy(IntBits), 0));
672 if (NextCmpBlock == PhiBB) {
674 Builder.CreateBr(PhiBB);
675 Phi.addIncoming(IsEqual, BB);
679 Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
681 DTU.
applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock},
682 {DominatorTree::Insert, BB, PhiBB}});
689 assert(atLeastOneMerged() &&
"simplifying trivial BCECmpChain");
690 LLVM_DEBUG(
dbgs() <<
"Simplifying comparison chain starting at block "
691 << EntryBlock_->getName() <<
"\n");
698 InsertBefore = NextCmpBlock = mergeComparisons(
699 Blocks, InsertBefore, NextCmpBlock, Phi_, TLI, AA, DTU);
710 DTU.
applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_},
711 {DominatorTree::Insert, Pred, NextCmpBlock}});
716 const bool ChainEntryIsFnEntry = EntryBlock_->isEntryBlock();
717 if (ChainEntryIsFnEntry && DTU.
hasDomTree()) {
719 << EntryBlock_->getName() <<
" to "
720 << NextCmpBlock->
getName() <<
"\n");
722 DTU.
applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}});
724 EntryBlock_ =
nullptr;
728 for (
const auto &
Blocks : MergedBlocks_) {
737 MergedBlocks_.clear();
741std::vector<BasicBlock *> getOrderedBlocks(
PHINode &Phi,
745 std::vector<BasicBlock *>
Blocks(NumBlocks);
746 assert(LastBlock &&
"invalid last block");
748 for (
int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) {
753 <<
" has its address taken\n");
756 Blocks[BlockIndex] = CurBlock;
758 if (!SinglePredecessor) {
761 <<
" has two or more predecessors\n");
764 if (
Phi.getBasicBlockIndex(SinglePredecessor) < 0) {
767 <<
" does not link back to the phi\n");
770 CurBlock = SinglePredecessor;
779 if (
Phi.getNumIncomingValues() <= 1) {
800 for (
unsigned I = 0;
I <
Phi.getNumIncomingValues(); ++
I) {
801 if (isa<ConstantInt>(
Phi.getIncomingValue(
I)))
continue;
807 if (!isa<ICmpInst>(
Phi.getIncomingValue(
I)) ||
808 cast<ICmpInst>(
Phi.getIncomingValue(
I))->getParent() !=
809 Phi.getIncomingBlock(
I)) {
817 <<
"skip: non-constant value not from cmp or not from last block.\n");
820 LastBlock =
Phi.getIncomingBlock(
I);
833 getOrderedBlocks(Phi, LastBlock,
Phi.getNumIncomingValues());
834 if (
Blocks.empty())
return false;
835 BCECmpChain CmpChain(
Blocks, Phi, AA);
837 if (!CmpChain.atLeastOneMerged()) {
842 return CmpChain.simplify(TLI, AA, DTU);
856 if (!TLI.
has(LibFunc_memcmp))
860 DomTreeUpdater::UpdateStrategy::Eager);
862 bool MadeChange =
false;
866 if (
auto *
const Phi = dyn_cast<PHINode>(&*BB.
begin()))
867 MadeChange |= processPhi(*Phi, TLI, AA, DTU);
883 const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
884 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
887 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
888 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
889 return runImpl(
F, TLI,
TTI, AA, DTWP ? &DTWP->getDomTree() :
nullptr);
904char MergeICmpsLegacyPass::ID = 0;
906 "Merge contiguous icmps into a memcmp",
false,
false)
921 const bool MadeChanges =
runImpl(
F, TLI,
TTI, AA, DT);
static MachineBasicBlock * split(MachineBasicBlock::iterator I)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DenseMap< Block *, BlockRelaxAux > Blocks
static bool runImpl(Function &F, const TargetLowering &TLI)
This is the interface for a simple mod/ref and alias analysis over globals.
Merge contiguous icmps into a memcmp
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallString class.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
A container for analyses that lazily runs them and caches their results.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getPredicate() const
Return the predicate for this instruction.
static ConstantInt * getFalse(LLVMContext &Context)
This class represents an Operation in the Expression.
Analysis pass which computes a DominatorTree.
DomTreeNodeBase< NodeT > * setNewRoot(NodeT *BB)
Add a new node to the forward dominator tree and make it a new root.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
void applyUpdates(ArrayRef< typename DomTreeT::UpdateType > Updates)
Submit updates to all available trees.
bool hasDomTree() const
Returns true if it holds a DomTreeT.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void moveBeforePreserving(Instruction *MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
Implements a dense probed hash-table based set with some number of buckets stored inline.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
unsigned getSizeTSize(const Module &M) const
Returns the size of the size_t type in bits.
unsigned getIntSize() const
Get size of a C-level int or unsigned int, in bits.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
NodeAddr< PhiNode * > Phi
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool operator<(int64_t V1, const APSInt &V2)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void initializeMergeICmpsLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
pred_iterator pred_begin(BasicBlock *BB)
Value * emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the memcmp function.
auto reverse(ContainerTy &&C)
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Pass * createMergeICmpsLegacyPass()
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool pred_empty(const BasicBlock *BB)
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
Implement std::hash so that hash_code can be used in STL containers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)