Go to the documentation of this file.
67 #define DEBUG_TYPE "mergeicmps"
75 :
GEP(
GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {}
77 BCEAtom(
const BCEAtom &) =
delete;
78 BCEAtom &operator=(
const BCEAtom &) =
delete;
80 BCEAtom(BCEAtom &&
that) =
default;
81 BCEAtom &operator=(BCEAtom &&
that) {
102 return BaseId !=
O.BaseId ? BaseId <
O.BaseId : Offset.slt(
O.Offset);
113 class BaseIdentifier {
119 const auto Insertion = BaseToIndex.try_emplace(
Base, Order);
120 if (Insertion.second)
122 return Insertion.first->second;
133 BCEAtom visitICmpLoadOperand(
Value *
const Val, BaseIdentifier &BaseId) {
134 auto *
const LoadI = dyn_cast<LoadInst>(Val);
138 if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
143 if (!LoadI->isSimple()) {
148 if (
Addr->getType()->getPointerAddressSpace() != 0) {
152 const auto &
DL = LoadI->getModule()->getDataLayout();
162 auto *
GEP = dyn_cast<GetElementPtrInst>(
Addr);
165 if (
GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
169 if (!
GEP->accumulateConstantOffset(
DL, Offset))
171 Base =
GEP->getPointerOperand();
173 return BCEAtom(
GEP, LoadI, BaseId.getBaseId(
Base), Offset);
188 BCECmp(BCEAtom L, BCEAtom R,
int SizeBits,
const ICmpInst *CmpI)
189 : Lhs(
std::
move(L)), Rhs(
std::
move(
R)), SizeBits(SizeBits), CmpI(CmpI) {
206 const BCEAtom &Lhs()
const {
return Cmp.Lhs; }
207 const BCEAtom &Rhs()
const {
return Cmp.Rhs; }
208 int SizeBits()
const {
return Cmp.SizeBits; }
211 bool doesOtherWork()
const;
233 bool RequireSplit =
false;
235 unsigned OrigOrder = 0;
241 bool BCECmpBlock::canSinkBCECmpInst(
const Instruction *Inst,
246 auto MayClobber = [&](
LoadInst *LI) {
252 if (MayClobber(
Cmp.Lhs.LoadI) || MayClobber(
Cmp.Rhs.LoadI))
258 const Instruction *OpI = dyn_cast<Instruction>(Op);
259 return OpI && BlockInsts.contains(OpI);
266 if (BlockInsts.count(&Inst))
268 assert(canSinkBCECmpInst(&Inst,
AA) &&
"Split unsplittable block");
271 OtherInsts.push_back(&Inst);
281 if (!BlockInsts.count(&Inst)) {
282 if (!canSinkBCECmpInst(&Inst,
AA))
289 bool BCECmpBlock::doesOtherWork()
const {
295 if (!BlockInsts.count(&Inst))
305 BaseIdentifier &BaseId) {
320 auto Lhs = visitICmpLoadOperand(CmpI->
getOperand(0), BaseId);
323 auto Rhs = visitICmpLoadOperand(CmpI->
getOperand(1), BaseId);
335 BaseIdentifier &BaseId) {
337 auto *
const BranchI = dyn_cast<BranchInst>(
Block->getTerminator());
338 if (!BranchI)
return None;
342 if (BranchI->isUnconditional()) {
352 const auto *
const Const = cast<ConstantInt>(Val);
356 assert(BranchI->getNumSuccessors() == 2 &&
"expecting a cond branch");
357 BasicBlock *
const FalseBlock = BranchI->getSuccessor(1);
358 Cond = BranchI->getCondition();
363 auto *CmpI = dyn_cast<ICmpInst>(
Cond);
364 if (!CmpI)
return None;
376 BlockInsts.insert(
Result->Rhs.GEP);
377 return BCECmpBlock(
std::move(*Result), Block, BlockInsts);
380 static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
381 BCECmpBlock &&Comparison) {
383 <<
"': Found cmp of " << Comparison.SizeBits()
384 <<
" bits between " << Comparison.Lhs().BaseId <<
" + "
385 << Comparison.Lhs().Offset <<
" and "
386 << Comparison.Rhs().BaseId <<
" + "
387 << Comparison.Rhs().Offset <<
"\n");
389 Comparison.OrigOrder = Comparisons.size();
390 Comparisons.push_back(
std::move(Comparison));
396 using ContiguousBlocks = std::vector<BCECmpBlock>;
398 BCECmpChain(
const std::vector<BasicBlock *> &Blocks,
PHINode &Phi,
404 bool atLeastOneMerged()
const {
405 return any_of(MergedBlocks_,
406 [](
const auto &Blocks) {
return Blocks.size() > 1; });
412 std::vector<ContiguousBlocks> MergedBlocks_;
417 static bool areContiguous(
const BCECmpBlock &First,
const BCECmpBlock &Second) {
418 return First.Lhs().BaseId == Second.Lhs().BaseId &&
419 First.Rhs().BaseId == Second.Rhs().BaseId &&
420 First.Lhs().Offset +
First.SizeBits() / 8 == Second.Lhs().Offset &&
421 First.Rhs().Offset +
First.SizeBits() / 8 == Second.Rhs().Offset;
424 static unsigned getMinOrigOrder(
const BCECmpChain::ContiguousBlocks &Blocks) {
426 for (
const BCECmpBlock &Block : Blocks)
433 static std::vector<BCECmpChain::ContiguousBlocks>
434 mergeBlocks(std::vector<BCECmpBlock> &&Blocks) {
435 std::vector<BCECmpChain::ContiguousBlocks> MergedBlocks;
439 [](
const BCECmpBlock &LhsBlock,
const BCECmpBlock &RhsBlock) {
440 return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
441 std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
444 BCECmpChain::ContiguousBlocks *LastMergedBlock =
nullptr;
445 for (BCECmpBlock &Block : Blocks) {
446 if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(), Block)) {
447 MergedBlocks.emplace_back();
448 LastMergedBlock = &MergedBlocks.back();
451 << LastMergedBlock->back().BB->getName() <<
"\n");
453 LastMergedBlock->push_back(
std::move(Block));
458 llvm::sort(MergedBlocks, [](
const BCECmpChain::ContiguousBlocks &LhsBlocks,
459 const BCECmpChain::ContiguousBlocks &RhsBlocks) {
460 return getMinOrigOrder(LhsBlocks) < getMinOrigOrder(RhsBlocks);
466 BCECmpChain::BCECmpChain(
const std::vector<BasicBlock *> &Blocks,
PHINode &Phi,
469 assert(!Blocks.empty() &&
"a chain should have at least one block");
471 std::vector<BCECmpBlock> Comparisons;
472 BaseIdentifier BaseId;
474 assert(Block &&
"invalid block");
478 LLVM_DEBUG(
dbgs() <<
"chain with invalid BCECmpBlock, no merge.\n");
481 if (Comparison->doesOtherWork()) {
483 <<
"' does extra work besides compare\n");
484 if (Comparisons.empty()) {
498 if (Comparison->canSplit(
AA)) {
500 <<
"Split initial block '" << Comparison->BB->getName()
501 <<
"' that does extra work besides compare\n");
502 Comparison->RequireSplit =
true;
503 enqueueBlock(Comparisons,
std::move(*Comparison));
506 <<
"ignoring initial block '" << Comparison->BB->getName()
507 <<
"' that does extra work besides compare\n");
536 enqueueBlock(Comparisons,
std::move(*Comparison));
540 if (Comparisons.empty()) {
541 LLVM_DEBUG(
dbgs() <<
"chain with no BCE basic blocks, no merge\n");
544 EntryBlock_ = Comparisons[0].BB;
545 MergedBlocks_ = mergeBlocks(
std::move(Comparisons));
552 class MergedBlockName {
558 :
Name(makeName(Comparisons)) {}
565 if (Comparisons.
size() == 1)
566 return Comparisons[0].BB->getName();
567 const int size = std::accumulate(Comparisons.
begin(), Comparisons.
end(), 0,
568 [](
int i,
const BCECmpBlock &Cmp) {
569 return i + Cmp.BB->getName().size();
580 Scratch.
append(str.begin(), str.end());
582 append(Comparisons[0].
BB->getName());
583 for (
int I = 1,
E = Comparisons.
size();
I <
E; ++
I) {
585 if (!
BB->getName().empty()) {
590 return Scratch.
str();
601 assert(!Comparisons.
empty() &&
"merging zero comparisons");
603 const BCECmpBlock &FirstCmp = Comparisons[0];
607 BasicBlock::Create(
Context, MergedBlockName(Comparisons).
Name,
608 NextCmpBlock->
getParent(), InsertBefore);
612 if (FirstCmp.Lhs().GEP)
613 Lhs =
Builder.Insert(FirstCmp.Lhs().GEP->clone());
615 Lhs = FirstCmp.Lhs().LoadI->getPointerOperand();
616 if (FirstCmp.Rhs().GEP)
617 Rhs =
Builder.Insert(FirstCmp.Rhs().GEP->clone());
619 Rhs = FirstCmp.Rhs().LoadI->getPointerOperand();
621 Value *IsEqual =
nullptr;
623 <<
BB->getName() <<
"\n");
629 Comparisons, [](
const BCECmpBlock &
B) {
return B.RequireSplit; });
630 if (ToSplit != Comparisons.
end()) {
632 ToSplit->split(
BB,
AA);
635 if (Comparisons.
size() == 1) {
637 Value *
const LhsLoad =
638 Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs);
639 Value *
const RhsLoad =
640 Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs);
642 IsEqual =
Builder.CreateICmpEQ(LhsLoad, RhsLoad);
644 const unsigned TotalSizeBits = std::accumulate(
645 Comparisons.
begin(), Comparisons.
end(), 0u,
646 [](
int Size,
const BCECmpBlock &
C) { return Size + C.SizeBits(); });
654 IsEqual =
Builder.CreateICmpEQ(
660 if (NextCmpBlock == PhiBB) {
667 Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
677 assert(atLeastOneMerged() &&
"simplifying trivial BCECmpChain");
678 LLVM_DEBUG(
dbgs() <<
"Simplifying comparison chain starting at block "
679 << EntryBlock_->getName() <<
"\n");
685 for (
const auto &Blocks :
reverse(MergedBlocks_)) {
686 InsertBefore = NextCmpBlock = mergeComparisons(
687 Blocks, InsertBefore, NextCmpBlock, Phi_, TLI,
AA, DTU);
698 DTU.
applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_},
704 const bool ChainEntryIsFnEntry = EntryBlock_->isEntryBlock();
705 if (ChainEntryIsFnEntry && DTU.
hasDomTree()) {
707 << EntryBlock_->getName() <<
" to "
708 << NextCmpBlock->
getName() <<
"\n");
710 DTU.
applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}});
712 EntryBlock_ =
nullptr;
716 for (
const auto &Blocks : MergedBlocks_) {
717 for (
const BCECmpBlock &Block : Blocks) {
720 DeadBlocks.push_back(
Block.BB);
725 MergedBlocks_.clear();
729 std::vector<BasicBlock *> getOrderedBlocks(
PHINode &Phi,
733 std::vector<BasicBlock *> Blocks(NumBlocks);
734 assert(LastBlock &&
"invalid last block");
736 for (
int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) {
741 <<
" has its address taken\n");
744 Blocks[BlockIndex] = CurBlock;
746 if (!SinglePredecessor) {
749 <<
" has two or more predecessors\n");
755 <<
" does not link back to the phi\n");
758 CurBlock = SinglePredecessor;
760 Blocks[0] = CurBlock;
805 <<
"skip: non-constant value not from cmp or not from last block.\n");
822 if (Blocks.empty())
return false;
823 BCECmpChain CmpChain(Blocks, Phi,
AA);
825 if (!CmpChain.atLeastOneMerged()) {
830 return CmpChain.simplify(TLI,
AA, DTU);
844 if (!TLI.
has(LibFunc_memcmp))
848 DomTreeUpdater::UpdateStrategy::Eager);
850 bool MadeChange =
false;
854 if (
auto *
const Phi = dyn_cast<PHINode>(&*
BB.begin()))
855 MadeChange |= processPhi(*Phi, TLI,
AA, DTU);
871 const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
872 const auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
875 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
876 auto &
AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
877 return runImpl(
F, TLI,
TTI,
AA, DTWP ? &DTWP->getDomTree() :
nullptr);
894 "Merge contiguous icmps into a memcmp",
false,
false)
A set of analyses that are preserved following a run of a transformation pass.
A manager for alias analyses.
Analysis pass providing the TargetTransformInfo.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
This is an optimization pass for GlobalISel generic memory operations.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Vector Rotate Left Mask Mask Insert
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
const Function * getParent() const
Return the enclosing method, or null if none.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Value * emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the memcmp function.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Implements a dense probed hash-table based set with some number of buckets stored inline.
DomTreeNodeBase< NodeT > * setNewRoot(NodeT *BB)
Add a new node to the forward dominator tree and make it a new root.
bool isModSet(const ModRefInfo MRI)
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
Merge contiguous icmps into a memcmp
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static Error split(StringRef Str, char Separator, std::pair< StringRef, StringRef > &Split)
Checked version of split, to ensure mandatory subparts.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local $pop6 WebAssembly registers are implicitly initialized to zero Explicit zeroing is therefore often redundant and could be optimized away Small indices may use smaller encodings than large indices WebAssemblyRegColoring and or WebAssemblyRegRenumbering should sort registers according to their usage frequency to maximize the usage of smaller encodings Many cases of irreducible control flow could be transformed more optimally than via the transform in WebAssemblyFixIrreducibleControlFlow cpp It may also be worthwhile to do transforms before register particularly when duplicating to allow register coloring to be aware of the duplication WebAssemblyRegStackify could use AliasAnalysis to reorder loads and stores more aggressively WebAssemblyRegStackify is currently a greedy algorithm This means that
std::pair< iterator, bool > insert(const ValueT &V)
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
iterator begin()
Instruction iterator methods.
Represent the analysis usage information of a pass.
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
into llvm powi allowing the code generator to produce balanced multiplication trees First
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Legacy analysis pass which computes a DominatorTree.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
bool hasDomTree() const
Returns true if it holds a DominatorTree.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
void append(StringRef RHS)
Append from a StringRef.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
This instruction compares its operands according to the predicate given to the constructor.
void preserve()
Mark an analysis as preserved.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
This is an important class for using LLVM in a threaded context.
DominatorTree & getDomTree()
Flush DomTree updates and return DomTree.
bool operator<(int64_t V1, const APSInt &V2)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
bool has(LibFunc F) const
Tests whether a library function is available.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< MachineOperand, 4 > Cond
StringRef - Represent a constant reference to a string, i.e.
Type * getType() const
All values are typed, get the type of this value.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
bool pred_empty(const BasicBlock *BB)
StringRef getName() const
Return a constant reference to the value's name.
An instruction for reading from memory.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVMContext & getContext() const
Get the context in which this basic block lives.
Should compile to something r4 addze r3 instead we get
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
void sort(IteratorTy Start, IteratorTy End)
Provides information about what library functions are available for the current target.
StringRef str() const
Explicit conversion to StringRef.
INITIALIZE_PASS_BEGIN(MergeICmpsLegacyPass, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_END(MergeICmpsLegacyPass
void initializeMergeICmpsLegacyPassPass(PassRegistry &)
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Analysis pass which computes a DominatorTree.
Pass interface - Implemented by all 'passes'.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
const BasicBlock * getParent() const
Predicate getPredicate() const
Return the predicate for this instruction.
Legacy wrapper pass to provide the GlobalsAAResult object.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
size_t size() const
size - Get the array size.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
Value * getOperand(unsigned i) const
void reserve(size_type N)
LLVM Value Representation.
Analysis pass providing the TargetLibraryInfo.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Pass * createMergeICmpsLegacyPass()