34using namespace PatternMatch;
36#define DEBUG_TYPE "aggressive-instcombine"
38STATISTIC(NumAnyOrAllBitsSet,
"Number of any/all-bits-set patterns folded");
40 "Number of guarded rotates transformed into funnel shifts");
42 "Number of guarded funnel shifts transformed into funnel shifts");
43STATISTIC(NumPopCountRecognized,
"Number of popcount idioms recognized");
47 cl::desc(
"Max number of instructions to scan for aggressive instcombine."));
53 if (
I.getOpcode() != Instruction::PHI ||
I.getNumOperands() != 2)
77 return Intrinsic::fshl;
87 return Intrinsic::fshr;
99 unsigned FunnelOp = 0, GuardOp = 1;
101 Value *ShVal0, *ShVal1, *ShAmt;
104 (IID == Intrinsic::fshl && ShVal0 != P1) ||
105 (IID == Intrinsic::fshr && ShVal1 != P1)) {
108 (IID == Intrinsic::fshl && ShVal0 != P0) ||
109 (IID == Intrinsic::fshr && ShVal1 != P0))
111 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
112 "Pattern must match funnel shift left or right");
139 if (ShVal0 == ShVal1)
142 ++NumGuardedFunnelShifts;
146 bool IsFshl = IID == Intrinsic::fshl;
147 if (ShVal0 != ShVal1) {
149 ShVal1 =
Builder.CreateFreeze(ShVal1);
151 ShVal0 =
Builder.CreateFreeze(ShVal0);
215 const APInt *BitIndex =
nullptr;
221 MOps.
Root = Candidate;
229 return MOps.
Root == Candidate;
243 bool MatchAllBitsSet;
245 MatchAllBitsSet =
true;
247 MatchAllBitsSet =
false;
251 MaskOps MOps(
I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
252 if (MatchAllBitsSet) {
268 I.replaceAllUsesWith(Zext);
269 ++NumAnyOrAllBitsSet;
285 if (
I.getOpcode() != Instruction::LShr)
288 Type *Ty =
I.getType();
294 if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
303 Value *Op0 =
I.getOperand(0);
304 Value *Op1 =
I.getOperand(1);
320 Value *Root, *SubOp1;
328 I.getModule(), Intrinsic::ctpop,
I.getType());
329 I.replaceAllUsesWith(
Builder.CreateCall(Func, {Root}));
330 ++NumPopCountRecognized;
351 const APInt *MinC, *MaxC;
361 if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
364 Type *IntTy =
I.getType();
365 Type *FpTy = In->getType();
368 if (
auto *VecTy = dyn_cast<VectorType>(IntTy))
369 SatTy = VectorType::get(SatTy, VecTy->getElementCount());
390 if (SatCost >= MinMaxCost)
397 I.replaceAllUsesWith(
Builder.CreateSExt(Sat, IntTy));
407 auto *Call = dyn_cast<CallInst>(&
I);
411 Module *M = Call->getModule();
416 if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
425 Type *Ty = Call->getType();
426 Value *
Arg = Call->getArgOperand(0);
431 Builder.setFastMathFlags(Call->getFastMathFlags());
435 I.replaceAllUsesWith(NewSqrt);
452 if (Length < InputBits || Length > InputBits * 2)
456 unsigned Matched = 0;
458 for (
unsigned i = 0; i <
Length; i++) {
460 if (Element >= InputBits)
467 if ((((
Mul << Element) & Mask.getZExtValue()) >> Shift) == i)
471 return Matched == InputBits;
534 if (!
GEP || !
GEP->isInBounds() ||
GEP->getNumIndices() != 2)
537 if (!
GEP->getSourceElementType()->isArrayTy())
540 uint64_t ArraySize =
GEP->getSourceElementType()->getArrayNumElements();
541 if (ArraySize != 32 && ArraySize != 64)
556 Value *Idx2 = std::next(
GEP->idx_begin())->get();
568 if (InputBits != 32 && InputBits != 64)
572 if (InputBits -
Log2_32(InputBits) != ShiftConst &&
573 InputBits -
Log2_32(InputBits) - 1 != ShiftConst)
576 if (!
isCTTZTable(*ConstData, MulConst, ShiftConst, InputBits))
580 bool DefinedForZero = ZeroTableElem == InputBits;
585 auto Cttz =
B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
586 Value *ZExtOrTrunc =
nullptr;
588 if (DefinedForZero) {
589 ZExtOrTrunc =
B.CreateZExtOrTrunc(Cttz, AccessType);
600 ZExtOrTrunc =
B.CreateZExtOrTrunc(
Select, AccessType);
626 Value *ShAmt2 =
nullptr;
650 LI1 = dyn_cast<LoadInst>(L1);
652 LoadInst *LI2 = dyn_cast<LoadInst>(L2);
664 bool IsBigEndian =
DL.isBigEndian();
682 if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2)
692 if (!Start->comesBefore(End)) {
699 unsigned NumScanned = 0;
701 make_range(Start->getIterator(), End->getIterator())) {
709 bool Reverse =
false;
710 if (Offset2.
slt(Offset1)) {
742 uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
745 if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
755 LOps.
LoadSize = LoadSize1 + LoadSize2;
773 if (isa<VectorType>(
I.getType()))
789 unsigned AS = LI1->getPointerAddressSpace();
792 AS, LI1->getAlign(), &
Fast);
793 if (!Allowed || !
Fast)
797 Instruction *Inst = dyn_cast<Instruction>(LI1->getPointerOperand());
803 Value *Load1Ptr = LI1->getPointerOperand();
806 NewLoad =
Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(),
807 LI1->isVolatile(),
"");
813 Value *NewOp = NewLoad;
822 I.replaceAllUsesWith(NewOp);
833 bool MadeChange =
false;
873 bool MadeChange =
false;
876 MadeChange |= TIC.
run(
F);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
amdgpu AMDGPU Register Bank Select
static bool tryToRecognizePopCount(Instruction &I)
static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, DominatorTree &DT, AliasAnalysis &AA)
This is the entry point for all transforms.
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
static bool tryToRecognizeTableBasedCttz(Instruction &I)
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA)
This is the entry point for folds that could be implemented in regular InstCombine,...
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA)
static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul, uint64_t Shift, uint64_t InputBits)
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool runImpl(Function &F, const TargetLowering &TLI)
This is the interface for a simple mod/ref and alias analysis over globals.
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
bool slt(const APInt &RHS) const
Signed less than comparison.
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
An array constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
uint64_t getElementAsInteger(unsigned i) const
If this is a sequential container of integers (of any size), return the specified element in the low ...
unsigned getNumElements() const
Return the number of elements in the array or vector.
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
const BasicBlock * getParent() const
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
A Module instance is used to store all the information related to an LLVM module.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
The instances of the Type class are immutable: once they are created, they are never changed.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr) const
Accumulate the constant offset this value has compared to a base pointer.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
void takeName(Value *V)
Transfer the name from V to this value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, OpTy > m_ZExtOrSelf(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
@ And
Bitwise or logical AND of integers.
constexpr unsigned BitWidth
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and the bit indexes (Mask) nee...
MaskOps(unsigned BitWidth, bool MatchAnds)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.