35using namespace PatternMatch;
37#define DEBUG_TYPE "aggressive-instcombine"
39STATISTIC(NumAnyOrAllBitsSet,
"Number of any/all-bits-set patterns folded");
41 "Number of guarded rotates transformed into funnel shifts");
43 "Number of guarded funnel shifts transformed into funnel shifts");
44STATISTIC(NumPopCountRecognized,
"Number of popcount idioms recognized");
48 cl::desc(
"Max number of instructions to scan for aggressive instcombine."));
54 if (
I.getOpcode() != Instruction::PHI ||
I.getNumOperands() != 2)
68 unsigned Width = V->getType()->getScalarSizeInBits();
76 return Intrinsic::fshl;
85 return Intrinsic::fshr;
97 unsigned FunnelOp = 0, GuardOp = 1;
99 Value *ShVal0, *ShVal1, *ShAmt;
102 (IID == Intrinsic::fshl && ShVal0 != P1) ||
103 (IID == Intrinsic::fshr && ShVal1 != P1)) {
106 (IID == Intrinsic::fshl && ShVal0 != P0) ||
107 (IID == Intrinsic::fshr && ShVal1 != P0))
109 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
110 "Pattern must match funnel shift left or right");
137 if (ShVal0 == ShVal1)
140 ++NumGuardedFunnelShifts;
144 bool IsFshl = IID == Intrinsic::fshl;
145 if (ShVal0 != ShVal1) {
147 ShVal1 =
Builder.CreateFreeze(ShVal1);
149 ShVal0 =
Builder.CreateFreeze(ShVal0);
213 const APInt *BitIndex =
nullptr;
219 MOps.
Root = Candidate;
227 return MOps.
Root == Candidate;
241 bool MatchAllBitsSet;
243 MatchAllBitsSet =
true;
245 MatchAllBitsSet =
false;
249 MaskOps MOps(
I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
250 if (MatchAllBitsSet) {
266 I.replaceAllUsesWith(Zext);
267 ++NumAnyOrAllBitsSet;
283 if (
I.getOpcode() != Instruction::LShr)
286 Type *Ty =
I.getType();
292 if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
301 Value *Op0 =
I.getOperand(0);
302 Value *Op1 =
I.getOperand(1);
318 Value *Root, *SubOp1;
326 I.getModule(), Intrinsic::ctpop,
I.getType());
327 I.replaceAllUsesWith(
Builder.CreateCall(Func, {Root}));
328 ++NumPopCountRecognized;
349 const APInt *MinC, *MaxC;
359 if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
362 Type *IntTy =
I.getType();
363 Type *FpTy = In->getType();
366 if (
auto *VecTy = dyn_cast<VectorType>(IntTy))
367 SatTy = VectorType::get(SatTy, VecTy->getElementCount());
388 if (SatCost >= MinMaxCost)
395 I.replaceAllUsesWith(
Builder.CreateSExt(Sat, IntTy));
405 auto *Call = dyn_cast<CallInst>(&
I);
409 Module *M = Call->getModule();
414 if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
423 Type *Ty = Call->getType();
424 Value *
Arg = Call->getArgOperand(0);
426 (Call->hasNoNaNs() ||
430 Builder.setFastMathFlags(Call->getFastMathFlags());
434 I.replaceAllUsesWith(NewSqrt);
451 if (Length < InputBits || Length > InputBits * 2)
455 unsigned Matched = 0;
457 for (
unsigned i = 0; i <
Length; i++) {
459 if (Element >= InputBits)
466 if ((((
Mul << Element) & Mask.getZExtValue()) >> Shift) == i)
470 return Matched == InputBits;
533 if (!
GEP || !
GEP->isInBounds() ||
GEP->getNumIndices() != 2)
536 if (!
GEP->getSourceElementType()->isArrayTy())
539 uint64_t ArraySize =
GEP->getSourceElementType()->getArrayNumElements();
540 if (ArraySize != 32 && ArraySize != 64)
555 Value *Idx2 = std::next(
GEP->idx_begin())->get();
567 if (InputBits != 32 && InputBits != 64)
571 if (InputBits -
Log2_32(InputBits) != ShiftConst &&
572 InputBits -
Log2_32(InputBits) - 1 != ShiftConst)
575 if (!
isCTTZTable(*ConstData, MulConst, ShiftConst, InputBits))
579 bool DefinedForZero = ZeroTableElem == InputBits;
584 auto Cttz =
B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
585 Value *ZExtOrTrunc =
nullptr;
587 if (DefinedForZero) {
588 ZExtOrTrunc =
B.CreateZExtOrTrunc(Cttz, AccessType);
599 ZExtOrTrunc =
B.CreateZExtOrTrunc(
Select, AccessType);
625 const APInt *ShAmt2 =
nullptr;
649 LI1 = dyn_cast<LoadInst>(L1);
651 LoadInst *LI2 = dyn_cast<LoadInst>(L2);
663 bool IsBigEndian =
DL.isBigEndian();
681 if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2)
691 if (!Start->comesBefore(
End)) {
698 unsigned NumScanned = 0;
709 if (Offset2.
slt(Offset1)) {
740 uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
743 if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
753 LOps.
LoadSize = LoadSize1 + LoadSize2;
772 if (isa<VectorType>(
I.getType()))
788 unsigned AS = LI1->getPointerAddressSpace();
791 AS, LI1->getAlign(), &
Fast);
792 if (!Allowed || !
Fast)
796 Value *Load1Ptr = LI1->getPointerOperand();
807 NewLoad =
Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(),
808 LI1->isVolatile(),
"");
814 Value *NewOp = NewLoad;
823 I.replaceAllUsesWith(NewOp);
830static std::pair<APInt, APInt>
832 unsigned BW =
DL.getIndexTypeSizeInBits(PtrOp->
getType());
833 std::optional<APInt> Stride;
834 APInt ModOffset(BW, 0);
837 while (
auto *
GEP = dyn_cast<GEPOperator>(PtrOp)) {
839 if (!
GEP->collectOffset(
DL, BW, VarOffsets, ModOffset))
842 for (
auto [V, Scale] : VarOffsets) {
844 if (!
GEP->isInBounds())
853 PtrOp =
GEP->getPointerOperand();
858 if (!isa<GlobalVariable>(PtrOp) || !Stride)
863 ModOffset = ModOffset.
srem(*Stride);
865 ModOffset += *Stride;
867 return {*Stride, ModOffset};
873 auto *LI = dyn_cast<LoadInst>(&
I);
874 if (!LI || LI->isVolatile())
879 auto *PtrOp = LI->getPointerOperand();
881 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
886 uint64_t GVSize =
DL.getTypeAllocSize(
C->getType());
887 if (!GVSize || 4096 < GVSize)
890 Type *LoadTy = LI->getType();
891 unsigned BW =
DL.getIndexTypeSizeInBits(PtrOp->getType());
897 if (
auto LA = LI->getAlign();
898 LA <= GV->
getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
899 ConstOffset =
APInt(BW, 0);
900 Stride =
APInt(BW, LA.value());
907 unsigned E = GVSize -
DL.getTypeStoreSize(LoadTy);
908 for (; ConstOffset.getZExtValue() <=
E; ConstOffset += Stride)
912 I.replaceAllUsesWith(Ca);
923 bool MadeChange =
false;
964 bool MadeChange =
false;
967 MadeChange |= TIC.
run(
F);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
amdgpu AMDGPU Register Bank Select
static bool tryToRecognizePopCount(Instruction &I)
static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, DominatorTree &DT, AliasAnalysis &AA)
This is the entry point for all transforms.
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool tryToRecognizeTableBasedCttz(Instruction &I)
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA)
This is the entry point for folds that could be implemented in regular InstCombine,...
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA)
static std::pair< APInt, APInt > getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL)
static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul, uint64_t Shift, uint64_t InputBits)
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL)
If C is a constant patterned array and all valid loaded results for given alignment are same to a con...
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool runImpl(Function &F, const TargetLowering &TLI)
This is the interface for a simple mod/ref and alias analysis over globals.
static MaybeAlign getAlign(Value *Ptr)
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
bool slt(const APInt &RHS) const
Signed less than comparison.
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
An array constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
uint64_t getElementAsInteger(unsigned i) const
If this is a sequential container of integers (of any size), return the specified element in the low ...
unsigned getNumElements() const
Return the number of elements in the array or vector.
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
const BasicBlock * getParent() const
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
This class implements a map that also provides access to all stored values in a deterministic order.
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
A Module instance is used to store all the information related to an LLVM module.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
The instances of the Type class are immutable: once they are created, they are never changed.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr) const
Accumulate the constant offset this value has compared to a base pointer.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
void takeName(Value *V)
Transfer the name from V to this value.
APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, OpTy > m_ZExtOrSelf(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
@ And
Bitwise or logical AND of integers.
constexpr unsigned BitWidth
bool CannotBeOrderedLessThanZero(const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and the bit indexes (Mask) nee...
MaskOps(unsigned BitWidth, bool MatchAnds)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.