Go to the documentation of this file.
79 #define DEBUG_TYPE "argpromotion"
81 STATISTIC(NumArgumentsPromoted,
"Number of pointer arguments promoted");
82 STATISTIC(NumArgumentsDead,
"Number of dead pointer args eliminated");
94 using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
99 Value *Ptr,
Type *ResElemTy, int64_t Offset) {
103 APInt OrigOffset(
DL.getIndexTypeSizeInBits(Ptr->
getType()), Offset);
106 if (OrigOffset == 0 && OrigElemTy == ResElemTy)
110 APInt TmpOffset = OrigOffset;
111 Type *TmpTy = OrigElemTy;
113 DL.getGEPIndicesForOffset(TmpTy, TmpOffset);
114 if (TmpOffset == 0) {
116 while (TmpTy != ResElemTy) {
122 isa<StructType>(TmpTy) ? 32 : OrigOffset.
getBitWidth()));
130 if (OrigOffset != 0 || TmpTy == ResElemTy) {
131 Ptr = IRB.
CreateGEP(OrigElemTy, Ptr, Indices);
138 if (OrigOffset != 0) {
155 std::vector<Type *> Params;
167 if (!ArgsToPromote.count(&*
I)) {
169 Params.push_back(
I->getType());
171 }
else if (
I->use_empty()) {
175 const auto &ArgParts = ArgsToPromote.find(&*
I)->second;
176 for (
const auto &Pair : ArgParts) {
177 Params.push_back(Pair.second.Ty);
180 ++NumArgumentsPromoted;
198 F->setSubprogram(
nullptr);
200 LLVM_DEBUG(
dbgs() <<
"ARG PROMOTION: Promoting to:" << *NF <<
"\n"
204 for (
auto *
I : Params)
205 if (
auto *VT = dyn_cast<llvm::VectorType>(
I))
207 LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize());
216 F->getParent()->getFunctionList().insert(
F->getIterator(), NF);
223 while (!
F->use_empty()) {
224 CallBase &CB = cast<CallBase>(*
F->user_back());
234 ++
I, ++AI, ++ArgNo) {
235 if (!ArgsToPromote.count(&*
I)) {
238 }
else if (!
I->use_empty()) {
240 const auto &ArgParts = ArgsToPromote.find(&*
I)->second;
241 for (
const auto &Pair : ArgParts) {
245 Pair.second.Alignment, V->
getName() +
".val");
246 if (Pair.second.MustExecInstr) {
247 LI->
setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
249 {LLVMContext::MD_range, LLVMContext::MD_nonnull,
250 LLVMContext::MD_dereferenceable,
251 LLVMContext::MD_dereferenceable_or_null,
252 LLVMContext::MD_align, LLVMContext::MD_noundef});
261 for (; AI != CB.
arg_end(); ++AI, ++ArgNo) {
270 if (
InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
272 Args, OpBundles,
"", &CB);
275 NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());
282 NewCS->
copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
312 if (!ArgsToPromote.count(&
Arg)) {
315 Arg.replaceAllUsesWith(&*I2);
335 "Only arguments with a pointer type are promotable");
341 for (
const auto &Pair : ArgsToPromote.find(&
Arg)->second) {
342 int64_t Offset = Pair.first;
343 const ArgPart &Part = Pair.second;
349 Part.Ty,
nullptr,
Arg.getName() +
"." +
Twine(Offset) +
".allc");
354 OffsetToAlloca.
insert({Offset, NewAlloca});
357 auto GetAlloca = [&](
Value *Ptr) {
358 APInt Offset(
DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
359 Ptr = Ptr->stripAndAccumulateConstantOffsets(
DL, Offset,
361 assert(Ptr == &
Arg &&
"Not constant offset from arg?");
362 return OffsetToAlloca.
lookup(Offset.getSExtValue());
371 while (!Worklist.empty()) {
373 if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {
374 DeadInsts.push_back(cast<Instruction>(V));
379 if (
auto *LI = dyn_cast<LoadInst>(V)) {
380 Value *Ptr = LI->getPointerOperand();
385 if (
auto *
SI = dyn_cast<StoreInst>(V)) {
386 assert(!
SI->isVolatile() &&
"Volatile operations can't be promoted.");
387 Value *Ptr =
SI->getPointerOperand();
397 I->eraseFromParent();
401 for (
const auto &Pair : OffsetToAlloca) {
403 "By design, only promotable allocas should be produced.");
404 Allocas.push_back(Pair.second);
409 <<
" alloca(s) are promotable by Mem2Reg\n");
411 if (!Allocas.empty()) {
430 APInt Bytes(64, NeededDerefBytes);
439 CallBase &CB = cast<CallBase>(*U);
440 return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
441 NeededAlign, Bytes, DL);
448 unsigned MaxElements,
bool IsRecursive,
451 if (
Arg->use_empty())
468 Align NeededAlign(1);
475 bool AreStoresAllowed =
Arg->getParamByValType() &&
Arg->getParamAlign();
480 auto HandleEndUser = [&](
auto *
I,
Type *Ty,
486 Value *Ptr =
I->getPointerOperand();
493 if (Offset.getSignificantBits() >= 64)
498 if (Size.isScalable())
503 if (IsRecursive && Ty->isPointerTy())
506 int64_t Off = Offset.getSExtValue();
508 Off, ArgPart{Ty,
I->getAlign(), GuaranteedToExecute ?
I :
nullptr});
509 ArgPart &Part = Pair.first->second;
510 bool OffsetNotSeenBefore = Pair.second;
514 if (MaxElements > 0 && ArgParts.
size() > MaxElements) {
516 <<
"more than " << MaxElements <<
" parts\n");
524 <<
"accessed as both " << *Part.Ty <<
" and " << *Ty
525 <<
" at offset " << Off <<
"\n");
535 if (!GuaranteedToExecute &&
536 (OffsetNotSeenBefore || Part.Alignment <
I->getAlign())) {
545 NeededDerefBytes =
std::max(NeededDerefBytes, Off + Size.getFixedValue());
546 NeededAlign =
std::max(NeededAlign,
I->getAlign());
549 Part.Alignment =
std::max(Part.Alignment,
I->getAlign());
556 if (
LoadInst *LI = dyn_cast<LoadInst>(&
I))
557 Res = HandleEndUser(LI, LI->getType(),
true);
559 Res = HandleEndUser(
SI,
SI->getValueOperand()->getType(),
573 auto AppendUses = [&](
const Value *V) {
575 if (Visited.
insert(&U).second)
576 Worklist.push_back(&U);
579 while (!Worklist.empty()) {
581 Value *V = U->getUser();
582 if (isa<BitCastInst>(V)) {
587 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(V)) {
588 if (!
GEP->hasAllConstantIndices())
594 if (
auto *LI = dyn_cast<LoadInst>(V)) {
595 if (!*HandleEndUser(LI, LI->getType(),
false))
602 auto *
SI = dyn_cast<StoreInst>(V);
603 if (AreStoresAllowed &&
SI &&
605 if (!*HandleEndUser(
SI,
SI->getValueOperand()->getType(),
615 <<
"unknown user " << *V <<
"\n");
619 if (NeededDerefBytes || NeededAlign > 1) {
624 <<
"not dereferenceable or aligned\n");
629 if (ArgParts.
empty())
635 [](
const auto &A,
const auto &
B) {
return A.first <
B.first; });
638 int64_t Offset = ArgPartsVec[0].first;
639 for (
const auto &Pair : ArgPartsVec) {
640 if (Pair.first < Offset)
643 Offset = Pair.first +
DL.getTypeStoreSize(Pair.second.Ty);
649 if (AreStoresAllowed)
691 CallBase *CB = dyn_cast<CallBase>(U.getUser());
695 const Function *Caller = CB->getCaller();
696 const Function *Callee = CB->getCalledFunction();
697 return TTI.areTypesABICompatible(Caller, Callee, Types);
706 unsigned MaxElements,
bool IsRecursive) {
710 if (
F->hasFnAttribute(Attribute::Naked))
714 if (!
F->hasLocalLinkage())
727 if (
F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
733 if (
I.getType()->isPointerTy())
734 PointerArgs.push_back(&
I);
735 if (PointerArgs.empty())
741 for (
Use &U :
F->uses()) {
742 CallBase *CB = dyn_cast<CallBase>(U.getUser());
744 if (CB ==
nullptr || !CB->
isCallee(&U) ||
759 if (
BB.getTerminatingMustTailCall())
769 for (
Argument *PtrArg : PointerArgs) {
772 if (PtrArg->hasStructRetAttr()) {
773 unsigned ArgNo = PtrArg->getArgNo();
774 F->removeParamAttr(ArgNo, Attribute::StructRet);
775 F->addParamAttr(ArgNo, Attribute::NoAlias);
776 for (
Use &U :
F->uses()) {
777 CallBase &CB = cast<CallBase>(*U.getUser());
786 if (
findArgParts(PtrArg,
DL, AAR, MaxElements, IsRecursive, ArgParts)) {
788 for (
const auto &Pair : ArgParts)
789 Types.push_back(Pair.second.Ty);
798 if (ArgsToPromote.
empty())
808 bool Changed =
false, LocalChange;
817 bool IsRecursive =
C.size() > 1;
830 C.getOuterRefSCC().replaceNodeFunction(
N, *NewF);
836 for (
auto *U : NewF->
users()) {
837 auto *UserF = cast<CallBase>(U)->getFunction();
842 Changed |= LocalChange;
843 }
while (LocalChange);
A set of analyses that are preserved following a run of a transformation pass.
This class represents an incoming formal argument to a Function.
A manager for alias analyses.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Analysis pass providing the TargetTransformInfo.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
This is an optimization pass for GlobalISel generic memory operations.
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
A parsed version of the target data layout string in and methods for querying it.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
const BasicBlockListType & getBasicBlockList() const
Get the underlying elements of the Function...
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
void invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Invalidate cached analyses for an IR unit.
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute >> Attrs)
Create an AttributeList with the specified parameters in it.
FunctionAnalysisManager FAM
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getBitWidth() const
Return the number of bits in the APInt.
AttributeList getAttributes() const
Return the parameter attributes for this call.
static unsigned getPointerOperandIndex()
FunctionType * getFunctionType() const
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
AttributeSet getFnAttrs() const
The function attributes are returned.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr) const
Accumulate the constant offset this value has compared to a base pointer.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_NODISCARD T pop_back_val()
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Removes the attribute from the given argument.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
An SCC of the call graph.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
(vector float) vec_cmpeq(*A, *B) C
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
void clear(IRUnitT &IR, llvm::StringRef Name)
Clear any cached analysis results for a single unit of IR.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
iterator_range< use_iterator > uses()
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
LLVM_NODISCARD detail::scope_exit< typename std::decay< Callable >::type > make_scope_exit(Callable &&F)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
STATISTIC(NumFunctions, "Total number of functions")
auto predecessors(MachineBasicBlock *BB)
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
void setName(const Twine &Name)
Change the name of the value.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This struct is a compact representation of a valid (non-zero power of two) alignment.
CallingConv::ID getCallingConv() const
Function * getCaller()
Helper to get the caller (the parent function).
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
An instruction for storing to memory.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A function analysis which provides an AssumptionCache.
void preserve()
Mark an analysis as preserved.
void copyMetadata(const GlobalObject *Src, unsigned Offset)
Copy metadata from Src, adjusting offsets by Offset.
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2, const MemoryLocation &Loc, const ModRefInfo Mode)
Check if it is possible for the execution of the specified instructions to mod(according to the mode)...
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StandardInstrumentations SI(Debug, VerifyEach)
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
A node in the call graph.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
Represents analyses that only rely on functions' control flow.
const Function * getFunction() const
Return the function this instruction belongs to.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
@ Mod
The access may modify the value stored in memory.
Common base class shared among various IRBuilders.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
void updateMinLegalVectorWidthAttr(Function &Fn, uint64_t Width)
Update min-legal-vector-width if it is in Attribute and less than Width.
StringRef getName() const
Return a constant reference to the value's name.
An instruction for reading from memory.
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty LLVM_NODISCARD bool empty() const
amdgpu Simplify well known AMD library false FunctionCallee Callee
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::size unsigned size() const
void sort(IteratorTy Start, IteratorTy End)
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
void setAlignment(Align Align)
Analysis pass which computes a DominatorTree.
bool isOpaquePointerTy() const
True if this is an instance of an opaque PointerType.
void preserveSet()
Mark an analysis set as preserved.
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc)
Check if it is possible for execution of the specified basic block to modify the location Loc.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A container for analyses that lazily runs them and caches their results.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
void takeName(Value *V)
Transfer the name from V to this value.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
an instruction to allocate memory on the stack
iterator_range< idf_ext_iterator< T, SetTy > > inverse_depth_first_ext(const T &G, SetTy &S)
A lazily constructed view of the call graph of a module.
static unsigned getPointerOperandIndex()
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
A proxy from a FunctionAnalysisManager to an SCC.
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
Type * getReturnType() const
LLVM Value Representation.
iterator_range< user_iterator > users()
bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
Representation for a specific memory location.
void setCallingConv(CallingConv::ID CC)
Class to represent function types.
A Use represents the edge between a Value definition and its users.
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.