LLVM 19.0.0git
ReplaceWithVeclib.cpp
Go to the documentation of this file.
1//=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Replaces calls to LLVM Intrinsics with matching calls to functions from a
10// vector library (e.g libmvec, SVML) using TargetLibraryInfo interface.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/CodeGen/Passes.h"
25#include "llvm/IR/IRBuilder.h"
31
32using namespace llvm;
33
34#define DEBUG_TYPE "replace-with-veclib"
35
36STATISTIC(NumCallsReplaced,
37 "Number of calls to intrinsics that have been replaced.");
38
39STATISTIC(NumTLIFuncDeclAdded,
40 "Number of vector library function declarations added.");
41
42STATISTIC(NumFuncUsedAdded,
43 "Number of functions added to `llvm.compiler.used`");
44
45/// Returns a vector Function that it adds to the Module \p M. When an \p
46/// ScalarFunc is not null, it copies its attributes to the newly created
47/// Function.
49 const StringRef TLIName,
50 Function *ScalarFunc = nullptr) {
51 Function *TLIFunc = M->getFunction(TLIName);
52 if (!TLIFunc) {
53 TLIFunc =
54 Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M);
55 if (ScalarFunc)
56 TLIFunc->copyAttributesFrom(ScalarFunc);
57
58 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
59 << TLIName << "` of type `" << *(TLIFunc->getType())
60 << "` to module.\n");
61
62 ++NumTLIFuncDeclAdded;
63 // Add the freshly created function to llvm.compiler.used, similar to as it
64 // is done in InjectTLIMappings.
65 appendToCompilerUsed(*M, {TLIFunc});
66 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
67 << "` to `@llvm.compiler.used`.\n");
68 ++NumFuncUsedAdded;
69 }
70 return TLIFunc;
71}
72
73/// Replace the intrinsic call \p II to \p TLIVecFunc, which is the
74/// corresponding function from the vector library.
76 Function *TLIVecFunc) {
78 SmallVector<Value *> Args(II->args());
79 if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) {
80 auto *MaskTy =
81 VectorType::get(Type::getInt1Ty(II->getContext()), Info.Shape.VF);
82 Args.insert(Args.begin() + OptMaskpos.value(),
84 }
85
86 // Preserve the operand bundles.
88 II->getOperandBundlesAsDefs(OpBundles);
89
90 auto *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles);
91 II->replaceAllUsesWith(Replacement);
92 // Preserve fast math flags for FP math.
93 if (isa<FPMathOperator>(Replacement))
94 Replacement->copyFastMathFlags(II);
95}
96
97/// Returns true when successfully replaced \p II, which is a call to a
98/// vectorized intrinsic, with a suitable function taking vector arguments,
99/// based on available mappings in the \p TLI.
101 IntrinsicInst *II) {
102 assert(II != nullptr && "Intrinsic cannot be null");
103 // At the moment VFABI assumes the return type is always widened unless it is
104 // a void type.
105 auto *VTy = dyn_cast<VectorType>(II->getType());
106 ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));
107 // Compute the argument types of the corresponding scalar call and check that
108 // all vector operands match the previously found EC.
109 SmallVector<Type *, 8> ScalarArgTypes;
110 Intrinsic::ID IID = II->getIntrinsicID();
111 for (auto Arg : enumerate(II->args())) {
112 auto *ArgTy = Arg.value()->getType();
113 if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
114 ScalarArgTypes.push_back(ArgTy);
115 } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
116 ScalarArgTypes.push_back(VectorArgTy->getElementType());
117 // When return type is void, set EC to the first vector argument, and
118 // disallow vector arguments with different ECs.
119 if (EC.isZero())
120 EC = VectorArgTy->getElementCount();
121 else if (EC != VectorArgTy->getElementCount())
122 return false;
123 } else
124 // Exit when it is supposed to be a vector argument but it isn't.
125 return false;
126 }
127
128 // Try to reconstruct the name for the scalar version of the instruction,
129 // using scalar argument types.
130 std::string ScalarName =
132 ? Intrinsic::getName(IID, ScalarArgTypes, II->getModule())
133 : Intrinsic::getName(IID).str();
134
135 // Try to find the mapping for the scalar version of this intrinsic and the
136 // exact vector width of the call operands in the TargetLibraryInfo. First,
137 // check with a non-masked variant, and if that fails try with a masked one.
138 const VecDesc *VD =
139 TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ false);
140 if (!VD && !(VD = TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ true)))
141 return false;
142
143 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI mapping from: `" << ScalarName
144 << "` and vector width " << EC << " to: `"
145 << VD->getVectorFnName() << "`.\n");
146
147 // Replace the call to the intrinsic with a call to the vector library
148 // function.
149 Type *ScalarRetTy = II->getType()->getScalarType();
150 FunctionType *ScalarFTy =
151 FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
152 const std::string MangledName = VD->getVectorFunctionABIVariantString();
153 auto OptInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
154 if (!OptInfo)
155 return false;
156
157 // There is no guarantee that the vectorized instructions followed the VFABI
158 // specification when being created, this is why we need to add extra check to
159 // make sure that the operands of the vector function obtained via VFABI match
160 // the operands of the original vector instruction.
161 for (auto &VFParam : OptInfo->Shape.Parameters) {
162 if (VFParam.ParamKind == VFParamKind::GlobalPredicate)
163 continue;
164
165 // tryDemangleForVFABI must return valid ParamPos, otherwise it could be
166 // a bug in the VFABI parser.
167 assert(VFParam.ParamPos < II->arg_size() && "ParamPos has invalid range");
168 Type *OrigTy = II->getArgOperand(VFParam.ParamPos)->getType();
169 if (OrigTy->isVectorTy() != (VFParam.ParamKind == VFParamKind::Vector)) {
170 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Will not replace: " << ScalarName
171 << ". Wrong type at index " << VFParam.ParamPos << ": "
172 << *OrigTy << "\n");
173 return false;
174 }
175 }
176
177 FunctionType *VectorFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy);
178 if (!VectorFTy)
179 return false;
180
181 Function *TLIFunc =
182 getTLIFunction(II->getModule(), VectorFTy, VD->getVectorFnName(),
183 II->getCalledFunction());
184 replaceWithTLIFunction(II, *OptInfo, TLIFunc);
185 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName
186 << "` with call to `" << TLIFunc->getName() << "`.\n");
187 ++NumCallsReplaced;
188 return true;
189}
190
191static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
192 SmallVector<Instruction *> ReplacedCalls;
193 for (auto &I : instructions(F)) {
194 // Process only intrinsic calls that return void or a vector.
195 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
196 if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy())
197 continue;
198
199 if (replaceWithCallToVeclib(TLI, II))
200 ReplacedCalls.push_back(&I);
201 }
202 }
203 // Erase any intrinsic calls that were replaced with vector library calls.
204 for (auto *I : ReplacedCalls)
205 I->eraseFromParent();
206 return !ReplacedCalls.empty();
207}
208
209////////////////////////////////////////////////////////////////////////////////
210// New pass manager implementation.
211////////////////////////////////////////////////////////////////////////////////
215 auto Changed = runImpl(TLI, F);
216 if (Changed) {
217 LLVM_DEBUG(dbgs() << "Intrinsic calls replaced with vector libraries: "
218 << NumCallsReplaced << "\n");
219
227 return PA;
228 }
229
230 // The pass did not replace any calls, hence it preserves all analyses.
231 return PreservedAnalyses::all();
232}
233
234////////////////////////////////////////////////////////////////////////////////
235// Legacy PM Implementation.
236////////////////////////////////////////////////////////////////////////////////
238 const TargetLibraryInfo &TLI =
239 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
240 return runImpl(TLI, F);
241}
242
244 AU.setPreservesCFG();
251}
252
253////////////////////////////////////////////////////////////////////////////////
254// Legacy Pass manager initialization
255////////////////////////////////////////////////////////////////////////////////
257
259 "Replace intrinsics with calls to vector library", false,
260 false)
263 "Replace intrinsics with calls to vector library", false,
264 false)
265
267 return new ReplaceWithVeclibLegacy();
268}
AMDGPU promote alloca to vector or false DEBUG_TYPE to vector
Expand Atomic instructions
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static bool runImpl(Function &F, const TargetLowering &TLI)
expand Expand reduction intrinsics
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
Function * getTLIFunction(Module *M, FunctionType *VectorFTy, const StringRef TLIName, Function *ScalarFunc=nullptr)
Returns a vector Function that it adds to the Module M.
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, IntrinsicInst *II)
Returns true when successfully replaced II, which is a call to a vectorized intrinsic,...
static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info, Function *TLIVecFunc)
Replace the intrinsic call II to TLIVecFunc, which is the corresponding function from the vector libr...
Replace intrinsics with calls to vector library
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
#define DEBUG_TYPE
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
An analysis that produces DemandedBits for a function.
Definition: DemandedBits.h:103
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:165
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:810
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
Legacy wrapper pass to provide the GlobalsAAResult object.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2410
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This analysis provides dependence information for the memory accesses of a loop.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
OptimizationRemarkEmitter legacy analysis pass.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Analysis pass that exposes the ScalarEvolution for a function.
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:223
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
const VecDesc * getVectorMappingInfo(StringRef F, const ElementCount &VF, bool Masked) const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
static IntegerType * getInt1Ty(LLVMContext &C)
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Provides info so a possible vectorization of a function can be computed.
std::string getVectorFunctionABIVariantString() const
Returns a vector function ABI variant string on the form: ZGV<isa><mask><vlen><vparams><scalarname>(<...
StringRef getVectorFnName() const
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1042
bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
Definition: Function.cpp:1473
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
FunctionType * createFunctionType(const VFInfo &Info, const FunctionType *ScalarFTy)
Constructs a FunctionType by applying vector function information to the type of a matching scalar fu...
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
FunctionPass * createReplaceWithVeclibLegacyPass()
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Holds the VFShape for a specific scalar to vector function mapping.