LLVM 20.0.0git
ReplaceWithVeclib.cpp
Go to the documentation of this file.
1//=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Replaces calls to LLVM Intrinsics with matching calls to functions from a
10// vector library (e.g libmvec, SVML) using TargetLibraryInfo interface.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringRef.h"
23#include "llvm/CodeGen/Passes.h"
25#include "llvm/IR/IRBuilder.h"
31
32using namespace llvm;
33
34#define DEBUG_TYPE "replace-with-veclib"
35
36STATISTIC(NumCallsReplaced,
37 "Number of calls to intrinsics that have been replaced.");
38
39STATISTIC(NumTLIFuncDeclAdded,
40 "Number of vector library function declarations added.");
41
42STATISTIC(NumFuncUsedAdded,
43 "Number of functions added to `llvm.compiler.used`");
44
45/// Returns a vector Function that it adds to the Module \p M. When an \p
46/// ScalarFunc is not null, it copies its attributes to the newly created
47/// Function.
49 const StringRef TLIName,
50 Function *ScalarFunc = nullptr) {
51 Function *TLIFunc = M->getFunction(TLIName);
52 if (!TLIFunc) {
53 TLIFunc =
54 Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M);
55 if (ScalarFunc)
56 TLIFunc->copyAttributesFrom(ScalarFunc);
57
58 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
59 << TLIName << "` of type `" << *(TLIFunc->getType())
60 << "` to module.\n");
61
62 ++NumTLIFuncDeclAdded;
63 // Add the freshly created function to llvm.compiler.used, similar to as it
64 // is done in InjectTLIMappings.
65 appendToCompilerUsed(*M, {TLIFunc});
66 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
67 << "` to `@llvm.compiler.used`.\n");
68 ++NumFuncUsedAdded;
69 }
70 return TLIFunc;
71}
72
73/// Replace the intrinsic call \p II to \p TLIVecFunc, which is the
74/// corresponding function from the vector library.
76 Function *TLIVecFunc) {
78 SmallVector<Value *> Args(II->args());
79 if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) {
80 auto *MaskTy =
81 VectorType::get(Type::getInt1Ty(II->getContext()), Info.Shape.VF);
82 Args.insert(Args.begin() + OptMaskpos.value(),
84 }
85
86 // Preserve the operand bundles.
88 II->getOperandBundlesAsDefs(OpBundles);
89
90 auto *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles);
91 II->replaceAllUsesWith(Replacement);
92 // Preserve fast math flags for FP math.
93 if (isa<FPMathOperator>(Replacement))
94 Replacement->copyFastMathFlags(II);
95}
96
97/// Returns true when successfully replaced \p II, which is a call to a
98/// vectorized intrinsic, with a suitable function taking vector arguments,
99/// based on available mappings in the \p TLI.
101 IntrinsicInst *II) {
102 assert(II != nullptr && "Intrinsic cannot be null");
103 Intrinsic::ID IID = II->getIntrinsicID();
104 Type *RetTy = II->getType();
105 Type *ScalarRetTy = RetTy->getScalarType();
106 // At the moment VFABI assumes the return type is always widened unless it is
107 // a void type.
108 auto *VTy = dyn_cast<VectorType>(RetTy);
109 ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));
110
111 // OloadTys collects types used in scalar intrinsic overload name.
112 SmallVector<Type *, 3> OloadTys;
113 if (!RetTy->isVoidTy() &&
114 isVectorIntrinsicWithOverloadTypeAtArg(IID, -1, /*TTI=*/nullptr))
115 OloadTys.push_back(ScalarRetTy);
116
117 // Compute the argument types of the corresponding scalar call and check that
118 // all vector operands match the previously found EC.
119 SmallVector<Type *, 8> ScalarArgTypes;
120 for (auto Arg : enumerate(II->args())) {
121 auto *ArgTy = Arg.value()->getType();
122 bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index(),
123 /*TTI=*/nullptr);
124 if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index(), /*TTI=*/nullptr)) {
125 ScalarArgTypes.push_back(ArgTy);
126 if (IsOloadTy)
127 OloadTys.push_back(ArgTy);
128 } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
129 auto *ScalarArgTy = VectorArgTy->getElementType();
130 ScalarArgTypes.push_back(ScalarArgTy);
131 if (IsOloadTy)
132 OloadTys.push_back(ScalarArgTy);
133 // When return type is void, set EC to the first vector argument, and
134 // disallow vector arguments with different ECs.
135 if (EC.isZero())
136 EC = VectorArgTy->getElementCount();
137 else if (EC != VectorArgTy->getElementCount())
138 return false;
139 } else
140 // Exit when it is supposed to be a vector argument but it isn't.
141 return false;
142 }
143
144 // Try to reconstruct the name for the scalar version of the instruction,
145 // using scalar argument types.
146 std::string ScalarName =
148 ? Intrinsic::getName(IID, OloadTys, II->getModule())
149 : Intrinsic::getName(IID).str();
150
151 // Try to find the mapping for the scalar version of this intrinsic and the
152 // exact vector width of the call operands in the TargetLibraryInfo. First,
153 // check with a non-masked variant, and if that fails try with a masked one.
154 const VecDesc *VD =
155 TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ false);
156 if (!VD && !(VD = TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ true)))
157 return false;
158
159 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI mapping from: `" << ScalarName
160 << "` and vector width " << EC << " to: `"
161 << VD->getVectorFnName() << "`.\n");
162
163 // Replace the call to the intrinsic with a call to the vector library
164 // function.
165 FunctionType *ScalarFTy =
166 FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
167 const std::string MangledName = VD->getVectorFunctionABIVariantString();
168 auto OptInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
169 if (!OptInfo)
170 return false;
171
172 // There is no guarantee that the vectorized instructions followed the VFABI
173 // specification when being created, this is why we need to add extra check to
174 // make sure that the operands of the vector function obtained via VFABI match
175 // the operands of the original vector instruction.
176 for (auto &VFParam : OptInfo->Shape.Parameters) {
177 if (VFParam.ParamKind == VFParamKind::GlobalPredicate)
178 continue;
179
180 // tryDemangleForVFABI must return valid ParamPos, otherwise it could be
181 // a bug in the VFABI parser.
182 assert(VFParam.ParamPos < II->arg_size() && "ParamPos has invalid range");
183 Type *OrigTy = II->getArgOperand(VFParam.ParamPos)->getType();
184 if (OrigTy->isVectorTy() != (VFParam.ParamKind == VFParamKind::Vector)) {
185 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Will not replace: " << ScalarName
186 << ". Wrong type at index " << VFParam.ParamPos << ": "
187 << *OrigTy << "\n");
188 return false;
189 }
190 }
191
192 FunctionType *VectorFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy);
193 if (!VectorFTy)
194 return false;
195
196 Function *TLIFunc =
197 getTLIFunction(II->getModule(), VectorFTy, VD->getVectorFnName(),
198 II->getCalledFunction());
199 replaceWithTLIFunction(II, *OptInfo, TLIFunc);
200 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName
201 << "` with call to `" << TLIFunc->getName() << "`.\n");
202 ++NumCallsReplaced;
203 return true;
204}
205
206static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
207 SmallVector<Instruction *> ReplacedCalls;
208 for (auto &I : instructions(F)) {
209 // Process only intrinsic calls that return void or a vector.
210 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
211 if (II->getIntrinsicID() == Intrinsic::not_intrinsic)
212 continue;
213 if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy())
214 continue;
215
216 if (replaceWithCallToVeclib(TLI, II))
217 ReplacedCalls.push_back(&I);
218 }
219 }
220 // Erase any intrinsic calls that were replaced with vector library calls.
221 for (auto *I : ReplacedCalls)
222 I->eraseFromParent();
223 return !ReplacedCalls.empty();
224}
225
226////////////////////////////////////////////////////////////////////////////////
227// New pass manager implementation.
228////////////////////////////////////////////////////////////////////////////////
232 auto Changed = runImpl(TLI, F);
233 if (Changed) {
234 LLVM_DEBUG(dbgs() << "Intrinsic calls replaced with vector libraries: "
235 << NumCallsReplaced << "\n");
236
244 return PA;
245 }
246
247 // The pass did not replace any calls, hence it preserves all analyses.
248 return PreservedAnalyses::all();
249}
250
251////////////////////////////////////////////////////////////////////////////////
252// Legacy PM Implementation.
253////////////////////////////////////////////////////////////////////////////////
255 const TargetLibraryInfo &TLI =
256 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
257 return runImpl(TLI, F);
258}
259
261 AU.setPreservesCFG();
268}
269
270////////////////////////////////////////////////////////////////////////////////
271// Legacy Pass manager initialization
272////////////////////////////////////////////////////////////////////////////////
274
276 "Replace intrinsics with calls to vector library", false,
277 false)
280 "Replace intrinsics with calls to vector library", false,
281 false)
282
284 return new ReplaceWithVeclibLegacy();
285}
AMDGPU promote alloca to vector or false DEBUG_TYPE to vector
Expand Atomic instructions
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
return RetTy
#define LLVM_DEBUG(...)
Definition: Debug.h:106
static bool runImpl(Function &F, const TargetLowering &TLI)
expand Expand reduction intrinsics
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
Function * getTLIFunction(Module *M, FunctionType *VectorFTy, const StringRef TLIName, Function *ScalarFunc=nullptr)
Returns a vector Function that it adds to the Module M.
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, IntrinsicInst *II)
Returns true when successfully replaced II, which is a call to a vectorized intrinsic,...
static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info, Function *TLIVecFunc)
Replace the intrinsic call II to TLIVecFunc, which is the corresponding function from the vector libr...
Replace intrinsics with calls to vector library
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
#define DEBUG_TYPE
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
An analysis that produces DemandedBits for a function.
Definition: DemandedBits.h:103
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:860
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
Legacy wrapper pass to provide the GlobalsAAResult object.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This analysis provides dependence information for the memory accesses of a loop.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
OptimizationRemarkEmitter legacy analysis pass.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Analysis pass that exposes the ScalarEvolution for a function.
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:229
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
const VecDesc * getVectorMappingInfo(StringRef F, const ElementCount &VF, bool Masked) const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
static IntegerType * getInt1Ty(LLVMContext &C)
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
Provides info so a possible vectorization of a function can be computed.
std::string getVectorFunctionABIVariantString() const
Returns a vector function ABI variant string on the form: ZGV<isa><mask><vlen><vparams><scalarname>(<...
StringRef getVectorFnName() const
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Intrinsics.cpp:46
bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
Definition: Intrinsics.cpp:606
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
FunctionType * createFunctionType(const VFInfo &Info, const FunctionType *ScalarFTy)
Constructs a FunctionType by applying vector function information to the type of a matching scalar fu...
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
FunctionPass * createReplaceWithVeclibLegacyPass()
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Holds the VFShape for a specific scalar to vector function mapping.