LLVM  13.0.0git
ReplaceWithVeclib.cpp
Go to the documentation of this file.
1 //=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics
10 // with vector operands) with matching calls to functions from a vector
11 // library (e.g., libmvec, SVML) according to TargetLibraryInfo.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/Passes.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/InstIterator.h"
26 #include "llvm/IR/IntrinsicInst.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "replace-with-veclib"
32 
33 STATISTIC(NumCallsReplaced,
34  "Number of calls to intrinsics that have been replaced.");
35 
36 STATISTIC(NumTLIFuncDeclAdded,
37  "Number of vector library function declarations added.");
38 
39 STATISTIC(NumFuncUsedAdded,
40  "Number of functions added to `llvm.compiler.used`");
41 
42 static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
43  Module *M = CI.getModule();
44 
45  Function *OldFunc = CI.getCalledFunction();
46 
47  // Check if the vector library function is already declared in this module,
48  // otherwise insert it.
49  Function *TLIFunc = M->getFunction(TLIName);
50  if (!TLIFunc) {
51  TLIFunc = Function::Create(OldFunc->getFunctionType(),
52  Function::ExternalLinkage, TLIName, *M);
53  TLIFunc->copyAttributesFrom(OldFunc);
54 
55  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
56  << TLIName << "` of type `" << *(TLIFunc->getType())
57  << "` to module.\n");
58 
59  ++NumTLIFuncDeclAdded;
60 
61  // Add the freshly created function to llvm.compiler.used,
62  // similar to as it is done in InjectTLIMappings
63  appendToCompilerUsed(*M, {TLIFunc});
64 
65  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
66  << "` to `@llvm.compiler.used`.\n");
67  ++NumFuncUsedAdded;
68  }
69 
70  // Replace the call to the vector intrinsic with a call
71  // to the corresponding function from the vector library.
74  // Preserve the operand bundles.
76  CI.getOperandBundlesAsDefs(OpBundles);
77  CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
78  assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
79  "Expecting function types to be identical");
80  CI.replaceAllUsesWith(Replacement);
81  if (isa<FPMathOperator>(Replacement)) {
82  // Preserve fast math flags for FP math.
83  Replacement->copyFastMathFlags(&CI);
84  }
85 
86  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
87  << OldFunc->getName() << "` with call to `" << TLIName
88  << "`.\n");
89  ++NumCallsReplaced;
90  return true;
91 }
92 
94  CallInst &CI) {
95  if (!CI.getCalledFunction()) {
96  return false;
97  }
98 
99  auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
100  if (IntrinsicID == Intrinsic::not_intrinsic) {
101  // Replacement is only performed for intrinsic functions
102  return false;
103  }
104 
105  // Convert vector arguments to scalar type and check that
106  // all vector operands have identical vector width.
108  SmallVector<Type *> ScalarTypes;
109  for (auto Arg : enumerate(CI.arg_operands())) {
110  auto *ArgType = Arg.value()->getType();
111  // Vector calls to intrinsics can still have
112  // scalar operands for specific arguments.
113  if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) {
114  ScalarTypes.push_back(ArgType);
115  } else {
116  // The argument in this place should be a vector if
117  // this is a call to a vector intrinsic.
118  auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
119  if (!VectorArgTy) {
120  // The argument is not a vector, do not perform
121  // the replacement.
122  return false;
123  }
124  ElementCount NumElements = VectorArgTy->getElementCount();
125  if (NumElements.isScalable()) {
126  // The current implementation does not support
127  // scalable vectors.
128  return false;
129  }
130  if (VF.isNonZero() && VF != NumElements) {
131  // The different arguments differ in vector size.
132  return false;
133  } else {
134  VF = NumElements;
135  }
136  ScalarTypes.push_back(VectorArgTy->getElementType());
137  }
138  }
139 
140  // Try to reconstruct the name for the scalar version of this
141  // intrinsic using the intrinsic ID and the argument types
142  // converted to scalar above.
143  std::string ScalarName;
144  if (Intrinsic::isOverloaded(IntrinsicID)) {
145  ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes);
146  } else {
147  ScalarName = Intrinsic::getName(IntrinsicID).str();
148  }
149 
150  if (!TLI.isFunctionVectorizable(ScalarName)) {
151  // The TargetLibraryInfo does not contain a vectorized version of
152  // the scalar function.
153  return false;
154  }
155 
156  // Try to find the mapping for the scalar version of this intrinsic
157  // and the exact vector width of the call operands in the
158  // TargetLibraryInfo.
159  const std::string TLIName =
160  std::string(TLI.getVectorizedFunction(ScalarName, VF));
161 
162  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
163  << ScalarName << "` and vector width " << VF << ".\n");
164 
165  if (!TLIName.empty()) {
166  // Found the correct mapping in the TargetLibraryInfo,
167  // replace the call to the intrinsic with a call to
168  // the vector library function.
169  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
170  << "`.\n");
171  return replaceWithTLIFunction(CI, TLIName);
172  }
173 
174  return false;
175 }
176 
177 static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
178  bool Changed = false;
179  SmallVector<CallInst *> ReplacedCalls;
180  for (auto &I : instructions(F)) {
181  if (auto *CI = dyn_cast<CallInst>(&I)) {
182  if (replaceWithCallToVeclib(TLI, *CI)) {
183  ReplacedCalls.push_back(CI);
184  Changed = true;
185  }
186  }
187  }
188  // Erase the calls to the intrinsics that have been replaced
189  // with calls to the vector library.
190  for (auto *CI : ReplacedCalls) {
191  CI->eraseFromParent();
192  }
193  return Changed;
194 }
195 
196 ////////////////////////////////////////////////////////////////////////////////
197 // New pass manager implementation.
198 ////////////////////////////////////////////////////////////////////////////////
202  auto Changed = runImpl(TLI, F);
203  if (Changed) {
205  PA.preserveSet<CFGAnalyses>();
211  return PA;
212  } else {
213  // The pass did not replace any calls, hence it preserves all analyses.
214  return PreservedAnalyses::all();
215  }
216 }
217 
218 ////////////////////////////////////////////////////////////////////////////////
219 // Legacy PM Implementation.
220 ////////////////////////////////////////////////////////////////////////////////
222  const TargetLibraryInfo &TLI =
223  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
224  return runImpl(TLI, F);
225 }
226 
228  AU.setPreservesCFG();
237 }
238 
239 ////////////////////////////////////////////////////////////////////////////////
240 // Legacy Pass manager initialization
241 ////////////////////////////////////////////////////////////////////////////////
243 
245  "Replace intrinsics with calls to vector library", false,
246  false)
249  "Replace intrinsics with calls to vector library", false,
250  false)
251 
253  return new ReplaceWithVeclibLegacy();
254 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::Intrinsic::isOverloaded
bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
Definition: Function.cpp:1306
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2105
llvm::LoopAccessLegacyAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:717
llvm
Definition: AllocatorList.h:23
llvm::CallBase::getOperandBundlesAsDefs
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Definition: Instructions.cpp:361
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
IntrinsicInst.h
llvm::ElementCount
Definition: TypeSize.h:386
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
InstIterator.h
llvm::Function
Definition: Function.h:61
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
Statistic.h
replaceWithCallToVeclib
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, CallInst &CI)
Definition: ReplaceWithVeclib.cpp:93
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1905
llvm::Intrinsic::getName
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:840
llvm::IRBuilder<>
llvm::TargetLibraryInfo::isFunctionVectorizable
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const
Definition: TargetLibraryInfo.h:326
OptimizationRemarkEmitter.h
GlobalsModRef.h
llvm::ReplaceWithVeclibLegacy::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: ReplaceWithVeclib.cpp:227
llvm::LoopAccessAnalysis
This analysis provides dependence information for the memory accesses of a loop.
Definition: LoopAccessAnalysis.h:759
to
Should compile to
Definition: README.txt:449
llvm::DemandedBitsAnalysis
An analysis that produces DemandedBits for a function.
Definition: DemandedBits.h:123
llvm::ReplaceWithVeclibLegacy::ID
static char ID
Definition: ReplaceWithVeclib.h:27
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
STLExtras.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: ReplaceWithVeclib.cpp:31
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
replaceWithTLIFunction
static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName)
Definition: ReplaceWithVeclib.cpp:42
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1396
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
runImpl
static bool runImpl(const TargetLibraryInfo &TLI, Function &F)
Definition: ReplaceWithVeclib.cpp:177
llvm::createReplaceWithVeclibLegacyPass
FunctionPass * createReplaceWithVeclibLegacyPass()
Definition: ReplaceWithVeclib.cpp:252
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
llvm::appendToCompilerUsed
void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
Definition: ModuleUtils.cpp:110
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::Function::copyAttributesFrom
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
Definition: Function.cpp:681
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2135
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::LinearPolySize< ElementCount >::getFixed
static ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
Passes.h
VectorUtils.h
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE, "Replace intrinsics with calls to vector library", false, false) INITIALIZE_PASS_END(ReplaceWithVeclibLegacy
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
intrinsics
expand Expand reduction intrinsics
Definition: ExpandReductions.cpp:200
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::hasVectorInstrinsicScalarOpd
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:99
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:137
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::Function::getIntrinsicID
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:205
library
Replace intrinsics with calls to vector library
Definition: ReplaceWithVeclib.cpp:249
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:517
llvm::ReplaceWithVeclibLegacy
Definition: ReplaceWithVeclib.h:26
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:218
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:294
DemandedBits.h
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::ReplaceWithVeclibLegacy::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: ReplaceWithVeclib.cpp:221
llvm::DemandedBitsWrapperPass
Definition: DemandedBits.h:102
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:175
llvm::GlobalValue::ExternalLinkage
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
ReplaceWithVeclib.h
ModuleUtils.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1281
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
calls
amdgpu Simplify well known AMD library calls
Definition: AMDGPULibCalls.cpp:199
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
llvm::ReplaceWithVeclib::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: ReplaceWithVeclib.cpp:199
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
llvm::UnivariateLinearPolyBase::isNonZero
bool isNonZero() const
Definition: TypeSize.h:230
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::CallBase::arg_operands
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1333
llvm::OptimizationRemarkEmitterAnalysis
Definition: OptimizationRemarkEmitter.h:164
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2352
llvm::TargetLibraryInfo::getVectorizedFunction
StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const
Definition: TargetLibraryInfo.h:332