LLVM 20.0.0git
PartiallyInlineLibCalls.cpp
Go to the documentation of this file.
1//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass tries to partially inline the fast path of well-known library
10// functions, such as using square-root instructions for cases where sqrt()
11// does not need to set errno.
12//
13//===----------------------------------------------------------------------===//
14
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
26#include <optional>
27
28using namespace llvm;
29
30#define DEBUG_TYPE "partially-inline-libcalls"
31
32DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
33 "Controls transformations in partially-inline-libcalls");
34
35static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
36 BasicBlock &CurrBB, Function::iterator &BB,
39 // There is no need to change the IR, since backend will emit sqrt
40 // instruction if the call has already been marked read-only.
41 if (Call->onlyReadsMemory())
42 return false;
43
44 if (!DebugCounter::shouldExecute(PILCounter))
45 return false;
46
47 // Do the following transformation:
48 //
49 // (before)
50 // dst = sqrt(src)
51 //
52 // (after)
53 // v0 = sqrt_noreadmem(src) # native sqrt instruction.
54 // [if (v0 is a NaN) || if (src < 0)]
55 // v1 = sqrt(src) # library call.
56 // dst = phi(v0, v1)
57 //
58
59 Type *Ty = Call->getType();
60 IRBuilder<> Builder(Call->getNextNode());
61
62 // Split CurrBB right after the call, create a 'then' block (that branches
63 // back to split-off tail of CurrBB) into which we'll insert a libcall.
65 Builder.getTrue(), Call->getNextNode(), /*Unreachable=*/false,
66 /*BranchWeights*/ nullptr, DTU);
67
68 auto *CurrBBTerm = cast<BranchInst>(CurrBB.getTerminator());
69 // We want an 'else' block though, not a 'then' block.
70 cast<BranchInst>(CurrBBTerm)->swapSuccessors();
71
72 // Create phi that will merge results of either sqrt and replace all uses.
73 BasicBlock *JoinBB = LibCallTerm->getSuccessor(0);
74 JoinBB->setName(CurrBB.getName() + ".split");
75 Builder.SetInsertPoint(JoinBB, JoinBB->begin());
76 PHINode *Phi = Builder.CreatePHI(Ty, 2);
77 Call->replaceAllUsesWith(Phi);
78
79 // Finally, insert the libcall into 'else' block.
80 BasicBlock *LibCallBB = LibCallTerm->getParent();
81 LibCallBB->setName("call.sqrt");
82 Builder.SetInsertPoint(LibCallTerm);
83 Instruction *LibCall = Call->clone();
84 Builder.Insert(LibCall);
85
86 // Add memory(none) attribute, so that the backend can use a native sqrt
87 // instruction for this call.
88 Call->setDoesNotAccessMemory();
89
90 // Insert a FP compare instruction and use it as the CurrBB branch condition.
91 Builder.SetInsertPoint(CurrBBTerm);
93 ? Builder.CreateFCmpORD(Call, Call)
94 : Builder.CreateFCmpOGE(Call->getOperand(0),
95 ConstantFP::get(Ty, 0.0));
96 CurrBBTerm->setCondition(FCmp);
97
98 // Add phi operands.
99 Phi->addIncoming(Call, &CurrBB);
100 Phi->addIncoming(LibCall, LibCallBB);
101
102 BB = JoinBB->getIterator();
103 return true;
104}
105
108 DominatorTree *DT,
110 std::optional<DomTreeUpdater> DTU;
111 if (DT)
112 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
113
114 bool Changed = false;
115
116 Function::iterator CurrBB;
117 for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
118 CurrBB = BB++;
119
120 for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
121 II != IE; ++II) {
122 CallInst *Call = dyn_cast<CallInst>(&*II);
123 Function *CalledFunc;
124
125 if (!Call || !(CalledFunc = Call->getCalledFunction()))
126 continue;
127
128 if (Call->isNoBuiltin() || Call->isStrictFP())
129 continue;
130
131 if (Call->isMustTailCall())
132 continue;
133
134 // Skip if function either has local linkage or is not a known library
135 // function.
136 LibFunc LF;
137 if (CalledFunc->hasLocalLinkage() ||
138 !TLI->getLibFunc(*CalledFunc, LF) || !TLI->has(LF))
139 continue;
140
141 switch (LF) {
142 case LibFunc_sqrtf:
143 case LibFunc_sqrt:
144 if (TTI->haveFastSqrt(Call->getType()) &&
145 optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI,
146 DTU ? &*DTU : nullptr, ORE))
147 break;
148 continue;
149 default:
150 continue;
151 }
152
153 Changed = true;
154 break;
155 }
156 }
157
158 return Changed;
159}
160
163 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
164 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
167 if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT, &ORE))
168 return PreservedAnalyses::all();
171 return PA;
172}
173
174namespace {
175class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
176public:
177 static char ID;
178
179 PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) {
182 }
183
184 void getAnalysisUsage(AnalysisUsage &AU) const override {
189 FunctionPass::getAnalysisUsage(AU);
190 }
191
192 bool runOnFunction(Function &F) override {
193 if (skipFunction(F))
194 return false;
195
196 TargetLibraryInfo *TLI =
197 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
198 const TargetTransformInfo *TTI =
199 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
200 DominatorTree *DT = nullptr;
201 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
202 DT = &DTWP->getDomTree();
203 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
204 return runPartiallyInlineLibCalls(F, TLI, TTI, DT, ORE);
205 }
206};
207}
208
209char PartiallyInlineLibCallsLegacyPass::ID = 0;
210INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
211 "partially-inline-libcalls",
212 "Partially inline calls to library functions", false,
213 false)
218INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
219 "partially-inline-libcalls",
220 "Partially inline calls to library functions", false, false)
221
223 return new PartiallyInlineLibCallsLegacyPass();
224}
Lower uses of LDS variables from non kernel functions
always inline
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
static bool runOnFunction(Function &F, bool PostInlining)
#define F(x, y, z)
Definition: MD5.cpp:55
uint64_t IntrinsicInst * II
static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, DominatorTree *DT, OptimizationRemarkEmitter *ORE)
static bool optimizeSQRT(CallInst *Call, Function *CalledFunc, BasicBlock &CurrBB, Function::iterator &BB, const TargetTransformInfo *TTI, DomTreeUpdater *DTU, OptimizationRemarkEmitter *ORE)
partially inline libcalls
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
Replace intrinsics with calls to vector library
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:429
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class represents a function call, abstracting a target machine's calling convention.
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
BasicBlockListType::iterator iterator
Definition: Function.h:68
bool hasLocalLinkage() const
Definition: GlobalValue.h:529
Value * CreateFCmpORD(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2340
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:485
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2435
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:164
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
Value * CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2320
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry &)
FunctionPass * createPartiallyInlineLibCallsPass()
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...