LLVM  12.0.0git
PartiallyInlineLibCalls.cpp
Go to the documentation of this file.
1 //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass tries to partially inline the fast path of well-known library
10 // functions, such as using square-root instructions for cases where sqrt()
11 // does not need to set errno.
12 //
13 //===----------------------------------------------------------------------===//
14 
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/InitializePasses.h"
21 #include "llvm/Transforms/Scalar.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "partially-inline-libcalls"
27 
28 DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
29  "Controls transformations in partially-inline-libcalls");
30 
31 static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
32  BasicBlock &CurrBB, Function::iterator &BB,
33  const TargetTransformInfo *TTI) {
34  // There is no need to change the IR, since backend will emit sqrt
35  // instruction if the call has already been marked read-only.
36  if (Call->onlyReadsMemory())
37  return false;
38 
39  if (!DebugCounter::shouldExecute(PILCounter))
40  return false;
41 
42  // Do the following transformation:
43  //
44  // (before)
45  // dst = sqrt(src)
46  //
47  // (after)
48  // v0 = sqrt_noreadmem(src) # native sqrt instruction.
49  // [if (v0 is a NaN) || if (src < 0)]
50  // v1 = sqrt(src) # library call.
51  // dst = phi(v0, v1)
52  //
53 
54  // Move all instructions following Call to newly created block JoinBB.
55  // Create phi and replace all uses.
56  BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode());
57  IRBuilder<> Builder(JoinBB, JoinBB->begin());
58  Type *Ty = Call->getType();
59  PHINode *Phi = Builder.CreatePHI(Ty, 2);
60  Call->replaceAllUsesWith(Phi);
61 
62  // Create basic block LibCallBB and insert a call to library function sqrt.
63  BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt",
64  CurrBB.getParent(), JoinBB);
65  Builder.SetInsertPoint(LibCallBB);
66  Instruction *LibCall = Call->clone();
67  Builder.Insert(LibCall);
68  Builder.CreateBr(JoinBB);
69 
70  // Add attribute "readnone" so that backend can use a native sqrt instruction
71  // for this call. Insert a FP compare instruction and a conditional branch
72  // at the end of CurrBB.
73  Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
74  CurrBB.getTerminator()->eraseFromParent();
75  Builder.SetInsertPoint(&CurrBB);
77  ? Builder.CreateFCmpORD(Call, Call)
78  : Builder.CreateFCmpOGE(Call->getOperand(0),
79  ConstantFP::get(Ty, 0.0));
80  Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);
81 
82  // Add phi operands.
83  Phi->addIncoming(Call, &CurrBB);
84  Phi->addIncoming(LibCall, LibCallBB);
85 
86  BB = JoinBB->getIterator();
87  return true;
88 }
89 
91  const TargetTransformInfo *TTI) {
92  bool Changed = false;
93 
94  Function::iterator CurrBB;
95  for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
96  CurrBB = BB++;
97 
98  for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
99  II != IE; ++II) {
100  CallInst *Call = dyn_cast<CallInst>(&*II);
101  Function *CalledFunc;
102 
103  if (!Call || !(CalledFunc = Call->getCalledFunction()))
104  continue;
105 
106  if (Call->isNoBuiltin())
107  continue;
108 
109  // Skip if function either has local linkage or is not a known library
110  // function.
111  LibFunc LF;
112  if (CalledFunc->hasLocalLinkage() ||
113  !TLI->getLibFunc(*CalledFunc, LF) || !TLI->has(LF))
114  continue;
115 
116  switch (LF) {
117  case LibFunc_sqrtf:
118  case LibFunc_sqrt:
119  if (TTI->haveFastSqrt(Call->getType()) &&
120  optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI))
121  break;
122  continue;
123  default:
124  continue;
125  }
126 
127  Changed = true;
128  break;
129  }
130  }
131 
132  return Changed;
133 }
134 
137  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
138  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
139  if (!runPartiallyInlineLibCalls(F, &TLI, &TTI))
140  return PreservedAnalyses::all();
141  return PreservedAnalyses::none();
142 }
143 
144 namespace {
145 class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
146 public:
147  static char ID;
148 
149  PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) {
152  }
153 
154  void getAnalysisUsage(AnalysisUsage &AU) const override {
158  }
159 
160  bool runOnFunction(Function &F) override {
161  if (skipFunction(F))
162  return false;
163 
164  TargetLibraryInfo *TLI =
165  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
166  const TargetTransformInfo *TTI =
167  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
168  return runPartiallyInlineLibCalls(F, TLI, TTI);
169  }
170 };
171 }
172 
174 INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
175  "partially-inline-libcalls",
176  "Partially inline calls to library functions", false,
177  false)
180 INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
181  "partially-inline-libcalls",
182  "Partially inline calls to library functions", false, false)
183 
185  return new PartiallyInlineLibCallsLegacyPass();
186 }
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This class represents a function call, abstracting a target machine's calling convention.
Analysis pass providing the TargetTransformInfo.
F(f)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
partially inline Partially calls to library functions
AnalysisUsage & addRequired()
INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass, "partially-inline-libcalls", "Partially inline calls to library functions", false, false) INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass
This file provides an implementation of debug counters.
DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform", "Controls transformations in partially-inline-libcalls")
FunctionPass * createPartiallyInlineLibCallsPass()
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
bool has(LibFunc F) const
Tests whether a library function is available.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
static bool runOnFunction(Function &F, bool PostInlining)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI)
amdgpu inline
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
self_iterator getIterator()
Definition: ilist_node.h:81
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
assume Assume Builder
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Iterator for intrusive lists based on ilist_node.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Provides information about what library functions are available for the current target.
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:914
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Analysis pass providing the TargetLibraryInfo.
partially inline libcalls
LLVM Value Representation.
Definition: Value.h:75
amdgpu Simplify well known AMD library calls
static bool optimizeSQRT(CallInst *Call, Function *CalledFunc, BasicBlock &CurrBB, Function::iterator &BB, const TargetTransformInfo *TTI)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
A container for analyses that lazily runs them and caches their results.
This pass exposes codegen information to IR-level passes.
void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry &)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)