Bug Summary

File:lib/Target/AMDGPU/AMDGPULibCalls.cpp
Warning:line 1649, column 13
2nd function call argument is an uninitialized value

Annotated Source Code

1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief This file does AMD library function optimizations.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE"amdgpu-simplifylib" "amdgpu-simplifylib"
16
17#include "AMDGPU.h"
18#include "AMDGPULibFunc.h"
19#include "llvm/Analysis/AliasAnalysis.h"
20#include "llvm/Analysis/Loads.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/IR/Constants.h"
24#include "llvm/IR/DerivedTypes.h"
25#include "llvm/IR/Instructions.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/IR/Module.h"
30#include "llvm/IR/ValueSymbolTable.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/raw_ostream.h"
33#include "llvm/Target/TargetOptions.h"
34#include <vector>
35#include <cmath>
36
37using namespace llvm;
38
39static cl::opt<bool> EnablePreLink("amdgpu-prelink",
40 cl::desc("Enable pre-link mode optimizations"),
41 cl::init(false),
42 cl::Hidden);
43
44static cl::list<std::string> UseNative("amdgpu-use-native",
45 cl::desc("Comma separated list of functions to replace with native, or all"),
46 cl::CommaSeparated, cl::ValueOptional,
47 cl::Hidden);
48
49#define MATH_PI3.14159265358979323846264338327950288419716939937511 3.14159265358979323846264338327950288419716939937511
50#define MATH_E2.71828182845904523536028747135266249775724709369996 2.71828182845904523536028747135266249775724709369996
51#define MATH_SQRT21.41421356237309504880168872420969807856967187537695 1.41421356237309504880168872420969807856967187537695
52
53#define MATH_LOG2E1.4426950408889634073599246810018921374266459541529859 1.4426950408889634073599246810018921374266459541529859
54#define MATH_LOG10E0.4342944819032518276511289189166050822943970058036665 0.4342944819032518276511289189166050822943970058036665
55// Value of log2(10)
56#define MATH_LOG2_103.3219280948873623478703194294893901758648313930245806 3.3219280948873623478703194294893901758648313930245806
57// Value of 1 / log2(10)
58#define MATH_RLOG2_100.3010299956639811952137388947244930267681898814621085 0.3010299956639811952137388947244930267681898814621085
59// Value of 1 / M_LOG2E_F = 1 / log2(e)
60#define MATH_RLOG2_E0.6931471805599453094172321214581765680755001343602552 0.6931471805599453094172321214581765680755001343602552
61
62namespace llvm {
63
64class AMDGPULibCalls {
65private:
66
67 typedef llvm::AMDGPULibFunc FuncInfo;
68
69 // -fuse-native.
70 bool AllNative = false;
71
72 bool useNativeFunc(const StringRef F) const;
73
74 // Return a pointer (pointer expr) to the function if function defintion with
75 // "FuncName" exists. It may create a new function prototype in pre-link mode.
76 Constant *getFunction(Module *M, const FuncInfo& fInfo);
77
78 // Replace a normal function with its native version.
79 bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
80
81 bool parseFunctionName(const StringRef& FMangledName,
82 FuncInfo *FInfo=nullptr /*out*/);
83
84 bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
85
86 /* Specialized optimizations */
87
88 // recip (half or native)
89 bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
90
91 // divide (half or native)
92 bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
93
94 // pow/powr/pown
95 bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
96
97 // rootn
98 bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
99
100 // fma/mad
101 bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
102
103 // -fuse-native for sincos
104 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
105
106 // evaluate calls if calls' arguments are constants.
107 bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
108 double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
109 bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
110
111 // exp
112 bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
113
114 // exp2
115 bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
116
117 // exp10
118 bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
119
120 // log
121 bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
122
123 // log2
124 bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
125
126 // log10
127 bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
128
129 // sqrt
130 bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
131
132 // sin/cos
133 bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
134
135 // __read_pipe/__write_pipe
136 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
137
138 // Get insertion point at entry.
139 BasicBlock::iterator getEntryIns(CallInst * UI);
140 // Insert an Alloc instruction.
141 AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
142 // Get a scalar native builtin signle argument FP function
143 Constant* getNativeFunction(Module* M, const FuncInfo &FInfo);
144
145protected:
146 CallInst *CI;
147
148 bool isUnsafeMath(const CallInst *CI) const;
149
150 void replaceCall(Value *With) {
151 CI->replaceAllUsesWith(With);
152 CI->eraseFromParent();
153 }
154
155public:
156 bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
157
158 void initNativeFuncs();
159
160 // Replace a normal math function call with that native version
161 bool useNative(CallInst *CI);
162};
163
164} // end llvm namespace
165
166namespace {
167
168 class AMDGPUSimplifyLibCalls : public FunctionPass {
169
170 AMDGPULibCalls Simplifier;
171
172 const TargetOptions Options;
173
174 public:
175 static char ID; // Pass identification
176
177 AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
178 : FunctionPass(ID), Options(Opt) {
179 initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
180 }
181
182 void getAnalysisUsage(AnalysisUsage &AU) const override {
183 AU.addRequired<AAResultsWrapperPass>();
184 }
185
186 bool runOnFunction(Function &M) override;
187 };
188
189 class AMDGPUUseNativeCalls : public FunctionPass {
190
191 AMDGPULibCalls Simplifier;
192
193 public:
194 static char ID; // Pass identification
195
196 AMDGPUUseNativeCalls() : FunctionPass(ID) {
197 initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
198 Simplifier.initNativeFuncs();
199 }
200
201 bool runOnFunction(Function &F) override;
202 };
203
204} // end anonymous namespace.
205
206char AMDGPUSimplifyLibCalls::ID = 0;
207char AMDGPUUseNativeCalls::ID = 0;
208
209INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry
&Registry) {
210 "Simplify well-known AMD library calls", false, false)static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry
&Registry) {
211INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
212INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls"
, "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>)
, false, false); Registry.registerPass(*PI, true); return PI;
} static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag
; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry
&Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag
, initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry
)); }
213 "Simplify well-known AMD library calls", false, false)PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls"
, "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>)
, false, false); Registry.registerPass(*PI, true); return PI;
} static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag
; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry
&Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag
, initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry
)); }
214
215INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions."
, "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void
llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce
, std::ref(Registry)); }
216 "Replace builtin math calls with that native versions.",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions."
, "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void
llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce
, std::ref(Registry)); }
217 false, false)static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry
&Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions."
, "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void
llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce
, std::ref(Registry)); }
218
219template <typename IRB>
220CallInst *CreateCallEx(IRB &B, Value *Callee, Value *Arg, const Twine &Name="")
221{
222 CallInst *R = B.CreateCall(Callee, Arg, Name);
223 if (Function* F = dyn_cast<Function>(Callee))
224 R->setCallingConv(F->getCallingConv());
225 return R;
226}
227
228template <typename IRB>
229CallInst *CreateCallEx2(IRB &B, Value *Callee, Value *Arg1, Value *Arg2,
230 const Twine &Name="") {
231 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
232 if (Function* F = dyn_cast<Function>(Callee))
233 R->setCallingConv(F->getCallingConv());
234 return R;
235}
236
237// Data structures for table-driven optimizations.
238// FuncTbl works for both f32 and f64 functions with 1 input argument
239
240struct TableEntry {
241 double result;
242 double input;
243};
244
245/* a list of {result, input} */
246static const TableEntry tbl_acos[] = {
247 {MATH_PI3.14159265358979323846264338327950288419716939937511/2.0, 0.0},
248 {MATH_PI3.14159265358979323846264338327950288419716939937511/2.0, -0.0},
249 {0.0, 1.0},
250 {MATH_PI3.14159265358979323846264338327950288419716939937511, -1.0}
251};
252static const TableEntry tbl_acosh[] = {
253 {0.0, 1.0}
254};
255static const TableEntry tbl_acospi[] = {
256 {0.5, 0.0},
257 {0.5, -0.0},
258 {0.0, 1.0},
259 {1.0, -1.0}
260};
261static const TableEntry tbl_asin[] = {
262 {0.0, 0.0},
263 {-0.0, -0.0},
264 {MATH_PI3.14159265358979323846264338327950288419716939937511/2.0, 1.0},
265 {-MATH_PI3.14159265358979323846264338327950288419716939937511/2.0, -1.0}
266};
267static const TableEntry tbl_asinh[] = {
268 {0.0, 0.0},
269 {-0.0, -0.0}
270};
271static const TableEntry tbl_asinpi[] = {
272 {0.0, 0.0},
273 {-0.0, -0.0},
274 {0.5, 1.0},
275 {-0.5, -1.0}
276};
277static const TableEntry tbl_atan[] = {
278 {0.0, 0.0},
279 {-0.0, -0.0},
280 {MATH_PI3.14159265358979323846264338327950288419716939937511/4.0, 1.0},
281 {-MATH_PI3.14159265358979323846264338327950288419716939937511/4.0, -1.0}
282};
283static const TableEntry tbl_atanh[] = {
284 {0.0, 0.0},
285 {-0.0, -0.0}
286};
287static const TableEntry tbl_atanpi[] = {
288 {0.0, 0.0},
289 {-0.0, -0.0},
290 {0.25, 1.0},
291 {-0.25, -1.0}
292};
293static const TableEntry tbl_cbrt[] = {
294 {0.0, 0.0},
295 {-0.0, -0.0},
296 {1.0, 1.0},
297 {-1.0, -1.0},
298};
299static const TableEntry tbl_cos[] = {
300 {1.0, 0.0},
301 {1.0, -0.0}
302};
303static const TableEntry tbl_cosh[] = {
304 {1.0, 0.0},
305 {1.0, -0.0}
306};
307static const TableEntry tbl_cospi[] = {
308 {1.0, 0.0},
309 {1.0, -0.0}
310};
311static const TableEntry tbl_erfc[] = {
312 {1.0, 0.0},
313 {1.0, -0.0}
314};
315static const TableEntry tbl_erf[] = {
316 {0.0, 0.0},
317 {-0.0, -0.0}
318};
319static const TableEntry tbl_exp[] = {
320 {1.0, 0.0},
321 {1.0, -0.0},
322 {MATH_E2.71828182845904523536028747135266249775724709369996, 1.0}
323};
324static const TableEntry tbl_exp2[] = {
325 {1.0, 0.0},
326 {1.0, -0.0},
327 {2.0, 1.0}
328};
329static const TableEntry tbl_exp10[] = {
330 {1.0, 0.0},
331 {1.0, -0.0},
332 {10.0, 1.0}
333};
334static const TableEntry tbl_expm1[] = {
335 {0.0, 0.0},
336 {-0.0, -0.0}
337};
338static const TableEntry tbl_log[] = {
339 {0.0, 1.0},
340 {1.0, MATH_E2.71828182845904523536028747135266249775724709369996}
341};
342static const TableEntry tbl_log2[] = {
343 {0.0, 1.0},
344 {1.0, 2.0}
345};
346static const TableEntry tbl_log10[] = {
347 {0.0, 1.0},
348 {1.0, 10.0}
349};
350static const TableEntry tbl_rsqrt[] = {
351 {1.0, 1.0},
352 {1.0/MATH_SQRT21.41421356237309504880168872420969807856967187537695, 2.0}
353};
354static const TableEntry tbl_sin[] = {
355 {0.0, 0.0},
356 {-0.0, -0.0}
357};
358static const TableEntry tbl_sinh[] = {
359 {0.0, 0.0},
360 {-0.0, -0.0}
361};
362static const TableEntry tbl_sinpi[] = {
363 {0.0, 0.0},
364 {-0.0, -0.0}
365};
366static const TableEntry tbl_sqrt[] = {
367 {0.0, 0.0},
368 {1.0, 1.0},
369 {MATH_SQRT21.41421356237309504880168872420969807856967187537695, 2.0}
370};
371static const TableEntry tbl_tan[] = {
372 {0.0, 0.0},
373 {-0.0, -0.0}
374};
375static const TableEntry tbl_tanh[] = {
376 {0.0, 0.0},
377 {-0.0, -0.0}
378};
379static const TableEntry tbl_tanpi[] = {
380 {0.0, 0.0},
381 {-0.0, -0.0}
382};
383static const TableEntry tbl_tgamma[] = {
384 {1.0, 1.0},
385 {1.0, 2.0},
386 {2.0, 3.0},
387 {6.0, 4.0}
388};
389
390static bool HasNative(AMDGPULibFunc::EFuncId id) {
391 switch(id) {
392 case AMDGPULibFunc::EI_DIVIDE:
393 case AMDGPULibFunc::EI_COS:
394 case AMDGPULibFunc::EI_EXP:
395 case AMDGPULibFunc::EI_EXP2:
396 case AMDGPULibFunc::EI_EXP10:
397 case AMDGPULibFunc::EI_LOG:
398 case AMDGPULibFunc::EI_LOG2:
399 case AMDGPULibFunc::EI_LOG10:
400 case AMDGPULibFunc::EI_POWR:
401 case AMDGPULibFunc::EI_RECIP:
402 case AMDGPULibFunc::EI_RSQRT:
403 case AMDGPULibFunc::EI_SIN:
404 case AMDGPULibFunc::EI_SINCOS:
405 case AMDGPULibFunc::EI_SQRT:
406 case AMDGPULibFunc::EI_TAN:
407 return true;
408 default:;
409 }
410 return false;
411}
412
413struct TableRef {
414 size_t size;
415 const TableEntry *table; // variable size: from 0 to (size - 1)
416
417 TableRef() : size(0), table(nullptr) {}
418
419 template <size_t N>
420 TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
421};
422
423static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
424 switch(id) {
425 case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos);
426 case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh);
427 case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi);
428 case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin);
429 case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh);
430 case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi);
431 case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan);
432 case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh);
433 case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi);
434 case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt);
435 case AMDGPULibFunc::EI_NCOS:
436 case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
437 case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh);
438 case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi);
439 case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc);
440 case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
441 case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
442 case AMDGPULibFunc::EI_NEXP2:
443 case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2);
444 case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10);
445 case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1);
446 case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
447 case AMDGPULibFunc::EI_NLOG2:
448 case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2);
449 case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10);
450 case AMDGPULibFunc::EI_NRSQRT:
451 case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt);
452 case AMDGPULibFunc::EI_NSIN:
453 case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
454 case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh);
455 case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi);
456 case AMDGPULibFunc::EI_NSQRT:
457 case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt);
458 case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
459 case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh);
460 case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi);
461 case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma);
462 default:;
463 }
464 return TableRef();
465}
466
467static inline int getVecSize(const AMDGPULibFunc& FInfo) {
468 return FInfo.getLeads()[0].VectorSize;
469}
470
471static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
472 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
473}
474
475Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) {
476 // If we are doing PreLinkOpt, the function is external. So it is safe to
477 // use getOrInsertFunction() at this stage.
478
479 return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
480 : AMDGPULibFunc::getFunction(M, fInfo);
481}
482
483bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
484 FuncInfo *FInfo) {
485 return AMDGPULibFunc::parse(FMangledName, *FInfo);
486}
487
488bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
489 if (auto Op = dyn_cast<FPMathOperator>(CI))
490 if (Op->isFast())
491 return true;
492 const Function *F = CI->getParent()->getParent();
493 Attribute Attr = F->getFnAttribute("unsafe-fp-math");
494 return Attr.getValueAsString() == "true";
495}
496
497bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
498 return AllNative ||
499 std::find(UseNative.begin(), UseNative.end(), F) != UseNative.end();
500}
501
502void AMDGPULibCalls::initNativeFuncs() {
503 AllNative = useNativeFunc("all") ||
504 (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
505 UseNative.begin()->empty());
506}
507
508bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
509 bool native_sin = useNativeFunc("sin");
510 bool native_cos = useNativeFunc("cos");
511
512 if (native_sin && native_cos) {
513 Module *M = aCI->getModule();
514 Value *opr0 = aCI->getArgOperand(0);
515
516 AMDGPULibFunc nf;
517 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
518 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
519
520 nf.setPrefix(AMDGPULibFunc::NATIVE);
521 nf.setId(AMDGPULibFunc::EI_SIN);
522 Constant *sinExpr = getFunction(M, nf);
523
524 nf.setPrefix(AMDGPULibFunc::NATIVE);
525 nf.setId(AMDGPULibFunc::EI_COS);
526 Constant *cosExpr = getFunction(M, nf);
527 if (sinExpr && cosExpr) {
528 Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
529 Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
530 new StoreInst(cosval, aCI->getArgOperand(1), aCI);
531
532 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("usenative")) { dbgs() << "<useNative> replace "
<< *aCI << " with native version of sin/cos"; } }
while (false)
533 << " with native version of sin/cos")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("usenative")) { dbgs() << "<useNative> replace "
<< *aCI << " with native version of sin/cos"; } }
while (false)
;
534
535 replaceCall(sinval);
536 return true;
537 }
538 }
539 return false;
540}
541
542bool AMDGPULibCalls::useNative(CallInst *aCI) {
543 CI = aCI;
544 Function *Callee = aCI->getCalledFunction();
545
546 FuncInfo FInfo;
547 if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
548 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
549 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
550 !(AllNative || useNativeFunc(FInfo.getName()))) {
551 return false;
552 }
553
554 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
555 return sincosUseNative(aCI, FInfo);
556
557 FInfo.setPrefix(AMDGPULibFunc::NATIVE);
558 Constant *F = getFunction(aCI->getModule(), FInfo);
559 if (!F)
560 return false;
561
562 aCI->setCalledFunction(F);
563 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("usenative")) { dbgs() << "<useNative> replace "
<< *aCI << " with native version"; } } while (false
)
564 << " with native version")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("usenative")) { dbgs() << "<useNative> replace "
<< *aCI << " with native version"; } } while (false
)
;
565 return true;
566}
567
568// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
569// builtin, with appended type size and alignment arguments, where 2 or 4
570// indicates the original number of arguments. The library has optimized version
571// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
572// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
573// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
574// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
575bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
576 FuncInfo &FInfo) {
577 auto *Callee = CI->getCalledFunction();
578 if (!Callee->isDeclaration())
579 return false;
580
581 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function")(static_cast <bool> (Callee->hasName() && "Invalid read_pipe/write_pipe function"
) ? void (0) : __assert_fail ("Callee->hasName() && \"Invalid read_pipe/write_pipe function\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 581, __extension__ __PRETTY_FUNCTION__))
;
582 auto *M = Callee->getParent();
583 auto &Ctx = M->getContext();
584 std::string Name = Callee->getName();
585 auto NumArg = CI->getNumArgOperands();
586 if (NumArg != 4 && NumArg != 6)
587 return false;
588 auto *PacketSize = CI->getArgOperand(NumArg - 2);
589 auto *PacketAlign = CI->getArgOperand(NumArg - 1);
590 if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
591 return false;
592 unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
593 unsigned Align = cast<ConstantInt>(PacketAlign)->getZExtValue();
594 if (Size != Align || !isPowerOf2_32(Size))
595 return false;
596
597 Type *PtrElemTy;
598 if (Size <= 8)
599 PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
600 else
601 PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
602 unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
603 auto PtrArg = CI->getArgOperand(PtrArgLoc);
604 unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
605 auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
606
607 SmallVector<llvm::Type *, 6> ArgTys;
608 for (unsigned I = 0; I != PtrArgLoc; ++I)
609 ArgTys.push_back(CI->getArgOperand(I)->getType());
610 ArgTys.push_back(PtrTy);
611
612 Name = Name + "_" + std::to_string(Size);
613 auto *FTy = FunctionType::get(Callee->getReturnType(),
614 ArrayRef<Type *>(ArgTys), false);
615 AMDGPULibFunc NewLibFunc(Name, FTy);
616 auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
617 if (!F)
618 return false;
619
620 auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
621 SmallVector<Value *, 6> Args;
622 for (unsigned I = 0; I != PtrArgLoc; ++I)
623 Args.push_back(CI->getArgOperand(I));
624 Args.push_back(BCast);
625
626 auto *NCI = B.CreateCall(F, Args);
627 NCI->setAttributes(CI->getAttributes());
628 CI->replaceAllUsesWith(NCI);
629 CI->dropAllReferences();
630 CI->eraseFromParent();
631
632 return true;
633}
634
635// This function returns false if no change; return true otherwise.
636bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
637 this->CI = CI;
638 Function *Callee = CI->getCalledFunction();
639
640 // Ignore indirect calls.
641 if (Callee == 0) return false;
642
643 FuncInfo FInfo;
644 if (!parseFunctionName(Callee->getName(), &FInfo))
645 return false;
646
647 // Further check the number of arguments to see if they match.
648 if (CI->getNumArgOperands() != FInfo.getNumArgs())
649 return false;
650
651 BasicBlock *BB = CI->getParent();
652 LLVMContext &Context = CI->getParent()->getContext();
653 IRBuilder<> B(Context);
654
655 // Set the builder to the instruction after the call.
656 B.SetInsertPoint(BB, CI->getIterator());
657
658 // Copy fast flags from the original call.
659 if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
660 B.setFastMathFlags(FPOp->getFastMathFlags());
661
662 if (TDOFold(CI, FInfo))
663 return true;
664
665 // Under unsafe-math, evaluate calls if possible.
666 // According to Brian Sumner, we can do this for all f32 function calls
667 // using host's double function calls.
668 if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
669 return true;
670
671 // Specilized optimizations for each function call
672 switch (FInfo.getId()) {
673 case AMDGPULibFunc::EI_RECIP:
674 // skip vector function
675 assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
"recip must be an either native or half function") ? void (0
) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"recip must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 677, __extension__ __PRETTY_FUNCTION__))
676 FInfo.getPrefix() == AMDGPULibFunc::HALF) &&(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
"recip must be an either native or half function") ? void (0
) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"recip must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 677, __extension__ __PRETTY_FUNCTION__))
677 "recip must be an either native or half function")(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
"recip must be an either native or half function") ? void (0
) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"recip must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 677, __extension__ __PRETTY_FUNCTION__))
;
678 return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
679
680 case AMDGPULibFunc::EI_DIVIDE:
681 // skip vector function
682 assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
"divide must be an either native or half function") ? void (
0) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"divide must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 684, __extension__ __PRETTY_FUNCTION__))
683 FInfo.getPrefix() == AMDGPULibFunc::HALF) &&(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
"divide must be an either native or half function") ? void (
0) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"divide must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 684, __extension__ __PRETTY_FUNCTION__))
684 "divide must be an either native or half function")(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
"divide must be an either native or half function") ? void (
0) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"divide must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 684, __extension__ __PRETTY_FUNCTION__))
;
685 return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
686
687 case AMDGPULibFunc::EI_POW:
688 case AMDGPULibFunc::EI_POWR:
689 case AMDGPULibFunc::EI_POWN:
690 return fold_pow(CI, B, FInfo);
691
692 case AMDGPULibFunc::EI_ROOTN:
693 // skip vector function
694 return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
695
696 case AMDGPULibFunc::EI_FMA:
697 case AMDGPULibFunc::EI_MAD:
698 case AMDGPULibFunc::EI_NFMA:
699 // skip vector function
700 return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
701
702 case AMDGPULibFunc::EI_SQRT:
703 return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
704 case AMDGPULibFunc::EI_COS:
705 case AMDGPULibFunc::EI_SIN:
706 if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
707 getArgType(FInfo) == AMDGPULibFunc::F64)
708 && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
709 return fold_sincos(CI, B, AA);
710
711 break;
712 case AMDGPULibFunc::EI_READ_PIPE_2:
713 case AMDGPULibFunc::EI_READ_PIPE_4:
714 case AMDGPULibFunc::EI_WRITE_PIPE_2:
715 case AMDGPULibFunc::EI_WRITE_PIPE_4:
716 return fold_read_write_pipe(CI, B, FInfo);
717
718 default:
719 break;
720 }
721
722 return false;
723}
724
725bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
726 // Table-Driven optimization
727 const TableRef tr = getOptTable(FInfo.getId());
728 if (tr.size==0)
729 return false;
730
731 int const sz = (int)tr.size;
732 const TableEntry * const ftbl = tr.table;
733 Value *opr0 = CI->getArgOperand(0);
734
735 if (getVecSize(FInfo) > 1) {
736 if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
737 SmallVector<double, 0> DVal;
738 for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
739 ConstantFP *eltval = dyn_cast<ConstantFP>(
740 CV->getElementAsConstant((unsigned)eltNo));
741 assert(eltval && "Non-FP arguments in math function!")(static_cast <bool> (eltval && "Non-FP arguments in math function!"
) ? void (0) : __assert_fail ("eltval && \"Non-FP arguments in math function!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 741, __extension__ __PRETTY_FUNCTION__))
;
742 bool found = false;
743 for (int i=0; i < sz; ++i) {
744 if (eltval->isExactlyValue(ftbl[i].input)) {
745 DVal.push_back(ftbl[i].result);
746 found = true;
747 break;
748 }
749 }
750 if (!found) {
751 // This vector constants not handled yet.
752 return false;
753 }
754 }
755 LLVMContext &context = CI->getParent()->getParent()->getContext();
756 Constant *nval;
757 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
758 SmallVector<float, 0> FVal;
759 for (unsigned i = 0; i < DVal.size(); ++i) {
760 FVal.push_back((float)DVal[i]);
761 }
762 ArrayRef<float> tmp(FVal);
763 nval = ConstantDataVector::get(context, tmp);
764 } else { // F64
765 ArrayRef<double> tmp(DVal);
766 nval = ConstantDataVector::get(context, tmp);
767 }
768 DEBUG(errs() << "AMDIC: " << *CIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *nval << "\n"; } } while
(false)
769 << " ---> " << *nval << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *nval << "\n"; } } while
(false)
;
770 replaceCall(nval);
771 return true;
772 }
773 } else {
774 // Scalar version
775 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
776 for (int i = 0; i < sz; ++i) {
777 if (CF->isExactlyValue(ftbl[i].input)) {
778 Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
779 DEBUG(errs() << "AMDIC: " << *CIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *nval << "\n"; } } while
(false)
780 << " ---> " << *nval << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *nval << "\n"; } } while
(false)
;
781 replaceCall(nval);
782 return true;
783 }
784 }
785 }
786 }
787
788 return false;
789}
790
791bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
792 Module *M = CI->getModule();
793 if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
794 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
795 !HasNative(FInfo.getId()))
796 return false;
797
798 AMDGPULibFunc nf = FInfo;
799 nf.setPrefix(AMDGPULibFunc::NATIVE);
800 if (Constant *FPExpr = getFunction(M, nf)) {
801 DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: " << *
CI << " ---> "; } } while (false)
;
802
803 CI->setCalledFunction(FPExpr);
804
805 DEBUG(dbgs() << *CI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << *CI << '\n'; }
} while (false)
;
806
807 return true;
808 }
809 return false;
810}
811
812// [native_]half_recip(c) ==> 1.0/c
813bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
814 const FuncInfo &FInfo) {
815 Value *opr0 = CI->getArgOperand(0);
816 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
817 // Just create a normal div. Later, InstCombine will be able
818 // to compute the divide into a constant (avoid check float infinity
819 // or subnormal at this point).
820 Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
821 opr0,
822 "recip2div");
823 DEBUG(errs() << "AMDIC: " << *CIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *nval << "\n"; } } while
(false)
824 << " ---> " << *nval << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *nval << "\n"; } } while
(false)
;
825 replaceCall(nval);
826 return true;
827 }
828 return false;
829}
830
831// [native_]half_divide(x, c) ==> x/c
832bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
833 const FuncInfo &FInfo) {
834 Value *opr0 = CI->getArgOperand(0);
835 Value *opr1 = CI->getArgOperand(1);
836 ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
837 ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
838
839 if ((CF0 && CF1) || // both are constants
840 (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
841 // CF1 is constant && f32 divide
842 {
843 Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
844 opr1, "__div2recip");
845 Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
846 replaceCall(nval);
847 return true;
848 }
849 return false;
850}
851
852namespace llvm {
853static double log2(double V) {
854#if _XOPEN_SOURCE700 >= 600 || _ISOC99_SOURCE1 || _POSIX_C_SOURCE200809L >= 200112L
855 return ::log2(V);
856#else
857 return log(V) / 0.693147180559945309417;
858#endif
859}
860}
861
862bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
863 const FuncInfo &FInfo) {
864 assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW
|| FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() ==
AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call"
) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 867, __extension__ __PRETTY_FUNCTION__))
865 FInfo.getId() == AMDGPULibFunc::EI_POWR ||(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW
|| FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() ==
AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call"
) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 867, __extension__ __PRETTY_FUNCTION__))
866 FInfo.getId() == AMDGPULibFunc::EI_POWN) &&(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW
|| FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() ==
AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call"
) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 867, __extension__ __PRETTY_FUNCTION__))
867 "fold_pow: encounter a wrong function call")(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW
|| FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() ==
AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call"
) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 867, __extension__ __PRETTY_FUNCTION__))
;
868
869 Value *opr0, *opr1;
870 ConstantFP *CF;
871 ConstantInt *CINT;
872 ConstantAggregateZero *CZero;
873 Type *eltType;
874
875 opr0 = CI->getArgOperand(0);
876 opr1 = CI->getArgOperand(1);
877 CZero = dyn_cast<ConstantAggregateZero>(opr1);
878 if (getVecSize(FInfo) == 1) {
879 eltType = opr0->getType();
880 CF = dyn_cast<ConstantFP>(opr1);
881 CINT = dyn_cast<ConstantInt>(opr1);
882 } else {
883 VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
884 assert(VTy && "Oprand of vector function should be of vectortype")(static_cast <bool> (VTy && "Oprand of vector function should be of vectortype"
) ? void (0) : __assert_fail ("VTy && \"Oprand of vector function should be of vectortype\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 884, __extension__ __PRETTY_FUNCTION__))
;
885 eltType = VTy->getElementType();
886 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
887
888 // Now, only Handle vector const whose elements have the same value.
889 CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
890 CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
891 }
892
893 // No unsafe math , no constant argument, do nothing
894 if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
895 return false;
896
897 // 0x1111111 means that we don't do anything for this call.
898 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
899
900 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
901 // pow/powr/pown(x, 0) == 1
902 DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> 1\n"; } } while (false)
;
903 Constant *cnval = ConstantFP::get(eltType, 1.0);
904 if (getVecSize(FInfo) > 1) {
905 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
906 }
907 replaceCall(cnval);
908 return true;
909 }
910 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
911 // pow/powr/pown(x, 1.0) = x
912 DEBUG(errs() << "AMDIC: " << *CIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << "\n"; } } while
(false)
913 << " ---> " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << "\n"; } } while
(false)
;
914 replaceCall(opr0);
915 return true;
916 }
917 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
918 // pow/powr/pown(x, 2.0) = x*x
919 DEBUG(errs() << "AMDIC: " << *CIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " * " <<
*opr0 << "\n"; } } while (false)
920 << " ---> " << *opr0 << " * " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " * " <<
*opr0 << "\n"; } } while (false)
;
921 Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
922 replaceCall(nval);
923 return true;
924 }
925 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
926 // pow/powr/pown(x, -1.0) = 1.0/x
927 DEBUG(errs() << "AMDIC: " << *CIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> 1 / " << *opr0 << "\n"; } }
while (false)
928 << " ---> 1 / " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> 1 / " << *opr0 << "\n"; } }
while (false)
;
929 Constant *cnval = ConstantFP::get(eltType, 1.0);
930 if (getVecSize(FInfo) > 1) {
931 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
932 }
933 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
934 replaceCall(nval);
935 return true;
936 }
937
938 Module *M = CI->getModule();
939 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
940 // pow[r](x, [-]0.5) = sqrt(x)
941 bool issqrt = CF->isExactlyValue(0.5);
942 if (Constant *FPExpr = getFunction(M,
943 AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
944 : AMDGPULibFunc::EI_RSQRT, FInfo))) {
945 DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << FInfo.getName().c_str() <<
"(" << *opr0 << ")\n"; } } while (false)
946 << FInfo.getName().c_str() << "(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << FInfo.getName().c_str() <<
"(" << *opr0 << ")\n"; } } while (false)
;
947 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
948 : "__pow2rsqrt");
949 replaceCall(nval);
950 return true;
951 }
952 }
953
954 if (!isUnsafeMath(CI))
955 return false;
956
957 // Unsafe Math optimization
958
959 // Remember that ci_opr1 is set if opr1 is integral
960 if (CF) {
961 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
962 ? (double)CF->getValueAPF().convertToFloat()
963 : CF->getValueAPF().convertToDouble();
964 int ival = (int)dval;
965 if ((double)ival == dval) {
966 ci_opr1 = ival;
967 } else
968 ci_opr1 = 0x11111111;
969 }
970
971 // pow/powr/pown(x, c) = [1/](x*x*..x); where
972 // trunc(c) == c && the number of x == c && |c| <= 12
973 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
974 if (abs_opr1 <= 12) {
975 Constant *cnval;
976 Value *nval;
977 if (abs_opr1 == 0) {
978 cnval = ConstantFP::get(eltType, 1.0);
979 if (getVecSize(FInfo) > 1) {
980 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
981 }
982 nval = cnval;
983 } else {
984 Value *valx2 = nullptr;
985 nval = nullptr;
986 while (abs_opr1 > 0) {
987 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
988 if (abs_opr1 & 1) {
989 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
990 }
991 abs_opr1 >>= 1;
992 }
993 }
994
995 if (ci_opr1 < 0) {
996 cnval = ConstantFP::get(eltType, 1.0);
997 if (getVecSize(FInfo) > 1) {
998 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
999 }
1000 nval = B.CreateFDiv(cnval, nval, "__1powprod");
1001 }
1002 DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << ((ci_opr1 < 0) ? "1/prod("
: "prod(") << *opr0 << ")\n"; } } while (false)
1003 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << ((ci_opr1 < 0) ? "1/prod("
: "prod(") << *opr0 << ")\n"; } } while (false)
;
1004 replaceCall(nval);
1005 return true;
1006 }
1007
1008 // powr ---> exp2(y * log2(x))
1009 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
1010 Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2,
1011 FInfo));
1012 if (!ExpExpr)
1013 return false;
1014
1015 bool needlog = false;
1016 bool needabs = false;
1017 bool needcopysign = false;
1018 Constant *cnval = nullptr;
1019 if (getVecSize(FInfo) == 1) {
1020 CF = dyn_cast<ConstantFP>(opr0);
1021
1022 if (CF) {
1023 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1024 ? (double)CF->getValueAPF().convertToFloat()
1025 : CF->getValueAPF().convertToDouble();
1026
1027 V = log2(std::abs(V));
1028 cnval = ConstantFP::get(eltType, V);
1029 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
1030 CF->isNegative();
1031 } else {
1032 needlog = true;
1033 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1034 (!CF || CF->isNegative());
1035 }
1036 } else {
1037 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
1038
1039 if (!CDV) {
1040 needlog = true;
1041 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
1042 } else {
1043 assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&(static_cast <bool> ((int)CDV->getNumElements() == getVecSize
(FInfo) && "Wrong vector size detected") ? void (0) :
__assert_fail ("(int)CDV->getNumElements() == getVecSize(FInfo) && \"Wrong vector size detected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1044, __extension__ __PRETTY_FUNCTION__))
1044 "Wrong vector size detected")(static_cast <bool> ((int)CDV->getNumElements() == getVecSize
(FInfo) && "Wrong vector size detected") ? void (0) :
__assert_fail ("(int)CDV->getNumElements() == getVecSize(FInfo) && \"Wrong vector size detected\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1044, __extension__ __PRETTY_FUNCTION__))
;
1045
1046 SmallVector<double, 0> DVal;
1047 for (int i=0; i < getVecSize(FInfo); ++i) {
1048 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1049 ? (double)CDV->getElementAsFloat(i)
1050 : CDV->getElementAsDouble(i);
1051 if (V < 0.0) needcopysign = true;
1052 V = log2(std::abs(V));
1053 DVal.push_back(V);
1054 }
1055 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1056 SmallVector<float, 0> FVal;
1057 for (unsigned i=0; i < DVal.size(); ++i) {
1058 FVal.push_back((float)DVal[i]);
1059 }
1060 ArrayRef<float> tmp(FVal);
1061 cnval = ConstantDataVector::get(M->getContext(), tmp);
1062 } else {
1063 ArrayRef<double> tmp(DVal);
1064 cnval = ConstantDataVector::get(M->getContext(), tmp);
1065 }
1066 }
1067 }
1068
1069 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1070 // We cannot handle corner cases for a general pow() function, give up
1071 // unless y is a constant integral value. Then proceed as if it were pown.
1072 if (getVecSize(FInfo) == 1) {
1073 if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
1074 double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1075 ? (double)CF->getValueAPF().convertToFloat()
1076 : CF->getValueAPF().convertToDouble();
1077 if (y != (double)(int64_t)y)
1078 return false;
1079 } else
1080 return false;
1081 } else {
1082 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
1083 for (int i=0; i < getVecSize(FInfo); ++i) {
1084 double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1085 ? (double)CDV->getElementAsFloat(i)
1086 : CDV->getElementAsDouble(i);
1087 if (y != (double)(int64_t)y)
1088 return false;
1089 }
1090 } else
1091 return false;
1092 }
1093 }
1094
1095 Value *nval;
1096 if (needabs) {
1097 Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS,
1098 FInfo));
1099 if (!AbsExpr)
1100 return false;
1101 nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
1102 } else {
1103 nval = cnval ? cnval : opr0;
1104 }
1105 if (needlog) {
1106 Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2,
1107 FInfo));
1108 if (!LogExpr)
1109 return false;
1110 nval = CreateCallEx(B,LogExpr, nval, "__log2");
1111 }
1112
1113 if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1114 // convert int(32) to fp(f32 or f64)
1115 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1116 }
1117 nval = B.CreateFMul(opr1, nval, "__ylogx");
1118 nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1119
1120 if (needcopysign) {
1121 Value *opr_n;
1122 Type* rTy = opr0->getType();
1123 Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
1124 Type *nTy = nTyS;
1125 if (const VectorType *vTy = dyn_cast<VectorType>(rTy))
1126 nTy = VectorType::get(nTyS, vTy->getNumElements());
1127 unsigned size = nTy->getScalarSizeInBits();
1128 opr_n = CI->getArgOperand(1);
1129 if (opr_n->getType()->isIntegerTy())
1130 opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
1131 else
1132 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1133
1134 Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1135 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1136 nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1137 nval = B.CreateBitCast(nval, opr0->getType());
1138 }
1139
1140 DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << "exp2(" << *opr1 <<
" * log2(" << *opr0 << "))\n"; } } while (false)
1141 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << "exp2(" << *opr1 <<
" * log2(" << *opr0 << "))\n"; } } while (false)
;
1142 replaceCall(nval);
1143
1144 return true;
1145}
1146
1147bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
1148 const FuncInfo &FInfo) {
1149 Value *opr0 = CI->getArgOperand(0);
1150 Value *opr1 = CI->getArgOperand(1);
1151
1152 ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
1153 if (!CINT) {
1154 return false;
1155 }
1156 int ci_opr1 = (int)CINT->getSExtValue();
1157 if (ci_opr1 == 1) { // rootn(x, 1) = x
1158 DEBUG(errs() << "AMDIC: " << *CIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << "\n"; } } while
(false)
1159 << " ---> " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << "\n"; } } while
(false)
;
1160 replaceCall(opr0);
1161 return true;
1162 }
1163 if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
1164 std::vector<const Type*> ParamsTys;
1165 ParamsTys.push_back(opr0->getType());
1166 Module *M = CI->getModule();
1167 if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
1168 FInfo))) {
1169 DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> sqrt(" << *opr0 << ")\n"; }
} while (false)
;
1170 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
1171 replaceCall(nval);
1172 return true;
1173 }
1174 } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1175 Module *M = CI->getModule();
1176 if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
1177 FInfo))) {
1178 DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> cbrt(" << *opr0 << ")\n"; }
} while (false)
;
1179 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1180 replaceCall(nval);
1181 return true;
1182 }
1183 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1184 DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> 1.0 / " << *opr0 << "\n"; }
} while (false)
;
1185 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1186 opr0,
1187 "__rootn2div");
1188 replaceCall(nval);
1189 return true;
1190 } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
1191 std::vector<const Type*> ParamsTys;
1192 ParamsTys.push_back(opr0->getType());
1193 Module *M = CI->getModule();
1194 if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
1195 FInfo))) {
1196 DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> rsqrt(" << *opr0 << ")\n"; }
} while (false)
;
1197 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
1198 replaceCall(nval);
1199 return true;
1200 }
1201 }
1202 return false;
1203}
1204
1205bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
1206 const FuncInfo &FInfo) {
1207 Value *opr0 = CI->getArgOperand(0);
1208 Value *opr1 = CI->getArgOperand(1);
1209 Value *opr2 = CI->getArgOperand(2);
1210
1211 ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
1212 ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
1213 if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
1214 // fma/mad(a, b, c) = c if a=0 || b=0
1215 DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr2 << "\n"; } } while
(false)
;
1216 replaceCall(opr2);
1217 return true;
1218 }
1219 if (CF0 && CF0->isExactlyValue(1.0f)) {
1220 // fma/mad(a, b, c) = b+c if a=1
1221 DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr1 << " + " <<
*opr2 << "\n"; } } while (false)
1222 << *opr1 << " + " << *opr2 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr1 << " + " <<
*opr2 << "\n"; } } while (false)
;
1223 Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
1224 replaceCall(nval);
1225 return true;
1226 }
1227 if (CF1 && CF1->isExactlyValue(1.0f)) {
1228 // fma/mad(a, b, c) = a+c if b=1
1229 DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " + " <<
*opr2 << "\n"; } } while (false)
1230 << *opr0 << " + " << *opr2 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " + " <<
*opr2 << "\n"; } } while (false)
;
1231 Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
1232 replaceCall(nval);
1233 return true;
1234 }
1235 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
1236 if (CF->isZero()) {
1237 // fma/mad(a, b, c) = a*b if c=0
1238 DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " * " <<
*opr1 << "\n"; } } while (false)
1239 << *opr0 << " * " << *opr1 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " * " <<
*opr1 << "\n"; } } while (false)
;
1240 Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
1241 replaceCall(nval);
1242 return true;
1243 }
1244 }
1245
1246 return false;
1247}
1248
1249// Get a scalar native builtin signle argument FP function
1250Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
1251 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1252 return nullptr;
1253 FuncInfo nf = FInfo;
1254 nf.setPrefix(AMDGPULibFunc::NATIVE);
1255 return getFunction(M, nf);
1256}
1257
1258// fold sqrt -> native_sqrt (x)
1259bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
1260 const FuncInfo &FInfo) {
1261 if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
1262 (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
1263 if (Constant *FPExpr = getNativeFunction(
1264 CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1265 Value *opr0 = CI->getArgOperand(0);
1266 DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << "sqrt(" << *opr0 <<
")\n"; } } while (false)
1267 << "sqrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << "sqrt(" << *opr0 <<
")\n"; } } while (false)
;
1268 Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
1269 replaceCall(nval);
1270 return true;
1271 }
1272 }
1273 return false;
1274}
1275
1276// fold sin, cos -> sincos.
1277bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
1278 AliasAnalysis *AA) {
1279 AMDGPULibFunc fInfo;
1280 if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
1281 return false;
1282
1283 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||(static_cast <bool> (fInfo.getId() == AMDGPULibFunc::EI_SIN
|| fInfo.getId() == AMDGPULibFunc::EI_COS) ? void (0) : __assert_fail
("fInfo.getId() == AMDGPULibFunc::EI_SIN || fInfo.getId() == AMDGPULibFunc::EI_COS"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1284, __extension__ __PRETTY_FUNCTION__))
1284 fInfo.getId() == AMDGPULibFunc::EI_COS)(static_cast <bool> (fInfo.getId() == AMDGPULibFunc::EI_SIN
|| fInfo.getId() == AMDGPULibFunc::EI_COS) ? void (0) : __assert_fail
("fInfo.getId() == AMDGPULibFunc::EI_SIN || fInfo.getId() == AMDGPULibFunc::EI_COS"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1284, __extension__ __PRETTY_FUNCTION__))
;
1285 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1286
1287 Value *CArgVal = CI->getArgOperand(0);
1288 BasicBlock * const CBB = CI->getParent();
1289
1290 int const MaxScan = 30;
1291
1292 { // fold in load value.
1293 LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
1294 if (LI && LI->getParent() == CBB) {
1295 BasicBlock::iterator BBI = LI->getIterator();
1296 Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
1297 if (AvailableVal) {
1298 CArgVal->replaceAllUsesWith(AvailableVal);
1299 if (CArgVal->getNumUses() == 0)
1300 LI->eraseFromParent();
1301 CArgVal = CI->getArgOperand(0);
1302 }
1303 }
1304 }
1305
1306 Module *M = CI->getModule();
1307 fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
1308 std::string const PairName = fInfo.mangle();
1309
1310 CallInst *UI = nullptr;
1311 for (User* U : CArgVal->users()) {
1312 CallInst *XI = dyn_cast_or_null<CallInst>(U);
1313 if (!XI || XI == CI || XI->getParent() != CBB)
1314 continue;
1315
1316 Function *UCallee = XI->getCalledFunction();
1317 if (!UCallee || !UCallee->getName().equals(PairName))
1318 continue;
1319
1320 BasicBlock::iterator BBI = CI->getIterator();
1321 if (BBI == CI->getParent()->begin())
1322 break;
1323 --BBI;
1324 for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
1325 if (cast<Instruction>(BBI) == XI) {
1326 UI = XI;
1327 break;
1328 }
1329 }
1330 if (UI) break;
1331 }
1332
1333 if (!UI) return false;
1334
1335 // Merge the sin and cos.
1336
1337 // for OpenCL 2.0 we have only generic implementation of sincos
1338 // function.
1339 AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
1340 const AMDGPUAS AS = AMDGPU::getAMDGPUAS(*M);
1341 nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AS.FLAT_ADDRESS);
1342 Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
1343 if (!Fsincos) return false;
1344
1345 BasicBlock::iterator ItOld = B.GetInsertPoint();
1346 AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
1347 B.SetInsertPoint(UI);
1348
1349 Value *P = Alloc;
1350 Type *PTy = Fsincos->getFunctionType()->getParamType(1);
1351 // The allocaInst allocates the memory in private address space. This need
1352 // to be bitcasted to point to the address space of cos pointer type.
1353 // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1354 if (PTy->getPointerAddressSpace() != AS.PRIVATE_ADDRESS)
1355 P = B.CreateAddrSpaceCast(Alloc, PTy);
1356 CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
1357
1358 DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: fold_sincos ("
<< *CI << ", " << *UI << ") with " <<
*Call << "\n"; } } while (false)
1359 << ") with " << *Call << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: fold_sincos ("
<< *CI << ", " << *UI << ") with " <<
*Call << "\n"; } } while (false)
;
1360
1361 if (!isSin) { // CI->cos, UI->sin
1362 B.SetInsertPoint(&*ItOld);
1363 UI->replaceAllUsesWith(&*Call);
1364 Instruction *Reload = B.CreateLoad(Alloc);
1365 CI->replaceAllUsesWith(Reload);
1366 UI->eraseFromParent();
1367 CI->eraseFromParent();
1368 } else { // CI->sin, UI->cos
1369 Instruction *Reload = B.CreateLoad(Alloc);
1370 UI->replaceAllUsesWith(Reload);
1371 CI->replaceAllUsesWith(Call);
1372 UI->eraseFromParent();
1373 CI->eraseFromParent();
1374 }
1375 return true;
1376}
1377
1378// Get insertion point at entry.
1379BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
1380 Function * Func = UI->getParent()->getParent();
1381 BasicBlock * BB = &Func->getEntryBlock();
1382 assert(BB && "Entry block not found!")(static_cast <bool> (BB && "Entry block not found!"
) ? void (0) : __assert_fail ("BB && \"Entry block not found!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1382, __extension__ __PRETTY_FUNCTION__))
;
1383 BasicBlock::iterator ItNew = BB->begin();
1384 return ItNew;
1385}
1386
1387// Insert a AllocsInst at the beginning of function entry block.
1388AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
1389 const char *prefix) {
1390 BasicBlock::iterator ItNew = getEntryIns(UI);
1391 Function *UCallee = UI->getCalledFunction();
1392 Type *RetType = UCallee->getReturnType();
1393 B.SetInsertPoint(&*ItNew);
1394 AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
1395 std::string(prefix) + UI->getName());
1396 Alloc->setAlignment(UCallee->getParent()->getDataLayout()
1397 .getTypeAllocSize(RetType));
1398 return Alloc;
1399}
1400
1401bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
1402 double& Res0, double& Res1,
1403 Constant *copr0, Constant *copr1,
1404 Constant *copr2) {
1405 // By default, opr0/opr1/opr3 holds values of float/double type.
1406 // If they are not float/double, each function has to its
1407 // operand separately.
1408 double opr0=0.0, opr1=0.0, opr2=0.0;
1409 ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1410 ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1411 ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
1412 if (fpopr0) {
1413 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1414 ? fpopr0->getValueAPF().convertToDouble()
1415 : (double)fpopr0->getValueAPF().convertToFloat();
1416 }
1417
1418 if (fpopr1) {
1419 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1420 ? fpopr1->getValueAPF().convertToDouble()
1421 : (double)fpopr1->getValueAPF().convertToFloat();
1422 }
1423
1424 if (fpopr2) {
1425 opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1426 ? fpopr2->getValueAPF().convertToDouble()
1427 : (double)fpopr2->getValueAPF().convertToFloat();
1428 }
1429
1430 switch (FInfo.getId()) {
1431 default : return false;
1432
1433 case AMDGPULibFunc::EI_ACOS:
1434 Res0 = acos(opr0);
1435 return true;
1436
1437 case AMDGPULibFunc::EI_ACOSH:
1438 // acosh(x) == log(x + sqrt(x*x - 1))
1439 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1440 return true;
1441
1442 case AMDGPULibFunc::EI_ACOSPI:
1443 Res0 = acos(opr0) / MATH_PI3.14159265358979323846264338327950288419716939937511;
1444 return true;
1445
1446 case AMDGPULibFunc::EI_ASIN:
1447 Res0 = asin(opr0);
1448 return true;
1449
1450 case AMDGPULibFunc::EI_ASINH:
1451 // asinh(x) == log(x + sqrt(x*x + 1))
1452 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1453 return true;
1454
1455 case AMDGPULibFunc::EI_ASINPI:
1456 Res0 = asin(opr0) / MATH_PI3.14159265358979323846264338327950288419716939937511;
1457 return true;
1458
1459 case AMDGPULibFunc::EI_ATAN:
1460 Res0 = atan(opr0);
1461 return true;
1462
1463 case AMDGPULibFunc::EI_ATANH:
1464 // atanh(x) == (log(x+1) - log(x-1))/2;
1465 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1466 return true;
1467
1468 case AMDGPULibFunc::EI_ATANPI:
1469 Res0 = atan(opr0) / MATH_PI3.14159265358979323846264338327950288419716939937511;
1470 return true;
1471
1472 case AMDGPULibFunc::EI_CBRT:
1473 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1474 return true;
1475
1476 case AMDGPULibFunc::EI_COS:
1477 Res0 = cos(opr0);
1478 return true;
1479
1480 case AMDGPULibFunc::EI_COSH:
1481 Res0 = cosh(opr0);
1482 return true;
1483
1484 case AMDGPULibFunc::EI_COSPI:
1485 Res0 = cos(MATH_PI3.14159265358979323846264338327950288419716939937511 * opr0);
1486 return true;
1487
1488 case AMDGPULibFunc::EI_EXP:
1489 Res0 = exp(opr0);
1490 return true;
1491
1492 case AMDGPULibFunc::EI_EXP2:
1493 Res0 = pow(2.0, opr0);
1494 return true;
1495
1496 case AMDGPULibFunc::EI_EXP10:
1497 Res0 = pow(10.0, opr0);
1498 return true;
1499
1500 case AMDGPULibFunc::EI_EXPM1:
1501 Res0 = exp(opr0) - 1.0;
1502 return true;
1503
1504 case AMDGPULibFunc::EI_LOG:
1505 Res0 = log(opr0);
1506 return true;
1507
1508 case AMDGPULibFunc::EI_LOG2:
1509 Res0 = log(opr0) / log(2.0);
1510 return true;
1511
1512 case AMDGPULibFunc::EI_LOG10:
1513 Res0 = log(opr0) / log(10.0);
1514 return true;
1515
1516 case AMDGPULibFunc::EI_RSQRT:
1517 Res0 = 1.0 / sqrt(opr0);
1518 return true;
1519
1520 case AMDGPULibFunc::EI_SIN:
1521 Res0 = sin(opr0);
1522 return true;
1523
1524 case AMDGPULibFunc::EI_SINH:
1525 Res0 = sinh(opr0);
1526 return true;
1527
1528 case AMDGPULibFunc::EI_SINPI:
1529 Res0 = sin(MATH_PI3.14159265358979323846264338327950288419716939937511 * opr0);
1530 return true;
1531
1532 case AMDGPULibFunc::EI_SQRT:
1533 Res0 = sqrt(opr0);
1534 return true;
1535
1536 case AMDGPULibFunc::EI_TAN:
1537 Res0 = tan(opr0);
1538 return true;
1539
1540 case AMDGPULibFunc::EI_TANH:
1541 Res0 = tanh(opr0);
1542 return true;
1543
1544 case AMDGPULibFunc::EI_TANPI:
1545 Res0 = tan(MATH_PI3.14159265358979323846264338327950288419716939937511 * opr0);
1546 return true;
1547
1548 case AMDGPULibFunc::EI_RECIP:
1549 Res0 = 1.0 / opr0;
1550 return true;
1551
1552 // two-arg functions
1553 case AMDGPULibFunc::EI_DIVIDE:
1554 Res0 = opr0 / opr1;
1555 return true;
1556
1557 case AMDGPULibFunc::EI_POW:
1558 case AMDGPULibFunc::EI_POWR:
1559 Res0 = pow(opr0, opr1);
1560 return true;
1561
1562 case AMDGPULibFunc::EI_POWN: {
1563 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1564 double val = (double)iopr1->getSExtValue();
1565 Res0 = pow(opr0, val);
1566 return true;
1567 }
1568 return false;
1569 }
1570
1571 case AMDGPULibFunc::EI_ROOTN: {
1572 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1573 double val = (double)iopr1->getSExtValue();
1574 Res0 = pow(opr0, 1.0 / val);
1575 return true;
1576 }
1577 return false;
1578 }
1579
1580 // with ptr arg
1581 case AMDGPULibFunc::EI_SINCOS:
1582 Res0 = sin(opr0);
1583 Res1 = cos(opr0);
1584 return true;
1585
1586 // three-arg functions
1587 case AMDGPULibFunc::EI_FMA:
1588 case AMDGPULibFunc::EI_MAD:
1589 Res0 = opr0 * opr1 + opr2;
1590 return true;
1591 }
1592
1593 return false;
1594}
1595
1596bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
1597 int numArgs = (int)aCI->getNumArgOperands();
1598 if (numArgs > 3)
1
Assuming 'numArgs' is <= 3
2
Taking false branch
1599 return false;
1600
1601 Constant *copr0 = nullptr;
1602 Constant *copr1 = nullptr;
1603 Constant *copr2 = nullptr;
1604 if (numArgs > 0) {
3
Assuming 'numArgs' is <= 0
4
Taking false branch
1605 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1606 return false;
1607 }
1608
1609 if (numArgs > 1) {
5
Taking false branch
1610 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1611 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1612 return false;
1613 }
1614 }
1615
1616 if (numArgs > 2) {
6
Taking false branch
1617 if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
1618 return false;
1619 }
1620
1621 // At this point, all arguments to aCI are constants.
1622
1623 // max vector size is 16, and sincos will generate two results.
1624 double DVal0[16], DVal1[16];
1625 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
7
Assuming the condition is false
1626 if (getVecSize(FInfo) == 1) {
8
Assuming the condition is false
9
Taking false branch
1627 if (!evaluateScalarMathFunc(FInfo, DVal0[0],
1628 DVal1[0], copr0, copr1, copr2)) {
1629 return false;
1630 }
1631 } else {
1632 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1633 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1634 ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
1635 for (int i=0; i < getVecSize(FInfo); ++i) {
10
Assuming the condition is false
11
Loop condition is false. Execution continues on line 1646
1636 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1637 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1638 Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
1639 if (!evaluateScalarMathFunc(FInfo, DVal0[i],
1640 DVal1[i], celt0, celt1, celt2)) {
1641 return false;
1642 }
1643 }
1644 }
1645
1646 LLVMContext &context = CI->getParent()->getParent()->getContext();
1647 Constant *nval0, *nval1;
1648 if (getVecSize(FInfo) == 1) {
12
Assuming the condition is true
13
Taking true branch
1649 nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
14
2nd function call argument is an uninitialized value
1650 if (hasTwoResults)
1651 nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
1652 } else {
1653 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1654 SmallVector <float, 0> FVal0, FVal1;
1655 for (int i=0; i < getVecSize(FInfo); ++i)
1656 FVal0.push_back((float)DVal0[i]);
1657 ArrayRef<float> tmp0(FVal0);
1658 nval0 = ConstantDataVector::get(context, tmp0);
1659 if (hasTwoResults) {
1660 for (int i=0; i < getVecSize(FInfo); ++i)
1661 FVal1.push_back((float)DVal1[i]);
1662 ArrayRef<float> tmp1(FVal1);
1663 nval1 = ConstantDataVector::get(context, tmp1);
1664 }
1665 } else {
1666 ArrayRef<double> tmp0(DVal0);
1667 nval0 = ConstantDataVector::get(context, tmp0);
1668 if (hasTwoResults) {
1669 ArrayRef<double> tmp1(DVal1);
1670 nval1 = ConstantDataVector::get(context, tmp1);
1671 }
1672 }
1673 }
1674
1675 if (hasTwoResults) {
1676 // sincos
1677 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&(static_cast <bool> (FInfo.getId() == AMDGPULibFunc::EI_SINCOS
&& "math function with ptr arg not supported yet") ?
void (0) : __assert_fail ("FInfo.getId() == AMDGPULibFunc::EI_SINCOS && \"math function with ptr arg not supported yet\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1678, __extension__ __PRETTY_FUNCTION__))
1678 "math function with ptr arg not supported yet")(static_cast <bool> (FInfo.getId() == AMDGPULibFunc::EI_SINCOS
&& "math function with ptr arg not supported yet") ?
void (0) : __assert_fail ("FInfo.getId() == AMDGPULibFunc::EI_SINCOS && \"math function with ptr arg not supported yet\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1678, __extension__ __PRETTY_FUNCTION__))
;
1679 new StoreInst(nval1, aCI->getArgOperand(1), aCI);
1680 }
1681
1682 replaceCall(nval0);
1683 return true;
1684}
1685
1686// Public interface to the Simplify LibCalls pass.
1687FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
1688 return new AMDGPUSimplifyLibCalls(Opt);
1689}
1690
1691FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
1692 return new AMDGPUUseNativeCalls();
1693}
1694
1695static bool setFastFlags(Function &F, const TargetOptions &Options) {
1696 AttrBuilder B;
1697
1698 if (Options.UnsafeFPMath || Options.NoInfsFPMath)
1699 B.addAttribute("no-infs-fp-math", "true");
1700 if (Options.UnsafeFPMath || Options.NoNaNsFPMath)
1701 B.addAttribute("no-nans-fp-math", "true");
1702 if (Options.UnsafeFPMath) {
1703 B.addAttribute("less-precise-fpmad", "true");
1704 B.addAttribute("unsafe-fp-math", "true");
1705 }
1706
1707 if (!B.hasAttributes())
1708 return false;
1709
1710 F.addAttributes(AttributeList::FunctionIndex, B);
1711
1712 return true;
1713}
1714
1715bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
1716 if (skipFunction(F))
1717 return false;
1718
1719 bool Changed = false;
1720 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1721
1722 DEBUG(dbgs() << "AMDIC: process function ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function "
; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() <<
'\n';; } } while (false)
1723 F.printAsOperand(dbgs(), false, F.getParent());do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function "
; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() <<
'\n';; } } while (false)
1724 dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function "
; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() <<
'\n';; } } while (false)
;
1725
1726 if (!EnablePreLink)
1727 Changed |= setFastFlags(F, Options);
1728
1729 for (auto &BB : F) {
1730 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1731 // Ignore non-calls.
1732 CallInst *CI = dyn_cast<CallInst>(I);
1733 ++I;
1734 if (!CI) continue;
1735
1736 // Ignore indirect calls.
1737 Function *Callee = CI->getCalledFunction();
1738 if (Callee == 0) continue;
1739
1740 DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding "
<< *CI << "\n"; dbgs().flush(); } } while (false
)
1741 dbgs().flush())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding "
<< *CI << "\n"; dbgs().flush(); } } while (false
)
;
1742 if(Simplifier.fold(CI, AA))
1743 Changed = true;
1744 }
1745 }
1746 return Changed;
1747}
1748
1749bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
1750 if (skipFunction(F) || UseNative.empty())
1751 return false;
1752
1753 bool Changed = false;
1754 for (auto &BB : F) {
1755 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1756 // Ignore non-calls.
1757 CallInst *CI = dyn_cast<CallInst>(I);
1758 ++I;
1759 if (!CI) continue;
1760
1761 // Ignore indirect calls.
1762 Function *Callee = CI->getCalledFunction();
1763 if (Callee == 0) continue;
1764
1765 if(Simplifier.useNative(CI))
1766 Changed = true;
1767 }
1768 }
1769 return Changed;
1770}