File: | llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp |
Warning: | line 1627, column 9 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AMDGPULibCalls.cpp -------------------------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | /// \file | |||
10 | /// This file does AMD library function optimizations. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "AMDGPU.h" | |||
15 | #include "AMDGPULibFunc.h" | |||
16 | #include "GCNSubtarget.h" | |||
17 | #include "llvm/Analysis/AliasAnalysis.h" | |||
18 | #include "llvm/Analysis/Loads.h" | |||
19 | #include "llvm/IR/IRBuilder.h" | |||
20 | #include "llvm/IR/IntrinsicInst.h" | |||
21 | #include "llvm/IR/IntrinsicsAMDGPU.h" | |||
22 | #include "llvm/InitializePasses.h" | |||
23 | #include "llvm/Target/TargetMachine.h" | |||
24 | ||||
25 | #define DEBUG_TYPE"amdgpu-simplifylib" "amdgpu-simplifylib" | |||
26 | ||||
27 | using namespace llvm; | |||
28 | ||||
29 | static cl::opt<bool> EnablePreLink("amdgpu-prelink", | |||
30 | cl::desc("Enable pre-link mode optimizations"), | |||
31 | cl::init(false), | |||
32 | cl::Hidden); | |||
33 | ||||
34 | static cl::list<std::string> UseNative("amdgpu-use-native", | |||
35 | cl::desc("Comma separated list of functions to replace with native, or all"), | |||
36 | cl::CommaSeparated, cl::ValueOptional, | |||
37 | cl::Hidden); | |||
38 | ||||
39 | #define MATH_PInumbers::pi numbers::pi | |||
40 | #define MATH_Enumbers::e numbers::e | |||
41 | #define MATH_SQRT2numbers::sqrt2 numbers::sqrt2 | |||
42 | #define MATH_SQRT1_2numbers::inv_sqrt2 numbers::inv_sqrt2 | |||
43 | ||||
44 | namespace llvm { | |||
45 | ||||
46 | class AMDGPULibCalls { | |||
47 | private: | |||
48 | ||||
49 | typedef llvm::AMDGPULibFunc FuncInfo; | |||
50 | ||||
51 | const TargetMachine *TM; | |||
52 | ||||
53 | // -fuse-native. | |||
54 | bool AllNative = false; | |||
55 | ||||
56 | bool useNativeFunc(const StringRef F) const; | |||
57 | ||||
58 | // Return a pointer (pointer expr) to the function if function definition with | |||
59 | // "FuncName" exists. It may create a new function prototype in pre-link mode. | |||
60 | FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); | |||
61 | ||||
62 | bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo); | |||
63 | ||||
64 | bool TDOFold(CallInst *CI, const FuncInfo &FInfo); | |||
65 | ||||
66 | /* Specialized optimizations */ | |||
67 | ||||
68 | // recip (half or native) | |||
69 | bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
70 | ||||
71 | // divide (half or native) | |||
72 | bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
73 | ||||
74 | // pow/powr/pown | |||
75 | bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
76 | ||||
77 | // rootn | |||
78 | bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
79 | ||||
80 | // fma/mad | |||
81 | bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
82 | ||||
83 | // -fuse-native for sincos | |||
84 | bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); | |||
85 | ||||
86 | // evaluate calls if calls' arguments are constants. | |||
87 | bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0, | |||
88 | double& Res1, Constant *copr0, Constant *copr1, Constant *copr2); | |||
89 | bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo); | |||
90 | ||||
91 | // sqrt | |||
92 | bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); | |||
93 | ||||
94 | // sin/cos | |||
95 | bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA); | |||
96 | ||||
97 | // __read_pipe/__write_pipe | |||
98 | bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, | |||
99 | const FuncInfo &FInfo); | |||
100 | ||||
101 | // llvm.amdgcn.wavefrontsize | |||
102 | bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B); | |||
103 | ||||
104 | // Get insertion point at entry. | |||
105 | BasicBlock::iterator getEntryIns(CallInst * UI); | |||
106 | // Insert an Alloc instruction. | |||
107 | AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix); | |||
108 | // Get a scalar native builtin single argument FP function | |||
109 | FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); | |||
110 | ||||
111 | protected: | |||
112 | CallInst *CI; | |||
113 | ||||
114 | bool isUnsafeMath(const CallInst *CI) const; | |||
115 | ||||
116 | void replaceCall(Value *With) { | |||
117 | CI->replaceAllUsesWith(With); | |||
118 | CI->eraseFromParent(); | |||
119 | } | |||
120 | ||||
121 | public: | |||
122 | AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {} | |||
123 | ||||
124 | bool fold(CallInst *CI, AliasAnalysis *AA = nullptr); | |||
125 | ||||
126 | void initNativeFuncs(); | |||
127 | ||||
128 | // Replace a normal math function call with that native version | |||
129 | bool useNative(CallInst *CI); | |||
130 | }; | |||
131 | ||||
132 | } // end llvm namespace | |||
133 | ||||
134 | namespace { | |||
135 | ||||
136 | class AMDGPUSimplifyLibCalls : public FunctionPass { | |||
137 | ||||
138 | AMDGPULibCalls Simplifier; | |||
139 | ||||
140 | public: | |||
141 | static char ID; // Pass identification | |||
142 | ||||
143 | AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr) | |||
144 | : FunctionPass(ID), Simplifier(TM) { | |||
145 | initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); | |||
146 | } | |||
147 | ||||
148 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
149 | AU.addRequired<AAResultsWrapperPass>(); | |||
150 | } | |||
151 | ||||
152 | bool runOnFunction(Function &M) override; | |||
153 | }; | |||
154 | ||||
155 | class AMDGPUUseNativeCalls : public FunctionPass { | |||
156 | ||||
157 | AMDGPULibCalls Simplifier; | |||
158 | ||||
159 | public: | |||
160 | static char ID; // Pass identification | |||
161 | ||||
162 | AMDGPUUseNativeCalls() : FunctionPass(ID) { | |||
163 | initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry()); | |||
164 | Simplifier.initNativeFuncs(); | |||
165 | } | |||
166 | ||||
167 | bool runOnFunction(Function &F) override; | |||
168 | }; | |||
169 | ||||
170 | } // end anonymous namespace. | |||
171 | ||||
172 | char AMDGPUSimplifyLibCalls::ID = 0; | |||
173 | char AMDGPUUseNativeCalls::ID = 0; | |||
174 | ||||
175 | INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry &Registry) { | |||
176 | "Simplify well-known AMD library calls", false, false)static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry &Registry) { | |||
177 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | |||
178 | INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls" , "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>) , false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag ; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag , initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry )); } | |||
179 | "Simplify well-known AMD library calls", false, false)PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls" , "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>) , false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag ; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag , initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry )); } | |||
180 | ||||
181 | INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions." , "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce , std::ref(Registry)); } | |||
182 | "Replace builtin math calls with that native versions.",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions." , "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce , std::ref(Registry)); } | |||
183 | false, false)static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions." , "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo ::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false , false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry ) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce , std::ref(Registry)); } | |||
184 | ||||
185 | template <typename IRB> | |||
186 | static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, | |||
187 | const Twine &Name = "") { | |||
188 | CallInst *R = B.CreateCall(Callee, Arg, Name); | |||
189 | if (Function *F = dyn_cast<Function>(Callee.getCallee())) | |||
190 | R->setCallingConv(F->getCallingConv()); | |||
191 | return R; | |||
192 | } | |||
193 | ||||
194 | template <typename IRB> | |||
195 | static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, | |||
196 | Value *Arg2, const Twine &Name = "") { | |||
197 | CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name); | |||
198 | if (Function *F = dyn_cast<Function>(Callee.getCallee())) | |||
199 | R->setCallingConv(F->getCallingConv()); | |||
200 | return R; | |||
201 | } | |||
202 | ||||
203 | // Data structures for table-driven optimizations. | |||
204 | // FuncTbl works for both f32 and f64 functions with 1 input argument | |||
205 | ||||
206 | struct TableEntry { | |||
207 | double result; | |||
208 | double input; | |||
209 | }; | |||
210 | ||||
211 | /* a list of {result, input} */ | |||
212 | static const TableEntry tbl_acos[] = { | |||
213 | {MATH_PInumbers::pi / 2.0, 0.0}, | |||
214 | {MATH_PInumbers::pi / 2.0, -0.0}, | |||
215 | {0.0, 1.0}, | |||
216 | {MATH_PInumbers::pi, -1.0} | |||
217 | }; | |||
218 | static const TableEntry tbl_acosh[] = { | |||
219 | {0.0, 1.0} | |||
220 | }; | |||
221 | static const TableEntry tbl_acospi[] = { | |||
222 | {0.5, 0.0}, | |||
223 | {0.5, -0.0}, | |||
224 | {0.0, 1.0}, | |||
225 | {1.0, -1.0} | |||
226 | }; | |||
227 | static const TableEntry tbl_asin[] = { | |||
228 | {0.0, 0.0}, | |||
229 | {-0.0, -0.0}, | |||
230 | {MATH_PInumbers::pi / 2.0, 1.0}, | |||
231 | {-MATH_PInumbers::pi / 2.0, -1.0} | |||
232 | }; | |||
233 | static const TableEntry tbl_asinh[] = { | |||
234 | {0.0, 0.0}, | |||
235 | {-0.0, -0.0} | |||
236 | }; | |||
237 | static const TableEntry tbl_asinpi[] = { | |||
238 | {0.0, 0.0}, | |||
239 | {-0.0, -0.0}, | |||
240 | {0.5, 1.0}, | |||
241 | {-0.5, -1.0} | |||
242 | }; | |||
243 | static const TableEntry tbl_atan[] = { | |||
244 | {0.0, 0.0}, | |||
245 | {-0.0, -0.0}, | |||
246 | {MATH_PInumbers::pi / 4.0, 1.0}, | |||
247 | {-MATH_PInumbers::pi / 4.0, -1.0} | |||
248 | }; | |||
249 | static const TableEntry tbl_atanh[] = { | |||
250 | {0.0, 0.0}, | |||
251 | {-0.0, -0.0} | |||
252 | }; | |||
253 | static const TableEntry tbl_atanpi[] = { | |||
254 | {0.0, 0.0}, | |||
255 | {-0.0, -0.0}, | |||
256 | {0.25, 1.0}, | |||
257 | {-0.25, -1.0} | |||
258 | }; | |||
259 | static const TableEntry tbl_cbrt[] = { | |||
260 | {0.0, 0.0}, | |||
261 | {-0.0, -0.0}, | |||
262 | {1.0, 1.0}, | |||
263 | {-1.0, -1.0}, | |||
264 | }; | |||
265 | static const TableEntry tbl_cos[] = { | |||
266 | {1.0, 0.0}, | |||
267 | {1.0, -0.0} | |||
268 | }; | |||
269 | static const TableEntry tbl_cosh[] = { | |||
270 | {1.0, 0.0}, | |||
271 | {1.0, -0.0} | |||
272 | }; | |||
273 | static const TableEntry tbl_cospi[] = { | |||
274 | {1.0, 0.0}, | |||
275 | {1.0, -0.0} | |||
276 | }; | |||
277 | static const TableEntry tbl_erfc[] = { | |||
278 | {1.0, 0.0}, | |||
279 | {1.0, -0.0} | |||
280 | }; | |||
281 | static const TableEntry tbl_erf[] = { | |||
282 | {0.0, 0.0}, | |||
283 | {-0.0, -0.0} | |||
284 | }; | |||
285 | static const TableEntry tbl_exp[] = { | |||
286 | {1.0, 0.0}, | |||
287 | {1.0, -0.0}, | |||
288 | {MATH_Enumbers::e, 1.0} | |||
289 | }; | |||
290 | static const TableEntry tbl_exp2[] = { | |||
291 | {1.0, 0.0}, | |||
292 | {1.0, -0.0}, | |||
293 | {2.0, 1.0} | |||
294 | }; | |||
295 | static const TableEntry tbl_exp10[] = { | |||
296 | {1.0, 0.0}, | |||
297 | {1.0, -0.0}, | |||
298 | {10.0, 1.0} | |||
299 | }; | |||
300 | static const TableEntry tbl_expm1[] = { | |||
301 | {0.0, 0.0}, | |||
302 | {-0.0, -0.0} | |||
303 | }; | |||
304 | static const TableEntry tbl_log[] = { | |||
305 | {0.0, 1.0}, | |||
306 | {1.0, MATH_Enumbers::e} | |||
307 | }; | |||
308 | static const TableEntry tbl_log2[] = { | |||
309 | {0.0, 1.0}, | |||
310 | {1.0, 2.0} | |||
311 | }; | |||
312 | static const TableEntry tbl_log10[] = { | |||
313 | {0.0, 1.0}, | |||
314 | {1.0, 10.0} | |||
315 | }; | |||
316 | static const TableEntry tbl_rsqrt[] = { | |||
317 | {1.0, 1.0}, | |||
318 | {MATH_SQRT1_2numbers::inv_sqrt2, 2.0} | |||
319 | }; | |||
320 | static const TableEntry tbl_sin[] = { | |||
321 | {0.0, 0.0}, | |||
322 | {-0.0, -0.0} | |||
323 | }; | |||
324 | static const TableEntry tbl_sinh[] = { | |||
325 | {0.0, 0.0}, | |||
326 | {-0.0, -0.0} | |||
327 | }; | |||
328 | static const TableEntry tbl_sinpi[] = { | |||
329 | {0.0, 0.0}, | |||
330 | {-0.0, -0.0} | |||
331 | }; | |||
332 | static const TableEntry tbl_sqrt[] = { | |||
333 | {0.0, 0.0}, | |||
334 | {1.0, 1.0}, | |||
335 | {MATH_SQRT2numbers::sqrt2, 2.0} | |||
336 | }; | |||
337 | static const TableEntry tbl_tan[] = { | |||
338 | {0.0, 0.0}, | |||
339 | {-0.0, -0.0} | |||
340 | }; | |||
341 | static const TableEntry tbl_tanh[] = { | |||
342 | {0.0, 0.0}, | |||
343 | {-0.0, -0.0} | |||
344 | }; | |||
345 | static const TableEntry tbl_tanpi[] = { | |||
346 | {0.0, 0.0}, | |||
347 | {-0.0, -0.0} | |||
348 | }; | |||
349 | static const TableEntry tbl_tgamma[] = { | |||
350 | {1.0, 1.0}, | |||
351 | {1.0, 2.0}, | |||
352 | {2.0, 3.0}, | |||
353 | {6.0, 4.0} | |||
354 | }; | |||
355 | ||||
356 | static bool HasNative(AMDGPULibFunc::EFuncId id) { | |||
357 | switch(id) { | |||
358 | case AMDGPULibFunc::EI_DIVIDE: | |||
359 | case AMDGPULibFunc::EI_COS: | |||
360 | case AMDGPULibFunc::EI_EXP: | |||
361 | case AMDGPULibFunc::EI_EXP2: | |||
362 | case AMDGPULibFunc::EI_EXP10: | |||
363 | case AMDGPULibFunc::EI_LOG: | |||
364 | case AMDGPULibFunc::EI_LOG2: | |||
365 | case AMDGPULibFunc::EI_LOG10: | |||
366 | case AMDGPULibFunc::EI_POWR: | |||
367 | case AMDGPULibFunc::EI_RECIP: | |||
368 | case AMDGPULibFunc::EI_RSQRT: | |||
369 | case AMDGPULibFunc::EI_SIN: | |||
370 | case AMDGPULibFunc::EI_SINCOS: | |||
371 | case AMDGPULibFunc::EI_SQRT: | |||
372 | case AMDGPULibFunc::EI_TAN: | |||
373 | return true; | |||
374 | default:; | |||
375 | } | |||
376 | return false; | |||
377 | } | |||
378 | ||||
379 | struct TableRef { | |||
380 | size_t size; | |||
381 | const TableEntry *table; // variable size: from 0 to (size - 1) | |||
382 | ||||
383 | TableRef() : size(0), table(nullptr) {} | |||
384 | ||||
385 | template <size_t N> | |||
386 | TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {} | |||
387 | }; | |||
388 | ||||
389 | static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { | |||
390 | switch(id) { | |||
391 | case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos); | |||
392 | case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh); | |||
393 | case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi); | |||
394 | case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin); | |||
395 | case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh); | |||
396 | case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi); | |||
397 | case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan); | |||
398 | case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh); | |||
399 | case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi); | |||
400 | case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt); | |||
401 | case AMDGPULibFunc::EI_NCOS: | |||
402 | case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos); | |||
403 | case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh); | |||
404 | case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi); | |||
405 | case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc); | |||
406 | case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf); | |||
407 | case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp); | |||
408 | case AMDGPULibFunc::EI_NEXP2: | |||
409 | case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2); | |||
410 | case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10); | |||
411 | case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1); | |||
412 | case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log); | |||
413 | case AMDGPULibFunc::EI_NLOG2: | |||
414 | case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2); | |||
415 | case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10); | |||
416 | case AMDGPULibFunc::EI_NRSQRT: | |||
417 | case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt); | |||
418 | case AMDGPULibFunc::EI_NSIN: | |||
419 | case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin); | |||
420 | case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh); | |||
421 | case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi); | |||
422 | case AMDGPULibFunc::EI_NSQRT: | |||
423 | case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt); | |||
424 | case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan); | |||
425 | case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh); | |||
426 | case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi); | |||
427 | case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma); | |||
428 | default:; | |||
429 | } | |||
430 | return TableRef(); | |||
431 | } | |||
432 | ||||
433 | static inline int getVecSize(const AMDGPULibFunc& FInfo) { | |||
434 | return FInfo.getLeads()[0].VectorSize; | |||
435 | } | |||
436 | ||||
437 | static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { | |||
438 | return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; | |||
439 | } | |||
440 | ||||
441 | FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) { | |||
442 | // If we are doing PreLinkOpt, the function is external. So it is safe to | |||
443 | // use getOrInsertFunction() at this stage. | |||
444 | ||||
445 | return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo) | |||
446 | : AMDGPULibFunc::getFunction(M, fInfo); | |||
447 | } | |||
448 | ||||
449 | bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName, | |||
450 | FuncInfo &FInfo) { | |||
451 | return AMDGPULibFunc::parse(FMangledName, FInfo); | |||
452 | } | |||
453 | ||||
454 | bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const { | |||
455 | if (auto Op = dyn_cast<FPMathOperator>(CI)) | |||
456 | if (Op->isFast()) | |||
457 | return true; | |||
458 | const Function *F = CI->getParent()->getParent(); | |||
459 | Attribute Attr = F->getFnAttribute("unsafe-fp-math"); | |||
460 | return Attr.getValueAsBool(); | |||
461 | } | |||
462 | ||||
463 | bool AMDGPULibCalls::useNativeFunc(const StringRef F) const { | |||
464 | return AllNative || llvm::is_contained(UseNative, F); | |||
465 | } | |||
466 | ||||
467 | void AMDGPULibCalls::initNativeFuncs() { | |||
468 | AllNative = useNativeFunc("all") || | |||
469 | (UseNative.getNumOccurrences() && UseNative.size() == 1 && | |||
470 | UseNative.begin()->empty()); | |||
471 | } | |||
472 | ||||
473 | bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { | |||
474 | bool native_sin = useNativeFunc("sin"); | |||
475 | bool native_cos = useNativeFunc("cos"); | |||
476 | ||||
477 | if (native_sin && native_cos) { | |||
478 | Module *M = aCI->getModule(); | |||
479 | Value *opr0 = aCI->getArgOperand(0); | |||
480 | ||||
481 | AMDGPULibFunc nf; | |||
482 | nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType; | |||
483 | nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize; | |||
484 | ||||
485 | nf.setPrefix(AMDGPULibFunc::NATIVE); | |||
486 | nf.setId(AMDGPULibFunc::EI_SIN); | |||
487 | FunctionCallee sinExpr = getFunction(M, nf); | |||
488 | ||||
489 | nf.setPrefix(AMDGPULibFunc::NATIVE); | |||
490 | nf.setId(AMDGPULibFunc::EI_COS); | |||
491 | FunctionCallee cosExpr = getFunction(M, nf); | |||
492 | if (sinExpr && cosExpr) { | |||
493 | Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI); | |||
494 | Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI); | |||
495 | new StoreInst(cosval, aCI->getArgOperand(1), aCI); | |||
496 | ||||
497 | DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("usenative")) { dbgs() << "<useNative> replace " << *aCI << " with native version of sin/cos"; } } while (false) | |||
498 | << " with native version of sin/cos")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("usenative")) { dbgs() << "<useNative> replace " << *aCI << " with native version of sin/cos"; } } while (false); | |||
499 | ||||
500 | replaceCall(sinval); | |||
501 | return true; | |||
502 | } | |||
503 | } | |||
504 | return false; | |||
505 | } | |||
506 | ||||
507 | bool AMDGPULibCalls::useNative(CallInst *aCI) { | |||
508 | CI = aCI; | |||
509 | Function *Callee = aCI->getCalledFunction(); | |||
510 | ||||
511 | FuncInfo FInfo; | |||
512 | if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() || | |||
513 | FInfo.getPrefix() != AMDGPULibFunc::NOPFX || | |||
514 | getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) || | |||
515 | !(AllNative || useNativeFunc(FInfo.getName()))) { | |||
516 | return false; | |||
517 | } | |||
518 | ||||
519 | if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS) | |||
520 | return sincosUseNative(aCI, FInfo); | |||
521 | ||||
522 | FInfo.setPrefix(AMDGPULibFunc::NATIVE); | |||
523 | FunctionCallee F = getFunction(aCI->getModule(), FInfo); | |||
524 | if (!F) | |||
525 | return false; | |||
526 | ||||
527 | aCI->setCalledFunction(F); | |||
528 | DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("usenative")) { dbgs() << "<useNative> replace " << *aCI << " with native version"; } } while (false ) | |||
529 | << " with native version")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("usenative")) { dbgs() << "<useNative> replace " << *aCI << " with native version"; } } while (false ); | |||
530 | return true; | |||
531 | } | |||
532 | ||||
533 | // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe | |||
534 | // builtin, with appended type size and alignment arguments, where 2 or 4 | |||
535 | // indicates the original number of arguments. The library has optimized version | |||
536 | // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same | |||
537 | // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N | |||
538 | // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., | |||
539 | // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. | |||
540 | bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, | |||
541 | const FuncInfo &FInfo) { | |||
542 | auto *Callee = CI->getCalledFunction(); | |||
543 | if (!Callee->isDeclaration()) | |||
544 | return false; | |||
545 | ||||
546 | assert(Callee->hasName() && "Invalid read_pipe/write_pipe function")(static_cast <bool> (Callee->hasName() && "Invalid read_pipe/write_pipe function" ) ? void (0) : __assert_fail ("Callee->hasName() && \"Invalid read_pipe/write_pipe function\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 546, __extension__ __PRETTY_FUNCTION__)); | |||
547 | auto *M = Callee->getParent(); | |||
548 | auto &Ctx = M->getContext(); | |||
549 | std::string Name = std::string(Callee->getName()); | |||
550 | auto NumArg = CI->arg_size(); | |||
551 | if (NumArg != 4 && NumArg != 6) | |||
552 | return false; | |||
553 | auto *PacketSize = CI->getArgOperand(NumArg - 2); | |||
554 | auto *PacketAlign = CI->getArgOperand(NumArg - 1); | |||
555 | if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign)) | |||
556 | return false; | |||
557 | unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue(); | |||
558 | Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue(); | |||
559 | if (Alignment != Size) | |||
560 | return false; | |||
561 | ||||
562 | Type *PtrElemTy; | |||
563 | if (Size <= 8) | |||
564 | PtrElemTy = Type::getIntNTy(Ctx, Size * 8); | |||
565 | else | |||
566 | PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8); | |||
567 | unsigned PtrArgLoc = CI->arg_size() - 3; | |||
568 | auto PtrArg = CI->getArgOperand(PtrArgLoc); | |||
569 | unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace(); | |||
570 | auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS); | |||
571 | ||||
572 | SmallVector<llvm::Type *, 6> ArgTys; | |||
573 | for (unsigned I = 0; I != PtrArgLoc; ++I) | |||
574 | ArgTys.push_back(CI->getArgOperand(I)->getType()); | |||
575 | ArgTys.push_back(PtrTy); | |||
576 | ||||
577 | Name = Name + "_" + std::to_string(Size); | |||
578 | auto *FTy = FunctionType::get(Callee->getReturnType(), | |||
579 | ArrayRef<Type *>(ArgTys), false); | |||
580 | AMDGPULibFunc NewLibFunc(Name, FTy); | |||
581 | FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); | |||
582 | if (!F) | |||
583 | return false; | |||
584 | ||||
585 | auto *BCast = B.CreatePointerCast(PtrArg, PtrTy); | |||
586 | SmallVector<Value *, 6> Args; | |||
587 | for (unsigned I = 0; I != PtrArgLoc; ++I) | |||
588 | Args.push_back(CI->getArgOperand(I)); | |||
589 | Args.push_back(BCast); | |||
590 | ||||
591 | auto *NCI = B.CreateCall(F, Args); | |||
592 | NCI->setAttributes(CI->getAttributes()); | |||
593 | CI->replaceAllUsesWith(NCI); | |||
594 | CI->dropAllReferences(); | |||
595 | CI->eraseFromParent(); | |||
596 | ||||
597 | return true; | |||
598 | } | |||
599 | ||||
600 | // This function returns false if no change; return true otherwise. | |||
601 | bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { | |||
602 | this->CI = CI; | |||
603 | Function *Callee = CI->getCalledFunction(); | |||
604 | ||||
605 | // Ignore indirect calls. | |||
606 | if (Callee == nullptr) | |||
607 | return false; | |||
608 | ||||
609 | BasicBlock *BB = CI->getParent(); | |||
610 | LLVMContext &Context = CI->getParent()->getContext(); | |||
611 | IRBuilder<> B(Context); | |||
612 | ||||
613 | // Set the builder to the instruction after the call. | |||
614 | B.SetInsertPoint(BB, CI->getIterator()); | |||
615 | ||||
616 | // Copy fast flags from the original call. | |||
617 | if (const FPMathOperator *FPOp
| |||
618 | B.setFastMathFlags(FPOp->getFastMathFlags()); | |||
619 | ||||
620 | switch (Callee->getIntrinsicID()) { | |||
621 | default: | |||
622 | break; | |||
623 | case Intrinsic::amdgcn_wavefrontsize: | |||
624 | return !EnablePreLink && fold_wavefrontsize(CI, B); | |||
625 | } | |||
626 | ||||
627 | FuncInfo FInfo; | |||
628 | if (!parseFunctionName(Callee->getName(), FInfo)) | |||
629 | return false; | |||
630 | ||||
631 | // Further check the number of arguments to see if they match. | |||
632 | if (CI->arg_size() != FInfo.getNumArgs()) | |||
633 | return false; | |||
634 | ||||
635 | if (TDOFold(CI, FInfo)) | |||
636 | return true; | |||
637 | ||||
638 | // Under unsafe-math, evaluate calls if possible. | |||
639 | // According to Brian Sumner, we can do this for all f32 function calls | |||
640 | // using host's double function calls. | |||
641 | if (isUnsafeMath(CI) && evaluateCall(CI, FInfo)) | |||
642 | return true; | |||
643 | ||||
644 | // Specialized optimizations for each function call | |||
645 | switch (FInfo.getId()) { | |||
646 | case AMDGPULibFunc::EI_RECIP: | |||
647 | // skip vector function | |||
648 | assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc ::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && "recip must be an either native or half function") ? void (0 ) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"recip must be an either native or half function\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 650, __extension__ __PRETTY_FUNCTION__)) | |||
649 | FInfo.getPrefix() == AMDGPULibFunc::HALF) &&(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc ::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && "recip must be an either native or half function") ? void (0 ) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"recip must be an either native or half function\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 650, __extension__ __PRETTY_FUNCTION__)) | |||
650 | "recip must be an either native or half function")(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc ::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && "recip must be an either native or half function") ? void (0 ) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"recip must be an either native or half function\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 650, __extension__ __PRETTY_FUNCTION__)); | |||
651 | return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo); | |||
652 | ||||
653 | case AMDGPULibFunc::EI_DIVIDE: | |||
654 | // skip vector function | |||
655 | assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc ::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && "divide must be an either native or half function") ? void ( 0) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"divide must be an either native or half function\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 657, __extension__ __PRETTY_FUNCTION__)) | |||
656 | FInfo.getPrefix() == AMDGPULibFunc::HALF) &&(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc ::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && "divide must be an either native or half function") ? void ( 0) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"divide must be an either native or half function\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 657, __extension__ __PRETTY_FUNCTION__)) | |||
657 | "divide must be an either native or half function")(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc ::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && "divide must be an either native or half function") ? void ( 0) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"divide must be an either native or half function\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 657, __extension__ __PRETTY_FUNCTION__)); | |||
658 | return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo); | |||
659 | ||||
660 | case AMDGPULibFunc::EI_POW: | |||
661 | case AMDGPULibFunc::EI_POWR: | |||
662 | case AMDGPULibFunc::EI_POWN: | |||
663 | return fold_pow(CI, B, FInfo); | |||
664 | ||||
665 | case AMDGPULibFunc::EI_ROOTN: | |||
666 | // skip vector function | |||
667 | return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo); | |||
668 | ||||
669 | case AMDGPULibFunc::EI_FMA: | |||
670 | case AMDGPULibFunc::EI_MAD: | |||
671 | case AMDGPULibFunc::EI_NFMA: | |||
672 | // skip vector function | |||
673 | return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo); | |||
674 | ||||
675 | case AMDGPULibFunc::EI_SQRT: | |||
676 | return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo); | |||
677 | case AMDGPULibFunc::EI_COS: | |||
678 | case AMDGPULibFunc::EI_SIN: | |||
679 | if ((getArgType(FInfo) == AMDGPULibFunc::F32 || | |||
680 | getArgType(FInfo) == AMDGPULibFunc::F64) | |||
681 | && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX)) | |||
682 | return fold_sincos(CI, B, AA); | |||
683 | ||||
684 | break; | |||
685 | case AMDGPULibFunc::EI_READ_PIPE_2: | |||
686 | case AMDGPULibFunc::EI_READ_PIPE_4: | |||
687 | case AMDGPULibFunc::EI_WRITE_PIPE_2: | |||
688 | case AMDGPULibFunc::EI_WRITE_PIPE_4: | |||
689 | return fold_read_write_pipe(CI, B, FInfo); | |||
690 | ||||
691 | default: | |||
692 | break; | |||
693 | } | |||
694 | ||||
695 | return false; | |||
696 | } | |||
697 | ||||
698 | bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { | |||
699 | // Table-Driven optimization | |||
700 | const TableRef tr = getOptTable(FInfo.getId()); | |||
701 | if (tr.size==0) | |||
702 | return false; | |||
703 | ||||
704 | int const sz = (int)tr.size; | |||
705 | const TableEntry * const ftbl = tr.table; | |||
706 | Value *opr0 = CI->getArgOperand(0); | |||
707 | ||||
708 | if (getVecSize(FInfo) > 1) { | |||
709 | if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) { | |||
710 | SmallVector<double, 0> DVal; | |||
711 | for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) { | |||
712 | ConstantFP *eltval = dyn_cast<ConstantFP>( | |||
713 | CV->getElementAsConstant((unsigned)eltNo)); | |||
714 | assert(eltval && "Non-FP arguments in math function!")(static_cast <bool> (eltval && "Non-FP arguments in math function!" ) ? void (0) : __assert_fail ("eltval && \"Non-FP arguments in math function!\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 714, __extension__ __PRETTY_FUNCTION__)); | |||
715 | bool found = false; | |||
716 | for (int i=0; i < sz; ++i) { | |||
717 | if (eltval->isExactlyValue(ftbl[i].input)) { | |||
718 | DVal.push_back(ftbl[i].result); | |||
719 | found = true; | |||
720 | break; | |||
721 | } | |||
722 | } | |||
723 | if (!found) { | |||
724 | // This vector constants not handled yet. | |||
725 | return false; | |||
726 | } | |||
727 | } | |||
728 | LLVMContext &context = CI->getParent()->getParent()->getContext(); | |||
729 | Constant *nval; | |||
730 | if (getArgType(FInfo) == AMDGPULibFunc::F32) { | |||
731 | SmallVector<float, 0> FVal; | |||
732 | for (unsigned i = 0; i < DVal.size(); ++i) { | |||
733 | FVal.push_back((float)DVal[i]); | |||
734 | } | |||
735 | ArrayRef<float> tmp(FVal); | |||
736 | nval = ConstantDataVector::get(context, tmp); | |||
737 | } else { // F64 | |||
738 | ArrayRef<double> tmp(DVal); | |||
739 | nval = ConstantDataVector::get(context, tmp); | |||
740 | } | |||
741 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *nval << "\n"; } } while (false); | |||
742 | replaceCall(nval); | |||
743 | return true; | |||
744 | } | |||
745 | } else { | |||
746 | // Scalar version | |||
747 | if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { | |||
748 | for (int i = 0; i < sz; ++i) { | |||
749 | if (CF->isExactlyValue(ftbl[i].input)) { | |||
750 | Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result); | |||
751 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *nval << "\n"; } } while (false); | |||
752 | replaceCall(nval); | |||
753 | return true; | |||
754 | } | |||
755 | } | |||
756 | } | |||
757 | } | |||
758 | ||||
759 | return false; | |||
760 | } | |||
761 | ||||
762 | // [native_]half_recip(c) ==> 1.0/c | |||
763 | bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B, | |||
764 | const FuncInfo &FInfo) { | |||
765 | Value *opr0 = CI->getArgOperand(0); | |||
766 | if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { | |||
767 | // Just create a normal div. Later, InstCombine will be able | |||
768 | // to compute the divide into a constant (avoid check float infinity | |||
769 | // or subnormal at this point). | |||
770 | Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0), | |||
771 | opr0, | |||
772 | "recip2div"); | |||
773 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *nval << "\n"; } } while (false); | |||
774 | replaceCall(nval); | |||
775 | return true; | |||
776 | } | |||
777 | return false; | |||
778 | } | |||
779 | ||||
780 | // [native_]half_divide(x, c) ==> x/c | |||
781 | bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B, | |||
782 | const FuncInfo &FInfo) { | |||
783 | Value *opr0 = CI->getArgOperand(0); | |||
784 | Value *opr1 = CI->getArgOperand(1); | |||
785 | ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0); | |||
786 | ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1); | |||
787 | ||||
788 | if ((CF0 && CF1) || // both are constants | |||
789 | (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32))) | |||
790 | // CF1 is constant && f32 divide | |||
791 | { | |||
792 | Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0), | |||
793 | opr1, "__div2recip"); | |||
794 | Value *nval = B.CreateFMul(opr0, nval1, "__div2mul"); | |||
795 | replaceCall(nval); | |||
796 | return true; | |||
797 | } | |||
798 | return false; | |||
799 | } | |||
800 | ||||
801 | namespace llvm { | |||
802 | static double log2(double V) { | |||
803 | #if _XOPEN_SOURCE700 >= 600 || defined(_ISOC99_SOURCE1) || _POSIX_C_SOURCE200809L >= 200112L | |||
804 | return ::log2(V); | |||
805 | #else | |||
806 | return log(V) / numbers::ln2; | |||
807 | #endif | |||
808 | } | |||
809 | } | |||
810 | ||||
811 | bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, | |||
812 | const FuncInfo &FInfo) { | |||
813 | assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call" ) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 816, __extension__ __PRETTY_FUNCTION__)) | |||
814 | FInfo.getId() == AMDGPULibFunc::EI_POWR ||(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call" ) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 816, __extension__ __PRETTY_FUNCTION__)) | |||
815 | FInfo.getId() == AMDGPULibFunc::EI_POWN) &&(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call" ) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 816, __extension__ __PRETTY_FUNCTION__)) | |||
816 | "fold_pow: encounter a wrong function call")(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call" ) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 816, __extension__ __PRETTY_FUNCTION__)); | |||
817 | ||||
818 | Value *opr0, *opr1; | |||
819 | ConstantFP *CF; | |||
820 | ConstantInt *CINT; | |||
821 | ConstantAggregateZero *CZero; | |||
822 | Type *eltType; | |||
823 | ||||
824 | opr0 = CI->getArgOperand(0); | |||
825 | opr1 = CI->getArgOperand(1); | |||
826 | CZero = dyn_cast<ConstantAggregateZero>(opr1); | |||
827 | if (getVecSize(FInfo) == 1) { | |||
828 | eltType = opr0->getType(); | |||
829 | CF = dyn_cast<ConstantFP>(opr1); | |||
830 | CINT = dyn_cast<ConstantInt>(opr1); | |||
831 | } else { | |||
832 | VectorType *VTy = dyn_cast<VectorType>(opr0->getType()); | |||
833 | assert(VTy && "Oprand of vector function should be of vectortype")(static_cast <bool> (VTy && "Oprand of vector function should be of vectortype" ) ? void (0) : __assert_fail ("VTy && \"Oprand of vector function should be of vectortype\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 833, __extension__ __PRETTY_FUNCTION__)); | |||
834 | eltType = VTy->getElementType(); | |||
835 | ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1); | |||
836 | ||||
837 | // Now, only Handle vector const whose elements have the same value. | |||
838 | CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr; | |||
839 | CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr; | |||
840 | } | |||
841 | ||||
842 | // No unsafe math , no constant argument, do nothing | |||
843 | if (!isUnsafeMath(CI) && !CF && !CINT && !CZero) | |||
844 | return false; | |||
845 | ||||
846 | // 0x1111111 means that we don't do anything for this call. | |||
847 | int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); | |||
848 | ||||
849 | if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) { | |||
850 | // pow/powr/pown(x, 0) == 1 | |||
851 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> 1\n"; } } while (false); | |||
852 | Constant *cnval = ConstantFP::get(eltType, 1.0); | |||
853 | if (getVecSize(FInfo) > 1) { | |||
854 | cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); | |||
855 | } | |||
856 | replaceCall(cnval); | |||
857 | return true; | |||
858 | } | |||
859 | if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { | |||
860 | // pow/powr/pown(x, 1.0) = x | |||
861 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr0 << "\n"; } } while (false); | |||
862 | replaceCall(opr0); | |||
863 | return true; | |||
864 | } | |||
865 | if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) { | |||
866 | // pow/powr/pown(x, 2.0) = x*x | |||
867 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr0 << " * " << *opr0 << "\n"; } } while (false) | |||
868 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr0 << " * " << *opr0 << "\n"; } } while (false); | |||
869 | Value *nval = B.CreateFMul(opr0, opr0, "__pow2"); | |||
870 | replaceCall(nval); | |||
871 | return true; | |||
872 | } | |||
873 | if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { | |||
874 | // pow/powr/pown(x, -1.0) = 1.0/x | |||
875 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> 1 / " << *opr0 << "\n"; } } while (false); | |||
876 | Constant *cnval = ConstantFP::get(eltType, 1.0); | |||
877 | if (getVecSize(FInfo) > 1) { | |||
878 | cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); | |||
879 | } | |||
880 | Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip"); | |||
881 | replaceCall(nval); | |||
882 | return true; | |||
883 | } | |||
884 | ||||
885 | Module *M = CI->getModule(); | |||
886 | if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { | |||
887 | // pow[r](x, [-]0.5) = sqrt(x) | |||
888 | bool issqrt = CF->isExactlyValue(0.5); | |||
889 | if (FunctionCallee FPExpr = | |||
890 | getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT | |||
891 | : AMDGPULibFunc::EI_RSQRT, | |||
892 | FInfo))) { | |||
893 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << FInfo.getName().c_str() << "(" << *opr0 << ")\n"; } } while (false) | |||
894 | << FInfo.getName().c_str() << "(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << FInfo.getName().c_str() << "(" << *opr0 << ")\n"; } } while (false); | |||
895 | Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" | |||
896 | : "__pow2rsqrt"); | |||
897 | replaceCall(nval); | |||
898 | return true; | |||
899 | } | |||
900 | } | |||
901 | ||||
902 | if (!isUnsafeMath(CI)) | |||
903 | return false; | |||
904 | ||||
905 | // Unsafe Math optimization | |||
906 | ||||
907 | // Remember that ci_opr1 is set if opr1 is integral | |||
908 | if (CF) { | |||
909 | double dval = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
910 | ? (double)CF->getValueAPF().convertToFloat() | |||
911 | : CF->getValueAPF().convertToDouble(); | |||
912 | int ival = (int)dval; | |||
913 | if ((double)ival == dval) { | |||
914 | ci_opr1 = ival; | |||
915 | } else | |||
916 | ci_opr1 = 0x11111111; | |||
917 | } | |||
918 | ||||
919 | // pow/powr/pown(x, c) = [1/](x*x*..x); where | |||
920 | // trunc(c) == c && the number of x == c && |c| <= 12 | |||
921 | unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1; | |||
922 | if (abs_opr1 <= 12) { | |||
923 | Constant *cnval; | |||
924 | Value *nval; | |||
925 | if (abs_opr1 == 0) { | |||
926 | cnval = ConstantFP::get(eltType, 1.0); | |||
927 | if (getVecSize(FInfo) > 1) { | |||
928 | cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); | |||
929 | } | |||
930 | nval = cnval; | |||
931 | } else { | |||
932 | Value *valx2 = nullptr; | |||
933 | nval = nullptr; | |||
934 | while (abs_opr1 > 0) { | |||
935 | valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0; | |||
936 | if (abs_opr1 & 1) { | |||
937 | nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2; | |||
938 | } | |||
939 | abs_opr1 >>= 1; | |||
940 | } | |||
941 | } | |||
942 | ||||
943 | if (ci_opr1 < 0) { | |||
944 | cnval = ConstantFP::get(eltType, 1.0); | |||
945 | if (getVecSize(FInfo) > 1) { | |||
946 | cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); | |||
947 | } | |||
948 | nval = B.CreateFDiv(cnval, nval, "__1powprod"); | |||
949 | } | |||
950 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 << ")\n"; } } while (false) | |||
951 | << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 << ")\n"; } } while (false) | |||
952 | << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 << ")\n"; } } while (false); | |||
953 | replaceCall(nval); | |||
954 | return true; | |||
955 | } | |||
956 | ||||
957 | // powr ---> exp2(y * log2(x)) | |||
958 | // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) | |||
959 | FunctionCallee ExpExpr = | |||
960 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); | |||
961 | if (!ExpExpr) | |||
962 | return false; | |||
963 | ||||
964 | bool needlog = false; | |||
965 | bool needabs = false; | |||
966 | bool needcopysign = false; | |||
967 | Constant *cnval = nullptr; | |||
968 | if (getVecSize(FInfo) == 1) { | |||
969 | CF = dyn_cast<ConstantFP>(opr0); | |||
970 | ||||
971 | if (CF) { | |||
972 | double V = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
973 | ? (double)CF->getValueAPF().convertToFloat() | |||
974 | : CF->getValueAPF().convertToDouble(); | |||
975 | ||||
976 | V = log2(std::abs(V)); | |||
977 | cnval = ConstantFP::get(eltType, V); | |||
978 | needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) && | |||
979 | CF->isNegative(); | |||
980 | } else { | |||
981 | needlog = true; | |||
982 | needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR && | |||
983 | (!CF || CF->isNegative()); | |||
984 | } | |||
985 | } else { | |||
986 | ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0); | |||
987 | ||||
988 | if (!CDV) { | |||
989 | needlog = true; | |||
990 | needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; | |||
991 | } else { | |||
992 | assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&(static_cast <bool> ((int)CDV->getNumElements() == getVecSize (FInfo) && "Wrong vector size detected") ? void (0) : __assert_fail ("(int)CDV->getNumElements() == getVecSize(FInfo) && \"Wrong vector size detected\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 993, __extension__ __PRETTY_FUNCTION__)) | |||
993 | "Wrong vector size detected")(static_cast <bool> ((int)CDV->getNumElements() == getVecSize (FInfo) && "Wrong vector size detected") ? void (0) : __assert_fail ("(int)CDV->getNumElements() == getVecSize(FInfo) && \"Wrong vector size detected\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 993, __extension__ __PRETTY_FUNCTION__)); | |||
994 | ||||
995 | SmallVector<double, 0> DVal; | |||
996 | for (int i=0; i < getVecSize(FInfo); ++i) { | |||
997 | double V = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
998 | ? (double)CDV->getElementAsFloat(i) | |||
999 | : CDV->getElementAsDouble(i); | |||
1000 | if (V < 0.0) needcopysign = true; | |||
1001 | V = log2(std::abs(V)); | |||
1002 | DVal.push_back(V); | |||
1003 | } | |||
1004 | if (getArgType(FInfo) == AMDGPULibFunc::F32) { | |||
1005 | SmallVector<float, 0> FVal; | |||
1006 | for (unsigned i=0; i < DVal.size(); ++i) { | |||
1007 | FVal.push_back((float)DVal[i]); | |||
1008 | } | |||
1009 | ArrayRef<float> tmp(FVal); | |||
1010 | cnval = ConstantDataVector::get(M->getContext(), tmp); | |||
1011 | } else { | |||
1012 | ArrayRef<double> tmp(DVal); | |||
1013 | cnval = ConstantDataVector::get(M->getContext(), tmp); | |||
1014 | } | |||
1015 | } | |||
1016 | } | |||
1017 | ||||
1018 | if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) { | |||
1019 | // We cannot handle corner cases for a general pow() function, give up | |||
1020 | // unless y is a constant integral value. Then proceed as if it were pown. | |||
1021 | if (getVecSize(FInfo) == 1) { | |||
1022 | if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) { | |||
1023 | double y = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
1024 | ? (double)CF->getValueAPF().convertToFloat() | |||
1025 | : CF->getValueAPF().convertToDouble(); | |||
1026 | if (y != (double)(int64_t)y) | |||
1027 | return false; | |||
1028 | } else | |||
1029 | return false; | |||
1030 | } else { | |||
1031 | if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) { | |||
1032 | for (int i=0; i < getVecSize(FInfo); ++i) { | |||
1033 | double y = (getArgType(FInfo) == AMDGPULibFunc::F32) | |||
1034 | ? (double)CDV->getElementAsFloat(i) | |||
1035 | : CDV->getElementAsDouble(i); | |||
1036 | if (y != (double)(int64_t)y) | |||
1037 | return false; | |||
1038 | } | |||
1039 | } else | |||
1040 | return false; | |||
1041 | } | |||
1042 | } | |||
1043 | ||||
1044 | Value *nval; | |||
1045 | if (needabs) { | |||
1046 | FunctionCallee AbsExpr = | |||
1047 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo)); | |||
1048 | if (!AbsExpr) | |||
1049 | return false; | |||
1050 | nval = CreateCallEx(B, AbsExpr, opr0, "__fabs"); | |||
1051 | } else { | |||
1052 | nval = cnval ? cnval : opr0; | |||
1053 | } | |||
1054 | if (needlog) { | |||
1055 | FunctionCallee LogExpr = | |||
1056 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); | |||
1057 | if (!LogExpr) | |||
1058 | return false; | |||
1059 | nval = CreateCallEx(B,LogExpr, nval, "__log2"); | |||
1060 | } | |||
1061 | ||||
1062 | if (FInfo.getId() == AMDGPULibFunc::EI_POWN) { | |||
1063 | // convert int(32) to fp(f32 or f64) | |||
1064 | opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F"); | |||
1065 | } | |||
1066 | nval = B.CreateFMul(opr1, nval, "__ylogx"); | |||
1067 | nval = CreateCallEx(B,ExpExpr, nval, "__exp2"); | |||
1068 | ||||
1069 | if (needcopysign) { | |||
1070 | Value *opr_n; | |||
1071 | Type* rTy = opr0->getType(); | |||
1072 | Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty(); | |||
1073 | Type *nTy = nTyS; | |||
1074 | if (const auto *vTy = dyn_cast<FixedVectorType>(rTy)) | |||
1075 | nTy = FixedVectorType::get(nTyS, vTy); | |||
1076 | unsigned size = nTy->getScalarSizeInBits(); | |||
1077 | opr_n = CI->getArgOperand(1); | |||
1078 | if (opr_n->getType()->isIntegerTy()) | |||
1079 | opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou"); | |||
1080 | else | |||
1081 | opr_n = B.CreateFPToSI(opr1, nTy, "__ytou"); | |||
1082 | ||||
1083 | Value *sign = B.CreateShl(opr_n, size-1, "__yeven"); | |||
1084 | sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign"); | |||
1085 | nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign); | |||
1086 | nval = B.CreateBitCast(nval, opr0->getType()); | |||
1087 | } | |||
1088 | ||||
1089 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"; } } while (false) | |||
1090 | << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"; } } while (false); | |||
1091 | replaceCall(nval); | |||
1092 | ||||
1093 | return true; | |||
1094 | } | |||
1095 | ||||
1096 | bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, | |||
1097 | const FuncInfo &FInfo) { | |||
1098 | Value *opr0 = CI->getArgOperand(0); | |||
1099 | Value *opr1 = CI->getArgOperand(1); | |||
1100 | ||||
1101 | ConstantInt *CINT = dyn_cast<ConstantInt>(opr1); | |||
1102 | if (!CINT) { | |||
1103 | return false; | |||
1104 | } | |||
1105 | int ci_opr1 = (int)CINT->getSExtValue(); | |||
1106 | if (ci_opr1 == 1) { // rootn(x, 1) = x | |||
1107 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr0 << "\n"; } } while (false); | |||
1108 | replaceCall(opr0); | |||
1109 | return true; | |||
1110 | } | |||
1111 | if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x) | |||
1112 | Module *M = CI->getModule(); | |||
1113 | if (FunctionCallee FPExpr = | |||
1114 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { | |||
1115 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> sqrt(" << *opr0 << ")\n"; } } while (false); | |||
1116 | Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); | |||
1117 | replaceCall(nval); | |||
1118 | return true; | |||
1119 | } | |||
1120 | } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) | |||
1121 | Module *M = CI->getModule(); | |||
1122 | if (FunctionCallee FPExpr = | |||
1123 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { | |||
1124 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> cbrt(" << *opr0 << ")\n"; } } while (false); | |||
1125 | Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); | |||
1126 | replaceCall(nval); | |||
1127 | return true; | |||
1128 | } | |||
1129 | } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x | |||
1130 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> 1.0 / " << *opr0 << "\n"; } } while (false); | |||
1131 | Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), | |||
1132 | opr0, | |||
1133 | "__rootn2div"); | |||
1134 | replaceCall(nval); | |||
1135 | return true; | |||
1136 | } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x) | |||
1137 | Module *M = CI->getModule(); | |||
1138 | if (FunctionCallee FPExpr = | |||
1139 | getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { | |||
1140 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> rsqrt(" << *opr0 << ")\n"; } } while (false) | |||
1141 | << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> rsqrt(" << *opr0 << ")\n"; } } while (false); | |||
1142 | Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); | |||
1143 | replaceCall(nval); | |||
1144 | return true; | |||
1145 | } | |||
1146 | } | |||
1147 | return false; | |||
1148 | } | |||
1149 | ||||
1150 | bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B, | |||
1151 | const FuncInfo &FInfo) { | |||
1152 | Value *opr0 = CI->getArgOperand(0); | |||
1153 | Value *opr1 = CI->getArgOperand(1); | |||
1154 | Value *opr2 = CI->getArgOperand(2); | |||
1155 | ||||
1156 | ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0); | |||
1157 | ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1); | |||
1158 | if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) { | |||
1159 | // fma/mad(a, b, c) = c if a=0 || b=0 | |||
1160 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr2 << "\n"; } } while (false); | |||
1161 | replaceCall(opr2); | |||
1162 | return true; | |||
1163 | } | |||
1164 | if (CF0 && CF0->isExactlyValue(1.0f)) { | |||
1165 | // fma/mad(a, b, c) = b+c if a=1 | |||
1166 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr1 << " + " << *opr2 << "\n"; } } while (false) | |||
1167 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr1 << " + " << *opr2 << "\n"; } } while (false); | |||
1168 | Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd"); | |||
1169 | replaceCall(nval); | |||
1170 | return true; | |||
1171 | } | |||
1172 | if (CF1 && CF1->isExactlyValue(1.0f)) { | |||
1173 | // fma/mad(a, b, c) = a+c if b=1 | |||
1174 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr0 << " + " << *opr2 << "\n"; } } while (false) | |||
1175 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr0 << " + " << *opr2 << "\n"; } } while (false); | |||
1176 | Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd"); | |||
1177 | replaceCall(nval); | |||
1178 | return true; | |||
1179 | } | |||
1180 | if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) { | |||
1181 | if (CF->isZero()) { | |||
1182 | // fma/mad(a, b, c) = a*b if c=0 | |||
1183 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr0 << " * " << *opr1 << "\n"; } } while (false) | |||
1184 | << *opr1 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << *opr0 << " * " << *opr1 << "\n"; } } while (false); | |||
1185 | Value *nval = B.CreateFMul(opr0, opr1, "fmamul"); | |||
1186 | replaceCall(nval); | |||
1187 | return true; | |||
1188 | } | |||
1189 | } | |||
1190 | ||||
1191 | return false; | |||
1192 | } | |||
1193 | ||||
1194 | // Get a scalar native builtin single argument FP function | |||
1195 | FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, | |||
1196 | const FuncInfo &FInfo) { | |||
1197 | if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) | |||
1198 | return nullptr; | |||
1199 | FuncInfo nf = FInfo; | |||
1200 | nf.setPrefix(AMDGPULibFunc::NATIVE); | |||
1201 | return getFunction(M, nf); | |||
1202 | } | |||
1203 | ||||
1204 | // fold sqrt -> native_sqrt (x) | |||
1205 | bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B, | |||
1206 | const FuncInfo &FInfo) { | |||
1207 | if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && | |||
1208 | (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { | |||
1209 | if (FunctionCallee FPExpr = getNativeFunction( | |||
1210 | CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { | |||
1211 | Value *opr0 = CI->getArgOperand(0); | |||
1212 | LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << "sqrt(" << *opr0 << ")\n"; } } while (false) | |||
1213 | << "sqrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: " << * CI << " ---> " << "sqrt(" << *opr0 << ")\n"; } } while (false); | |||
1214 | Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt"); | |||
1215 | replaceCall(nval); | |||
1216 | return true; | |||
1217 | } | |||
1218 | } | |||
1219 | return false; | |||
1220 | } | |||
1221 | ||||
1222 | // fold sin, cos -> sincos. | |||
1223 | bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, | |||
1224 | AliasAnalysis *AA) { | |||
1225 | AMDGPULibFunc fInfo; | |||
1226 | if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo)) | |||
1227 | return false; | |||
1228 | ||||
1229 | assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||(static_cast <bool> (fInfo.getId() == AMDGPULibFunc::EI_SIN || fInfo.getId() == AMDGPULibFunc::EI_COS) ? void (0) : __assert_fail ("fInfo.getId() == AMDGPULibFunc::EI_SIN || fInfo.getId() == AMDGPULibFunc::EI_COS" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 1230, __extension__ __PRETTY_FUNCTION__)) | |||
1230 | fInfo.getId() == AMDGPULibFunc::EI_COS)(static_cast <bool> (fInfo.getId() == AMDGPULibFunc::EI_SIN || fInfo.getId() == AMDGPULibFunc::EI_COS) ? void (0) : __assert_fail ("fInfo.getId() == AMDGPULibFunc::EI_SIN || fInfo.getId() == AMDGPULibFunc::EI_COS" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 1230, __extension__ __PRETTY_FUNCTION__)); | |||
1231 | bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; | |||
1232 | ||||
1233 | Value *CArgVal = CI->getArgOperand(0); | |||
1234 | BasicBlock * const CBB = CI->getParent(); | |||
1235 | ||||
1236 | int const MaxScan = 30; | |||
1237 | bool Changed = false; | |||
1238 | ||||
1239 | { // fold in load value. | |||
1240 | LoadInst *LI = dyn_cast<LoadInst>(CArgVal); | |||
1241 | if (LI && LI->getParent() == CBB) { | |||
1242 | BasicBlock::iterator BBI = LI->getIterator(); | |||
1243 | Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA); | |||
1244 | if (AvailableVal) { | |||
1245 | Changed = true; | |||
1246 | CArgVal->replaceAllUsesWith(AvailableVal); | |||
1247 | if (CArgVal->getNumUses() == 0) | |||
1248 | LI->eraseFromParent(); | |||
1249 | CArgVal = CI->getArgOperand(0); | |||
1250 | } | |||
1251 | } | |||
1252 | } | |||
1253 | ||||
1254 | Module *M = CI->getModule(); | |||
1255 | fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN); | |||
1256 | std::string const PairName = fInfo.mangle(); | |||
1257 | ||||
1258 | CallInst *UI = nullptr; | |||
1259 | for (User* U : CArgVal->users()) { | |||
1260 | CallInst *XI = dyn_cast_or_null<CallInst>(U); | |||
1261 | if (!XI || XI == CI || XI->getParent() != CBB) | |||
1262 | continue; | |||
1263 | ||||
1264 | Function *UCallee = XI->getCalledFunction(); | |||
1265 | if (!UCallee || !UCallee->getName().equals(PairName)) | |||
1266 | continue; | |||
1267 | ||||
1268 | BasicBlock::iterator BBI = CI->getIterator(); | |||
1269 | if (BBI == CI->getParent()->begin()) | |||
1270 | break; | |||
1271 | --BBI; | |||
1272 | for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) { | |||
1273 | if (cast<Instruction>(BBI) == XI) { | |||
1274 | UI = XI; | |||
1275 | break; | |||
1276 | } | |||
1277 | } | |||
1278 | if (UI) break; | |||
1279 | } | |||
1280 | ||||
1281 | if (!UI) | |||
1282 | return Changed; | |||
1283 | ||||
1284 | // Merge the sin and cos. | |||
1285 | ||||
1286 | // for OpenCL 2.0 we have only generic implementation of sincos | |||
1287 | // function. | |||
1288 | AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); | |||
1289 | nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); | |||
1290 | FunctionCallee Fsincos = getFunction(M, nf); | |||
1291 | if (!Fsincos) | |||
1292 | return Changed; | |||
1293 | ||||
1294 | BasicBlock::iterator ItOld = B.GetInsertPoint(); | |||
1295 | AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_"); | |||
1296 | B.SetInsertPoint(UI); | |||
1297 | ||||
1298 | Value *P = Alloc; | |||
1299 | Type *PTy = Fsincos.getFunctionType()->getParamType(1); | |||
1300 | // The allocaInst allocates the memory in private address space. This need | |||
1301 | // to be bitcasted to point to the address space of cos pointer type. | |||
1302 | // In OpenCL 2.0 this is generic, while in 1.2 that is private. | |||
1303 | if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) | |||
1304 | P = B.CreateAddrSpaceCast(Alloc, PTy); | |||
1305 | CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P); | |||
1306 | ||||
1307 | LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with " << *Call << "\n"; } } while (false) | |||
1308 | << *Call << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with " << *Call << "\n"; } } while (false); | |||
1309 | ||||
1310 | if (!isSin) { // CI->cos, UI->sin | |||
1311 | B.SetInsertPoint(&*ItOld); | |||
1312 | UI->replaceAllUsesWith(&*Call); | |||
1313 | Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); | |||
1314 | CI->replaceAllUsesWith(Reload); | |||
1315 | UI->eraseFromParent(); | |||
1316 | CI->eraseFromParent(); | |||
1317 | } else { // CI->sin, UI->cos | |||
1318 | Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); | |||
1319 | UI->replaceAllUsesWith(Reload); | |||
1320 | CI->replaceAllUsesWith(Call); | |||
1321 | UI->eraseFromParent(); | |||
1322 | CI->eraseFromParent(); | |||
1323 | } | |||
1324 | return true; | |||
1325 | } | |||
1326 | ||||
1327 | bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) { | |||
1328 | if (!TM) | |||
1329 | return false; | |||
1330 | ||||
1331 | StringRef CPU = TM->getTargetCPU(); | |||
1332 | StringRef Features = TM->getTargetFeatureString(); | |||
1333 | if ((CPU.empty() || CPU.equals_insensitive("generic")) && | |||
1334 | (Features.empty() || !Features.contains_insensitive("wavefrontsize"))) | |||
1335 | return false; | |||
1336 | ||||
1337 | Function *F = CI->getParent()->getParent(); | |||
1338 | const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F); | |||
1339 | unsigned N = ST.getWavefrontSize(); | |||
1340 | ||||
1341 | LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with " << N << "\n"; } } while (false) | |||
1342 | << N << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with " << N << "\n"; } } while (false); | |||
1343 | ||||
1344 | CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N)); | |||
1345 | CI->eraseFromParent(); | |||
1346 | return true; | |||
1347 | } | |||
1348 | ||||
1349 | // Get insertion point at entry. | |||
1350 | BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) { | |||
1351 | Function * Func = UI->getParent()->getParent(); | |||
1352 | BasicBlock * BB = &Func->getEntryBlock(); | |||
1353 | assert(BB && "Entry block not found!")(static_cast <bool> (BB && "Entry block not found!" ) ? void (0) : __assert_fail ("BB && \"Entry block not found!\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 1353, __extension__ __PRETTY_FUNCTION__)); | |||
1354 | BasicBlock::iterator ItNew = BB->begin(); | |||
1355 | return ItNew; | |||
1356 | } | |||
1357 | ||||
1358 | // Insert a AllocsInst at the beginning of function entry block. | |||
1359 | AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B, | |||
1360 | const char *prefix) { | |||
1361 | BasicBlock::iterator ItNew = getEntryIns(UI); | |||
1362 | Function *UCallee = UI->getCalledFunction(); | |||
1363 | Type *RetType = UCallee->getReturnType(); | |||
1364 | B.SetInsertPoint(&*ItNew); | |||
1365 | AllocaInst *Alloc = | |||
1366 | B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName()); | |||
1367 | Alloc->setAlignment( | |||
1368 | Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType))); | |||
1369 | return Alloc; | |||
1370 | } | |||
1371 | ||||
1372 | bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, | |||
1373 | double& Res0, double& Res1, | |||
1374 | Constant *copr0, Constant *copr1, | |||
1375 | Constant *copr2) { | |||
1376 | // By default, opr0/opr1/opr3 holds values of float/double type. | |||
1377 | // If they are not float/double, each function has to its | |||
1378 | // operand separately. | |||
1379 | double opr0=0.0, opr1=0.0, opr2=0.0; | |||
1380 | ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0); | |||
1381 | ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1); | |||
1382 | ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2); | |||
1383 | if (fpopr0) { | |||
1384 | opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64) | |||
1385 | ? fpopr0->getValueAPF().convertToDouble() | |||
1386 | : (double)fpopr0->getValueAPF().convertToFloat(); | |||
1387 | } | |||
1388 | ||||
1389 | if (fpopr1) { | |||
1390 | opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64) | |||
1391 | ? fpopr1->getValueAPF().convertToDouble() | |||
1392 | : (double)fpopr1->getValueAPF().convertToFloat(); | |||
1393 | } | |||
1394 | ||||
1395 | if (fpopr2) { | |||
1396 | opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64) | |||
1397 | ? fpopr2->getValueAPF().convertToDouble() | |||
1398 | : (double)fpopr2->getValueAPF().convertToFloat(); | |||
1399 | } | |||
1400 | ||||
1401 | switch (FInfo.getId()) { | |||
1402 | default : return false; | |||
1403 | ||||
1404 | case AMDGPULibFunc::EI_ACOS: | |||
1405 | Res0 = acos(opr0); | |||
1406 | return true; | |||
1407 | ||||
1408 | case AMDGPULibFunc::EI_ACOSH: | |||
1409 | // acosh(x) == log(x + sqrt(x*x - 1)) | |||
1410 | Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0)); | |||
1411 | return true; | |||
1412 | ||||
1413 | case AMDGPULibFunc::EI_ACOSPI: | |||
1414 | Res0 = acos(opr0) / MATH_PInumbers::pi; | |||
1415 | return true; | |||
1416 | ||||
1417 | case AMDGPULibFunc::EI_ASIN: | |||
1418 | Res0 = asin(opr0); | |||
1419 | return true; | |||
1420 | ||||
1421 | case AMDGPULibFunc::EI_ASINH: | |||
1422 | // asinh(x) == log(x + sqrt(x*x + 1)) | |||
1423 | Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0)); | |||
1424 | return true; | |||
1425 | ||||
1426 | case AMDGPULibFunc::EI_ASINPI: | |||
1427 | Res0 = asin(opr0) / MATH_PInumbers::pi; | |||
1428 | return true; | |||
1429 | ||||
1430 | case AMDGPULibFunc::EI_ATAN: | |||
1431 | Res0 = atan(opr0); | |||
1432 | return true; | |||
1433 | ||||
1434 | case AMDGPULibFunc::EI_ATANH: | |||
1435 | // atanh(x) == (log(x+1) - log(x-1))/2; | |||
1436 | Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0; | |||
1437 | return true; | |||
1438 | ||||
1439 | case AMDGPULibFunc::EI_ATANPI: | |||
1440 | Res0 = atan(opr0) / MATH_PInumbers::pi; | |||
1441 | return true; | |||
1442 | ||||
1443 | case AMDGPULibFunc::EI_CBRT: | |||
1444 | Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0); | |||
1445 | return true; | |||
1446 | ||||
1447 | case AMDGPULibFunc::EI_COS: | |||
1448 | Res0 = cos(opr0); | |||
1449 | return true; | |||
1450 | ||||
1451 | case AMDGPULibFunc::EI_COSH: | |||
1452 | Res0 = cosh(opr0); | |||
1453 | return true; | |||
1454 | ||||
1455 | case AMDGPULibFunc::EI_COSPI: | |||
1456 | Res0 = cos(MATH_PInumbers::pi * opr0); | |||
1457 | return true; | |||
1458 | ||||
1459 | case AMDGPULibFunc::EI_EXP: | |||
1460 | Res0 = exp(opr0); | |||
1461 | return true; | |||
1462 | ||||
1463 | case AMDGPULibFunc::EI_EXP2: | |||
1464 | Res0 = pow(2.0, opr0); | |||
1465 | return true; | |||
1466 | ||||
1467 | case AMDGPULibFunc::EI_EXP10: | |||
1468 | Res0 = pow(10.0, opr0); | |||
1469 | return true; | |||
1470 | ||||
1471 | case AMDGPULibFunc::EI_EXPM1: | |||
1472 | Res0 = exp(opr0) - 1.0; | |||
1473 | return true; | |||
1474 | ||||
1475 | case AMDGPULibFunc::EI_LOG: | |||
1476 | Res0 = log(opr0); | |||
1477 | return true; | |||
1478 | ||||
1479 | case AMDGPULibFunc::EI_LOG2: | |||
1480 | Res0 = log(opr0) / log(2.0); | |||
1481 | return true; | |||
1482 | ||||
1483 | case AMDGPULibFunc::EI_LOG10: | |||
1484 | Res0 = log(opr0) / log(10.0); | |||
1485 | return true; | |||
1486 | ||||
1487 | case AMDGPULibFunc::EI_RSQRT: | |||
1488 | Res0 = 1.0 / sqrt(opr0); | |||
1489 | return true; | |||
1490 | ||||
1491 | case AMDGPULibFunc::EI_SIN: | |||
1492 | Res0 = sin(opr0); | |||
1493 | return true; | |||
1494 | ||||
1495 | case AMDGPULibFunc::EI_SINH: | |||
1496 | Res0 = sinh(opr0); | |||
1497 | return true; | |||
1498 | ||||
1499 | case AMDGPULibFunc::EI_SINPI: | |||
1500 | Res0 = sin(MATH_PInumbers::pi * opr0); | |||
1501 | return true; | |||
1502 | ||||
1503 | case AMDGPULibFunc::EI_SQRT: | |||
1504 | Res0 = sqrt(opr0); | |||
1505 | return true; | |||
1506 | ||||
1507 | case AMDGPULibFunc::EI_TAN: | |||
1508 | Res0 = tan(opr0); | |||
1509 | return true; | |||
1510 | ||||
1511 | case AMDGPULibFunc::EI_TANH: | |||
1512 | Res0 = tanh(opr0); | |||
1513 | return true; | |||
1514 | ||||
1515 | case AMDGPULibFunc::EI_TANPI: | |||
1516 | Res0 = tan(MATH_PInumbers::pi * opr0); | |||
1517 | return true; | |||
1518 | ||||
1519 | case AMDGPULibFunc::EI_RECIP: | |||
1520 | Res0 = 1.0 / opr0; | |||
1521 | return true; | |||
1522 | ||||
1523 | // two-arg functions | |||
1524 | case AMDGPULibFunc::EI_DIVIDE: | |||
1525 | Res0 = opr0 / opr1; | |||
1526 | return true; | |||
1527 | ||||
1528 | case AMDGPULibFunc::EI_POW: | |||
1529 | case AMDGPULibFunc::EI_POWR: | |||
1530 | Res0 = pow(opr0, opr1); | |||
1531 | return true; | |||
1532 | ||||
1533 | case AMDGPULibFunc::EI_POWN: { | |||
1534 | if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { | |||
1535 | double val = (double)iopr1->getSExtValue(); | |||
1536 | Res0 = pow(opr0, val); | |||
1537 | return true; | |||
1538 | } | |||
1539 | return false; | |||
1540 | } | |||
1541 | ||||
1542 | case AMDGPULibFunc::EI_ROOTN: { | |||
1543 | if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { | |||
1544 | double val = (double)iopr1->getSExtValue(); | |||
1545 | Res0 = pow(opr0, 1.0 / val); | |||
1546 | return true; | |||
1547 | } | |||
1548 | return false; | |||
1549 | } | |||
1550 | ||||
1551 | // with ptr arg | |||
1552 | case AMDGPULibFunc::EI_SINCOS: | |||
1553 | Res0 = sin(opr0); | |||
1554 | Res1 = cos(opr0); | |||
1555 | return true; | |||
1556 | ||||
1557 | // three-arg functions | |||
1558 | case AMDGPULibFunc::EI_FMA: | |||
1559 | case AMDGPULibFunc::EI_MAD: | |||
1560 | Res0 = opr0 * opr1 + opr2; | |||
1561 | return true; | |||
1562 | } | |||
1563 | ||||
1564 | return false; | |||
1565 | } | |||
1566 | ||||
1567 | bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) { | |||
1568 | int numArgs = (int)aCI->arg_size(); | |||
1569 | if (numArgs > 3) | |||
1570 | return false; | |||
1571 | ||||
1572 | Constant *copr0 = nullptr; | |||
1573 | Constant *copr1 = nullptr; | |||
1574 | Constant *copr2 = nullptr; | |||
1575 | if (numArgs > 0) { | |||
1576 | if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr) | |||
1577 | return false; | |||
1578 | } | |||
1579 | ||||
1580 | if (numArgs
| |||
1581 | if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) { | |||
1582 | if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS) | |||
1583 | return false; | |||
1584 | } | |||
1585 | } | |||
1586 | ||||
1587 | if (numArgs
| |||
1588 | if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr) | |||
1589 | return false; | |||
1590 | } | |||
1591 | ||||
1592 | // At this point, all arguments to aCI are constants. | |||
1593 | ||||
1594 | // max vector size is 16, and sincos will generate two results. | |||
1595 | double DVal0[16], DVal1[16]; | |||
1596 | bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS); | |||
1597 | if (getVecSize(FInfo) == 1) { | |||
1598 | if (!evaluateScalarMathFunc(FInfo, DVal0[0], | |||
1599 | DVal1[0], copr0, copr1, copr2)) { | |||
1600 | return false; | |||
1601 | } | |||
1602 | } else { | |||
1603 | ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0); | |||
1604 | ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1); | |||
1605 | ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2); | |||
1606 | for (int i=0; i < getVecSize(FInfo); ++i) { | |||
1607 | Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr; | |||
1608 | Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr; | |||
1609 | Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr; | |||
1610 | if (!evaluateScalarMathFunc(FInfo, DVal0[i], | |||
1611 | DVal1[i], celt0, celt1, celt2)) { | |||
1612 | return false; | |||
1613 | } | |||
1614 | } | |||
1615 | } | |||
1616 | ||||
1617 | LLVMContext &context = CI->getParent()->getParent()->getContext(); | |||
1618 | Constant *nval0, *nval1; | |||
1619 | if (getVecSize(FInfo) == 1) { | |||
1620 | nval0 = ConstantFP::get(CI->getType(), DVal0[0]); | |||
1621 | if (hasTwoResults) | |||
1622 | nval1 = ConstantFP::get(CI->getType(), DVal1[0]); | |||
1623 | } else { | |||
1624 | if (getArgType(FInfo) == AMDGPULibFunc::F32) { | |||
1625 | SmallVector <float, 0> FVal0, FVal1; | |||
1626 | for (int i=0; i < getVecSize(FInfo); ++i) | |||
1627 | FVal0.push_back((float)DVal0[i]); | |||
| ||||
1628 | ArrayRef<float> tmp0(FVal0); | |||
1629 | nval0 = ConstantDataVector::get(context, tmp0); | |||
1630 | if (hasTwoResults) { | |||
1631 | for (int i=0; i < getVecSize(FInfo); ++i) | |||
1632 | FVal1.push_back((float)DVal1[i]); | |||
1633 | ArrayRef<float> tmp1(FVal1); | |||
1634 | nval1 = ConstantDataVector::get(context, tmp1); | |||
1635 | } | |||
1636 | } else { | |||
1637 | ArrayRef<double> tmp0(DVal0); | |||
1638 | nval0 = ConstantDataVector::get(context, tmp0); | |||
1639 | if (hasTwoResults) { | |||
1640 | ArrayRef<double> tmp1(DVal1); | |||
1641 | nval1 = ConstantDataVector::get(context, tmp1); | |||
1642 | } | |||
1643 | } | |||
1644 | } | |||
1645 | ||||
1646 | if (hasTwoResults) { | |||
1647 | // sincos | |||
1648 | assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&(static_cast <bool> (FInfo.getId() == AMDGPULibFunc::EI_SINCOS && "math function with ptr arg not supported yet") ? void (0) : __assert_fail ("FInfo.getId() == AMDGPULibFunc::EI_SINCOS && \"math function with ptr arg not supported yet\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 1649, __extension__ __PRETTY_FUNCTION__)) | |||
1649 | "math function with ptr arg not supported yet")(static_cast <bool> (FInfo.getId() == AMDGPULibFunc::EI_SINCOS && "math function with ptr arg not supported yet") ? void (0) : __assert_fail ("FInfo.getId() == AMDGPULibFunc::EI_SINCOS && \"math function with ptr arg not supported yet\"" , "llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp", 1649, __extension__ __PRETTY_FUNCTION__)); | |||
1650 | new StoreInst(nval1, aCI->getArgOperand(1), aCI); | |||
1651 | } | |||
1652 | ||||
1653 | replaceCall(nval0); | |||
1654 | return true; | |||
1655 | } | |||
1656 | ||||
1657 | // Public interface to the Simplify LibCalls pass. | |||
1658 | FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) { | |||
1659 | return new AMDGPUSimplifyLibCalls(TM); | |||
1660 | } | |||
1661 | ||||
1662 | FunctionPass *llvm::createAMDGPUUseNativeCallsPass() { | |||
1663 | return new AMDGPUUseNativeCalls(); | |||
1664 | } | |||
1665 | ||||
1666 | bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { | |||
1667 | if (skipFunction(F)) | |||
1668 | return false; | |||
1669 | ||||
1670 | bool Changed = false; | |||
1671 | auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
1672 | ||||
1673 | LLVM_DEBUG(dbgs() << "AMDIC: process function ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function " ; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';; } } while (false) | |||
1674 | F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function " ; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';; } } while (false); | |||
1675 | ||||
1676 | for (auto &BB : F) { | |||
1677 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { | |||
1678 | // Ignore non-calls. | |||
1679 | CallInst *CI = dyn_cast<CallInst>(I); | |||
1680 | ++I; | |||
1681 | // Ignore intrinsics that do not become real instructions. | |||
1682 | if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd()) | |||
1683 | continue; | |||
1684 | ||||
1685 | // Ignore indirect calls. | |||
1686 | Function *Callee = CI->getCalledFunction(); | |||
1687 | if (Callee == nullptr) | |||
1688 | continue; | |||
1689 | ||||
1690 | LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding " << *CI << "\n"; dbgs().flush(); } } while (false ) | |||
1691 | dbgs().flush())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding " << *CI << "\n"; dbgs().flush(); } } while (false ); | |||
1692 | if(Simplifier.fold(CI, AA)) | |||
1693 | Changed = true; | |||
1694 | } | |||
1695 | } | |||
1696 | return Changed; | |||
1697 | } | |||
1698 | ||||
1699 | PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, | |||
1700 | FunctionAnalysisManager &AM) { | |||
1701 | AMDGPULibCalls Simplifier(&TM); | |||
1702 | Simplifier.initNativeFuncs(); | |||
1703 | ||||
1704 | bool Changed = false; | |||
1705 | auto AA = &AM.getResult<AAManager>(F); | |||
1706 | ||||
1707 | LLVM_DEBUG(dbgs() << "AMDIC: process function ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function " ; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';; } } while (false) | |||
| ||||
1708 | F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function " ; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';; } } while (false); | |||
1709 | ||||
1710 | for (auto &BB : F) { | |||
1711 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { | |||
1712 | // Ignore non-calls. | |||
1713 | CallInst *CI = dyn_cast<CallInst>(I); | |||
1714 | ++I; | |||
1715 | // Ignore intrinsics that do not become real instructions. | |||
1716 | if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd()) | |||
1717 | continue; | |||
1718 | ||||
1719 | // Ignore indirect calls. | |||
1720 | Function *Callee = CI->getCalledFunction(); | |||
1721 | if (Callee == nullptr) | |||
1722 | continue; | |||
1723 | ||||
1724 | LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding " << *CI << "\n"; dbgs().flush(); } } while (false ) | |||
1725 | dbgs().flush())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding " << *CI << "\n"; dbgs().flush(); } } while (false ); | |||
1726 | if (Simplifier.fold(CI, AA)) | |||
1727 | Changed = true; | |||
1728 | } | |||
1729 | } | |||
1730 | return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); | |||
1731 | } | |||
1732 | ||||
1733 | bool AMDGPUUseNativeCalls::runOnFunction(Function &F) { | |||
1734 | if (skipFunction(F) || UseNative.empty()) | |||
1735 | return false; | |||
1736 | ||||
1737 | bool Changed = false; | |||
1738 | for (auto &BB : F) { | |||
1739 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { | |||
1740 | // Ignore non-calls. | |||
1741 | CallInst *CI = dyn_cast<CallInst>(I); | |||
1742 | ++I; | |||
1743 | if (!CI) continue; | |||
1744 | ||||
1745 | // Ignore indirect calls. | |||
1746 | Function *Callee = CI->getCalledFunction(); | |||
1747 | if (Callee == nullptr) | |||
1748 | continue; | |||
1749 | ||||
1750 | if (Simplifier.useNative(CI)) | |||
1751 | Changed = true; | |||
1752 | } | |||
1753 | } | |||
1754 | return Changed; | |||
1755 | } | |||
1756 | ||||
1757 | PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, | |||
1758 | FunctionAnalysisManager &AM) { | |||
1759 | if (UseNative.empty()) | |||
1760 | return PreservedAnalyses::all(); | |||
1761 | ||||
1762 | AMDGPULibCalls Simplifier; | |||
1763 | Simplifier.initNativeFuncs(); | |||
1764 | ||||
1765 | bool Changed = false; | |||
1766 | for (auto &BB : F) { | |||
1767 | for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { | |||
1768 | // Ignore non-calls. | |||
1769 | CallInst *CI = dyn_cast<CallInst>(I); | |||
1770 | ++I; | |||
1771 | if (!CI) | |||
1772 | continue; | |||
1773 | ||||
1774 | // Ignore indirect calls. | |||
1775 | Function *Callee = CI->getCalledFunction(); | |||
1776 | if (Callee == nullptr) | |||
1777 | continue; | |||
1778 | ||||
1779 | if (Simplifier.useNative(CI)) | |||
1780 | Changed = true; | |||
1781 | } | |||
1782 | } | |||
1783 | return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); | |||
1784 | } |