LLVM 23.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file does AMD library function optimizations.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULibFunc.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include "llvm/IR/MDBuilder.h"
25#include <cmath>
26
27#define DEBUG_TYPE "amdgpu-simplifylib"
28
29using namespace llvm;
30using namespace llvm::PatternMatch;
31
32static cl::opt<bool> EnablePreLink("amdgpu-prelink",
33 cl::desc("Enable pre-link mode optimizations"),
34 cl::init(false),
36
37static cl::list<std::string> UseNative("amdgpu-use-native",
38 cl::desc("Comma separated list of functions to replace with native, or all"),
41
42#define MATH_PI numbers::pi
43#define MATH_E numbers::e
44#define MATH_SQRT2 numbers::sqrt2
45#define MATH_SQRT1_2 numbers::inv_sqrt2
46
47enum class PowKind { Pow, PowR, PowN, RootN };
48
49namespace llvm {
50
52private:
54
55 using FuncInfo = llvm::AMDGPULibFunc;
56
57 // -fuse-native.
58 bool AllNative = false;
59
60 bool useNativeFunc(const StringRef F) const;
61
62 // Return a pointer (pointer expr) to the function if function definition with
63 // "FuncName" exists. It may create a new function prototype in pre-link mode.
64 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
65
66 /// Wrapper around getFunction which tries to use a faster variant if
67 /// available, and falls back to a less fast option.
68 ///
69 /// Return a replacement function for \p fInfo that has float-typed fast
70 /// variants. \p NewFunc is a base replacement function to use. \p
71 /// NewFuncFastVariant is a faster version to use if the calling context knows
72 /// it's legal. If there is no fast variant to use, \p NewFuncFastVariant
73 /// should be EI_NONE.
74 FunctionCallee getFloatFastVariant(Module *M, const FuncInfo &fInfo,
75 FuncInfo &newInfo,
77 AMDGPULibFunc::EFuncId NewFuncFastVariant);
78
79 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
80
81 bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
82
83 /* Specialized optimizations */
84
85 // pow/powr/pown
86 bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
87
88 /// Peform a fast math expansion of pow, powr, pown or rootn.
89 bool expandFastPow(FPMathOperator *FPOp, IRBuilder<> &B, PowKind Kind);
90
91 bool tryOptimizePow(FPMathOperator *FPOp, IRBuilder<> &B,
92 const FuncInfo &FInfo);
93
94 // rootn
95 bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
96
97 // -fuse-native for sincos
98 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
99
100 // evaluate calls if calls' arguments are constants.
101 bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1,
102 Constant *copr0, Constant *copr1);
103 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
104
105 /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
106 /// of cos, sincos call).
107 std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
108 FastMathFlags FMF,
109 IRBuilder<> &B,
110 FunctionCallee Fsincos);
111
112 // sin/cos
113 bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
114
115 // __read_pipe/__write_pipe
116 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
117 const FuncInfo &FInfo);
118
119 // Get a scalar native builtin single argument FP function
120 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
121
122 /// Substitute a call to a known libcall with an intrinsic call. If \p
123 /// AllowMinSize is true, allow the replacement in a minsize function.
124 bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
125 bool AllowMinSizeF32 = false,
126 bool AllowF64 = false,
127 bool AllowStrictFP = false);
128 void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
129 Intrinsic::ID IntrID);
130
131 bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
132 Intrinsic::ID IntrID,
133 bool AllowMinSizeF32 = false,
134 bool AllowF64 = false,
135 bool AllowStrictFP = false);
136
137protected:
138 bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const;
139
141
142 static void replaceCall(Instruction *I, Value *With) {
143 I->replaceAllUsesWith(With);
144 I->eraseFromParent();
145 }
146
147 static void replaceCall(FPMathOperator *I, Value *With) {
149 }
150
151public:
153
154 bool fold(CallInst *CI);
155
156 void initNativeFuncs();
157
158 // Replace a normal math function call with that native version
159 bool useNative(CallInst *CI);
160};
161
162} // end namespace llvm
163
164template <typename IRB>
165static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
166 const Twine &Name = "") {
167 CallInst *R = B.CreateCall(Callee, Arg, Name);
168 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
169 R->setCallingConv(F->getCallingConv());
170 return R;
171}
172
173template <typename IRB>
174static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
175 Value *Arg2, const Twine &Name = "") {
176 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
177 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
178 R->setCallingConv(F->getCallingConv());
179 return R;
180}
181
183 Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
184 if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
185 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
186
187 return FunctionType::get(FT->getReturnType(),
188 {FT->getParamType(0), PowNExpTy}, false);
189}
190
191// Data structures for table-driven optimizations.
192// FuncTbl works for both f32 and f64 functions with 1 input argument
193
195 double result;
196 double input;
197};
198
199/* a list of {result, input} */
200static const TableEntry tbl_acos[] = {
201 {MATH_PI / 2.0, 0.0},
202 {MATH_PI / 2.0, -0.0},
203 {0.0, 1.0},
204 {MATH_PI, -1.0}
205};
206static const TableEntry tbl_acosh[] = {
207 {0.0, 1.0}
208};
209static const TableEntry tbl_acospi[] = {
210 {0.5, 0.0},
211 {0.5, -0.0},
212 {0.0, 1.0},
213 {1.0, -1.0}
214};
215static const TableEntry tbl_asin[] = {
216 {0.0, 0.0},
217 {-0.0, -0.0},
218 {MATH_PI / 2.0, 1.0},
219 {-MATH_PI / 2.0, -1.0}
220};
221static const TableEntry tbl_asinh[] = {
222 {0.0, 0.0},
223 {-0.0, -0.0}
224};
225static const TableEntry tbl_asinpi[] = {
226 {0.0, 0.0},
227 {-0.0, -0.0},
228 {0.5, 1.0},
229 {-0.5, -1.0}
230};
231static const TableEntry tbl_atan[] = {
232 {0.0, 0.0},
233 {-0.0, -0.0},
234 {MATH_PI / 4.0, 1.0},
235 {-MATH_PI / 4.0, -1.0}
236};
237static const TableEntry tbl_atanh[] = {
238 {0.0, 0.0},
239 {-0.0, -0.0}
240};
241static const TableEntry tbl_atanpi[] = {
242 {0.0, 0.0},
243 {-0.0, -0.0},
244 {0.25, 1.0},
245 {-0.25, -1.0}
246};
247static const TableEntry tbl_cbrt[] = {
248 {0.0, 0.0},
249 {-0.0, -0.0},
250 {1.0, 1.0},
251 {-1.0, -1.0},
252};
253static const TableEntry tbl_cos[] = {
254 {1.0, 0.0},
255 {1.0, -0.0}
256};
257static const TableEntry tbl_cosh[] = {
258 {1.0, 0.0},
259 {1.0, -0.0}
260};
261static const TableEntry tbl_cospi[] = {
262 {1.0, 0.0},
263 {1.0, -0.0}
264};
265static const TableEntry tbl_erfc[] = {
266 {1.0, 0.0},
267 {1.0, -0.0}
268};
269static const TableEntry tbl_erf[] = {
270 {0.0, 0.0},
271 {-0.0, -0.0}
272};
273static const TableEntry tbl_exp[] = {
274 {1.0, 0.0},
275 {1.0, -0.0},
276 {MATH_E, 1.0}
277};
278static const TableEntry tbl_exp2[] = {
279 {1.0, 0.0},
280 {1.0, -0.0},
281 {2.0, 1.0}
282};
283static const TableEntry tbl_exp10[] = {
284 {1.0, 0.0},
285 {1.0, -0.0},
286 {10.0, 1.0}
287};
288static const TableEntry tbl_expm1[] = {
289 {0.0, 0.0},
290 {-0.0, -0.0}
291};
292static const TableEntry tbl_log[] = {
293 {0.0, 1.0},
294 {1.0, MATH_E}
295};
296static const TableEntry tbl_log2[] = {
297 {0.0, 1.0},
298 {1.0, 2.0}
299};
300static const TableEntry tbl_log10[] = {
301 {0.0, 1.0},
302 {1.0, 10.0}
303};
304static const TableEntry tbl_rsqrt[] = {
305 {1.0, 1.0},
306 {MATH_SQRT1_2, 2.0}
307};
308static const TableEntry tbl_sin[] = {
309 {0.0, 0.0},
310 {-0.0, -0.0}
311};
312static const TableEntry tbl_sinh[] = {
313 {0.0, 0.0},
314 {-0.0, -0.0}
315};
316static const TableEntry tbl_sinpi[] = {
317 {0.0, 0.0},
318 {-0.0, -0.0}
319};
320static const TableEntry tbl_sqrt[] = {
321 {0.0, 0.0},
322 {1.0, 1.0},
323 {MATH_SQRT2, 2.0}
324};
325static const TableEntry tbl_tan[] = {
326 {0.0, 0.0},
327 {-0.0, -0.0}
328};
329static const TableEntry tbl_tanh[] = {
330 {0.0, 0.0},
331 {-0.0, -0.0}
332};
333static const TableEntry tbl_tanpi[] = {
334 {0.0, 0.0},
335 {-0.0, -0.0}
336};
337static const TableEntry tbl_tgamma[] = {
338 {1.0, 1.0},
339 {1.0, 2.0},
340 {2.0, 3.0},
341 {6.0, 4.0}
342};
343
345 switch(id) {
361 return true;
362 default:;
363 }
364 return false;
365}
366
368
370 switch(id) {
408 default:;
409 }
410 return TableRef();
411}
412
413static inline int getVecSize(const AMDGPULibFunc& FInfo) {
414 return FInfo.getLeads()[0].VectorSize;
415}
416
417static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
418 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
419}
420
421FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
422 // If we are doing PreLinkOpt, the function is external. So it is safe to
423 // use getOrInsertFunction() at this stage.
424
426 : AMDGPULibFunc::getFunction(M, fInfo);
427}
428
429FunctionCallee AMDGPULibCalls::getFloatFastVariant(
430 Module *M, const FuncInfo &fInfo, FuncInfo &newInfo,
431 AMDGPULibFunc::EFuncId NewFunc, AMDGPULibFunc::EFuncId FastVariant) {
432 assert(NewFunc != FastVariant);
433
434 if (FastVariant != AMDGPULibFunc::EI_NONE &&
435 getArgType(fInfo) == AMDGPULibFunc::F32) {
436 newInfo = AMDGPULibFunc(FastVariant, fInfo);
437 if (FunctionCallee NewCallee = getFunction(M, newInfo))
438 return NewCallee;
439 }
440
441 newInfo = AMDGPULibFunc(NewFunc, fInfo);
442 return getFunction(M, newInfo);
443}
444
445bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
446 FuncInfo &FInfo) {
447 return AMDGPULibFunc::parse(FMangledName, FInfo);
448}
449
451 return FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs();
452}
453
455 const FPMathOperator *FPOp) const {
456 // TODO: Refine to approxFunc or contract
457 return FPOp->isFast();
458}
459
461 : SQ(F.getParent()->getDataLayout(),
462 &FAM.getResult<TargetLibraryAnalysis>(F),
463 FAM.getCachedResult<DominatorTreeAnalysis>(F),
464 &FAM.getResult<AssumptionAnalysis>(F)) {}
465
466bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
467 return AllNative || llvm::is_contained(UseNative, F);
468}
469
471 AllNative = useNativeFunc("all") ||
472 (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
473 UseNative.begin()->empty());
474}
475
476bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
477 bool native_sin = useNativeFunc("sin");
478 bool native_cos = useNativeFunc("cos");
479
480 if (native_sin && native_cos) {
481 Module *M = aCI->getModule();
482 Value *opr0 = aCI->getArgOperand(0);
483
484 AMDGPULibFunc nf;
485 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
486 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
487
490 FunctionCallee sinExpr = getFunction(M, nf);
491
494 FunctionCallee cosExpr = getFunction(M, nf);
495 if (sinExpr && cosExpr) {
496 Value *sinval =
497 CallInst::Create(sinExpr, opr0, "splitsin", aCI->getIterator());
498 Value *cosval =
499 CallInst::Create(cosExpr, opr0, "splitcos", aCI->getIterator());
500 new StoreInst(cosval, aCI->getArgOperand(1), aCI->getIterator());
501
502 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
503 << " with native version of sin/cos");
504
505 replaceCall(aCI, sinval);
506 return true;
507 }
508 }
509 return false;
510}
511
513 Function *Callee = aCI->getCalledFunction();
514 if (!Callee || aCI->isNoBuiltin())
515 return false;
516
517 FuncInfo FInfo;
518 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
519 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
520 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
521 !(AllNative || useNativeFunc(FInfo.getName()))) {
522 return false;
523 }
524
525 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
526 return sincosUseNative(aCI, FInfo);
527
529 FunctionCallee F = getFunction(aCI->getModule(), FInfo);
530 if (!F)
531 return false;
532
533 aCI->setCalledFunction(F);
534 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
535 << " with native version");
536 return true;
537}
538
539// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
540// builtin, with appended type size and alignment arguments, where 2 or 4
541// indicates the original number of arguments. The library has optimized version
542// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
543// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
544// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
545// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
546bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
547 const FuncInfo &FInfo) {
548 auto *Callee = CI->getCalledFunction();
549 if (!Callee->isDeclaration())
550 return false;
551
552 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
553 auto *M = Callee->getParent();
554 std::string Name = std::string(Callee->getName());
555 auto NumArg = CI->arg_size();
556 if (NumArg != 4 && NumArg != 6)
557 return false;
558 ConstantInt *PacketSize =
559 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2));
560 ConstantInt *PacketAlign =
561 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1));
562 if (!PacketSize || !PacketAlign)
563 return false;
564
565 unsigned Size = PacketSize->getZExtValue();
566 Align Alignment = PacketAlign->getAlignValue();
567 if (Alignment != Size)
568 return false;
569
570 unsigned PtrArgLoc = CI->arg_size() - 3;
571 Value *PtrArg = CI->getArgOperand(PtrArgLoc);
572 Type *PtrTy = PtrArg->getType();
573
575 for (unsigned I = 0; I != PtrArgLoc; ++I)
576 ArgTys.push_back(CI->getArgOperand(I)->getType());
577 ArgTys.push_back(PtrTy);
578
579 Name = Name + "_" + std::to_string(Size);
580 auto *FTy = FunctionType::get(Callee->getReturnType(),
581 ArrayRef<Type *>(ArgTys), false);
582 AMDGPULibFunc NewLibFunc(Name, FTy);
584 if (!F)
585 return false;
586
588 for (unsigned I = 0; I != PtrArgLoc; ++I)
589 Args.push_back(CI->getArgOperand(I));
590 Args.push_back(PtrArg);
591
592 auto *NCI = B.CreateCall(F, Args);
593 NCI->setAttributes(CI->getAttributes());
594 CI->replaceAllUsesWith(NCI);
595 CI->dropAllReferences();
596 CI->eraseFromParent();
597
598 return true;
599}
600
601// This function returns false if no change; return true otherwise.
603 Function *Callee = CI->getCalledFunction();
604 // Ignore indirect calls.
605 if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
606 return false;
607
608 FuncInfo FInfo;
609 if (!parseFunctionName(Callee->getName(), FInfo))
610 return false;
611
612 // Further check the number of arguments to see if they match.
613 // TODO: Check calling convention matches too
614 if (!FInfo.isCompatibleSignature(*Callee->getParent(), CI->getFunctionType()))
615 return false;
616
617 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n');
618
619 if (TDOFold(CI, FInfo))
620 return true;
621
622 IRBuilder<> B(CI);
623 if (CI->isStrictFP())
624 B.setIsFPConstrained(true);
625
627 // Under unsafe-math, evaluate calls if possible.
628 // According to Brian Sumner, we can do this for all f32 function calls
629 // using host's double function calls.
630 if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo))
631 return true;
632
633 // Copy fast flags from the original call.
634 FastMathFlags FMF = FPOp->getFastMathFlags();
635 B.setFastMathFlags(FMF);
636
637 // Specialized optimizations for each function call.
638 //
639 // TODO: Handle native functions
640 switch (FInfo.getId()) {
642 if (FMF.none())
643 return false;
644 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp,
645 FMF.approxFunc());
647 if (FMF.none())
648 return false;
649 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2,
650 FMF.approxFunc());
652 if (FMF.none())
653 return false;
654 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log,
655 FMF.approxFunc());
657 if (FMF.none())
658 return false;
659 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2,
660 FMF.approxFunc());
662 if (FMF.none())
663 return false;
664 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10,
665 FMF.approxFunc());
667 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum,
668 true, true);
670 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum,
671 true, true);
673 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true,
674 true);
676 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd,
677 true, true);
679 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true,
680 true, true);
682 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign,
683 true, true, true);
685 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true,
686 true);
688 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true,
689 true);
691 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true,
692 true);
694 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true,
695 true);
697 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true,
698 true);
700 if (!shouldReplaceLibcallWithIntrinsic(CI, true, true))
701 return false;
702
703 Value *Arg1 = CI->getArgOperand(1);
704 if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType());
705 VecTy && !isa<VectorType>(Arg1->getType())) {
706 Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
707 CI->setArgOperand(1, SplatArg1);
708 }
709
711 CI->getModule(), Intrinsic::ldexp,
712 {CI->getType(), CI->getArgOperand(1)->getType()}));
713 return true;
714 }
717 return tryOptimizePow(FPOp, B, FInfo);
720 if (fold_pow(FPOp, B, FInfo))
721 return true;
722 if (!FMF.approxFunc())
723 return false;
724
725 if (FInfo.getId() == AMDGPULibFunc::EI_POWR && FMF.approxFunc() &&
726 getArgType(FInfo) == AMDGPULibFunc::F32) {
727 Module *M = Callee->getParent();
728 AMDGPULibFunc PowrFastInfo(AMDGPULibFunc::EI_POWR_FAST, FInfo);
729 if (FunctionCallee PowrFastFunc = getFunction(M, PowrFastInfo)) {
730 CI->setCalledFunction(PowrFastFunc);
731 return true;
732 }
733 }
734
735 if (!shouldReplaceLibcallWithIntrinsic(CI))
736 return false;
737 return expandFastPow(FPOp, B, PowKind::PowR);
738 }
741 if (fold_pow(FPOp, B, FInfo))
742 return true;
743 if (!FMF.approxFunc())
744 return false;
745
746 if (FInfo.getId() == AMDGPULibFunc::EI_POWN &&
747 getArgType(FInfo) == AMDGPULibFunc::F32) {
748 Module *M = Callee->getParent();
749 AMDGPULibFunc PownFastInfo(AMDGPULibFunc::EI_POWN_FAST, FInfo);
750 if (FunctionCallee PownFastFunc = getFunction(M, PownFastInfo)) {
751 CI->setCalledFunction(PownFastFunc);
752 return true;
753 }
754 }
755
756 if (!shouldReplaceLibcallWithIntrinsic(CI))
757 return false;
758 return expandFastPow(FPOp, B, PowKind::PowN);
759 }
762 if (fold_rootn(FPOp, B, FInfo))
763 return true;
764 if (!FMF.approxFunc())
765 return false;
766
767 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
768 Module *M = Callee->getParent();
769 AMDGPULibFunc RootnFastInfo(AMDGPULibFunc::EI_ROOTN_FAST, FInfo);
770 if (FunctionCallee RootnFastFunc = getFunction(M, RootnFastInfo)) {
771 CI->setCalledFunction(RootnFastFunc);
772 return true;
773 }
774 }
775
776 return expandFastPow(FPOp, B, PowKind::RootN);
777 }
779 // TODO: Allow with strictfp + constrained intrinsic
780 return tryReplaceLibcallWithSimpleIntrinsic(
781 B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
784 return fold_sincos(FPOp, B, FInfo);
785 default:
786 break;
787 }
788 } else {
789 // Specialized optimizations for each function call
790 switch (FInfo.getId()) {
795 return fold_read_write_pipe(CI, B, FInfo);
796 default:
797 break;
798 }
799 }
800
801 return false;
802}
803
804bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
805 // Table-Driven optimization
806 const TableRef tr = getOptTable(FInfo.getId());
807 if (tr.empty())
808 return false;
809
810 int const sz = (int)tr.size();
811 Value *opr0 = CI->getArgOperand(0);
812
813 if (getVecSize(FInfo) > 1) {
816 for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
818 CV->getElementAsConstant((unsigned)eltNo));
819 assert(eltval && "Non-FP arguments in math function!");
820 bool found = false;
821 for (int i=0; i < sz; ++i) {
822 if (eltval->isExactlyValue(tr[i].input)) {
823 DVal.push_back(tr[i].result);
824 found = true;
825 break;
826 }
827 }
828 if (!found) {
829 // This vector constants not handled yet.
830 return false;
831 }
832 }
833 LLVMContext &context = CI->getContext();
834 Constant *nval;
835 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
837 for (double D : DVal)
838 FVal.push_back((float)D);
839 ArrayRef<float> tmp(FVal);
840 nval = ConstantDataVector::get(context, tmp);
841 } else { // F64
842 ArrayRef<double> tmp(DVal);
843 nval = ConstantDataVector::get(context, tmp);
844 }
845 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
846 replaceCall(CI, nval);
847 return true;
848 }
849 } else {
850 // Scalar version
851 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
852 for (int i = 0; i < sz; ++i) {
853 if (CF->isExactlyValue(tr[i].input)) {
854 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
855 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
856 replaceCall(CI, nval);
857 return true;
858 }
859 }
860 }
861 }
862
863 return false;
864}
865
866namespace llvm {
867static double log2(double V) {
868#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
869 return ::log2(V);
870#else
871 return log(V) / numbers::ln2;
872#endif
873}
874} // namespace llvm
875
876bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
877 const FuncInfo &FInfo) {
878 assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
879 FInfo.getId() == AMDGPULibFunc::EI_POW_FAST ||
880 FInfo.getId() == AMDGPULibFunc::EI_POWR ||
881 FInfo.getId() == AMDGPULibFunc::EI_POWR_FAST ||
882 FInfo.getId() == AMDGPULibFunc::EI_POWN ||
883 FInfo.getId() == AMDGPULibFunc::EI_POWN_FAST) &&
884 "fold_pow: encounter a wrong function call");
885
886 Module *M = B.GetInsertBlock()->getModule();
887 Type *eltType = FPOp->getType()->getScalarType();
888 Value *opr0 = FPOp->getOperand(0);
889 Value *opr1 = FPOp->getOperand(1);
890
891 const APFloat *CF = nullptr;
892 const APInt *CINT = nullptr;
893 if (!match(opr1, m_APFloatAllowPoison(CF)))
894 match(opr1, m_APIntAllowPoison(CINT));
895
896 // 0x1111111 means that we don't do anything for this call.
897 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
898
899 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
900 // pow/powr/pown(x, 0) == 1
901 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
902 Constant *cnval = ConstantFP::get(eltType, 1.0);
903 if (getVecSize(FInfo) > 1) {
904 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
905 }
906 replaceCall(FPOp, cnval);
907 return true;
908 }
909 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
910 // pow/powr/pown(x, 1.0) = x
911 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
912 replaceCall(FPOp, opr0);
913 return true;
914 }
915 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
916 // pow/powr/pown(x, 2.0) = x*x
917 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * "
918 << *opr0 << "\n");
919 Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
920 replaceCall(FPOp, nval);
921 return true;
922 }
923 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
924 // pow/powr/pown(x, -1.0) = 1.0/x
925 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n");
926 Constant *cnval = ConstantFP::get(eltType, 1.0);
927 if (getVecSize(FInfo) > 1) {
928 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
929 }
930 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
931 replaceCall(FPOp, nval);
932 return true;
933 }
934
935 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
936 // pow[r](x, [-]0.5) = sqrt(x)
937 bool issqrt = CF->isExactlyValue(0.5);
938 if (FunctionCallee FPExpr =
939 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
941 FInfo))) {
942 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName()
943 << '(' << *opr0 << ")\n");
944 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
945 : "__pow2rsqrt");
946 replaceCall(FPOp, nval);
947 return true;
948 }
949 }
950
951 if (!isUnsafeFiniteOnlyMath(FPOp))
952 return false;
953
954 // Unsafe Math optimization
955
956 // Remember that ci_opr1 is set if opr1 is integral
957 if (CF) {
958 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
959 ? (double)CF->convertToFloat()
960 : CF->convertToDouble();
961 int ival = (int)dval;
962 if ((double)ival == dval) {
963 ci_opr1 = ival;
964 } else
965 ci_opr1 = 0x11111111;
966 }
967
968 // pow/powr/pown(x, c) = [1/](x*x*..x); where
969 // trunc(c) == c && the number of x == c && |c| <= 12
970 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
971 if (abs_opr1 <= 12) {
972 Constant *cnval;
973 Value *nval;
974 if (abs_opr1 == 0) {
975 cnval = ConstantFP::get(eltType, 1.0);
976 if (getVecSize(FInfo) > 1) {
977 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
978 }
979 nval = cnval;
980 } else {
981 Value *valx2 = nullptr;
982 nval = nullptr;
983 while (abs_opr1 > 0) {
984 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
985 if (abs_opr1 & 1) {
986 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
987 }
988 abs_opr1 >>= 1;
989 }
990 }
991
992 if (ci_opr1 < 0) {
993 cnval = ConstantFP::get(eltType, 1.0);
994 if (getVecSize(FInfo) > 1) {
995 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
996 }
997 nval = B.CreateFDiv(cnval, nval, "__1powprod");
998 }
999 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1000 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
1001 << ")\n");
1002 replaceCall(FPOp, nval);
1003 return true;
1004 }
1005
1006 // If we should use the generic intrinsic instead of emitting a libcall
1007 const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy();
1008
1009 // powr ---> exp2(y * log2(x))
1010 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
1011 FunctionCallee ExpExpr;
1012 if (ShouldUseIntrinsic)
1013 ExpExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::exp2,
1014 {FPOp->getType()});
1015 else {
1016 ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
1017 if (!ExpExpr)
1018 return false;
1019 }
1020
1021 bool needlog = false;
1022 bool needabs = false;
1023 bool needcopysign = false;
1024 Constant *cnval = nullptr;
1025 if (getVecSize(FInfo) == 1) {
1026 CF = nullptr;
1027 match(opr0, m_APFloatAllowPoison(CF));
1028
1029 if (CF) {
1030 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1031 ? (double)CF->convertToFloat()
1032 : CF->convertToDouble();
1033
1034 V = log2(std::abs(V));
1035 cnval = ConstantFP::get(eltType, V);
1036 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1037 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST) &&
1038 CF->isNegative();
1039 } else {
1040 needlog = true;
1041 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1042 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST;
1043 }
1044 } else {
1045 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
1046
1047 if (!CDV) {
1048 needlog = true;
1049 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1050 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST;
1051 } else {
1052 assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
1053 "Wrong vector size detected");
1054
1056 for (int i=0; i < getVecSize(FInfo); ++i) {
1057 double V = CDV->getElementAsAPFloat(i).convertToDouble();
1058 if (V < 0.0) needcopysign = true;
1059 V = log2(std::abs(V));
1060 DVal.push_back(V);
1061 }
1062 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1064 for (double D : DVal)
1065 FVal.push_back((float)D);
1066 ArrayRef<float> tmp(FVal);
1067 cnval = ConstantDataVector::get(M->getContext(), tmp);
1068 } else {
1069 ArrayRef<double> tmp(DVal);
1070 cnval = ConstantDataVector::get(M->getContext(), tmp);
1071 }
1072 }
1073 }
1074
1075 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW ||
1076 FInfo.getId() == AMDGPULibFunc::EI_POW_FAST)) {
1077 // We cannot handle corner cases for a general pow() function, give up
1078 // unless y is a constant integral value. Then proceed as if it were pown.
1079 if (!isKnownIntegral(opr1, SQ.getWithInstruction(cast<Instruction>(FPOp)),
1080 FPOp->getFastMathFlags()))
1081 return false;
1082 }
1083
1084 Value *nval;
1085 if (needabs) {
1086 nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs");
1087 } else {
1088 nval = cnval ? cnval : opr0;
1089 }
1090 if (needlog) {
1091 FunctionCallee LogExpr;
1092 if (ShouldUseIntrinsic) {
1093 LogExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::log2,
1094 {FPOp->getType()});
1095 } else {
1096 LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1097 if (!LogExpr)
1098 return false;
1099 }
1100
1101 nval = CreateCallEx(B,LogExpr, nval, "__log2");
1102 }
1103
1104 if (FInfo.getId() == AMDGPULibFunc::EI_POWN ||
1105 FInfo.getId() == AMDGPULibFunc::EI_POWN_FAST) {
1106 // convert int(32) to fp(f32 or f64)
1107 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1108 }
1109 nval = B.CreateFMul(opr1, nval, "__ylogx");
1110
1111 CallInst *Exp2Call = CreateCallEx(B, ExpExpr, nval, "__exp2");
1112
1113 // TODO: Generalized fpclass logic for pow
1115 if (FPOp->hasNoNaNs())
1116 KnownNot |= FPClassTest::fcNan;
1117
1118 Exp2Call->addRetAttr(
1119 Attribute::getWithNoFPClass(Exp2Call->getContext(), KnownNot));
1120 nval = Exp2Call;
1121
1122 if (needcopysign) {
1123 Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
1124 Type *nTy = FPOp->getType()->getWithNewType(nTyS);
1125 Value *opr_n = FPOp->getOperand(1);
1126 if (opr_n->getType()->getScalarType()->isIntegerTy())
1127 opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
1128 else
1129 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1130
1131 unsigned size = nTy->getScalarSizeInBits();
1132 Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1133 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1134
1135 nval = B.CreateCopySign(nval, B.CreateBitCast(sign, nval->getType()),
1136 nullptr, "__pow_sign");
1137 }
1138
1139 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1140 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1141 replaceCall(FPOp, nval);
1142
1143 return true;
1144}
1145
1146bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
1147 const FuncInfo &FInfo) {
1148 Value *opr0 = FPOp->getOperand(0);
1149 Value *opr1 = FPOp->getOperand(1);
1150
1151 const APInt *CINT = nullptr;
1152 if (!match(opr1, m_APIntAllowPoison(CINT)))
1153 return false;
1154
1155 Function *Parent = B.GetInsertBlock()->getParent();
1156
1157 int ci_opr1 = (int)CINT->getSExtValue();
1158 if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
1159 // rootn(x, 1) = x
1160 //
1161 // TODO: Insert constrained canonicalize for strictfp case.
1162 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
1163 replaceCall(FPOp, opr0);
1164 return true;
1165 }
1166
1167 Module *M = B.GetInsertBlock()->getModule();
1168
1169 CallInst *CI = cast<CallInst>(FPOp);
1170 if (ci_opr1 == 2 &&
1171 shouldReplaceLibcallWithIntrinsic(CI,
1172 /*AllowMinSizeF32=*/true,
1173 /*AllowF64=*/true)) {
1174 // rootn(x, 2) = sqrt(x)
1175 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n");
1176
1177 CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1178 NewCall->takeName(CI);
1179
1180 // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some
1181 // metadata.
1182 MDBuilder MDHelper(M->getContext());
1183 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1184 NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD);
1185
1186 replaceCall(CI, NewCall);
1187 return true;
1188 }
1189
1190 if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1191 if (FunctionCallee FPExpr =
1192 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1193 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0
1194 << ")\n");
1195 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1196 replaceCall(FPOp, nval);
1197 return true;
1198 }
1199 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1200 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n");
1201 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1202 opr0,
1203 "__rootn2div");
1204 replaceCall(FPOp, nval);
1205 return true;
1206 }
1207
1208 if (ci_opr1 == -2 &&
1209 shouldReplaceLibcallWithIntrinsic(CI,
1210 /*AllowMinSizeF32=*/true,
1211 /*AllowF64=*/true)) {
1212 // rootn(x, -2) = rsqrt(x)
1213
1214 // The original rootn had looser ulp requirements than the resultant sqrt
1215 // and fdiv.
1216 MDBuilder MDHelper(M->getContext());
1217 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1218
1219 // TODO: Could handle strictfp but need to fix strict sqrt emission
1220 FastMathFlags FMF = FPOp->getFastMathFlags();
1221 FMF.setAllowContract(true);
1222
1223 CallInst *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1225 B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), Sqrt));
1226 Sqrt->setFastMathFlags(FMF);
1227 RSqrt->setFastMathFlags(FMF);
1228 RSqrt->setMetadata(LLVMContext::MD_fpmath, FPMD);
1229
1230 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0
1231 << ")\n");
1232 replaceCall(CI, RSqrt);
1233 return true;
1234 }
1235
1236 return false;
1237}
1238
1239// is_integer(y) => trunc(y) == y
1241 Value *TruncY = B.CreateUnaryIntrinsic(Intrinsic::trunc, Y);
1242 return B.CreateFCmpOEQ(TruncY, Y);
1243}
1244
1246 // Even integers are still integers after division by 2.
1247 auto *HalfY = B.CreateFMul(Y, ConstantFP::get(Y->getType(), 0.5));
1248 return emitIsInteger(B, HalfY);
1249}
1250
1251// is_odd_integer(y) => is_integer(y) && !is_even_integer(y)
1253 Value *IsIntY = emitIsInteger(B, Y);
1254 Value *IsEvenY = emitIsEvenInteger(B, Y);
1255 Value *NotEvenY = B.CreateNot(IsEvenY);
1256 return B.CreateAnd(IsIntY, NotEvenY);
1257}
1258
1259// isinf(val) => fabs(val) == +inf
1261 auto *fabsVal = B.CreateUnaryIntrinsic(Intrinsic::fabs, val);
1262 return B.CreateFCmpOEQ(fabsVal, ConstantFP::getInfinity(val->getType()));
1263}
1264
1265// y * log2(fabs(x))
1267 Value *AbsX = B.CreateUnaryIntrinsic(Intrinsic::fabs, X);
1268 Value *LogAbsX = B.CreateUnaryIntrinsic(Intrinsic::log2, AbsX);
1269 Value *YTimesLogX = B.CreateFMul(Y, LogAbsX);
1270 return B.CreateUnaryIntrinsic(Intrinsic::exp2, YTimesLogX);
1271}
1272
1273/// Emit special case management epilog code for fast pow, powr, pown, and rootn
1274/// expansions. \p x and \p y should be the arguments to the library call
1275/// (possibly with some values clamped). \p expylnx should be the result to use
1276/// in normal circumstances.
1278 PowKind Kind) {
1279 Constant *Zero = ConstantFP::getZero(X->getType());
1280 Constant *One = ConstantFP::get(X->getType(), 1.0);
1281 Constant *QNaN = ConstantFP::getQNaN(X->getType());
1282 Constant *PInf = ConstantFP::getInfinity(X->getType());
1283
1284 switch (Kind) {
1285 case PowKind::Pow: {
1286 // is_odd_integer(y)
1287 Value *IsOddY = emitIsOddInteger(B, Y);
1288
1289 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1290 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1291 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1292
1293 // if (x < 0 && !is_integer(y)) ret = QNAN
1294 Value *IsIntY = emitIsInteger(B, Y);
1295 Value *condNegX = B.CreateFCmpOLT(X, Zero);
1296 Value *condNotIntY = B.CreateNot(IsIntY);
1297 Value *condNaN = B.CreateAnd(condNegX, condNotIntY);
1298 Ret = B.CreateSelect(condNaN, QNaN, Ret);
1299
1300 // if (isinf(ay)) { ... }
1301
1302 // FIXME: Missing backend optimization to save on materialization cost of
1303 // mixed sign constant infinities.
1304 Value *YIsInf = emitIsInf(B, Y);
1305
1306 Value *AY = B.CreateUnaryIntrinsic(Intrinsic::fabs, Y);
1307 Value *YIsNegInf = B.CreateFCmpUNE(Y, AY);
1308
1309 Value *AX = B.CreateUnaryIntrinsic(Intrinsic::fabs, X);
1310 Value *AxEqOne = B.CreateFCmpOEQ(AX, One);
1311 Value *AxLtOne = B.CreateFCmpOLT(AX, One);
1312 Value *XorCond = B.CreateXor(AxLtOne, YIsNegInf);
1313 Value *SelInf =
1314 B.CreateSelect(AxEqOne, AX, B.CreateSelect(XorCond, Zero, AY));
1315 Ret = B.CreateSelect(YIsInf, SelInf, Ret);
1316
1317 // if (isinf(ax) || x == 0.0f) { ... }
1318 Value *XIsInf = emitIsInf(B, X);
1319 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1320 Value *AxInfOrZero = B.CreateOr(XIsInf, XEqZero);
1321 Value *YLtZero = B.CreateFCmpOLT(Y, Zero);
1322 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1323 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1324 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1325 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1326 Ret = B.CreateSelect(AxInfOrZero, Copysign, Ret);
1327
1328 // if (isunordered(x, y)) ret = QNAN
1329 Value *isUnordered = B.CreateFCmpUNO(X, Y);
1330 return B.CreateSelect(isUnordered, QNaN, Ret);
1331 }
1332 case PowKind::PowR: {
1333 Value *YIsNeg = B.CreateFCmpOLT(Y, Zero);
1334 Value *IZ = B.CreateSelect(YIsNeg, PInf, Zero);
1335 Value *ZI = B.CreateSelect(YIsNeg, Zero, PInf);
1336
1337 Value *YEqZero = B.CreateFCmpOEQ(Y, Zero);
1338 Value *SelZeroCase = B.CreateSelect(YEqZero, QNaN, IZ);
1339 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1340 Value *Ret = B.CreateSelect(XEqZero, SelZeroCase, ExpYLnX);
1341
1342 Value *XEqInf = B.CreateFCmpOEQ(X, PInf);
1343 Value *YNeZero = B.CreateFCmpUNE(Y, Zero);
1344 Value *CondInfCase = B.CreateAnd(XEqInf, YNeZero);
1345 Ret = B.CreateSelect(CondInfCase, ZI, Ret);
1346
1347 Value *IsInfY = emitIsInf(B, Y);
1348 Value *XNeOne = B.CreateFCmpUNE(X, One);
1349 Value *CondInfY = B.CreateAnd(IsInfY, XNeOne);
1350 Value *XLtOne = B.CreateFCmpOLT(X, One);
1351 Value *SelInfYCase = B.CreateSelect(XLtOne, IZ, ZI);
1352 Ret = B.CreateSelect(CondInfY, SelInfYCase, Ret);
1353
1354 Value *IsUnordered = B.CreateFCmpUNO(X, Y);
1355 return B.CreateSelect(IsUnordered, QNaN, Ret);
1356 }
1357 case PowKind::PowN: {
1358 Constant *ZeroI = ConstantInt::get(Y->getType(), 0);
1359
1360 // is_odd_y = (ny & 1) != 0
1361 Value *OneI = ConstantInt::get(Y->getType(), 1);
1362 Value *YAnd1 = B.CreateAnd(Y, OneI);
1363 Value *IsOddY = B.CreateICmpNE(YAnd1, ZeroI);
1364
1365 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1366 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1367 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1368
1369 // if (isinf(x) || x == 0.0f)
1370 Value *FabsX = B.CreateUnaryIntrinsic(Intrinsic::fabs, X);
1371 Value *XIsInf = B.CreateFCmpOEQ(FabsX, PInf);
1372 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1373 Value *InfOrZero = B.CreateOr(XIsInf, XEqZero);
1374
1375 // (x == 0.0f) ^ (ny < 0) ? 0.0f : +inf
1376 Value *YLtZero = B.CreateICmpSLT(Y, ZeroI);
1377 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1378 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1379
1380 // copysign(selVal, is_odd_y ? x : 0.0f)
1381 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1382 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1383
1384 return B.CreateSelect(InfOrZero, Copysign, Ret);
1385 }
1386 case PowKind::RootN: {
1387 Constant *ZeroI = ConstantInt::get(Y->getType(), 0);
1388
1389 // is_odd_y = (ny & 1) != 0
1390 Value *YAnd1 = B.CreateAnd(Y, ConstantInt::get(Y->getType(), 1));
1391 Value *IsOddY = B.CreateICmpNE(YAnd1, ZeroI);
1392
1393 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1394 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1395 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1396
1397 // if (isinf(x) || x == 0.0f)
1398 Value *FabsX = B.CreateUnaryIntrinsic(Intrinsic::fabs, X);
1399 Value *IsInfX = B.CreateFCmpOEQ(FabsX, PInf);
1400 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1401 Value *CondInfOrZero = B.CreateOr(IsInfX, XEqZero);
1402
1403 // (x == 0.0f) ^ (ny < 0) ? 0.0f : +inf
1404 Value *YLtZero = B.CreateICmpSLT(Y, ZeroI);
1405 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1406 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1407
1408 // copysign(selVal, is_odd_y ? x : 0.0f)
1409 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1410 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1411
1412 Ret = B.CreateSelect(CondInfOrZero, Copysign, Ret);
1413
1414 // if ((x < 0.0f && !is_odd_y) || ny == 0) ret = QNAN
1415 Value *XIsNeg = B.CreateFCmpOLT(X, Zero);
1416 Value *NotOddY = B.CreateNot(IsOddY);
1417 Value *CondNegAndNotOdd = B.CreateAnd(XIsNeg, NotOddY);
1418 Value *YEqZero = B.CreateICmpEQ(Y, ZeroI);
1419 Value *CondBad = B.CreateOr(CondNegAndNotOdd, YEqZero);
1420 return B.CreateSelect(CondBad, QNaN, Ret);
1421 }
1422 }
1423
1424 llvm_unreachable("covered switch");
1425}
1426
1427// TODO: Move the fold_pow folding to sqrt/fdiv here
1428bool AMDGPULibCalls::expandFastPow(FPMathOperator *FPOp, IRBuilder<> &B,
1429 PowKind Kind) {
1430 Type *Ty = FPOp->getType();
1431
1432 // There's currently no reason to do this for half. The correct path is
1433 // promote to float and use the fast float expansion.
1434 //
1435 // TODO: We could move this expansion to lowering to get half pow to work.
1436 if (!Ty->getScalarType()->isFloatTy())
1437 return false;
1438
1439 // TODO: Verify optimization for double and bfloat.
1440 Value *X = FPOp->getOperand(0);
1441 Value *Y = FPOp->getOperand(1);
1442
1443 switch (Kind) {
1444 case PowKind::Pow: {
1445 Constant *One = ConstantFP::get(X->getType(), 1.0);
1446
1447 // if (x == 1.0f) y = 1.0f;
1448 Value *XEqOne = B.CreateFCmpOEQ(X, One);
1449 Y = B.CreateSelect(XEqOne, One, Y);
1450
1451 // if (y == 0.0f) x = 1.0f;
1452 Value *YEqZero = B.CreateFCmpOEQ(Y, ConstantFP::getZero(X->getType()));
1453 X = B.CreateSelect(YEqZero, One, X);
1454
1455 Value *ExpYLnX = emitFastExpYLnx(B, X, Y);
1456 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1457 replaceCall(FPOp, Fixed);
1458 return true;
1459 }
1460 case PowKind::PowR: {
1461 Value *NegX = B.CreateFCmpOLT(X, ConstantFP::getZero(X->getType()));
1462 X = B.CreateSelect(NegX, ConstantFP::getQNaN(X->getType()), X);
1463
1464 Value *ExpYLnX = emitFastExpYLnx(B, X, Y);
1465 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1466 replaceCall(FPOp, Fixed);
1467 return true;
1468 }
1469 case PowKind::PowN: {
1470 // ny == 0
1471 Value *YEqZero = B.CreateICmpEQ(Y, ConstantInt::get(Y->getType(), 0));
1472
1473 // x = (ny == 0 ? 1.0f : x)
1474 X = B.CreateSelect(YEqZero, ConstantFP::get(X->getType(), 1.0), X);
1475
1476 Value *CastY = B.CreateSIToFP(Y, X->getType());
1477 Value *ExpYLnX = emitFastExpYLnx(B, X, CastY);
1478 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1479 replaceCall(FPOp, Fixed);
1480 return true;
1481 }
1482 case PowKind::RootN: {
1483 Value *CastY = B.CreateSIToFP(Y, X->getType());
1484 Value *RcpY = B.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, CastY);
1485 Value *ExpYLnX = emitFastExpYLnx(B, X, RcpY);
1486 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1487 replaceCall(FPOp, Fixed);
1488 return true;
1489 }
1490 }
1491 llvm_unreachable("Unhandled PowKind enum");
1492}
1493
1494bool AMDGPULibCalls::tryOptimizePow(FPMathOperator *FPOp, IRBuilder<> &B,
1495 const FuncInfo &FInfo) {
1496 FastMathFlags FMF = FPOp->getFastMathFlags();
1497 CallInst *Call = cast<CallInst>(FPOp);
1498 Module *M = Call->getModule();
1499
1500 FuncInfo PowrInfo;
1501 AMDGPULibFunc::EFuncId FastPowrFuncId =
1502 FMF.approxFunc() || FInfo.getId() == AMDGPULibFunc::EI_POW_FAST
1505 FunctionCallee PowrFunc = getFloatFastVariant(
1506 M, FInfo, PowrInfo, AMDGPULibFunc::EI_POWR, FastPowrFuncId);
1507
1508 // TODO: Prefer fast pown to fast powr, but slow powr to slow pown.
1509
1510 // pow(x, y) -> powr(x, y) for x >= -0.0
1511 // TODO: Account for flags on current call
1512 if (PowrFunc && cannotBeOrderedLessThanZero(FPOp->getOperand(0),
1513 SQ.getWithInstruction(Call))) {
1514 Call->setCalledFunction(PowrFunc);
1515 return fold_pow(FPOp, B, PowrInfo) || true;
1516 }
1517
1518 // pow(x, y) -> pown(x, y) for known integral y
1519 if (isKnownIntegral(FPOp->getOperand(1), SQ.getWithInstruction(Call),
1520 FPOp->getFastMathFlags())) {
1521 FunctionType *PownType = getPownType(Call->getFunctionType());
1522
1523 FuncInfo PownInfo;
1524 AMDGPULibFunc::EFuncId FastPownFuncId =
1525 FMF.approxFunc() || FInfo.getId() == AMDGPULibFunc::EI_POW_FAST
1528 FunctionCallee PownFunc = getFloatFastVariant(
1529 M, FInfo, PownInfo, AMDGPULibFunc::EI_POWN, FastPownFuncId);
1530
1531 if (PownFunc) {
1532 // TODO: If the incoming integral value is an sitofp/uitofp, it won't
1533 // fold out without a known range. We can probably take the source
1534 // value directly.
1535 Value *CastedArg =
1536 B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
1537 // Have to drop any nofpclass attributes on the original call site.
1539 1, AttributeFuncs::typeIncompatible(CastedArg->getType(),
1541 Call->setCalledFunction(PownFunc);
1542 Call->setArgOperand(1, CastedArg);
1543 return fold_pow(FPOp, B, PownInfo) || true;
1544 }
1545 }
1546
1547 if (fold_pow(FPOp, B, FInfo))
1548 return true;
1549
1550 if (!FMF.approxFunc())
1551 return false;
1552
1553 if (FInfo.getId() == AMDGPULibFunc::EI_POW && FMF.approxFunc() &&
1554 getArgType(FInfo) == AMDGPULibFunc::F32) {
1555 AMDGPULibFunc PowFastInfo(AMDGPULibFunc::EI_POW_FAST, FInfo);
1556 if (FunctionCallee PowFastFunc = getFunction(M, PowFastInfo)) {
1557 Call->setCalledFunction(PowFastFunc);
1558 return fold_pow(FPOp, B, PowFastInfo) || true;
1559 }
1560 }
1561
1562 return expandFastPow(FPOp, B, PowKind::Pow);
1563}
1564
1565// Get a scalar native builtin single argument FP function
1566FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1567 const FuncInfo &FInfo) {
1568 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1569 return nullptr;
1570 FuncInfo nf = FInfo;
1572 return getFunction(M, nf);
1573}
1574
1575// Some library calls are just wrappers around llvm intrinsics, but compiled
1576// conservatively. Preserve the flags from the original call site by
1577// substituting them with direct calls with all the flags.
1578bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
1579 bool AllowMinSizeF32,
1580 bool AllowF64,
1581 bool AllowStrictFP) {
1582 Type *FltTy = CI->getType()->getScalarType();
1583 const bool IsF32 = FltTy->isFloatTy();
1584
1585 // f64 intrinsics aren't implemented for most operations.
1586 if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy()))
1587 return false;
1588
1589 // We're implicitly inlining by replacing the libcall with the intrinsic, so
1590 // don't do it for noinline call sites.
1591 if (CI->isNoInline())
1592 return false;
1593
1594 const Function *ParentF = CI->getFunction();
1595 // TODO: Handle strictfp
1596 if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
1597 return false;
1598
1599 if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
1600 return false;
1601 return true;
1602}
1603
1604void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B,
1605 CallInst *CI,
1606 Intrinsic::ID IntrID) {
1607 if (CI->arg_size() == 2) {
1608 Value *Arg0 = CI->getArgOperand(0);
1609 Value *Arg1 = CI->getArgOperand(1);
1610 VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType());
1611 VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType());
1612 if (Arg0VecTy && !Arg1VecTy) {
1613 Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1614 CI->setArgOperand(1, SplatRHS);
1615 } else if (!Arg0VecTy && Arg1VecTy) {
1616 Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1617 CI->setArgOperand(0, SplatLHS);
1618 }
1619 }
1620
1622 CI->getModule(), IntrID, {CI->getType()}));
1623}
1624
1625bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1626 IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32,
1627 bool AllowF64, bool AllowStrictFP) {
1628 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1629 AllowStrictFP))
1630 return false;
1631 replaceLibCallWithSimpleIntrinsic(B, CI, IntrID);
1632 return true;
1633}
1634
1635std::tuple<Value *, Value *, Value *>
1636AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
1637 FunctionCallee Fsincos) {
1638 DebugLoc DL = B.getCurrentDebugLocation();
1639 Function *F = B.GetInsertBlock()->getParent();
1640 B.SetInsertPointPastAllocas(F);
1641
1642 AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_");
1643
1644 if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1645 // If the argument is an instruction, it must dominate all uses so put our
1646 // sincos call there. Otherwise, right after the allocas works well enough
1647 // if it's an argument or constant.
1648
1649 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1650
1651 // SetInsertPoint unwelcomely always tries to set the debug loc.
1652 B.SetCurrentDebugLocation(DL);
1653 }
1654
1655 Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1);
1656
1657 // The allocaInst allocates the memory in private address space. This need
1658 // to be addrspacecasted to point to the address space of cos pointer type.
1659 // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1660 Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy);
1661
1662 CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc);
1663
1664 // TODO: Is it worth trying to preserve the location for the cos calls for the
1665 // load?
1666
1667 LoadInst *LoadCos = B.CreateLoad(Arg->getType(), Alloc);
1668 return {SinCos, LoadCos, SinCos};
1669}
1670
1671// fold sin, cos -> sincos.
1672bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
1673 const FuncInfo &fInfo) {
1674 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1675 fInfo.getId() == AMDGPULibFunc::EI_COS);
1676
1677 if ((getArgType(fInfo) != AMDGPULibFunc::F32 &&
1678 getArgType(fInfo) != AMDGPULibFunc::F64) ||
1679 fInfo.getPrefix() != AMDGPULibFunc::NOPFX)
1680 return false;
1681
1682 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1683
1684 Value *CArgVal = FPOp->getOperand(0);
1685
1686 // TODO: Constant fold the call
1687 if (isa<ConstantData>(CArgVal))
1688 return false;
1689
1690 CallInst *CI = cast<CallInst>(FPOp);
1691
1692 Function *F = B.GetInsertBlock()->getParent();
1693 Module *M = F->getParent();
1694
1695 // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer
1696 // implementation. Prefer the private form if available.
1697 AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo);
1698 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1700
1701 AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo);
1702 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1704
1705 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1706 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1707 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1708 if (!FSinCos)
1709 return false;
1710
1711 SmallVector<CallInst *> SinCalls;
1712 SmallVector<CallInst *> CosCalls;
1713 SmallVector<CallInst *> SinCosCalls;
1714 FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
1715 fInfo);
1716 const std::string PairName = PartnerInfo.mangle();
1717
1718 StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName;
1719 StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName();
1720 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1721 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1722
1723 // Intersect the two sets of flags.
1724 FastMathFlags FMF = FPOp->getFastMathFlags();
1725 MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath);
1726
1727 SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()};
1728
1729 for (User* U : CArgVal->users()) {
1730 CallInst *XI = dyn_cast<CallInst>(U);
1731 if (!XI || XI->getFunction() != F || XI->isNoBuiltin())
1732 continue;
1733
1734 Function *UCallee = XI->getCalledFunction();
1735 if (!UCallee)
1736 continue;
1737
1738 bool Handled = true;
1739
1740 if (UCallee->getName() == SinName)
1741 SinCalls.push_back(XI);
1742 else if (UCallee->getName() == CosName)
1743 CosCalls.push_back(XI);
1744 else if (UCallee->getName() == SinCosPrivateName ||
1745 UCallee->getName() == SinCosGenericName)
1746 SinCosCalls.push_back(XI);
1747 else
1748 Handled = false;
1749
1750 if (Handled) {
1751 MergeDbgLocs.push_back(XI->getDebugLoc());
1752 auto *OtherOp = cast<FPMathOperator>(XI);
1753 FMF &= OtherOp->getFastMathFlags();
1755 FPMath, XI->getMetadata(LLVMContext::MD_fpmath));
1756 }
1757 }
1758
1759 if (SinCalls.empty() || CosCalls.empty())
1760 return false;
1761
1762 B.setFastMathFlags(FMF);
1763 B.setDefaultFPMathTag(FPMath);
1764 DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs);
1765 B.SetCurrentDebugLocation(DbgLoc);
1766
1767 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos);
1768
1769 auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) {
1770 for (CallInst *C : Calls)
1771 C->replaceAllUsesWith(Res);
1772
1773 // Leave the other dead instructions to avoid clobbering iterators.
1774 };
1775
1776 replaceTrigInsts(SinCalls, Sin);
1777 replaceTrigInsts(CosCalls, Cos);
1778 replaceTrigInsts(SinCosCalls, SinCos);
1779
1780 // It's safe to delete the original now.
1781 CI->eraseFromParent();
1782 return true;
1783}
1784
1785bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,
1786 double &Res1, Constant *copr0,
1787 Constant *copr1) {
1788 // By default, opr0/opr1/opr3 holds values of float/double type.
1789 // If they are not float/double, each function has to its
1790 // operand separately.
1791 double opr0 = 0.0, opr1 = 0.0;
1794 if (fpopr0) {
1795 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1796 ? fpopr0->getValueAPF().convertToDouble()
1797 : (double)fpopr0->getValueAPF().convertToFloat();
1798 }
1799
1800 if (fpopr1) {
1801 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1802 ? fpopr1->getValueAPF().convertToDouble()
1803 : (double)fpopr1->getValueAPF().convertToFloat();
1804 }
1805
1806 switch (FInfo.getId()) {
1807 default : return false;
1808
1810 Res0 = acos(opr0);
1811 return true;
1812
1814 // acosh(x) == log(x + sqrt(x*x - 1))
1815 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1816 return true;
1817
1819 Res0 = acos(opr0) / MATH_PI;
1820 return true;
1821
1823 Res0 = asin(opr0);
1824 return true;
1825
1827 // asinh(x) == log(x + sqrt(x*x + 1))
1828 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1829 return true;
1830
1832 Res0 = asin(opr0) / MATH_PI;
1833 return true;
1834
1836 Res0 = atan(opr0);
1837 return true;
1838
1840 // atanh(x) == (log(x+1) - log(x-1))/2;
1841 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1842 return true;
1843
1845 Res0 = atan(opr0) / MATH_PI;
1846 return true;
1847
1849 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1850 return true;
1851
1853 Res0 = cos(opr0);
1854 return true;
1855
1857 Res0 = cosh(opr0);
1858 return true;
1859
1861 Res0 = cos(MATH_PI * opr0);
1862 return true;
1863
1865 Res0 = exp(opr0);
1866 return true;
1867
1869 Res0 = pow(2.0, opr0);
1870 return true;
1871
1873 Res0 = pow(10.0, opr0);
1874 return true;
1875
1877 Res0 = log(opr0);
1878 return true;
1879
1881 Res0 = log(opr0) / log(2.0);
1882 return true;
1883
1885 Res0 = log(opr0) / log(10.0);
1886 return true;
1887
1889 Res0 = 1.0 / sqrt(opr0);
1890 return true;
1891
1893 Res0 = sin(opr0);
1894 return true;
1895
1897 Res0 = sinh(opr0);
1898 return true;
1899
1901 Res0 = sin(MATH_PI * opr0);
1902 return true;
1903
1905 Res0 = tan(opr0);
1906 return true;
1907
1909 Res0 = tanh(opr0);
1910 return true;
1911
1913 Res0 = tan(MATH_PI * opr0);
1914 return true;
1915
1916 // two-arg functions
1919 Res0 = pow(opr0, opr1);
1920 return true;
1921
1923 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1924 double val = (double)iopr1->getSExtValue();
1925 Res0 = pow(opr0, val);
1926 return true;
1927 }
1928 return false;
1929 }
1930
1932 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1933 double val = (double)iopr1->getSExtValue();
1934 Res0 = pow(opr0, 1.0 / val);
1935 return true;
1936 }
1937 return false;
1938 }
1939
1940 // with ptr arg
1942 Res0 = sin(opr0);
1943 Res1 = cos(opr0);
1944 return true;
1945 }
1946
1947 return false;
1948}
1949
1950bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1951 int numArgs = (int)aCI->arg_size();
1952 if (numArgs > 3)
1953 return false;
1954
1955 Constant *copr0 = nullptr;
1956 Constant *copr1 = nullptr;
1957 if (numArgs > 0) {
1958 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1959 return false;
1960 }
1961
1962 if (numArgs > 1) {
1963 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1964 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1965 return false;
1966 }
1967 }
1968
1969 // At this point, all arguments to aCI are constants.
1970
1971 // max vector size is 16, and sincos will generate two results.
1972 double DVal0[16], DVal1[16];
1973 int FuncVecSize = getVecSize(FInfo);
1974 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1975 if (FuncVecSize == 1) {
1976 if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
1977 return false;
1978 }
1979 } else {
1980 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1981 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1982 for (int i = 0; i < FuncVecSize; ++i) {
1983 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1984 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1985 if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
1986 return false;
1987 }
1988 }
1989 }
1990
1991 LLVMContext &context = aCI->getContext();
1992 Constant *nval0, *nval1;
1993 if (FuncVecSize == 1) {
1994 nval0 = ConstantFP::get(aCI->getType(), DVal0[0]);
1995 if (hasTwoResults)
1996 nval1 = ConstantFP::get(aCI->getType(), DVal1[0]);
1997 } else {
1998 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1999 SmallVector <float, 0> FVal0, FVal1;
2000 for (int i = 0; i < FuncVecSize; ++i)
2001 FVal0.push_back((float)DVal0[i]);
2002 ArrayRef<float> tmp0(FVal0);
2003 nval0 = ConstantDataVector::get(context, tmp0);
2004 if (hasTwoResults) {
2005 for (int i = 0; i < FuncVecSize; ++i)
2006 FVal1.push_back((float)DVal1[i]);
2007 ArrayRef<float> tmp1(FVal1);
2008 nval1 = ConstantDataVector::get(context, tmp1);
2009 }
2010 } else {
2011 ArrayRef<double> tmp0(DVal0);
2012 nval0 = ConstantDataVector::get(context, tmp0);
2013 if (hasTwoResults) {
2014 ArrayRef<double> tmp1(DVal1);
2015 nval1 = ConstantDataVector::get(context, tmp1);
2016 }
2017 }
2018 }
2019
2020 if (hasTwoResults) {
2021 // sincos
2022 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
2023 "math function with ptr arg not supported yet");
2024 new StoreInst(nval1, aCI->getArgOperand(1), aCI->getIterator());
2025 }
2026
2027 replaceCall(aCI, nval0);
2028 return true;
2029}
2030
2033 AMDGPULibCalls Simplifier(F, AM);
2034 Simplifier.initNativeFuncs();
2035
2036 bool Changed = false;
2037
2038 LLVM_DEBUG(dbgs() << "AMDIC: process function ";
2039 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
2040
2041 for (auto &BB : F) {
2042 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
2043 // Ignore non-calls.
2045 ++I;
2046
2047 if (CI) {
2048 if (Simplifier.fold(CI))
2049 Changed = true;
2050 }
2051 }
2052 }
2054}
2055
2058 if (UseNative.empty())
2059 return PreservedAnalyses::all();
2060
2061 AMDGPULibCalls Simplifier(F, AM);
2062 Simplifier.initNativeFuncs();
2063
2064 bool Changed = false;
2065 for (auto &BB : F) {
2066 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
2067 // Ignore non-calls.
2069 ++I;
2070 if (CI && Simplifier.useNative(CI))
2071 Changed = true;
2072 }
2073 }
2075}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
#define MATH_SQRT2
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
static Value * emitIsInf(IRBuilder<> &B, Value *val)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static Value * emitFastExpYLnx(IRBuilder<> &B, Value *X, Value *Y)
static Value * emitIsInteger(IRBuilder<> &B, Value *Y)
static Value * emitIsEvenInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static Value * emitPowFixup(IRBuilder<> &B, Value *X, Value *Y, Value *ExpYLnX, PowKind Kind)
Emit special case management epilog code for fast pow, powr, pown, and rootn expansions.
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
#define MATH_E
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
#define MATH_PI
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
#define MATH_SQRT1_2
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static Value * emitIsOddInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
loop term fold
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
FunctionAnalysisManager FAM
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
bool fold(CallInst *CI)
static void replaceCall(Instruction *I, Value *With)
AMDGPULibCalls(Function &F, FunctionAnalysisManager &FAM)
bool useNative(CallInst *CI)
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
EFuncId getId() const
bool isMangled() const
Param * getLeads()
Get leading parameters for mangled lib functions.
void setId(EFuncId Id)
ENamePrefix getPrefix() const
bool isNegative() const
Definition APFloat.h:1512
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:6034
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1495
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6065
bool isZero() const
Definition APFloat.h:1508
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
A function analysis which provides an AssumptionCache.
static LLVM_ABI Attribute getWithNoFPClass(LLVMContext &Context, FPClassTest Mask)
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
void removeParamAttrs(unsigned ArgNo, const AttributeMask &AttrsToRemove)
Removes the attributes from the given argument.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
AttributeSet getParamAttributes(unsigned ArgNo) const
Return the param attributes for this call.
bool isNoInline() const
Return true if the call should not be inlined.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI APFloat getElementAsAPFloat(uint64_t i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
LLVM_ABI Constant * getElementAsConstant(uint64_t i) const
Return a Constant for a specified index's element.
LLVM_ABI uint64_t getNumElements() const
Return the number of elements in the array or vector.
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:781
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
const APFloat & getValueAPF() const
Definition Constants.h:325
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
Definition Constants.h:186
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
Definition Operator.h:286
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
Definition Operator.h:302
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
Definition Operator.h:307
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
Definition Operator.h:328
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setAllowContract(bool B=true)
Definition FMF.h:93
bool none() const
Definition FMF.h:60
bool approxFunc() const
Definition FMF.h:73
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2787
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetLibraryInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
void dropAllReferences()
Drop all references to operands.
Definition User.h:324
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI APInt pow(const APInt &X, int64_t N)
Compute X^N for N>=0.
Definition APInt.cpp:3166
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
constexpr double ln2
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
LLVM_ABI bool isKnownIntegral(const Value *V, const SimplifyQuery &SQ, FastMathFlags FMF)
Return true if the floating-point value V is known to be an integer value.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39