LLVM 23.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file does AMD library function optimizations.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULibFunc.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include "llvm/IR/MDBuilder.h"
25#include <cmath>
26
27#define DEBUG_TYPE "amdgpu-simplifylib"
28
29using namespace llvm;
30using namespace llvm::PatternMatch;
31
32static cl::opt<bool> EnablePreLink("amdgpu-prelink",
33 cl::desc("Enable pre-link mode optimizations"),
34 cl::init(false),
36
37static cl::list<std::string> UseNative("amdgpu-use-native",
38 cl::desc("Comma separated list of functions to replace with native, or all"),
41
42#define MATH_PI numbers::pi
43#define MATH_E numbers::e
44#define MATH_SQRT2 numbers::sqrt2
45#define MATH_SQRT1_2 numbers::inv_sqrt2
46
47enum class PowKind { Pow, PowR, PowN, RootN };
48
49namespace llvm {
50
52private:
54
55 using FuncInfo = llvm::AMDGPULibFunc;
56
57 // -fuse-native.
58 bool AllNative = false;
59
60 bool useNativeFunc(const StringRef F) const;
61
62 // Return a pointer (pointer expr) to the function if function definition with
63 // "FuncName" exists. It may create a new function prototype in pre-link mode.
64 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
65
66 /// Wrapper around getFunction which tries to use a faster variant if
67 /// available, and falls back to a less fast option.
68 ///
69 /// Return a replacement function for \p fInfo that has float-typed fast
70 /// variants. \p NewFunc is a base replacement function to use. \p
71 /// NewFuncFastVariant is a faster version to use if the calling context knows
72 /// it's legal. If there is no fast variant to use, \p NewFuncFastVariant
73 /// should be EI_NONE.
74 FunctionCallee getFloatFastVariant(Module *M, const FuncInfo &fInfo,
75 FuncInfo &newInfo,
77 AMDGPULibFunc::EFuncId NewFuncFastVariant);
78
79 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
80
81 bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
82
83 /* Specialized optimizations */
84
85 // pow/powr/pown
86 bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
87
88 /// Peform a fast math expansion of pow, powr, pown or rootn.
89 bool expandFastPow(FPMathOperator *FPOp, IRBuilder<> &B, PowKind Kind);
90
91 bool tryOptimizePow(FPMathOperator *FPOp, IRBuilder<> &B,
92 const FuncInfo &FInfo);
93
94 // rootn
95 bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
96
97 // -fuse-native for sincos
98 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
99
100 // evaluate calls if calls' arguments are constants.
101 bool evaluateScalarMathFunc(const FuncInfo &FInfo, APFloat &Res0,
102 APFloat &Res1, Constant *copr0, Constant *copr1);
103 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
104
105 /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
106 /// of cos, sincos call).
107 std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
108 FastMathFlags FMF,
109 IRBuilder<> &B,
110 FunctionCallee Fsincos);
111
112 // sin/cos
113 bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
114
115 // __read_pipe/__write_pipe
116 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
117 const FuncInfo &FInfo);
118
119 // Get a scalar native builtin single argument FP function
120 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
121
122 /// Substitute a call to a known libcall with an intrinsic call. If \p
123 /// AllowMinSize is true, allow the replacement in a minsize function.
124 bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
125 bool AllowMinSizeF32 = false,
126 bool AllowF64 = false,
127 bool AllowStrictFP = false);
128 void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
129 Intrinsic::ID IntrID);
130
131 bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
132 Intrinsic::ID IntrID,
133 bool AllowMinSizeF32 = false,
134 bool AllowF64 = false,
135 bool AllowStrictFP = false);
136
137protected:
138 bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const;
139
141
142 static void replaceCall(Instruction *I, Value *With) {
143 I->replaceAllUsesWith(With);
144 I->eraseFromParent();
145 }
146
147 static void replaceCall(FPMathOperator *I, Value *With) {
149 }
150
151public:
153
154 bool fold(CallInst *CI);
155
156 void initNativeFuncs();
157
158 // Replace a normal math function call with that native version
159 bool useNative(CallInst *CI);
160};
161
162} // end namespace llvm
163
164template <typename IRB>
165static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
166 const Twine &Name = "") {
167 CallInst *R = B.CreateCall(Callee, Arg, Name);
168 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
169 R->setCallingConv(F->getCallingConv());
170 return R;
171}
172
173template <typename IRB>
174static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
175 Value *Arg2, const Twine &Name = "") {
176 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
177 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
178 R->setCallingConv(F->getCallingConv());
179 return R;
180}
181
183 Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
184 if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
185 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
186
187 return FunctionType::get(FT->getReturnType(),
188 {FT->getParamType(0), PowNExpTy}, false);
189}
190
191// Data structures for table-driven optimizations.
192// FuncTbl works for both f32 and f64 functions with 1 input argument
193
195 double result;
196 double input;
197};
198
199/* a list of {result, input} */
200static const TableEntry tbl_acos[] = {
201 {MATH_PI / 2.0, 0.0},
202 {MATH_PI / 2.0, -0.0},
203 {0.0, 1.0},
204 {MATH_PI, -1.0}
205};
206static const TableEntry tbl_acosh[] = {
207 {0.0, 1.0}
208};
209static const TableEntry tbl_acospi[] = {
210 {0.5, 0.0},
211 {0.5, -0.0},
212 {0.0, 1.0},
213 {1.0, -1.0}
214};
215static const TableEntry tbl_asin[] = {
216 {0.0, 0.0},
217 {-0.0, -0.0},
218 {MATH_PI / 2.0, 1.0},
219 {-MATH_PI / 2.0, -1.0}
220};
221static const TableEntry tbl_asinh[] = {
222 {0.0, 0.0},
223 {-0.0, -0.0}
224};
225static const TableEntry tbl_asinpi[] = {
226 {0.0, 0.0},
227 {-0.0, -0.0},
228 {0.5, 1.0},
229 {-0.5, -1.0}
230};
231static const TableEntry tbl_atan[] = {
232 {0.0, 0.0},
233 {-0.0, -0.0},
234 {MATH_PI / 4.0, 1.0},
235 {-MATH_PI / 4.0, -1.0}
236};
237static const TableEntry tbl_atanh[] = {
238 {0.0, 0.0},
239 {-0.0, -0.0}
240};
241static const TableEntry tbl_atanpi[] = {
242 {0.0, 0.0},
243 {-0.0, -0.0},
244 {0.25, 1.0},
245 {-0.25, -1.0}
246};
247static const TableEntry tbl_cbrt[] = {
248 {0.0, 0.0},
249 {-0.0, -0.0},
250 {1.0, 1.0},
251 {-1.0, -1.0},
252};
253static const TableEntry tbl_cos[] = {
254 {1.0, 0.0},
255 {1.0, -0.0}
256};
257static const TableEntry tbl_cosh[] = {
258 {1.0, 0.0},
259 {1.0, -0.0}
260};
261static const TableEntry tbl_cospi[] = {
262 {1.0, 0.0},
263 {1.0, -0.0}
264};
265static const TableEntry tbl_erfc[] = {
266 {1.0, 0.0},
267 {1.0, -0.0}
268};
269static const TableEntry tbl_erf[] = {
270 {0.0, 0.0},
271 {-0.0, -0.0}
272};
273static const TableEntry tbl_exp[] = {
274 {1.0, 0.0},
275 {1.0, -0.0},
276 {MATH_E, 1.0}
277};
278static const TableEntry tbl_exp2[] = {
279 {1.0, 0.0},
280 {1.0, -0.0},
281 {2.0, 1.0}
282};
283static const TableEntry tbl_exp10[] = {
284 {1.0, 0.0},
285 {1.0, -0.0},
286 {10.0, 1.0}
287};
288static const TableEntry tbl_expm1[] = {
289 {0.0, 0.0},
290 {-0.0, -0.0}
291};
292static const TableEntry tbl_log[] = {
293 {0.0, 1.0},
294 {1.0, MATH_E}
295};
296static const TableEntry tbl_log2[] = {
297 {0.0, 1.0},
298 {1.0, 2.0}
299};
300static const TableEntry tbl_log10[] = {
301 {0.0, 1.0},
302 {1.0, 10.0}
303};
304static const TableEntry tbl_rsqrt[] = {
305 {1.0, 1.0},
306 {MATH_SQRT1_2, 2.0}
307};
308static const TableEntry tbl_sin[] = {
309 {0.0, 0.0},
310 {-0.0, -0.0}
311};
312static const TableEntry tbl_sinh[] = {
313 {0.0, 0.0},
314 {-0.0, -0.0}
315};
316static const TableEntry tbl_sinpi[] = {
317 {0.0, 0.0},
318 {-0.0, -0.0}
319};
320static const TableEntry tbl_sqrt[] = {
321 {0.0, 0.0},
322 {1.0, 1.0},
323 {MATH_SQRT2, 2.0}
324};
325static const TableEntry tbl_tan[] = {
326 {0.0, 0.0},
327 {-0.0, -0.0}
328};
329static const TableEntry tbl_tanh[] = {
330 {0.0, 0.0},
331 {-0.0, -0.0}
332};
333static const TableEntry tbl_tanpi[] = {
334 {0.0, 0.0},
335 {-0.0, -0.0}
336};
337static const TableEntry tbl_tgamma[] = {
338 {1.0, 1.0},
339 {1.0, 2.0},
340 {2.0, 3.0},
341 {6.0, 4.0}
342};
343
345 switch(id) {
361 return true;
362 default:;
363 }
364 return false;
365}
366
368
370 switch(id) {
408 default:;
409 }
410 return TableRef();
411}
412
413static inline int getVecSize(const AMDGPULibFunc& FInfo) {
414 return FInfo.getLeads()[0].VectorSize;
415}
416
417static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
418 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
419}
420
421FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
422 // If we are doing PreLinkOpt, the function is external. So it is safe to
423 // use getOrInsertFunction() at this stage.
424
426 : AMDGPULibFunc::getFunction(M, fInfo);
427}
428
429FunctionCallee AMDGPULibCalls::getFloatFastVariant(
430 Module *M, const FuncInfo &fInfo, FuncInfo &newInfo,
431 AMDGPULibFunc::EFuncId NewFunc, AMDGPULibFunc::EFuncId FastVariant) {
432 assert(NewFunc != FastVariant);
433
434 if (FastVariant != AMDGPULibFunc::EI_NONE &&
435 getArgType(fInfo) == AMDGPULibFunc::F32) {
436 newInfo = AMDGPULibFunc(FastVariant, fInfo);
437 if (FunctionCallee NewCallee = getFunction(M, newInfo))
438 return NewCallee;
439 }
440
441 newInfo = AMDGPULibFunc(NewFunc, fInfo);
442 return getFunction(M, newInfo);
443}
444
445bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
446 FuncInfo &FInfo) {
447 return AMDGPULibFunc::parse(FMangledName, FInfo);
448}
449
451 return FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs();
452}
453
455 const FPMathOperator *FPOp) const {
456 // TODO: Refine to approxFunc or contract
457 return FPOp->isFast();
458}
459
461 : SQ(F.getParent()->getDataLayout(),
462 &FAM.getResult<TargetLibraryAnalysis>(F),
463 FAM.getCachedResult<DominatorTreeAnalysis>(F),
464 &FAM.getResult<AssumptionAnalysis>(F)) {}
465
466bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
467 return AllNative || llvm::is_contained(UseNative, F);
468}
469
471 AllNative = useNativeFunc("all") ||
472 (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
473 UseNative.begin()->empty());
474}
475
476bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
477 bool native_sin = useNativeFunc("sin");
478 bool native_cos = useNativeFunc("cos");
479
480 if (native_sin && native_cos) {
481 Module *M = aCI->getModule();
482 Value *opr0 = aCI->getArgOperand(0);
483
484 AMDGPULibFunc nf;
485 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
486 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
487
490 FunctionCallee sinExpr = getFunction(M, nf);
491
494 FunctionCallee cosExpr = getFunction(M, nf);
495 if (sinExpr && cosExpr) {
496 Value *sinval =
497 CallInst::Create(sinExpr, opr0, "splitsin", aCI->getIterator());
498 Value *cosval =
499 CallInst::Create(cosExpr, opr0, "splitcos", aCI->getIterator());
500 new StoreInst(cosval, aCI->getArgOperand(1), aCI->getIterator());
501
502 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
503 << " with native version of sin/cos");
504
505 replaceCall(aCI, sinval);
506 return true;
507 }
508 }
509 return false;
510}
511
513 Function *Callee = aCI->getCalledFunction();
514 if (!Callee || aCI->isNoBuiltin())
515 return false;
516
517 FuncInfo FInfo;
518 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
519 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
520 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
521 !(AllNative || useNativeFunc(FInfo.getName()))) {
522 return false;
523 }
524
525 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
526 return sincosUseNative(aCI, FInfo);
527
529 FunctionCallee F = getFunction(aCI->getModule(), FInfo);
530 if (!F)
531 return false;
532
533 aCI->setCalledFunction(F);
534 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
535 << " with native version");
536 return true;
537}
538
539// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
540// builtin, with appended type size and alignment arguments, where 2 or 4
541// indicates the original number of arguments. The library has optimized version
542// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
543// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
544// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
545// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
546bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
547 const FuncInfo &FInfo) {
548 auto *Callee = CI->getCalledFunction();
549 if (!Callee->isDeclaration())
550 return false;
551
552 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
553 auto *M = Callee->getParent();
554 std::string Name = std::string(Callee->getName());
555 auto NumArg = CI->arg_size();
556 if (NumArg != 4 && NumArg != 6)
557 return false;
558 ConstantInt *PacketSize =
559 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2));
560 ConstantInt *PacketAlign =
561 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1));
562 if (!PacketSize || !PacketAlign)
563 return false;
564
565 unsigned Size = PacketSize->getZExtValue();
566 Align Alignment = PacketAlign->getAlignValue();
567 if (Alignment != Size)
568 return false;
569
570 unsigned PtrArgLoc = CI->arg_size() - 3;
571 Value *PtrArg = CI->getArgOperand(PtrArgLoc);
572 Type *PtrTy = PtrArg->getType();
573
575 for (unsigned I = 0; I != PtrArgLoc; ++I)
576 ArgTys.push_back(CI->getArgOperand(I)->getType());
577 ArgTys.push_back(PtrTy);
578
579 Name = Name + "_" + std::to_string(Size);
580 auto *FTy = FunctionType::get(Callee->getReturnType(),
581 ArrayRef<Type *>(ArgTys), false);
582 AMDGPULibFunc NewLibFunc(Name, FTy);
584 if (!F)
585 return false;
586
588 for (unsigned I = 0; I != PtrArgLoc; ++I)
589 Args.push_back(CI->getArgOperand(I));
590 Args.push_back(PtrArg);
591
592 auto *NCI = B.CreateCall(F, Args);
593 NCI->setAttributes(CI->getAttributes());
594 CI->replaceAllUsesWith(NCI);
595 CI->dropAllReferences();
596 CI->eraseFromParent();
597
598 return true;
599}
600
601// This function returns false if no change; return true otherwise.
603 Function *Callee = CI->getCalledFunction();
604 // Ignore indirect calls.
605 if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
606 return false;
607
608 FuncInfo FInfo;
609 if (!parseFunctionName(Callee->getName(), FInfo))
610 return false;
611
612 // Further check the number of arguments to see if they match.
613 // TODO: Check calling convention matches too
614 if (!FInfo.isCompatibleSignature(*Callee->getParent(), CI->getFunctionType()))
615 return false;
616
617 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n');
618
619 if (TDOFold(CI, FInfo))
620 return true;
621
622 IRBuilder<> B(CI);
623 if (CI->isStrictFP())
624 B.setIsFPConstrained(true);
625
627 // Under unsafe-math, evaluate calls if possible.
628 // According to Brian Sumner, we can do this for all f32 function calls
629 // using host's double function calls.
630 if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo))
631 return true;
632
633 // Copy fast flags from the original call.
634 FastMathFlags FMF = FPOp->getFastMathFlags();
635 B.setFastMathFlags(FMF);
636
637 // Specialized optimizations for each function call.
638 //
639 // TODO: Handle native functions
640 switch (FInfo.getId()) {
642 if (FMF.none())
643 return false;
644 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp,
645 FMF.approxFunc());
647 if (FMF.none())
648 return false;
649 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2,
650 FMF.approxFunc());
652 if (FMF.none())
653 return false;
654 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log,
655 FMF.approxFunc());
657 if (FMF.none())
658 return false;
659 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2,
660 FMF.approxFunc());
662 if (FMF.none())
663 return false;
664 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10,
665 FMF.approxFunc());
667 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum,
668 true, true);
670 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum,
671 true, true);
673 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true,
674 true);
676 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd,
677 true, true);
679 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true,
680 true, true);
682 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign,
683 true, true, true);
685 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true,
686 true);
688 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true,
689 true);
691 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true,
692 true);
694 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true,
695 true);
697 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true,
698 true);
700 if (!shouldReplaceLibcallWithIntrinsic(CI, true, true))
701 return false;
702
703 Value *Arg1 = CI->getArgOperand(1);
704 if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType());
705 VecTy && !isa<VectorType>(Arg1->getType())) {
706 Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
707 CI->setArgOperand(1, SplatArg1);
708 }
709
711 CI->getModule(), Intrinsic::ldexp,
712 {CI->getType(), CI->getArgOperand(1)->getType()}));
714 return true;
715 }
718 return tryOptimizePow(FPOp, B, FInfo);
721 if (fold_pow(FPOp, B, FInfo))
722 return true;
723 if (!FMF.approxFunc())
724 return false;
725
726 if (FInfo.getId() == AMDGPULibFunc::EI_POWR && FMF.approxFunc() &&
727 getArgType(FInfo) == AMDGPULibFunc::F32) {
728 Module *M = Callee->getParent();
729 AMDGPULibFunc PowrFastInfo(AMDGPULibFunc::EI_POWR_FAST, FInfo);
730 if (FunctionCallee PowrFastFunc = getFunction(M, PowrFastInfo)) {
731 CI->setCalledFunction(PowrFastFunc);
732 return true;
733 }
734 }
735
736 if (!shouldReplaceLibcallWithIntrinsic(CI))
737 return false;
738 return expandFastPow(FPOp, B, PowKind::PowR);
739 }
742 if (fold_pow(FPOp, B, FInfo))
743 return true;
744 if (!FMF.approxFunc())
745 return false;
746
747 if (FInfo.getId() == AMDGPULibFunc::EI_POWN &&
748 getArgType(FInfo) == AMDGPULibFunc::F32) {
749 Module *M = Callee->getParent();
750 AMDGPULibFunc PownFastInfo(AMDGPULibFunc::EI_POWN_FAST, FInfo);
751 if (FunctionCallee PownFastFunc = getFunction(M, PownFastInfo)) {
752 CI->setCalledFunction(PownFastFunc);
753 return true;
754 }
755 }
756
757 if (!shouldReplaceLibcallWithIntrinsic(CI))
758 return false;
759 return expandFastPow(FPOp, B, PowKind::PowN);
760 }
763 if (fold_rootn(FPOp, B, FInfo))
764 return true;
765 if (!FMF.approxFunc())
766 return false;
767
768 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
769 Module *M = Callee->getParent();
770 AMDGPULibFunc RootnFastInfo(AMDGPULibFunc::EI_ROOTN_FAST, FInfo);
771 if (FunctionCallee RootnFastFunc = getFunction(M, RootnFastInfo)) {
772 CI->setCalledFunction(RootnFastFunc);
773 return true;
774 }
775 }
776
777 return expandFastPow(FPOp, B, PowKind::RootN);
778 }
780 // TODO: Allow with strictfp + constrained intrinsic
781 return tryReplaceLibcallWithSimpleIntrinsic(
782 B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
785 return fold_sincos(FPOp, B, FInfo);
786 default:
787 break;
788 }
789 } else {
790 // Specialized optimizations for each function call
791 switch (FInfo.getId()) {
796 return fold_read_write_pipe(CI, B, FInfo);
797 default:
798 break;
799 }
800 }
801
802 return false;
803}
804
806 const Type *Ty) {
807 Type *ElemTy = Ty->getScalarType();
808 const fltSemantics &FltSem = ElemTy->getFltSemantics();
809
810 SmallVector<Constant *, 4> ConstValues;
811 ConstValues.reserve(Values.size());
812 for (APFloat APF : Values) {
813 bool Unused;
814 APF.convert(FltSem, APFloat::rmNearestTiesToEven, &Unused);
815 ConstValues.push_back(ConstantFP::get(ElemTy, APF));
816 }
817 return ConstantVector::get(ConstValues);
818}
819
820bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
821 // Table-Driven optimization
822 const TableRef tr = getOptTable(FInfo.getId());
823 if (tr.empty())
824 return false;
825
826 int const sz = (int)tr.size();
827 Value *opr0 = CI->getArgOperand(0);
828
829 int vecSize = getVecSize(FInfo);
830 if (vecSize > 1) {
831 // Vector version
832 Constant *CV = dyn_cast<Constant>(opr0);
833 if (CV && CV->getType()->isVectorTy()) {
835 Values.reserve(vecSize);
836 for (int eltNo = 0; eltNo < vecSize; ++eltNo) {
837 ConstantFP *eltval =
838 cast<ConstantFP>(CV->getAggregateElement((unsigned)eltNo));
839 auto MatchingRow = llvm::find_if(tr, [eltval](const TableEntry &entry) {
840 return eltval->isExactlyValue(entry.input);
841 });
842 if (MatchingRow == tr.end())
843 return false;
844 Values.push_back(APFloat(MatchingRow->result));
845 }
846 Constant *NewValues = getConstantFloatVector(Values, CI->getType());
847 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *NewValues << "\n");
848 replaceCall(CI, NewValues);
849 return true;
850 }
851 } else {
852 // Scalar version
853 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
854 for (int i = 0; i < sz; ++i) {
855 if (CF->isExactlyValue(tr[i].input)) {
856 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
857 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
858 replaceCall(CI, nval);
859 return true;
860 }
861 }
862 }
863 }
864
865 return false;
866}
867
868namespace llvm {
869static double log2(double V) {
870#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
871 return ::log2(V);
872#else
873 return log(V) / numbers::ln2;
874#endif
875}
876} // namespace llvm
877
878bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
879 const FuncInfo &FInfo) {
880 assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
881 FInfo.getId() == AMDGPULibFunc::EI_POW_FAST ||
882 FInfo.getId() == AMDGPULibFunc::EI_POWR ||
883 FInfo.getId() == AMDGPULibFunc::EI_POWR_FAST ||
884 FInfo.getId() == AMDGPULibFunc::EI_POWN ||
885 FInfo.getId() == AMDGPULibFunc::EI_POWN_FAST) &&
886 "fold_pow: encounter a wrong function call");
887
888 Module *M = B.GetInsertBlock()->getModule();
889 Type *eltType = FPOp->getType()->getScalarType();
890 Value *opr0 = FPOp->getOperand(0);
891 Value *opr1 = FPOp->getOperand(1);
892
893 const APFloat *CF = nullptr;
894 const APInt *CINT = nullptr;
895 if (!match(opr1, m_APFloatAllowPoison(CF)))
896 match(opr1, m_APIntAllowPoison(CINT));
897
898 // 0x1111111 means that we don't do anything for this call.
899 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
900
901 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
902 // pow/powr/pown(x, 0) == 1
903 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
904 Constant *cnval = ConstantFP::get(eltType, 1.0);
905 if (getVecSize(FInfo) > 1) {
906 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
907 }
908 replaceCall(FPOp, cnval);
909 return true;
910 }
911 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
912 // pow/powr/pown(x, 1.0) = x
913 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
914 replaceCall(FPOp, opr0);
915 return true;
916 }
917 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
918 // pow/powr/pown(x, 2.0) = x*x
919 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * "
920 << *opr0 << "\n");
921 Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
922 replaceCall(FPOp, nval);
923 return true;
924 }
925 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
926 // pow/powr/pown(x, -1.0) = 1.0/x
927 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n");
928 Constant *cnval = ConstantFP::get(eltType, 1.0);
929 if (getVecSize(FInfo) > 1) {
930 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
931 }
932 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
933 replaceCall(FPOp, nval);
934 return true;
935 }
936
937 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
938 // pow[r](x, [-]0.5) = sqrt(x)
939 bool issqrt = CF->isExactlyValue(0.5);
940 if (FunctionCallee FPExpr =
941 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
943 FInfo))) {
944 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName()
945 << '(' << *opr0 << ")\n");
946 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
947 : "__pow2rsqrt");
948 replaceCall(FPOp, nval);
949 return true;
950 }
951 }
952
953 if (!isUnsafeFiniteOnlyMath(FPOp))
954 return false;
955
956 // Unsafe Math optimization
957
958 // Remember that ci_opr1 is set if opr1 is integral
959 if (CF) {
960 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
961 ? (double)CF->convertToFloat()
962 : CF->convertToDouble();
963 int ival = (int)dval;
964 if ((double)ival == dval) {
965 ci_opr1 = ival;
966 } else
967 ci_opr1 = 0x11111111;
968 }
969
970 // pow/powr/pown(x, c) = [1/](x*x*..x); where
971 // trunc(c) == c && the number of x == c && |c| <= 12
972 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
973 if (abs_opr1 <= 12) {
974 Constant *cnval;
975 Value *nval;
976 if (abs_opr1 == 0) {
977 cnval = ConstantFP::get(eltType, 1.0);
978 if (getVecSize(FInfo) > 1) {
979 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
980 }
981 nval = cnval;
982 } else {
983 Value *valx2 = nullptr;
984 nval = nullptr;
985 while (abs_opr1 > 0) {
986 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
987 if (abs_opr1 & 1) {
988 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
989 }
990 abs_opr1 >>= 1;
991 }
992 }
993
994 if (ci_opr1 < 0) {
995 cnval = ConstantFP::get(eltType, 1.0);
996 if (getVecSize(FInfo) > 1) {
997 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
998 }
999 nval = B.CreateFDiv(cnval, nval, "__1powprod");
1000 }
1001 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1002 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
1003 << ")\n");
1004 replaceCall(FPOp, nval);
1005 return true;
1006 }
1007
1008 // If we should use the generic intrinsic instead of emitting a libcall
1009 const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy();
1010
1011 // powr ---> exp2(y * log2(x))
1012 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
1013 FunctionCallee ExpExpr;
1014 if (ShouldUseIntrinsic)
1015 ExpExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::exp2,
1016 {FPOp->getType()});
1017 else {
1018 ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
1019 if (!ExpExpr)
1020 return false;
1021 }
1022
1023 bool needlog = false;
1024 bool needabs = false;
1025 bool needcopysign = false;
1026 Constant *cnval = nullptr;
1027 if (getVecSize(FInfo) == 1) {
1028 CF = nullptr;
1029 match(opr0, m_APFloatAllowPoison(CF));
1030
1031 if (CF) {
1032 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1033 ? (double)CF->convertToFloat()
1034 : CF->convertToDouble();
1035
1036 V = log2(std::abs(V));
1037 cnval = ConstantFP::get(eltType, V);
1038 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1039 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST) &&
1040 CF->isNegative();
1041 } else {
1042 needlog = true;
1043 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1044 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST;
1045 }
1046 } else {
1047 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
1048
1049 if (!CDV) {
1050 needlog = true;
1051 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1052 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST;
1053 } else {
1054 assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
1055 "Wrong vector size detected");
1056
1058 for (int i=0; i < getVecSize(FInfo); ++i) {
1059 double V = CDV->getElementAsAPFloat(i).convertToDouble();
1060 if (V < 0.0) needcopysign = true;
1061 V = log2(std::abs(V));
1062 DVal.push_back(V);
1063 }
1064 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1066 for (double D : DVal)
1067 FVal.push_back((float)D);
1068 ArrayRef<float> tmp(FVal);
1069 cnval = ConstantDataVector::get(M->getContext(), tmp);
1070 } else {
1071 ArrayRef<double> tmp(DVal);
1072 cnval = ConstantDataVector::get(M->getContext(), tmp);
1073 }
1074 }
1075 }
1076
1077 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW ||
1078 FInfo.getId() == AMDGPULibFunc::EI_POW_FAST)) {
1079 // We cannot handle corner cases for a general pow() function, give up
1080 // unless y is a constant integral value. Then proceed as if it were pown.
1081 if (!isKnownIntegral(opr1, SQ.getWithInstruction(cast<Instruction>(FPOp)),
1082 FPOp->getFastMathFlags()))
1083 return false;
1084 }
1085
1086 Value *nval;
1087 if (needabs) {
1088 nval = B.CreateFAbs(opr0, nullptr, "__fabs");
1089 } else {
1090 nval = cnval ? cnval : opr0;
1091 }
1092 if (needlog) {
1093 FunctionCallee LogExpr;
1094 if (ShouldUseIntrinsic) {
1095 LogExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::log2,
1096 {FPOp->getType()});
1097 } else {
1098 LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1099 if (!LogExpr)
1100 return false;
1101 }
1102
1103 nval = CreateCallEx(B,LogExpr, nval, "__log2");
1104 }
1105
1106 if (FInfo.getId() == AMDGPULibFunc::EI_POWN ||
1107 FInfo.getId() == AMDGPULibFunc::EI_POWN_FAST) {
1108 // convert int(32) to fp(f32 or f64)
1109 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1110 }
1111 nval = B.CreateFMul(opr1, nval, "__ylogx");
1112
1113 CallInst *Exp2Call = CreateCallEx(B, ExpExpr, nval, "__exp2");
1114
1115 // TODO: Generalized fpclass logic for pow
1117 if (FPOp->hasNoNaNs())
1118 KnownNot |= FPClassTest::fcNan;
1119
1120 Exp2Call->addRetAttr(
1121 Attribute::getWithNoFPClass(Exp2Call->getContext(), KnownNot));
1122 nval = Exp2Call;
1123
1124 if (needcopysign) {
1125 Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
1126 Type *nTy = FPOp->getType()->getWithNewType(nTyS);
1127 Value *opr_n = FPOp->getOperand(1);
1128 if (opr_n->getType()->getScalarType()->isIntegerTy())
1129 opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
1130 else
1131 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1132
1133 unsigned size = nTy->getScalarSizeInBits();
1134 Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1135 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1136
1137 nval = B.CreateCopySign(nval, B.CreateBitCast(sign, nval->getType()),
1138 nullptr, "__pow_sign");
1139 }
1140
1141 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1142 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1143 replaceCall(FPOp, nval);
1144
1145 return true;
1146}
1147
1148bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
1149 const FuncInfo &FInfo) {
1150 Value *opr0 = FPOp->getOperand(0);
1151 Value *opr1 = FPOp->getOperand(1);
1152
1153 const APInt *CINT = nullptr;
1154 if (!match(opr1, m_APIntAllowPoison(CINT)))
1155 return false;
1156
1157 Function *Parent = B.GetInsertBlock()->getParent();
1158
1159 int ci_opr1 = (int)CINT->getSExtValue();
1160 if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
1161 // rootn(x, 1) = x
1162 //
1163 // TODO: Insert constrained canonicalize for strictfp case.
1164 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
1165 replaceCall(FPOp, opr0);
1166 return true;
1167 }
1168
1169 Module *M = B.GetInsertBlock()->getModule();
1170
1171 CallInst *CI = cast<CallInst>(FPOp);
1172 if (ci_opr1 == 2 &&
1173 shouldReplaceLibcallWithIntrinsic(CI,
1174 /*AllowMinSizeF32=*/true,
1175 /*AllowF64=*/true)) {
1176 // rootn(x, 2) = sqrt(x)
1177 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n");
1178
1179 CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1180 NewCall->takeName(CI);
1181
1182 // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some
1183 // metadata.
1184 MDBuilder MDHelper(M->getContext());
1185 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1186 NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD);
1187
1188 replaceCall(CI, NewCall);
1189 return true;
1190 }
1191
1192 if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1193 if (FunctionCallee FPExpr =
1194 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1195 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0
1196 << ")\n");
1197 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1198 replaceCall(FPOp, nval);
1199 return true;
1200 }
1201 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1202 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n");
1203 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1204 opr0,
1205 "__rootn2div");
1206 replaceCall(FPOp, nval);
1207 return true;
1208 }
1209
1210 if (ci_opr1 == -2 &&
1211 shouldReplaceLibcallWithIntrinsic(CI,
1212 /*AllowMinSizeF32=*/true,
1213 /*AllowF64=*/true)) {
1214 // rootn(x, -2) = rsqrt(x)
1215
1216 // The original rootn had looser ulp requirements than the resultant sqrt
1217 // and fdiv.
1218 MDBuilder MDHelper(M->getContext());
1219 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1220
1221 // TODO: Could handle strictfp but need to fix strict sqrt emission
1222 FastMathFlags FMF = FPOp->getFastMathFlags();
1223 FMF.setAllowContract(true);
1224
1225 CallInst *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1227 B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), Sqrt));
1228 Sqrt->setFastMathFlags(FMF);
1229 RSqrt->setFastMathFlags(FMF);
1230 RSqrt->setMetadata(LLVMContext::MD_fpmath, FPMD);
1231
1232 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0
1233 << ")\n");
1234 replaceCall(CI, RSqrt);
1235 return true;
1236 }
1237
1238 return false;
1239}
1240
1241// is_integer(y) => trunc(y) == y
1243 Value *TruncY = B.CreateUnaryIntrinsic(Intrinsic::trunc, Y);
1244 return B.CreateFCmpOEQ(TruncY, Y);
1245}
1246
1248 // Even integers are still integers after division by 2.
1249 auto *HalfY = B.CreateFMul(Y, ConstantFP::get(Y->getType(), 0.5));
1250 return emitIsInteger(B, HalfY);
1251}
1252
1253// is_odd_integer(y) => is_integer(y) && !is_even_integer(y)
1255 Value *IsIntY = emitIsInteger(B, Y);
1256 Value *IsEvenY = emitIsEvenInteger(B, Y);
1257 Value *NotEvenY = B.CreateNot(IsEvenY);
1258 return B.CreateAnd(IsIntY, NotEvenY);
1259}
1260
1261// isinf(val) => fabs(val) == +inf
1263 auto *fabsVal = B.CreateFAbs(val);
1264 return B.CreateFCmpOEQ(fabsVal, ConstantFP::getInfinity(val->getType()));
1265}
1266
1267// y * log2(fabs(x))
1269 Value *AbsX = B.CreateFAbs(X);
1270 Value *LogAbsX = B.CreateUnaryIntrinsic(Intrinsic::log2, AbsX);
1271 Value *YTimesLogX = B.CreateFMul(Y, LogAbsX);
1272 return B.CreateUnaryIntrinsic(Intrinsic::exp2, YTimesLogX);
1273}
1274
1275/// Emit special case management epilog code for fast pow, powr, pown, and rootn
1276/// expansions. \p x and \p y should be the arguments to the library call
1277/// (possibly with some values clamped). \p expylnx should be the result to use
1278/// in normal circumstances.
1280 PowKind Kind) {
1281 Constant *Zero = ConstantFP::getZero(X->getType());
1282 Constant *One = ConstantFP::get(X->getType(), 1.0);
1283 Constant *QNaN = ConstantFP::getQNaN(X->getType());
1284 Constant *PInf = ConstantFP::getInfinity(X->getType());
1285
1286 switch (Kind) {
1287 case PowKind::Pow: {
1288 // is_odd_integer(y)
1289 Value *IsOddY = emitIsOddInteger(B, Y);
1290
1291 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1292 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1293 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1294
1295 // if (x < 0 && !is_integer(y)) ret = QNAN
1296 Value *IsIntY = emitIsInteger(B, Y);
1297 Value *condNegX = B.CreateFCmpOLT(X, Zero);
1298 Value *condNotIntY = B.CreateNot(IsIntY);
1299 Value *condNaN = B.CreateAnd(condNegX, condNotIntY);
1300 Ret = B.CreateSelect(condNaN, QNaN, Ret);
1301
1302 // if (isinf(ay)) { ... }
1303
1304 // FIXME: Missing backend optimization to save on materialization cost of
1305 // mixed sign constant infinities.
1306 Value *YIsInf = emitIsInf(B, Y);
1307
1308 Value *AY = B.CreateFAbs(Y);
1309 Value *YIsNegInf = B.CreateFCmpUNE(Y, AY);
1310
1311 Value *AX = B.CreateFAbs(X);
1312 Value *AxEqOne = B.CreateFCmpOEQ(AX, One);
1313 Value *AxLtOne = B.CreateFCmpOLT(AX, One);
1314 Value *XorCond = B.CreateXor(AxLtOne, YIsNegInf);
1315 Value *SelInf =
1316 B.CreateSelect(AxEqOne, AX, B.CreateSelect(XorCond, Zero, AY));
1317 Ret = B.CreateSelect(YIsInf, SelInf, Ret);
1318
1319 // if (isinf(ax) || x == 0.0f) { ... }
1320 Value *XIsInf = emitIsInf(B, X);
1321 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1322 Value *AxInfOrZero = B.CreateOr(XIsInf, XEqZero);
1323 Value *YLtZero = B.CreateFCmpOLT(Y, Zero);
1324 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1325 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1326 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1327 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1328 Ret = B.CreateSelect(AxInfOrZero, Copysign, Ret);
1329
1330 // if (isunordered(x, y)) ret = QNAN
1331 Value *isUnordered = B.CreateFCmpUNO(X, Y);
1332 return B.CreateSelect(isUnordered, QNaN, Ret);
1333 }
1334 case PowKind::PowR: {
1335 Value *YIsNeg = B.CreateFCmpOLT(Y, Zero);
1336 Value *IZ = B.CreateSelect(YIsNeg, PInf, Zero);
1337 Value *ZI = B.CreateSelect(YIsNeg, Zero, PInf);
1338
1339 Value *YEqZero = B.CreateFCmpOEQ(Y, Zero);
1340 Value *SelZeroCase = B.CreateSelect(YEqZero, QNaN, IZ);
1341 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1342 Value *Ret = B.CreateSelect(XEqZero, SelZeroCase, ExpYLnX);
1343
1344 Value *XEqInf = B.CreateFCmpOEQ(X, PInf);
1345 Value *YNeZero = B.CreateFCmpUNE(Y, Zero);
1346 Value *CondInfCase = B.CreateAnd(XEqInf, YNeZero);
1347 Ret = B.CreateSelect(CondInfCase, ZI, Ret);
1348
1349 Value *IsInfY = emitIsInf(B, Y);
1350 Value *XNeOne = B.CreateFCmpUNE(X, One);
1351 Value *CondInfY = B.CreateAnd(IsInfY, XNeOne);
1352 Value *XLtOne = B.CreateFCmpOLT(X, One);
1353 Value *SelInfYCase = B.CreateSelect(XLtOne, IZ, ZI);
1354 Ret = B.CreateSelect(CondInfY, SelInfYCase, Ret);
1355
1356 Value *IsUnordered = B.CreateFCmpUNO(X, Y);
1357 return B.CreateSelect(IsUnordered, QNaN, Ret);
1358 }
1359 case PowKind::PowN: {
1360 Constant *ZeroI = ConstantInt::get(Y->getType(), 0);
1361
1362 // is_odd_y = (ny & 1) != 0
1363 Value *OneI = ConstantInt::get(Y->getType(), 1);
1364 Value *YAnd1 = B.CreateAnd(Y, OneI);
1365 Value *IsOddY = B.CreateICmpNE(YAnd1, ZeroI);
1366
1367 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1368 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1369 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1370
1371 // if (isinf(x) || x == 0.0f)
1372 Value *FabsX = B.CreateFAbs(X);
1373 Value *XIsInf = B.CreateFCmpOEQ(FabsX, PInf);
1374 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1375 Value *InfOrZero = B.CreateOr(XIsInf, XEqZero);
1376
1377 // (x == 0.0f) ^ (ny < 0) ? 0.0f : +inf
1378 Value *YLtZero = B.CreateICmpSLT(Y, ZeroI);
1379 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1380 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1381
1382 // copysign(selVal, is_odd_y ? x : 0.0f)
1383 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1384 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1385
1386 return B.CreateSelect(InfOrZero, Copysign, Ret);
1387 }
1388 case PowKind::RootN: {
1389 Constant *ZeroI = ConstantInt::get(Y->getType(), 0);
1390
1391 // is_odd_y = (ny & 1) != 0
1392 Value *YAnd1 = B.CreateAnd(Y, ConstantInt::get(Y->getType(), 1));
1393 Value *IsOddY = B.CreateICmpNE(YAnd1, ZeroI);
1394
1395 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1396 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1397 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1398
1399 // if (isinf(x) || x == 0.0f)
1400 Value *FabsX = B.CreateFAbs(X);
1401 Value *IsInfX = B.CreateFCmpOEQ(FabsX, PInf);
1402 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1403 Value *CondInfOrZero = B.CreateOr(IsInfX, XEqZero);
1404
1405 // (x == 0.0f) ^ (ny < 0) ? 0.0f : +inf
1406 Value *YLtZero = B.CreateICmpSLT(Y, ZeroI);
1407 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1408 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1409
1410 // copysign(selVal, is_odd_y ? x : 0.0f)
1411 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1412 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1413
1414 Ret = B.CreateSelect(CondInfOrZero, Copysign, Ret);
1415
1416 // if ((x < 0.0f && !is_odd_y) || ny == 0) ret = QNAN
1417 Value *XIsNeg = B.CreateFCmpOLT(X, Zero);
1418 Value *NotOddY = B.CreateNot(IsOddY);
1419 Value *CondNegAndNotOdd = B.CreateAnd(XIsNeg, NotOddY);
1420 Value *YEqZero = B.CreateICmpEQ(Y, ZeroI);
1421 Value *CondBad = B.CreateOr(CondNegAndNotOdd, YEqZero);
1422 return B.CreateSelect(CondBad, QNaN, Ret);
1423 }
1424 }
1425
1426 llvm_unreachable("covered switch");
1427}
1428
1429// TODO: Move the fold_pow folding to sqrt/fdiv here
1430bool AMDGPULibCalls::expandFastPow(FPMathOperator *FPOp, IRBuilder<> &B,
1431 PowKind Kind) {
1432 Type *Ty = FPOp->getType();
1433
1434 // There's currently no reason to do this for half. The correct path is
1435 // promote to float and use the fast float expansion.
1436 //
1437 // TODO: We could move this expansion to lowering to get half pow to work.
1438 if (!Ty->getScalarType()->isFloatTy())
1439 return false;
1440
1441 // TODO: Verify optimization for double and bfloat.
1442 Value *X = FPOp->getOperand(0);
1443 Value *Y = FPOp->getOperand(1);
1444
1445 switch (Kind) {
1446 case PowKind::Pow: {
1447 Constant *One = ConstantFP::get(X->getType(), 1.0);
1448
1449 // if (x == 1.0f) y = 1.0f;
1450 Value *XEqOne = B.CreateFCmpOEQ(X, One);
1451 Y = B.CreateSelect(XEqOne, One, Y);
1452
1453 // if (y == 0.0f) x = 1.0f;
1454 Value *YEqZero = B.CreateFCmpOEQ(Y, ConstantFP::getZero(X->getType()));
1455 X = B.CreateSelect(YEqZero, One, X);
1456
1457 Value *ExpYLnX = emitFastExpYLnx(B, X, Y);
1458 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1459 replaceCall(FPOp, Fixed);
1460 return true;
1461 }
1462 case PowKind::PowR: {
1463 Value *NegX = B.CreateFCmpOLT(X, ConstantFP::getZero(X->getType()));
1464 X = B.CreateSelect(NegX, ConstantFP::getQNaN(X->getType()), X);
1465
1466 Value *ExpYLnX = emitFastExpYLnx(B, X, Y);
1467 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1468 replaceCall(FPOp, Fixed);
1469 return true;
1470 }
1471 case PowKind::PowN: {
1472 // ny == 0
1473 Value *YEqZero = B.CreateICmpEQ(Y, ConstantInt::get(Y->getType(), 0));
1474
1475 // x = (ny == 0 ? 1.0f : x)
1476 X = B.CreateSelect(YEqZero, ConstantFP::get(X->getType(), 1.0), X);
1477
1478 Value *CastY = B.CreateSIToFP(Y, X->getType());
1479 Value *ExpYLnX = emitFastExpYLnx(B, X, CastY);
1480 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1481 replaceCall(FPOp, Fixed);
1482 return true;
1483 }
1484 case PowKind::RootN: {
1485 Value *CastY = B.CreateSIToFP(Y, X->getType());
1486
1487 // This is afn anyway, so we will turn into rcp.
1488 Value *RcpY = B.CreateFDiv(ConstantFP::get(X->getType(), 1.0), CastY);
1489
1490 Value *ExpYLnX = emitFastExpYLnx(B, X, RcpY);
1491 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1492 replaceCall(FPOp, Fixed);
1493 return true;
1494 }
1495 }
1496 llvm_unreachable("Unhandled PowKind enum");
1497}
1498
1499bool AMDGPULibCalls::tryOptimizePow(FPMathOperator *FPOp, IRBuilder<> &B,
1500 const FuncInfo &FInfo) {
1501 FastMathFlags FMF = FPOp->getFastMathFlags();
1502 CallInst *Call = cast<CallInst>(FPOp);
1503 Module *M = Call->getModule();
1504
1505 FuncInfo PowrInfo;
1506 AMDGPULibFunc::EFuncId FastPowrFuncId =
1507 FMF.approxFunc() || FInfo.getId() == AMDGPULibFunc::EI_POW_FAST
1510 FunctionCallee PowrFunc = getFloatFastVariant(
1511 M, FInfo, PowrInfo, AMDGPULibFunc::EI_POWR, FastPowrFuncId);
1512
1513 // TODO: Prefer fast pown to fast powr, but slow powr to slow pown.
1514
1515 // pow(x, y) -> powr(x, y) for x >= -0.0
1516 // TODO: Account for flags on current call
1517 if (PowrFunc && cannotBeOrderedLessThanZero(FPOp->getOperand(0),
1518 SQ.getWithInstruction(Call))) {
1519 Call->setCalledFunction(PowrFunc);
1520 return fold_pow(FPOp, B, PowrInfo) || true;
1521 }
1522
1523 // pow(x, y) -> pown(x, y) for known integral y
1524 if (isKnownIntegral(FPOp->getOperand(1), SQ.getWithInstruction(Call),
1525 FPOp->getFastMathFlags())) {
1526 FunctionType *PownType = getPownType(Call->getFunctionType());
1527
1528 FuncInfo PownInfo;
1529 AMDGPULibFunc::EFuncId FastPownFuncId =
1530 FMF.approxFunc() || FInfo.getId() == AMDGPULibFunc::EI_POW_FAST
1533 FunctionCallee PownFunc = getFloatFastVariant(
1534 M, FInfo, PownInfo, AMDGPULibFunc::EI_POWN, FastPownFuncId);
1535
1536 if (PownFunc) {
1537 // TODO: If the incoming integral value is an sitofp/uitofp, it won't
1538 // fold out without a known range. We can probably take the source
1539 // value directly.
1540 Value *CastedArg =
1541 B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
1542 // Have to drop any nofpclass attributes on the original call site.
1544 1, AttributeFuncs::typeIncompatible(CastedArg->getType(),
1546 Call->setCalledFunction(PownFunc);
1547 Call->setArgOperand(1, CastedArg);
1548 return fold_pow(FPOp, B, PownInfo) || true;
1549 }
1550 }
1551
1552 if (fold_pow(FPOp, B, FInfo))
1553 return true;
1554
1555 if (!FMF.approxFunc())
1556 return false;
1557
1558 if (FInfo.getId() == AMDGPULibFunc::EI_POW && FMF.approxFunc() &&
1559 getArgType(FInfo) == AMDGPULibFunc::F32) {
1560 AMDGPULibFunc PowFastInfo(AMDGPULibFunc::EI_POW_FAST, FInfo);
1561 if (FunctionCallee PowFastFunc = getFunction(M, PowFastInfo)) {
1562 Call->setCalledFunction(PowFastFunc);
1563 return fold_pow(FPOp, B, PowFastInfo) || true;
1564 }
1565 }
1566
1567 return expandFastPow(FPOp, B, PowKind::Pow);
1568}
1569
1570// Get a scalar native builtin single argument FP function
1571FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1572 const FuncInfo &FInfo) {
1573 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1574 return nullptr;
1575 FuncInfo nf = FInfo;
1577 return getFunction(M, nf);
1578}
1579
1580// Some library calls are just wrappers around llvm intrinsics, but compiled
1581// conservatively. Preserve the flags from the original call site by
1582// substituting them with direct calls with all the flags.
1583bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
1584 bool AllowMinSizeF32,
1585 bool AllowF64,
1586 bool AllowStrictFP) {
1587 Type *FltTy = CI->getType()->getScalarType();
1588 const bool IsF32 = FltTy->isFloatTy();
1589
1590 // f64 intrinsics aren't implemented for most operations.
1591 if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy()))
1592 return false;
1593
1594 // We're implicitly inlining by replacing the libcall with the intrinsic, so
1595 // don't do it for noinline call sites.
1596 if (CI->isNoInline())
1597 return false;
1598
1599 const Function *ParentF = CI->getFunction();
1600 // TODO: Handle strictfp
1601 if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
1602 return false;
1603
1604 if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
1605 return false;
1606 return true;
1607}
1608
1609void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B,
1610 CallInst *CI,
1611 Intrinsic::ID IntrID) {
1612 if (CI->arg_size() == 2) {
1613 Value *Arg0 = CI->getArgOperand(0);
1614 Value *Arg1 = CI->getArgOperand(1);
1615 VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType());
1616 VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType());
1617 if (Arg0VecTy && !Arg1VecTy) {
1618 Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1619 CI->setArgOperand(1, SplatRHS);
1620 } else if (!Arg0VecTy && Arg1VecTy) {
1621 Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1622 CI->setArgOperand(0, SplatLHS);
1623 }
1624 }
1625
1627 CI->getModule(), IntrID, {CI->getType()}));
1629}
1630
1631bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1632 IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32,
1633 bool AllowF64, bool AllowStrictFP) {
1634 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1635 AllowStrictFP))
1636 return false;
1637 replaceLibCallWithSimpleIntrinsic(B, CI, IntrID);
1638 return true;
1639}
1640
1641std::tuple<Value *, Value *, Value *>
1642AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
1643 FunctionCallee Fsincos) {
1644 DebugLoc DL = B.getCurrentDebugLocation();
1645 Function *F = B.GetInsertBlock()->getParent();
1646 B.SetInsertPointPastAllocas(F);
1647
1648 AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_");
1649
1650 if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1651 // If the argument is an instruction, it must dominate all uses so put our
1652 // sincos call there. Otherwise, right after the allocas works well enough
1653 // if it's an argument or constant.
1654
1655 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1656
1657 // SetInsertPoint unwelcomely always tries to set the debug loc.
1658 B.SetCurrentDebugLocation(DL);
1659 }
1660
1661 Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1);
1662
1663 // The allocaInst allocates the memory in private address space. This need
1664 // to be addrspacecasted to point to the address space of cos pointer type.
1665 // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1666 Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy);
1667
1668 CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc);
1669
1670 // TODO: Is it worth trying to preserve the location for the cos calls for the
1671 // load?
1672
1673 LoadInst *LoadCos = B.CreateLoad(Arg->getType(), Alloc);
1674 return {SinCos, LoadCos, SinCos};
1675}
1676
1677// fold sin, cos -> sincos.
1678bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
1679 const FuncInfo &fInfo) {
1680 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1681 fInfo.getId() == AMDGPULibFunc::EI_COS);
1682
1683 if ((getArgType(fInfo) != AMDGPULibFunc::F32 &&
1684 getArgType(fInfo) != AMDGPULibFunc::F64) ||
1685 fInfo.getPrefix() != AMDGPULibFunc::NOPFX)
1686 return false;
1687
1688 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1689
1690 Value *CArgVal = FPOp->getOperand(0);
1691
1692 // TODO: Constant fold the call
1693 if (isa<ConstantData>(CArgVal))
1694 return false;
1695
1696 CallInst *CI = cast<CallInst>(FPOp);
1697
1698 Function *F = B.GetInsertBlock()->getParent();
1699 Module *M = F->getParent();
1700
1701 // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer
1702 // implementation. Prefer the private form if available.
1703 AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo);
1704 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1706
1707 AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo);
1708 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1710
1711 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1712 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1713 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1714 if (!FSinCos)
1715 return false;
1716
1717 SmallVector<CallInst *> SinCalls;
1718 SmallVector<CallInst *> CosCalls;
1719 SmallVector<CallInst *> SinCosCalls;
1720 FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
1721 fInfo);
1722 const std::string PairName = PartnerInfo.mangle();
1723
1724 StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName;
1725 StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName();
1726 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1727 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1728
1729 // Intersect the two sets of flags.
1730 FastMathFlags FMF = FPOp->getFastMathFlags();
1731 MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath);
1732
1733 SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()};
1734
1735 for (User* U : CArgVal->users()) {
1736 CallInst *XI = dyn_cast<CallInst>(U);
1737 if (!XI || XI->getFunction() != F || XI->isNoBuiltin())
1738 continue;
1739
1740 Function *UCallee = XI->getCalledFunction();
1741 if (!UCallee)
1742 continue;
1743
1744 bool Handled = true;
1745
1746 if (UCallee->getName() == SinName)
1747 SinCalls.push_back(XI);
1748 else if (UCallee->getName() == CosName)
1749 CosCalls.push_back(XI);
1750 else if (UCallee->getName() == SinCosPrivateName ||
1751 UCallee->getName() == SinCosGenericName)
1752 SinCosCalls.push_back(XI);
1753 else
1754 Handled = false;
1755
1756 if (Handled) {
1757 MergeDbgLocs.push_back(XI->getDebugLoc());
1758 auto *OtherOp = cast<FPMathOperator>(XI);
1759 FMF &= OtherOp->getFastMathFlags();
1761 FPMath, XI->getMetadata(LLVMContext::MD_fpmath));
1762 }
1763 }
1764
1765 if (SinCalls.empty() || CosCalls.empty())
1766 return false;
1767
1768 B.setFastMathFlags(FMF);
1769 B.setDefaultFPMathTag(FPMath);
1770 DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs);
1771 B.SetCurrentDebugLocation(DbgLoc);
1772
1773 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos);
1774
1775 auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) {
1776 for (CallInst *C : Calls)
1777 C->replaceAllUsesWith(Res);
1778
1779 // Leave the other dead instructions to avoid clobbering iterators.
1780 };
1781
1782 replaceTrigInsts(SinCalls, Sin);
1783 replaceTrigInsts(CosCalls, Cos);
1784 replaceTrigInsts(SinCosCalls, SinCos);
1785
1786 // It's safe to delete the original now.
1787 CI->eraseFromParent();
1788 return true;
1789}
1790
1791bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
1792 APFloat &Res0, APFloat &Res1,
1793 Constant *copr0, Constant *copr1) {
1794 // By default, opr0/opr1/opr3 holds values of float/double type.
1795 // If they are not float/double, each function has to its
1796 // operand separately.
1797 double opr0 = 0.0, opr1 = 0.0;
1800 if (fpopr0) {
1801 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1802 ? fpopr0->getValueAPF().convertToDouble()
1803 : (double)fpopr0->getValueAPF().convertToFloat();
1804 }
1805
1806 if (fpopr1) {
1807 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1808 ? fpopr1->getValueAPF().convertToDouble()
1809 : (double)fpopr1->getValueAPF().convertToFloat();
1810 }
1811
1812 switch (FInfo.getId()) {
1813 default:
1814 return false;
1815
1817 Res0 = APFloat{acos(opr0)};
1818 return true;
1819
1821 // acosh(x) == log(x + sqrt(x*x - 1))
1822 Res0 = APFloat{log(opr0 + sqrt(opr0 * opr0 - 1.0))};
1823 return true;
1824
1826 Res0 = APFloat{acos(opr0) / MATH_PI};
1827 return true;
1828
1830 Res0 = APFloat{asin(opr0)};
1831 return true;
1832
1834 // asinh(x) == log(x + sqrt(x*x + 1))
1835 Res0 = APFloat{log(opr0 + sqrt(opr0 * opr0 + 1.0))};
1836 return true;
1837
1839 Res0 = APFloat{asin(opr0) / MATH_PI};
1840 return true;
1841
1843 Res0 = APFloat{atan(opr0)};
1844 return true;
1845
1847 // atanh(x) == (log(x+1) - log(x-1))/2;
1848 Res0 = APFloat{(log(opr0 + 1.0) - log(opr0 - 1.0)) / 2.0};
1849 return true;
1850
1852 Res0 = APFloat{atan(opr0) / MATH_PI};
1853 return true;
1854
1856 Res0 =
1857 APFloat{(opr0 < 0.0) ? -pow(-opr0, 1.0 / 3.0) : pow(opr0, 1.0 / 3.0)};
1858 return true;
1859
1861 Res0 = APFloat{cos(opr0)};
1862 return true;
1863
1865 Res0 = APFloat{cosh(opr0)};
1866 return true;
1867
1869 Res0 = APFloat{cos(MATH_PI * opr0)};
1870 return true;
1871
1873 Res0 = APFloat{exp(opr0)};
1874 return true;
1875
1877 Res0 = APFloat{pow(2.0, opr0)};
1878 return true;
1879
1881 Res0 = APFloat{pow(10.0, opr0)};
1882 return true;
1883
1885 Res0 = APFloat{log(opr0)};
1886 return true;
1887
1889 Res0 = APFloat{log(opr0) / log(2.0)};
1890 return true;
1891
1893 Res0 = APFloat{log(opr0) / log(10.0)};
1894 return true;
1895
1897 Res0 = APFloat{1.0 / sqrt(opr0)};
1898 return true;
1899
1901 Res0 = APFloat{sin(opr0)};
1902 return true;
1903
1905 Res0 = APFloat{sinh(opr0)};
1906 return true;
1907
1909 Res0 = APFloat{sin(MATH_PI * opr0)};
1910 return true;
1911
1913 Res0 = APFloat{tan(opr0)};
1914 return true;
1915
1917 Res0 = APFloat{tanh(opr0)};
1918 return true;
1919
1921 Res0 = APFloat{tan(MATH_PI * opr0)};
1922 return true;
1923
1924 // two-arg functions
1927 Res0 = APFloat{pow(opr0, opr1)};
1928 return true;
1929
1931 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1932 double val = (double)iopr1->getSExtValue();
1933 Res0 = APFloat{pow(opr0, val)};
1934 return true;
1935 }
1936 return false;
1937 }
1938
1940 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1941 double val = (double)iopr1->getSExtValue();
1942 Res0 = APFloat{pow(opr0, 1.0 / val)};
1943 return true;
1944 }
1945 return false;
1946 }
1947
1948 // with ptr arg
1950 Res0 = APFloat{sin(opr0)};
1951 Res1 = APFloat{cos(opr0)};
1952 return true;
1953 }
1954
1955 return false;
1956}
1957
1958bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1959 int numArgs = (int)aCI->arg_size();
1960 if (numArgs > 3)
1961 return false;
1962
1963 Constant *copr0 = nullptr;
1964 Constant *copr1 = nullptr;
1965 if (numArgs > 0) {
1966 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1967 return false;
1968 }
1969
1970 if (numArgs > 1) {
1971 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1972 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1973 return false;
1974 }
1975 }
1976
1977 // At this point, all arguments to aCI are constants.
1978
1979 // max vector size is 16, and sincos will generate two results.
1980 SmallVector<APFloat, 16> Val0, Val1;
1981 int FuncVecSize = getVecSize(FInfo);
1982 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1983 if (FuncVecSize == 1) {
1984 if (!evaluateScalarMathFunc(FInfo, Val0.emplace_back(0.0),
1985 Val1.emplace_back(0.0), copr0, copr1)) {
1986 return false;
1987 }
1988 } else {
1989 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1990 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1991 for (int i = 0; i < FuncVecSize; ++i) {
1992 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1993 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1994 if (!evaluateScalarMathFunc(FInfo, Val0.emplace_back(0.0),
1995 Val1.emplace_back(0.0), celt0, celt1)) {
1996 return false;
1997 }
1998 }
1999 }
2000
2001 Constant *nval0, *nval1;
2002 if (FuncVecSize == 1) {
2003 nval0 = ConstantFP::get(aCI->getType(), Val0[0]);
2004 if (hasTwoResults)
2005 nval1 = ConstantFP::get(aCI->getType(), Val1[0]);
2006 } else {
2007 nval0 = getConstantFloatVector(Val0, aCI->getType());
2008 if (hasTwoResults)
2009 nval1 = getConstantFloatVector(Val1, aCI->getType());
2010 }
2011
2012 if (hasTwoResults) {
2013 // sincos
2014 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
2015 "math function with ptr arg not supported yet");
2016 new StoreInst(nval1, aCI->getArgOperand(1), aCI->getIterator());
2017 }
2018
2019 replaceCall(aCI, nval0);
2020 return true;
2021}
2022
2025 AMDGPULibCalls Simplifier(F, AM);
2026 Simplifier.initNativeFuncs();
2027
2028 bool Changed = false;
2029
2030 LLVM_DEBUG(dbgs() << "AMDIC: process function ";
2031 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
2032
2033 for (auto &BB : F) {
2034 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
2035 // Ignore non-calls.
2037 ++I;
2038
2039 if (CI) {
2040 if (Simplifier.fold(CI))
2041 Changed = true;
2042 }
2043 }
2044 }
2046}
2047
2050 if (UseNative.empty())
2051 return PreservedAnalyses::all();
2052
2053 AMDGPULibCalls Simplifier(F, AM);
2054 Simplifier.initNativeFuncs();
2055
2056 bool Changed = false;
2057 for (auto &BB : F) {
2058 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
2059 // Ignore non-calls.
2061 ++I;
2062 if (CI && Simplifier.useNative(CI))
2063 Changed = true;
2064 }
2065 }
2067}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static Constant * getConstantFloatVector(const ArrayRef< APFloat > Values, const Type *Ty)
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
#define MATH_SQRT2
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
static Value * emitIsInf(IRBuilder<> &B, Value *val)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static Value * emitFastExpYLnx(IRBuilder<> &B, Value *X, Value *Y)
static Value * emitIsInteger(IRBuilder<> &B, Value *Y)
static Value * emitIsEvenInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static Value * emitPowFixup(IRBuilder<> &B, Value *X, Value *Y, Value *ExpYLnX, PowKind Kind)
Emit special case management epilog code for fast pow, powr, pown, and rootn expansions.
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
#define MATH_E
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
#define MATH_PI
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
#define MATH_SQRT1_2
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static Value * emitIsOddInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
loop term fold
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
FunctionAnalysisManager FAM
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
bool fold(CallInst *CI)
static void replaceCall(Instruction *I, Value *With)
AMDGPULibCalls(Function &F, FunctionAnalysisManager &FAM)
bool useNative(CallInst *CI)
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
EFuncId getId() const
bool isMangled() const
Param * getLeads()
Get leading parameters for mangled lib functions.
void setId(EFuncId Id)
ENamePrefix getPrefix() const
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
bool isNegative() const
Definition APFloat.h:1538
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:5958
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1521
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:5986
bool isZero() const
Definition APFloat.h:1534
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
A function analysis which provides an AssumptionCache.
static LLVM_ABI Attribute getWithNoFPClass(LLVMContext &Context, FPClassTest Mask)
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
void setCallingConv(CallingConv::ID CC)
void removeParamAttrs(unsigned ArgNo, const AttributeMask &AttrsToRemove)
Removes the attributes from the given argument.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
AttributeSet getParamAttributes(unsigned ArgNo) const
Return the param attributes for this call.
bool isNoInline() const
Return true if the call should not be inlined.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI APFloat getElementAsAPFloat(uint64_t i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
LLVM_ABI Constant * getElementAsConstant(uint64_t i) const
Return a Constant for a specified index's element.
LLVM_ABI uint64_t getNumElements() const
Return the number of elements in the array or vector.
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
const APFloat & getValueAPF() const
Definition Constants.h:463
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
Definition Constants.h:186
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static LLVM_ABI DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
Definition Operator.h:262
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
Definition Operator.h:268
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:289
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
Definition Operator.h:271
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
Definition Operator.h:286
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setAllowContract(bool B=true)
Definition FMF.h:93
bool none() const
Definition FMF.h:60
bool approxFunc() const
Definition FMF.h:73
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetLibraryInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:144
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
void dropAllReferences()
Drop all references to operands.
Definition User.h:324
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI APInt pow(const APInt &X, int64_t N)
Compute X^N for N>=0.
Definition APInt.cpp:3207
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
constexpr double ln2
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI bool isKnownIntegral(const Value *V, const SimplifyQuery &SQ, FastMathFlags FMF)
Return true if the floating-point value V is known to be an integer value.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39