/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

Bug Summary

File:	llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
Warning:	line 1661, column 13 2nd function call argument is an uninitialized value
Annotated Source Code

Press '?' to see keyboard shortcuts
Show analyzer invocation
clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPULibCalls.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU -I include -I /build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-command-line-argument -Wno-unknown-warning-option -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/build-llvm -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-10-22-022607-21188-1 -x c++ /build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file does AMD library function optimizations.
11//
12//===----------------------------------------------------------------------===//

14#include "AMDGPU.h"
15#include "AMDGPULibFunc.h"
16#include "GCNSubtarget.h"
17#include "llvm/Analysis/AliasAnalysis.h"
18#include "llvm/Analysis/Loads.h"
19#include "llvm/IR/IntrinsicsAMDGPU.h"
20#include "llvm/IR/IRBuilder.h"
21#include "llvm/InitializePasses.h"
22#include "llvm/Target/TargetMachine.h"

24#define DEBUG_TYPE"amdgpu-simplifylib" "amdgpu-simplifylib"

26using namespace llvm;

28static cl::opt<bool> EnablePreLink("amdgpu-prelink",
cl::desc("Enable pre-link mode optimizations"),
cl::init(false),
cl::Hidden);

33static cl::list<std::string> UseNative("amdgpu-use-native",
cl::desc("Comma separated list of functions to replace with native, or all"),
cl::CommaSeparated, cl::ValueOptional,
cl::Hidden);

38#define MATH_PInumbers::pi      numbers::pi
39#define MATH_Enumbers::e       numbers::e
40#define MATH_SQRT2numbers::sqrt2   numbers::sqrt2
41#define MATH_SQRT1_2numbers::inv_sqrt2 numbers::inv_sqrt2

43namespace llvm {

45class AMDGPULibCalls {
46private:

typedef llvm::AMDGPULibFunc FuncInfo;

const TargetMachine *TM;

// -fuse-native.
bool AllNative = false;

bool useNativeFunc(const StringRef F) const;

// Return a pointer (pointer expr) to the function if function definition with
// "FuncName" exists. It may create a new function prototype in pre-link mode.
FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);

// Replace a normal function with its native version.
bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);

bool parseFunctionName(const StringRef& FMangledName,
                       FuncInfo *FInfo=nullptr /*out*/);

bool TDOFold(CallInst *CI, const FuncInfo &FInfo);

/* Specialized optimizations */

// recip (half or native)
bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// divide (half or native)
bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// pow/powr/pown
bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// rootn
bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// fma/mad
bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// -fuse-native for sincos
bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);

// evaluate calls if calls' arguments are constants.
bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
  double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);

// exp
bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// exp2
bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// exp10
bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// log
bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// log2
bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// log10
bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// sqrt
bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);

// sin/cos
bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);

// __read_pipe/__write_pipe
bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);

// llvm.amdgcn.wavefrontsize
bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);

// Get insertion point at entry.
BasicBlock::iterator getEntryIns(CallInst * UI);
// Insert an Alloc instruction.
AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
// Get a scalar native builtin signle argument FP function
FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);

131protected:
CallInst *CI;

bool isUnsafeMath(const CallInst *CI) const;

void replaceCall(Value *With) {
  CI->replaceAllUsesWith(With);
  CI->eraseFromParent();
}

141public:
AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}

bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);

void initNativeFuncs();

// Replace a normal math function call with that native version
bool useNative(CallInst *CI);
150};

152} // end llvm namespace

154namespace {

class AMDGPUSimplifyLibCalls : public FunctionPass {

AMDGPULibCalls Simplifier;

public:
  static char ID; // Pass identification

  AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
    : FunctionPass(ID), Simplifier(TM) {
    initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<AAResultsWrapperPass>();
  }

  bool runOnFunction(Function &M) override;
};

class AMDGPUUseNativeCalls : public FunctionPass {

AMDGPULibCalls Simplifier;

public:
  static char ID; // Pass identification

  AMDGPUUseNativeCalls() : FunctionPass(ID) {
    initializeAMDGPUUseNativeCallsPass(*PassRegistry::getPassRegistry());
    Simplifier.initNativeFuncs();
  }

  bool runOnFunction(Function &F) override;
};

190} // end anonymous namespace.

192char AMDGPUSimplifyLibCalls::ID = 0;
193char AMDGPUUseNativeCalls::ID = 0;

195INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry
 &Registry) {
                    "Simplify well-known AMD library calls", false, false)static void *initializeAMDGPUSimplifyLibCallsPassOnce(PassRegistry
 &Registry) {
197INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
198INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls"
, "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>)
, false, false); Registry.registerPass(*PI, true); return PI;
 } static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag
; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry
 &Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag
, initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry
)); }
                  "Simplify well-known AMD library calls", false, false)PassInfo *PI = new PassInfo( "Simplify well-known AMD library calls"
, "amdgpu-simplifylib", &AMDGPUSimplifyLibCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUSimplifyLibCalls>)
, false, false); Registry.registerPass(*PI, true); return PI;
 } static llvm::once_flag InitializeAMDGPUSimplifyLibCallsPassFlag
; void llvm::initializeAMDGPUSimplifyLibCallsPass(PassRegistry
 &Registry) { llvm::call_once(InitializeAMDGPUSimplifyLibCallsPassFlag
, initializeAMDGPUSimplifyLibCallsPassOnce, std::ref(Registry
)); }

201INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry
 &Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions."
, "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false
, false); Registry.registerPass(*PI, true); return PI; } static
 llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void
 llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce
, std::ref(Registry)); }
              "Replace builtin math calls with that native versions.",static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry
 &Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions."
, "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false
, false); Registry.registerPass(*PI, true); return PI; } static
 llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void
 llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce
, std::ref(Registry)); }
              false, false)static void *initializeAMDGPUUseNativeCallsPassOnce(PassRegistry
 &Registry) { PassInfo *PI = new PassInfo( "Replace builtin math calls with that native versions."
, "amdgpu-usenative", &AMDGPUUseNativeCalls::ID, PassInfo
::NormalCtor_t(callDefaultCtor<AMDGPUUseNativeCalls>), false
, false); Registry.registerPass(*PI, true); return PI; } static
 llvm::once_flag InitializeAMDGPUUseNativeCallsPassFlag; void
 llvm::initializeAMDGPUUseNativeCallsPass(PassRegistry &Registry
) { llvm::call_once(InitializeAMDGPUUseNativeCallsPassFlag, initializeAMDGPUUseNativeCallsPassOnce
, std::ref(Registry)); }

205template <typename IRB>
206static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
                            const Twine &Name = "") {
CallInst *R = B.CreateCall(Callee, Arg, Name);
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
  R->setCallingConv(F->getCallingConv());
return R;
212}

214template <typename IRB>
215static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
                             Value *Arg2, const Twine &Name = "") {
CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
  R->setCallingConv(F->getCallingConv());
return R;
221}

223//  Data structures for table-driven optimizations.
224//  FuncTbl works for both f32 and f64 functions with 1 input argument

226struct TableEntry {
double   result;
double   input;
229};

231/* a list of {result, input} */
232static const TableEntry tbl_acos[] = {
{MATH_PInumbers::pi / 2.0, 0.0},
{MATH_PInumbers::pi / 2.0, -0.0},
{0.0, 1.0},
{MATH_PInumbers::pi, -1.0}
237};
238static const TableEntry tbl_acosh[] = {
{0.0, 1.0}
240};
241static const TableEntry tbl_acospi[] = {
{0.5, 0.0},
{0.5, -0.0},
{0.0, 1.0},
{1.0, -1.0}
246};
247static const TableEntry tbl_asin[] = {
{0.0, 0.0},
{-0.0, -0.0},
{MATH_PInumbers::pi / 2.0, 1.0},
{-MATH_PInumbers::pi / 2.0, -1.0}
252};
253static const TableEntry tbl_asinh[] = {
{0.0, 0.0},
{-0.0, -0.0}
256};
257static const TableEntry tbl_asinpi[] = {
{0.0, 0.0},
{-0.0, -0.0},
{0.5, 1.0},
{-0.5, -1.0}
262};
263static const TableEntry tbl_atan[] = {
{0.0, 0.0},
{-0.0, -0.0},
{MATH_PInumbers::pi / 4.0, 1.0},
{-MATH_PInumbers::pi / 4.0, -1.0}
268};
269static const TableEntry tbl_atanh[] = {
{0.0, 0.0},
{-0.0, -0.0}
272};
273static const TableEntry tbl_atanpi[] = {
{0.0, 0.0},
{-0.0, -0.0},
{0.25, 1.0},
{-0.25, -1.0}
278};
279static const TableEntry tbl_cbrt[] = {
{0.0, 0.0},
{-0.0, -0.0},
{1.0, 1.0},
{-1.0, -1.0},
284};
285static const TableEntry tbl_cos[] = {
{1.0, 0.0},
{1.0, -0.0}
288};
289static const TableEntry tbl_cosh[] = {
{1.0, 0.0},
{1.0, -0.0}
292};
293static const TableEntry tbl_cospi[] = {
{1.0, 0.0},
{1.0, -0.0}
296};
297static const TableEntry tbl_erfc[] = {
{1.0, 0.0},
{1.0, -0.0}
300};
301static const TableEntry tbl_erf[] = {
{0.0, 0.0},
{-0.0, -0.0}
304};
305static const TableEntry tbl_exp[] = {
{1.0, 0.0},
{1.0, -0.0},
{MATH_Enumbers::e, 1.0}
309};
310static const TableEntry tbl_exp2[] = {
{1.0, 0.0},
{1.0, -0.0},
{2.0, 1.0}
314};
315static const TableEntry tbl_exp10[] = {
{1.0, 0.0},
{1.0, -0.0},
{10.0, 1.0}
319};
320static const TableEntry tbl_expm1[] = {
{0.0, 0.0},
{-0.0, -0.0}
323};
324static const TableEntry tbl_log[] = {
{0.0, 1.0},
{1.0, MATH_Enumbers::e}
327};
328static const TableEntry tbl_log2[] = {
{0.0, 1.0},
{1.0, 2.0}
331};
332static const TableEntry tbl_log10[] = {
{0.0, 1.0},
{1.0, 10.0}
335};
336static const TableEntry tbl_rsqrt[] = {
{1.0, 1.0},
{MATH_SQRT1_2numbers::inv_sqrt2, 2.0}
339};
340static const TableEntry tbl_sin[] = {
{0.0, 0.0},
{-0.0, -0.0}
343};
344static const TableEntry tbl_sinh[] = {
{0.0, 0.0},
{-0.0, -0.0}
347};
348static const TableEntry tbl_sinpi[] = {
{0.0, 0.0},
{-0.0, -0.0}
351};
352static const TableEntry tbl_sqrt[] = {
{0.0, 0.0},
{1.0, 1.0},
{MATH_SQRT2numbers::sqrt2, 2.0}
356};
357static const TableEntry tbl_tan[] = {
{0.0, 0.0},
{-0.0, -0.0}
360};
361static const TableEntry tbl_tanh[] = {
{0.0, 0.0},
{-0.0, -0.0}
364};
365static const TableEntry tbl_tanpi[] = {
{0.0, 0.0},
{-0.0, -0.0}
368};
369static const TableEntry tbl_tgamma[] = {
{1.0, 1.0},
{1.0, 2.0},
{2.0, 3.0},
{6.0, 4.0}
374};

376static bool HasNative(AMDGPULibFunc::EFuncId id) {
switch(id) {
case AMDGPULibFunc::EI_DIVIDE:
case AMDGPULibFunc::EI_COS:
case AMDGPULibFunc::EI_EXP:
case AMDGPULibFunc::EI_EXP2:
case AMDGPULibFunc::EI_EXP10:
case AMDGPULibFunc::EI_LOG:
case AMDGPULibFunc::EI_LOG2:
case AMDGPULibFunc::EI_LOG10:
case AMDGPULibFunc::EI_POWR:
case AMDGPULibFunc::EI_RECIP:
case AMDGPULibFunc::EI_RSQRT:
case AMDGPULibFunc::EI_SIN:
case AMDGPULibFunc::EI_SINCOS:
case AMDGPULibFunc::EI_SQRT:
case AMDGPULibFunc::EI_TAN:
  return true;
default:;
}
return false;
397}

399struct TableRef {
size_t size;
const TableEntry *table; // variable size: from 0 to (size - 1)

TableRef() : size(0), table(nullptr) {}

template <size_t N>
TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
407};

409static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
switch(id) {
case AMDGPULibFunc::EI_ACOS:    return TableRef(tbl_acos);
case AMDGPULibFunc::EI_ACOSH:   return TableRef(tbl_acosh);
case AMDGPULibFunc::EI_ACOSPI:  return TableRef(tbl_acospi);
case AMDGPULibFunc::EI_ASIN:    return TableRef(tbl_asin);
case AMDGPULibFunc::EI_ASINH:   return TableRef(tbl_asinh);
case AMDGPULibFunc::EI_ASINPI:  return TableRef(tbl_asinpi);
case AMDGPULibFunc::EI_ATAN:    return TableRef(tbl_atan);
case AMDGPULibFunc::EI_ATANH:   return TableRef(tbl_atanh);
case AMDGPULibFunc::EI_ATANPI:  return TableRef(tbl_atanpi);
case AMDGPULibFunc::EI_CBRT:    return TableRef(tbl_cbrt);
case AMDGPULibFunc::EI_NCOS:
case AMDGPULibFunc::EI_COS:     return TableRef(tbl_cos);
case AMDGPULibFunc::EI_COSH:    return TableRef(tbl_cosh);
case AMDGPULibFunc::EI_COSPI:   return TableRef(tbl_cospi);
case AMDGPULibFunc::EI_ERFC:    return TableRef(tbl_erfc);
case AMDGPULibFunc::EI_ERF:     return TableRef(tbl_erf);
case AMDGPULibFunc::EI_EXP:     return TableRef(tbl_exp);
case AMDGPULibFunc::EI_NEXP2:
case AMDGPULibFunc::EI_EXP2:    return TableRef(tbl_exp2);
case AMDGPULibFunc::EI_EXP10:   return TableRef(tbl_exp10);
case AMDGPULibFunc::EI_EXPM1:   return TableRef(tbl_expm1);
case AMDGPULibFunc::EI_LOG:     return TableRef(tbl_log);
case AMDGPULibFunc::EI_NLOG2:
case AMDGPULibFunc::EI_LOG2:    return TableRef(tbl_log2);
case AMDGPULibFunc::EI_LOG10:   return TableRef(tbl_log10);
case AMDGPULibFunc::EI_NRSQRT:
case AMDGPULibFunc::EI_RSQRT:   return TableRef(tbl_rsqrt);
case AMDGPULibFunc::EI_NSIN:
case AMDGPULibFunc::EI_SIN:     return TableRef(tbl_sin);
case AMDGPULibFunc::EI_SINH:    return TableRef(tbl_sinh);
case AMDGPULibFunc::EI_SINPI:   return TableRef(tbl_sinpi);
case AMDGPULibFunc::EI_NSQRT:
case AMDGPULibFunc::EI_SQRT:    return TableRef(tbl_sqrt);
case AMDGPULibFunc::EI_TAN:     return TableRef(tbl_tan);
case AMDGPULibFunc::EI_TANH:    return TableRef(tbl_tanh);
case AMDGPULibFunc::EI_TANPI:   return TableRef(tbl_tanpi);
case AMDGPULibFunc::EI_TGAMMA:  return TableRef(tbl_tgamma);
default:;
}
return TableRef();
451}

453static inline int getVecSize(const AMDGPULibFunc& FInfo) {
return FInfo.getLeads()[0].VectorSize;
455}

457static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
459}

461FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
// If we are doing PreLinkOpt, the function is external. So it is safe to
// use getOrInsertFunction() at this stage.

return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
                     : AMDGPULibFunc::getFunction(M, fInfo);
467}

469bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
                                  FuncInfo *FInfo) {
return AMDGPULibFunc::parse(FMangledName, *FInfo);
472}

474bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
if (auto Op = dyn_cast<FPMathOperator>(CI))
  if (Op->isFast())
    return true;
const Function *F = CI->getParent()->getParent();
Attribute Attr = F->getFnAttribute("unsafe-fp-math");
return Attr.getValueAsBool();
481}

483bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
return AllNative || llvm::is_contained(UseNative, F);
485}

487void AMDGPULibCalls::initNativeFuncs() {
AllNative = useNativeFunc("all") ||
            (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
             UseNative.begin()->empty());
491}

493bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
bool native_sin = useNativeFunc("sin");
bool native_cos = useNativeFunc("cos");

if (native_sin && native_cos) {
  Module *M = aCI->getModule();
  Value *opr0 = aCI->getArgOperand(0);

  AMDGPULibFunc nf;
  nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
  nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;

  nf.setPrefix(AMDGPULibFunc::NATIVE);
  nf.setId(AMDGPULibFunc::EI_SIN);
  FunctionCallee sinExpr = getFunction(M, nf);

  nf.setPrefix(AMDGPULibFunc::NATIVE);
  nf.setId(AMDGPULibFunc::EI_COS);
  FunctionCallee cosExpr = getFunction(M, nf);
  if (sinExpr && cosExpr) {
    Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
    Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
    new StoreInst(cosval, aCI->getArgOperand(1), aCI);

    DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("usenative")) { dbgs() << "<useNative> replace "
 << *aCI << " with native version of sin/cos"; } }
 while (false)
                                        << " with native version of sin/cos")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("usenative")) { dbgs() << "<useNative> replace "
 << *aCI << " with native version of sin/cos"; } }
 while (false);

    replaceCall(sinval);
    return true;
  }
}
return false;
525}

527bool AMDGPULibCalls::useNative(CallInst *aCI) {
CI = aCI;
Function *Callee = aCI->getCalledFunction();

FuncInfo FInfo;
if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
    FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
    getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
    !(AllNative || useNativeFunc(FInfo.getName()))) {
  return false;
}

if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
  return sincosUseNative(aCI, FInfo);

FInfo.setPrefix(AMDGPULibFunc::NATIVE);
FunctionCallee F = getFunction(aCI->getModule(), FInfo);
if (!F)
  return false;

aCI->setCalledFunction(F);
DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("usenative")) { dbgs() << "<useNative> replace "
 << *aCI << " with native version"; } } while (false
)
                                    << " with native version")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("usenative")) { dbgs() << "<useNative> replace "
 << *aCI << " with native version"; } } while (false
);
return true;
551}

553// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
554// builtin, with appended type size and alignment arguments, where 2 or 4
555// indicates the original number of arguments. The library has optimized version
556// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
557// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
558// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
559// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
560bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
                                        FuncInfo &FInfo) {
auto *Callee = CI->getCalledFunction();
if (!Callee->isDeclaration())
  return false;

assert(Callee->hasName() && "Invalid read_pipe/write_pipe function")(static_cast <bool> (Callee->hasName() && "Invalid read_pipe/write_pipe function"
) ? void (0) : __assert_fail ("Callee->hasName() && \"Invalid read_pipe/write_pipe function\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 566, __extension__ __PRETTY_FUNCTION__));
auto *M = Callee->getParent();
auto &Ctx = M->getContext();
std::string Name = std::string(Callee->getName());
auto NumArg = CI->arg_size();
if (NumArg != 4 && NumArg != 6)
  return false;
auto *PacketSize = CI->getArgOperand(NumArg - 2);
auto *PacketAlign = CI->getArgOperand(NumArg - 1);
if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
  return false;
unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
if (Alignment != Size)
  return false;

Type *PtrElemTy;
if (Size <= 8)
  PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
else
  PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
unsigned PtrArgLoc = CI->arg_size() - 3;
auto PtrArg = CI->getArgOperand(PtrArgLoc);
unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);

SmallVector<llvm::Type *, 6> ArgTys;
for (unsigned I = 0; I != PtrArgLoc; ++I)
  ArgTys.push_back(CI->getArgOperand(I)->getType());
ArgTys.push_back(PtrTy);

Name = Name + "_" + std::to_string(Size);
auto *FTy = FunctionType::get(Callee->getReturnType(),
                              ArrayRef<Type *>(ArgTys), false);
AMDGPULibFunc NewLibFunc(Name, FTy);
FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
if (!F)
  return false;

auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
SmallVector<Value *, 6> Args;
for (unsigned I = 0; I != PtrArgLoc; ++I)
  Args.push_back(CI->getArgOperand(I));
Args.push_back(BCast);

auto *NCI = B.CreateCall(F, Args);
NCI->setAttributes(CI->getAttributes());
CI->replaceAllUsesWith(NCI);
CI->dropAllReferences();
CI->eraseFromParent();

return true;
618}

620// This function returns false if no change; return true otherwise.
621bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
this->CI = CI;
Function *Callee = CI->getCalledFunction();

// Ignore indirect calls.
if (Callee11.1
'Callee' is not equal to null
 == 0) return false;
12
←
Taking false branch→

BasicBlock *BB = CI->getParent();
LLVMContext &Context = CI->getParent()->getContext();
IRBuilder<> B(Context);

// Set the builder to the instruction after the call.
B.SetInsertPoint(BB, CI->getIterator());

// Copy fast flags from the original call.
if (const FPMathOperator *FPOp13.1
'FPOp' is null
 = dyn_cast<const FPMathOperator>(CI))
13
←
Assuming 'CI' is not a 'FPMathOperator'→
14
←
Taking false branch→
  B.setFastMathFlags(FPOp->getFastMathFlags());

switch (Callee->getIntrinsicID()) {
15
←
Control jumps to the 'default' case at line 640→
default:
  break;
16
←
 Execution continues on line 646→
case Intrinsic::amdgcn_wavefrontsize:
  return !EnablePreLink && fold_wavefrontsize(CI, B);
}

FuncInfo FInfo;
if (!parseFunctionName(Callee->getName(), &FInfo))
17
←
Assuming the condition is false→
18
←
Taking false branch→
  return false;

// Further check the number of arguments to see if they match.
if (CI->arg_size() != FInfo.getNumArgs())
19
←
Assuming the condition is false→
20
←
Taking false branch→
  return false;

if (TDOFold(CI, FInfo))
  return true;

// Under unsafe-math, evaluate calls if possible.
// According to Brian Sumner, we can do this for all f32 function calls
// using host's double function calls.
if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
21
←
Assuming the condition is true→
22
←
Calling 'AMDGPULibCalls::evaluateCall'→
  return true;

// Specialized optimizations for each function call
switch (FInfo.getId()) {
case AMDGPULibFunc::EI_RECIP:
  // skip vector function
  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
 "recip must be an either native or half function") ? void (0
) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"recip must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 669, __extension__ __PRETTY_FUNCTION__))
           FInfo.getPrefix() == AMDGPULibFunc::HALF) &&(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
 "recip must be an either native or half function") ? void (0
) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"recip must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 669, __extension__ __PRETTY_FUNCTION__))
          "recip must be an either native or half function")(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
 "recip must be an either native or half function") ? void (0
) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"recip must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 669, __extension__ __PRETTY_FUNCTION__));
  return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);

case AMDGPULibFunc::EI_DIVIDE:
  // skip vector function
  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
 "divide must be an either native or half function") ? void (
0) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"divide must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 676, __extension__ __PRETTY_FUNCTION__))
           FInfo.getPrefix() == AMDGPULibFunc::HALF) &&(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
 "divide must be an either native or half function") ? void (
0) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"divide must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 676, __extension__ __PRETTY_FUNCTION__))
          "divide must be an either native or half function")(static_cast <bool> ((FInfo.getPrefix() == AMDGPULibFunc
::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
 "divide must be an either native or half function") ? void (
0) : __assert_fail ("(FInfo.getPrefix() == AMDGPULibFunc::NATIVE || FInfo.getPrefix() == AMDGPULibFunc::HALF) && \"divide must be an either native or half function\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 676, __extension__ __PRETTY_FUNCTION__));
  return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);

case AMDGPULibFunc::EI_POW:
case AMDGPULibFunc::EI_POWR:
case AMDGPULibFunc::EI_POWN:
  return fold_pow(CI, B, FInfo);

case AMDGPULibFunc::EI_ROOTN:
  // skip vector function
  return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);

case AMDGPULibFunc::EI_FMA:
case AMDGPULibFunc::EI_MAD:
case AMDGPULibFunc::EI_NFMA:
  // skip vector function
  return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);

case AMDGPULibFunc::EI_SQRT:
  return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
case AMDGPULibFunc::EI_COS:
case AMDGPULibFunc::EI_SIN:
  if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
       getArgType(FInfo) == AMDGPULibFunc::F64)
      && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
    return fold_sincos(CI, B, AA);

  break;
case AMDGPULibFunc::EI_READ_PIPE_2:
case AMDGPULibFunc::EI_READ_PIPE_4:
case AMDGPULibFunc::EI_WRITE_PIPE_2:
case AMDGPULibFunc::EI_WRITE_PIPE_4:
  return fold_read_write_pipe(CI, B, FInfo);

default:
  break;
}

return false;
715}

717bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
// Table-Driven optimization
const TableRef tr = getOptTable(FInfo.getId());
if (tr.size==0)
  return false;

int const sz = (int)tr.size;
const TableEntry * const ftbl = tr.table;
Value *opr0 = CI->getArgOperand(0);

if (getVecSize(FInfo) > 1) {
  if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
    SmallVector<double, 0> DVal;
    for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
      ConstantFP *eltval = dyn_cast<ConstantFP>(
                             CV->getElementAsConstant((unsigned)eltNo));
      assert(eltval && "Non-FP arguments in math function!")(static_cast <bool> (eltval && "Non-FP arguments in math function!"
) ? void (0) : __assert_fail ("eltval && \"Non-FP arguments in math function!\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 733, __extension__ __PRETTY_FUNCTION__));
      bool found = false;
      for (int i=0; i < sz; ++i) {
        if (eltval->isExactlyValue(ftbl[i].input)) {
          DVal.push_back(ftbl[i].result);
          found = true;
          break;
        }
      }
      if (!found) {
        // This vector constants not handled yet.
        return false;
      }
    }
    LLVMContext &context = CI->getParent()->getParent()->getContext();
    Constant *nval;
    if (getArgType(FInfo) == AMDGPULibFunc::F32) {
      SmallVector<float, 0> FVal;
      for (unsigned i = 0; i < DVal.size(); ++i) {
        FVal.push_back((float)DVal[i]);
      }
      ArrayRef<float> tmp(FVal);
      nval = ConstantDataVector::get(context, tmp);
    } else { // F64
      ArrayRef<double> tmp(DVal);
      nval = ConstantDataVector::get(context, tmp);
    }
    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *nval << "\n"; } } while
 (false);
    replaceCall(nval);
    return true;
  }
} else {
  // Scalar version
  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
    for (int i = 0; i < sz; ++i) {
      if (CF->isExactlyValue(ftbl[i].input)) {
        Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
        LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *nval << "\n"; } } while
 (false);
        replaceCall(nval);
        return true;
      }
    }
  }
}

return false;
779}

781bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
Module *M = CI->getModule();
if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
    FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
    !HasNative(FInfo.getId()))
  return false;

AMDGPULibFunc nf = FInfo;
nf.setPrefix(AMDGPULibFunc::NATIVE);
if (FunctionCallee FPExpr = getFunction(M, nf)) {
  LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: " << *
CI << " ---> "; } } while (false);

  CI->setCalledFunction(FPExpr);

  LLVM_DEBUG(dbgs() << *CI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << *CI << '\n'; }
 } while (false);

  return true;
}
return false;
800}

802//  [native_]half_recip(c) ==> 1.0/c
803bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
                              const FuncInfo &FInfo) {
Value *opr0 = CI->getArgOperand(0);
if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
  // Just create a normal div. Later, InstCombine will be able
  // to compute the divide into a constant (avoid check float infinity
  // or subnormal at this point).
  Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
                             opr0,
                             "recip2div");
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *nval << "\n"; } } while
 (false);
  replaceCall(nval);
  return true;
}
return false;
818}

820//  [native_]half_divide(x, c) ==> x/c
821bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
                               const FuncInfo &FInfo) {
Value *opr0 = CI->getArgOperand(0);
Value *opr1 = CI->getArgOperand(1);
ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);

if ((CF0 && CF1) ||  // both are constants
    (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
    // CF1 is constant && f32 divide
{
  Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
                              opr1, "__div2recip");
  Value *nval  = B.CreateFMul(opr0, nval1, "__div2mul");
  replaceCall(nval);
  return true;
}
return false;
839}

841namespace llvm {
842static double log2(double V) {
843#if _XOPEN_SOURCE700 >= 600 || defined(_ISOC99_SOURCE1) || _POSIX_C_SOURCE200809L >= 200112L
return ::log2(V);
845#else
return log(V) / numbers::ln2;
847#endif
848}
849}

851bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
                            const FuncInfo &FInfo) {
assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW
 || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() ==
 AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call"
) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 856, __extension__ __PRETTY_FUNCTION__))
        FInfo.getId() == AMDGPULibFunc::EI_POWR ||(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW
 || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() ==
 AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call"
) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 856, __extension__ __PRETTY_FUNCTION__))
        FInfo.getId() == AMDGPULibFunc::EI_POWN) &&(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW
 || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() ==
 AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call"
) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 856, __extension__ __PRETTY_FUNCTION__))
       "fold_pow: encounter a wrong function call")(static_cast <bool> ((FInfo.getId() == AMDGPULibFunc::EI_POW
 || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() ==
 AMDGPULibFunc::EI_POWN) && "fold_pow: encounter a wrong function call"
) ? void (0) : __assert_fail ("(FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && \"fold_pow: encounter a wrong function call\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 856, __extension__ __PRETTY_FUNCTION__));

Value *opr0, *opr1;
ConstantFP *CF;
ConstantInt *CINT;
ConstantAggregateZero *CZero;
Type *eltType;

opr0 = CI->getArgOperand(0);
opr1 = CI->getArgOperand(1);
CZero = dyn_cast<ConstantAggregateZero>(opr1);
if (getVecSize(FInfo) == 1) {
  eltType = opr0->getType();
  CF = dyn_cast<ConstantFP>(opr1);
  CINT = dyn_cast<ConstantInt>(opr1);
} else {
  VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
  assert(VTy && "Oprand of vector function should be of vectortype")(static_cast <bool> (VTy && "Oprand of vector function should be of vectortype"
) ? void (0) : __assert_fail ("VTy && \"Oprand of vector function should be of vectortype\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 873, __extension__ __PRETTY_FUNCTION__));
  eltType = VTy->getElementType();
  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);

  // Now, only Handle vector const whose elements have the same value.
  CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
  CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
}

// No unsafe math , no constant argument, do nothing
if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
  return false;

// 0x1111111 means that we don't do anything for this call.
int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);

if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
  //  pow/powr/pown(x, 0) == 1
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> 1\n"; } } while (false);
  Constant *cnval = ConstantFP::get(eltType, 1.0);
  if (getVecSize(FInfo) > 1) {
    cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
  }
  replaceCall(cnval);
  return true;
}
if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
  // pow/powr/pown(x, 1.0) = x
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << "\n"; } } while
 (false);
  replaceCall(opr0);
  return true;
}
if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
  // pow/powr/pown(x, 2.0) = x*x
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " * " <<
 *opr0 << "\n"; } } while (false)
                    << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " * " <<
 *opr0 << "\n"; } } while (false);
  Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
  replaceCall(nval);
  return true;
}
if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
  // pow/powr/pown(x, -1.0) = 1.0/x
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> 1 / " << *opr0 << "\n"; } }
 while (false);
  Constant *cnval = ConstantFP::get(eltType, 1.0);
  if (getVecSize(FInfo) > 1) {
    cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
  }
  Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
  replaceCall(nval);
  return true;
}

Module *M = CI->getModule();
if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
  // pow[r](x, [-]0.5) = sqrt(x)
  bool issqrt = CF->isExactlyValue(0.5);
  if (FunctionCallee FPExpr =
          getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
                                              : AMDGPULibFunc::EI_RSQRT,
                                       FInfo))) {
    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << FInfo.getName().c_str() <<
 "(" << *opr0 << ")\n"; } } while (false)
                      << FInfo.getName().c_str() << "(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << FInfo.getName().c_str() <<
 "(" << *opr0 << ")\n"; } } while (false);
    Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
                                                      : "__pow2rsqrt");
    replaceCall(nval);
    return true;
  }
}

if (!isUnsafeMath(CI))
  return false;

// Unsafe Math optimization

// Remember that ci_opr1 is set if opr1 is integral
if (CF) {
  double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
                  ? (double)CF->getValueAPF().convertToFloat()
                  : CF->getValueAPF().convertToDouble();
  int ival = (int)dval;
  if ((double)ival == dval) {
    ci_opr1 = ival;
  } else
    ci_opr1 = 0x11111111;
}

// pow/powr/pown(x, c) = [1/](x*x*..x); where
//   trunc(c) == c && the number of x == c && |c| <= 12
unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
if (abs_opr1 <= 12) {
  Constant *cnval;
  Value *nval;
  if (abs_opr1 == 0) {
    cnval = ConstantFP::get(eltType, 1.0);
    if (getVecSize(FInfo) > 1) {
      cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
    }
    nval = cnval;
  } else {
    Value *valx2 = nullptr;
    nval = nullptr;
    while (abs_opr1 > 0) {
      valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
      if (abs_opr1 & 1) {
        nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
      }
      abs_opr1 >>= 1;
    }
  }

  if (ci_opr1 < 0) {
    cnval = ConstantFP::get(eltType, 1.0);
    if (getVecSize(FInfo) > 1) {
      cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
    }
    nval = B.CreateFDiv(cnval, nval, "__1powprod");
  }
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << ((ci_opr1 < 0) ? "1/prod("
 : "prod(") << *opr0 << ")\n"; } } while (false)
                    << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << ((ci_opr1 < 0) ? "1/prod("
 : "prod(") << *opr0 << ")\n"; } } while (false)
                    << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << ((ci_opr1 < 0) ? "1/prod("
 : "prod(") << *opr0 << ")\n"; } } while (false);
  replaceCall(nval);
  return true;
}

// powr ---> exp2(y * log2(x))
// pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
FunctionCallee ExpExpr =
    getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
if (!ExpExpr)
  return false;

bool needlog = false;
bool needabs = false;
bool needcopysign = false;
Constant *cnval = nullptr;
if (getVecSize(FInfo) == 1) {
  CF = dyn_cast<ConstantFP>(opr0);

  if (CF) {
    double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
                 ? (double)CF->getValueAPF().convertToFloat()
                 : CF->getValueAPF().convertToDouble();

    V = log2(std::abs(V));
    cnval = ConstantFP::get(eltType, V);
    needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
                   CF->isNegative();
  } else {
    needlog = true;
    needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
                             (!CF || CF->isNegative());
  }
} else {
  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);

  if (!CDV) {
    needlog = true;
    needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
  } else {
    assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&(static_cast <bool> ((int)CDV->getNumElements() == getVecSize
(FInfo) && "Wrong vector size detected") ? void (0) :
 __assert_fail ("(int)CDV->getNumElements() == getVecSize(FInfo) && \"Wrong vector size detected\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1033, __extension__ __PRETTY_FUNCTION__))
            "Wrong vector size detected")(static_cast <bool> ((int)CDV->getNumElements() == getVecSize
(FInfo) && "Wrong vector size detected") ? void (0) :
 __assert_fail ("(int)CDV->getNumElements() == getVecSize(FInfo) && \"Wrong vector size detected\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1033, __extension__ __PRETTY_FUNCTION__));

    SmallVector<double, 0> DVal;
    for (int i=0; i < getVecSize(FInfo); ++i) {
      double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
                   ? (double)CDV->getElementAsFloat(i)
                   : CDV->getElementAsDouble(i);
      if (V < 0.0) needcopysign = true;
      V = log2(std::abs(V));
      DVal.push_back(V);
    }
    if (getArgType(FInfo) == AMDGPULibFunc::F32) {
      SmallVector<float, 0> FVal;
      for (unsigned i=0; i < DVal.size(); ++i) {
        FVal.push_back((float)DVal[i]);
      }
      ArrayRef<float> tmp(FVal);
      cnval = ConstantDataVector::get(M->getContext(), tmp);
    } else {
      ArrayRef<double> tmp(DVal);
      cnval = ConstantDataVector::get(M->getContext(), tmp);
    }
  }
}

if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
  // We cannot handle corner cases for a general pow() function, give up
  // unless y is a constant integral value. Then proceed as if it were pown.
  if (getVecSize(FInfo) == 1) {
    if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
      double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
                 ? (double)CF->getValueAPF().convertToFloat()
                 : CF->getValueAPF().convertToDouble();
      if (y != (double)(int64_t)y)
        return false;
    } else
      return false;
  } else {
    if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
      for (int i=0; i < getVecSize(FInfo); ++i) {
        double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
                   ? (double)CDV->getElementAsFloat(i)
                   : CDV->getElementAsDouble(i);
        if (y != (double)(int64_t)y)
          return false;
      }
    } else
      return false;
  }
}

Value *nval;
if (needabs) {
  FunctionCallee AbsExpr =
      getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
  if (!AbsExpr)
    return false;
  nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
} else {
  nval = cnval ? cnval : opr0;
}
if (needlog) {
  FunctionCallee LogExpr =
      getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
  if (!LogExpr)
    return false;
  nval = CreateCallEx(B,LogExpr, nval, "__log2");
}

if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
  // convert int(32) to fp(f32 or f64)
  opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
}
nval = B.CreateFMul(opr1, nval, "__ylogx");
nval = CreateCallEx(B,ExpExpr, nval, "__exp2");

if (needcopysign) {
  Value *opr_n;
  Type* rTy = opr0->getType();
  Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
  Type *nTy = nTyS;
  if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
    nTy = FixedVectorType::get(nTyS, vTy);
  unsigned size = nTy->getScalarSizeInBits();
  opr_n = CI->getArgOperand(1);
  if (opr_n->getType()->isIntegerTy())
    opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
  else
    opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");

  Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
  sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
  nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
  nval = B.CreateBitCast(nval, opr0->getType());
}

LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << "exp2(" << *opr1 <<
 " * log2(" << *opr0 << "))\n"; } } while (false)
                  << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << "exp2(" << *opr1 <<
 " * log2(" << *opr0 << "))\n"; } } while (false);
replaceCall(nval);

return true;
1134}

1136bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
                              const FuncInfo &FInfo) {
Value *opr0 = CI->getArgOperand(0);
Value *opr1 = CI->getArgOperand(1);

ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
if (!CINT) {
  return false;
}
int ci_opr1 = (int)CINT->getSExtValue();
if (ci_opr1 == 1) {  // rootn(x, 1) = x
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << "\n"; } } while
 (false);
  replaceCall(opr0);
  return true;
}
if (ci_opr1 == 2) {  // rootn(x, 2) = sqrt(x)
  Module *M = CI->getModule();
  if (FunctionCallee FPExpr =
          getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> sqrt(" << *opr0 << ")\n"; }
 } while (false);
    Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
    replaceCall(nval);
    return true;
  }
} else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
  Module *M = CI->getModule();
  if (FunctionCallee FPExpr =
          getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> cbrt(" << *opr0 << ")\n"; }
 } while (false);
    Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
    replaceCall(nval);
    return true;
  }
} else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> 1.0 / " << *opr0 << "\n"; }
 } while (false);
  Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
                             opr0,
                             "__rootn2div");
  replaceCall(nval);
  return true;
} else if (ci_opr1 == -2) {  // rootn(x, -2) = rsqrt(x)
  Module *M = CI->getModule();
  if (FunctionCallee FPExpr =
          getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> rsqrt(" << *opr0 << ")\n"; }
 } while (false)
                      << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> rsqrt(" << *opr0 << ")\n"; }
 } while (false);
    Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
    replaceCall(nval);
    return true;
  }
}
return false;
1188}

1190bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
                                const FuncInfo &FInfo) {
Value *opr0 = CI->getArgOperand(0);
Value *opr1 = CI->getArgOperand(1);
Value *opr2 = CI->getArgOperand(2);

ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
  // fma/mad(a, b, c) = c if a=0 || b=0
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr2 << "\n"; } } while
 (false);
  replaceCall(opr2);
  return true;
}
if (CF0 && CF0->isExactlyValue(1.0f)) {
  // fma/mad(a, b, c) = b+c if a=1
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr1 << " + " <<
 *opr2 << "\n"; } } while (false)
                    << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr1 << " + " <<
 *opr2 << "\n"; } } while (false);
  Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
  replaceCall(nval);
  return true;
}
if (CF1 && CF1->isExactlyValue(1.0f)) {
  // fma/mad(a, b, c) = a+c if b=1
  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " + " <<
 *opr2 << "\n"; } } while (false)
                    << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " + " <<
 *opr2 << "\n"; } } while (false);
  Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
  replaceCall(nval);
  return true;
}
if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
  if (CF->isZero()) {
    // fma/mad(a, b, c) = a*b if c=0
    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " * " <<
 *opr1 << "\n"; } } while (false)
                      << *opr1 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << *opr0 << " * " <<
 *opr1 << "\n"; } } while (false);
    Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
    replaceCall(nval);
    return true;
  }
}

return false;
1232}

1234// Get a scalar native builtin single argument FP function
1235FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
                                               const FuncInfo &FInfo) {
if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
  return nullptr;
FuncInfo nf = FInfo;
nf.setPrefix(AMDGPULibFunc::NATIVE);
return getFunction(M, nf);
1242}

1244// fold sqrt -> native_sqrt (x)
1245bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
                             const FuncInfo &FInfo) {
if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
    (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
  if (FunctionCallee FPExpr = getNativeFunction(
          CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
    Value *opr0 = CI->getArgOperand(0);
    LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << "sqrt(" << *opr0 <<
 ")\n"; } } while (false)
                      << "sqrt(" << *opr0 << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: " << *
CI << " ---> " << "sqrt(" << *opr0 <<
 ")\n"; } } while (false);
    Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
    replaceCall(nval);
    return true;
  }
}
return false;
1260}

1262// fold sin, cos -> sincos.
1263bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
                               AliasAnalysis *AA) {
AMDGPULibFunc fInfo;
if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
  return false;

assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||(static_cast <bool> (fInfo.getId() == AMDGPULibFunc::EI_SIN
 || fInfo.getId() == AMDGPULibFunc::EI_COS) ? void (0) : __assert_fail
 ("fInfo.getId() == AMDGPULibFunc::EI_SIN || fInfo.getId() == AMDGPULibFunc::EI_COS"
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1270, __extension__ __PRETTY_FUNCTION__))
       fInfo.getId() == AMDGPULibFunc::EI_COS)(static_cast <bool> (fInfo.getId() == AMDGPULibFunc::EI_SIN
 || fInfo.getId() == AMDGPULibFunc::EI_COS) ? void (0) : __assert_fail
 ("fInfo.getId() == AMDGPULibFunc::EI_SIN || fInfo.getId() == AMDGPULibFunc::EI_COS"
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1270, __extension__ __PRETTY_FUNCTION__));
bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;

Value *CArgVal = CI->getArgOperand(0);
BasicBlock * const CBB = CI->getParent();

int const MaxScan = 30;
bool Changed = false;

{ // fold in load value.
  LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
  if (LI && LI->getParent() == CBB) {
    BasicBlock::iterator BBI = LI->getIterator();
    Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
    if (AvailableVal) {
      Changed = true;
      CArgVal->replaceAllUsesWith(AvailableVal);
      if (CArgVal->getNumUses() == 0)
        LI->eraseFromParent();
      CArgVal = CI->getArgOperand(0);
    }
  }
}

Module *M = CI->getModule();
fInfo.setId(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN);
std::string const PairName = fInfo.mangle();

CallInst *UI = nullptr;
for (User* U : CArgVal->users()) {
  CallInst *XI = dyn_cast_or_null<CallInst>(U);
  if (!XI || XI == CI || XI->getParent() != CBB)
    continue;

  Function *UCallee = XI->getCalledFunction();
  if (!UCallee || !UCallee->getName().equals(PairName))
    continue;

  BasicBlock::iterator BBI = CI->getIterator();
  if (BBI == CI->getParent()->begin())
    break;
  --BBI;
  for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
    if (cast<Instruction>(BBI) == XI) {
      UI = XI;
      break;
    }
  }
  if (UI) break;
}

if (!UI)
  return Changed;

// Merge the sin and cos.

// for OpenCL 2.0 we have only generic implementation of sincos
// function.
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
FunctionCallee Fsincos = getFunction(M, nf);
if (!Fsincos)
  return Changed;

BasicBlock::iterator ItOld = B.GetInsertPoint();
AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
B.SetInsertPoint(UI);

Value *P = Alloc;
Type *PTy = Fsincos.getFunctionType()->getParamType(1);
// The allocaInst allocates the memory in private address space. This need
// to be bitcasted to point to the address space of cos pointer type.
// In OpenCL 2.0 this is generic, while in 1.2 that is private.
if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
  P = B.CreateAddrSpaceCast(Alloc, PTy);
CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);

LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: fold_sincos ("
 << *CI << ", " << *UI << ") with " <<
 *Call << "\n"; } } while (false)
                  << *Call << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: fold_sincos ("
 << *CI << ", " << *UI << ") with " <<
 *Call << "\n"; } } while (false);

if (!isSin) { // CI->cos, UI->sin
  B.SetInsertPoint(&*ItOld);
  UI->replaceAllUsesWith(&*Call);
  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
  CI->replaceAllUsesWith(Reload);
  UI->eraseFromParent();
  CI->eraseFromParent();
} else { // CI->sin, UI->cos
  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
  UI->replaceAllUsesWith(Reload);
  CI->replaceAllUsesWith(Call);
  UI->eraseFromParent();
  CI->eraseFromParent();
}
return true;
1365}

1367bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
if (!TM)
  return false;

StringRef CPU = TM->getTargetCPU();
StringRef Features = TM->getTargetFeatureString();
if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
    (Features.empty() ||
     Features.find_insensitive("wavefrontsize") == StringRef::npos))
  return false;

Function *F = CI->getParent()->getParent();
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
unsigned N = ST.getWavefrontSize();

LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: fold_wavefrontsize ("
 << *CI << ") with " << N << "\n"; } }
 while (false)
             << N << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { errs() << "AMDIC: fold_wavefrontsize ("
 << *CI << ") with " << N << "\n"; } }
 while (false);

CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
CI->eraseFromParent();
return true;
1388}

1390// Get insertion point at entry.
1391BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
Function * Func = UI->getParent()->getParent();
BasicBlock * BB = &Func->getEntryBlock();
assert(BB && "Entry block not found!")(static_cast <bool> (BB && "Entry block not found!"
) ? void (0) : __assert_fail ("BB && \"Entry block not found!\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1394, __extension__ __PRETTY_FUNCTION__));
BasicBlock::iterator ItNew = BB->begin();
return ItNew;
1397}

1399// Insert a AllocsInst at the beginning of function entry block.
1400AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
                                       const char *prefix) {
BasicBlock::iterator ItNew = getEntryIns(UI);
Function *UCallee = UI->getCalledFunction();
Type *RetType = UCallee->getReturnType();
B.SetInsertPoint(&*ItNew);
AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
  std::string(prefix) + UI->getName());
Alloc->setAlignment(
    Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
return Alloc;
1411}

1413bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
                                          double& Res0, double& Res1,
                                          Constant *copr0, Constant *copr1,
                                          Constant *copr2) {
// By default, opr0/opr1/opr3 holds values of float/double type.
// If they are not float/double, each function has to its
// operand separately.
double opr0=0.0, opr1=0.0, opr2=0.0;
ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
if (fpopr0) {
  opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
           ? fpopr0->getValueAPF().convertToDouble()
           : (double)fpopr0->getValueAPF().convertToFloat();
}

if (fpopr1) {
  opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
           ? fpopr1->getValueAPF().convertToDouble()
           : (double)fpopr1->getValueAPF().convertToFloat();
}

if (fpopr2) {
  opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
           ? fpopr2->getValueAPF().convertToDouble()
           : (double)fpopr2->getValueAPF().convertToFloat();
}

switch (FInfo.getId()) {
default : return false;

case AMDGPULibFunc::EI_ACOS:
  Res0 = acos(opr0);
  return true;

case AMDGPULibFunc::EI_ACOSH:
  // acosh(x) == log(x + sqrt(x*x - 1))
  Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
  return true;

case AMDGPULibFunc::EI_ACOSPI:
  Res0 = acos(opr0) / MATH_PInumbers::pi;
  return true;

case AMDGPULibFunc::EI_ASIN:
  Res0 = asin(opr0);
  return true;

case AMDGPULibFunc::EI_ASINH:
  // asinh(x) == log(x + sqrt(x*x + 1))
  Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
  return true;

case AMDGPULibFunc::EI_ASINPI:
  Res0 = asin(opr0) / MATH_PInumbers::pi;
  return true;

case AMDGPULibFunc::EI_ATAN:
  Res0 = atan(opr0);
  return true;

case AMDGPULibFunc::EI_ATANH:
  // atanh(x) == (log(x+1) - log(x-1))/2;
  Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
  return true;

case AMDGPULibFunc::EI_ATANPI:
  Res0 = atan(opr0) / MATH_PInumbers::pi;
  return true;

case AMDGPULibFunc::EI_CBRT:
  Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
  return true;

case AMDGPULibFunc::EI_COS:
  Res0 = cos(opr0);
  return true;

case AMDGPULibFunc::EI_COSH:
  Res0 = cosh(opr0);
  return true;

case AMDGPULibFunc::EI_COSPI:
  Res0 = cos(MATH_PInumbers::pi * opr0);
  return true;

case AMDGPULibFunc::EI_EXP:
  Res0 = exp(opr0);
  return true;

case AMDGPULibFunc::EI_EXP2:
  Res0 = pow(2.0, opr0);
  return true;

case AMDGPULibFunc::EI_EXP10:
  Res0 = pow(10.0, opr0);
  return true;

case AMDGPULibFunc::EI_EXPM1:
  Res0 = exp(opr0) - 1.0;
  return true;

case AMDGPULibFunc::EI_LOG:
  Res0 = log(opr0);
  return true;

case AMDGPULibFunc::EI_LOG2:
  Res0 = log(opr0) / log(2.0);
  return true;

case AMDGPULibFunc::EI_LOG10:
  Res0 = log(opr0) / log(10.0);
  return true;

case AMDGPULibFunc::EI_RSQRT:
  Res0 = 1.0 / sqrt(opr0);
  return true;

case AMDGPULibFunc::EI_SIN:
  Res0 = sin(opr0);
  return true;

case AMDGPULibFunc::EI_SINH:
  Res0 = sinh(opr0);
  return true;

case AMDGPULibFunc::EI_SINPI:
  Res0 = sin(MATH_PInumbers::pi * opr0);
  return true;

case AMDGPULibFunc::EI_SQRT:
  Res0 = sqrt(opr0);
  return true;

case AMDGPULibFunc::EI_TAN:
  Res0 = tan(opr0);
  return true;

case AMDGPULibFunc::EI_TANH:
  Res0 = tanh(opr0);
  return true;

case AMDGPULibFunc::EI_TANPI:
  Res0 = tan(MATH_PInumbers::pi * opr0);
  return true;

case AMDGPULibFunc::EI_RECIP:
  Res0 = 1.0 / opr0;
  return true;

// two-arg functions
case AMDGPULibFunc::EI_DIVIDE:
  Res0 = opr0 / opr1;
  return true;

case AMDGPULibFunc::EI_POW:
case AMDGPULibFunc::EI_POWR:
  Res0 = pow(opr0, opr1);
  return true;

case AMDGPULibFunc::EI_POWN: {
  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
    double val = (double)iopr1->getSExtValue();
    Res0 = pow(opr0, val);
    return true;
  }
  return false;
}

case AMDGPULibFunc::EI_ROOTN: {
  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
    double val = (double)iopr1->getSExtValue();
    Res0 = pow(opr0, 1.0 / val);
    return true;
  }
  return false;
}

// with ptr arg
case AMDGPULibFunc::EI_SINCOS:
  Res0 = sin(opr0);
  Res1 = cos(opr0);
  return true;

// three-arg functions
case AMDGPULibFunc::EI_FMA:
case AMDGPULibFunc::EI_MAD:
  Res0 = opr0 * opr1 + opr2;
  return true;
}

return false;
1606}

1608bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
int numArgs = (int)aCI->arg_size();
if (numArgs > 3)
23
←
Assuming 'numArgs' is <= 3→
24
←
Taking false branch→
  return false;

Constant *copr0 = nullptr;
Constant *copr1 = nullptr;
Constant *copr2 = nullptr;
if (numArgs > 0) {
25
←
Assuming 'numArgs' is <= 0→
26
←
Taking false branch→
  if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
    return false;
}

if (numArgs26.1
'numArgs' is <= 1
 > 1) {
27
←
Taking false branch→
  if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
    if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
      return false;
  }
}

if (numArgs27.1
'numArgs' is <= 2
 > 2) {
28
←
Taking false branch→
  if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
    return false;
}

// At this point, all arguments to aCI are constants.

// max vector size is 16, and sincos will generate two results.
double DVal0[16], DVal1[16];
bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
29
←
Assuming the condition is false→
if (getVecSize(FInfo) == 1) {
30
←
Assuming the condition is false→
31
←
Taking false branch→
  if (!evaluateScalarMathFunc(FInfo, DVal0[0],
                              DVal1[0], copr0, copr1, copr2)) {
    return false;
  }
} else {
  ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
32
←
Assuming null pointer is passed into cast→
  ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
33
←
Assuming null pointer is passed into cast→
  ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
34
←
Assuming null pointer is passed into cast→
  for (int i=0; i < getVecSize(FInfo); ++i) {
35
←
Assuming the condition is false→
36
←
Loop condition is false. Execution continues on line 1658→
    Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
    Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
    Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
    if (!evaluateScalarMathFunc(FInfo, DVal0[i],
                                DVal1[i], celt0, celt1, celt2)) {
      return false;
    }
  }
}

LLVMContext &context = CI->getParent()->getParent()->getContext();
Constant *nval0, *nval1;
if (getVecSize(FInfo) == 1) {
37
←
Assuming the condition is true→
38
←
Taking true branch→
  nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
39
←
2nd function call argument is an uninitialized value
  if (hasTwoResults)
    nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
} else {
  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
    SmallVector <float, 0> FVal0, FVal1;
    for (int i=0; i < getVecSize(FInfo); ++i)
      FVal0.push_back((float)DVal0[i]);
    ArrayRef<float> tmp0(FVal0);
    nval0 = ConstantDataVector::get(context, tmp0);
    if (hasTwoResults) {
      for (int i=0; i < getVecSize(FInfo); ++i)
        FVal1.push_back((float)DVal1[i]);
      ArrayRef<float> tmp1(FVal1);
      nval1 = ConstantDataVector::get(context, tmp1);
    }
  } else {
    ArrayRef<double> tmp0(DVal0);
    nval0 = ConstantDataVector::get(context, tmp0);
    if (hasTwoResults) {
      ArrayRef<double> tmp1(DVal1);
      nval1 = ConstantDataVector::get(context, tmp1);
    }
  }
}

if (hasTwoResults) {
  // sincos
  assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&(static_cast <bool> (FInfo.getId() == AMDGPULibFunc::EI_SINCOS
 && "math function with ptr arg not supported yet") ?
 void (0) : __assert_fail ("FInfo.getId() == AMDGPULibFunc::EI_SINCOS && \"math function with ptr arg not supported yet\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1690, __extension__ __PRETTY_FUNCTION__))
         "math function with ptr arg not supported yet")(static_cast <bool> (FInfo.getId() == AMDGPULibFunc::EI_SINCOS
 && "math function with ptr arg not supported yet") ?
 void (0) : __assert_fail ("FInfo.getId() == AMDGPULibFunc::EI_SINCOS && \"math function with ptr arg not supported yet\""
, "/build/llvm-toolchain-snapshot-14~++20211021103713+996123e5e8d9/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp"
, 1690, __extension__ __PRETTY_FUNCTION__));
  new StoreInst(nval1, aCI->getArgOperand(1), aCI);
}

replaceCall(nval0);
return true;
1696}

1698// Public interface to the Simplify LibCalls pass.
1699FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetMachine *TM) {
return new AMDGPUSimplifyLibCalls(TM);
1701}

1703FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
return new AMDGPUUseNativeCalls();
1705}

1707bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
if (skipFunction(F))
  return false;

bool Changed = false;
auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();

LLVM_DEBUG(dbgs() << "AMDIC: process function ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function "
; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() <<
 '\n';; } } while (false)
           F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function "
; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() <<
 '\n';; } } while (false);

for (auto &BB : F) {
  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
    // Ignore non-calls.
    CallInst *CI = dyn_cast<CallInst>(I);
    ++I;
    // Ignore intrinsics that do not become real instructions.
    if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
      continue;

    // Ignore indirect calls.
    Function *Callee = CI->getCalledFunction();
    if (Callee == 0) continue;

    LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding "
 << *CI << "\n"; dbgs().flush(); } } while (false
)
               dbgs().flush())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding "
 << *CI << "\n"; dbgs().flush(); } } while (false
);
    if(Simplifier.fold(CI, AA))
      Changed = true;
  }
}
return Changed;
1737}

1739PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
                                                FunctionAnalysisManager &AM) {
AMDGPULibCalls Simplifier(&TM);
Simplifier.initNativeFuncs();

bool Changed = false;
auto AA = &AM.getResult<AAManager>(F);

LLVM_DEBUG(dbgs() << "AMDIC: process function ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function "
; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() <<
 '\n';; } } while (false)
1
Assuming 'DebugFlag' is false→
2
←
Loop condition is false.  Exiting loop→
           F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: process function "
; F.printAsOperand(dbgs(), false, F.getParent()); dbgs() <<
 '\n';; } } while (false);

for (auto &BB : F) {
  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
3
←
Loop condition is true.  Entering loop body→
    // Ignore non-calls.
    CallInst *CI = dyn_cast<CallInst>(I);
    ++I;
    // Ignore intrinsics that do not become real instructions.
    if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
4
←
Assuming 'CI' is non-null→
5
←
Assuming 'CI' is not a 'DbgInfoIntrinsic'→
6
←
Assuming the condition is false→
7
←
Taking false branch→
      continue;

    // Ignore indirect calls.
    Function *Callee = CI->getCalledFunction();
    if (Callee7.1
'Callee' is not equal to null
 == 0)
      continue;

    LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding "
 << *CI << "\n"; dbgs().flush(); } } while (false
)
8
←
Taking false branch→
9
←
Assuming 'DebugFlag' is false→
10
←
Loop condition is false.  Exiting loop→
               dbgs().flush())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("amdgpu-simplifylib")) { dbgs() << "AMDIC: try folding "
 << *CI << "\n"; dbgs().flush(); } } while (false
);
    if (Simplifier.fold(CI, AA))
11
←
Calling 'AMDGPULibCalls::fold'→
      Changed = true;
  }
}
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1771}

1773bool AMDGPUUseNativeCalls::runOnFunction(Function &F) {
if (skipFunction(F) || UseNative.empty())
  return false;

bool Changed = false;
for (auto &BB : F) {
  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
    // Ignore non-calls.
    CallInst *CI = dyn_cast<CallInst>(I);
    ++I;
    if (!CI) continue;

    // Ignore indirect calls.
    Function *Callee = CI->getCalledFunction();
    if (Callee == 0) continue;

    if(Simplifier.useNative(CI))
      Changed = true;
  }
}
return Changed;
1794}

1796PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
                                              FunctionAnalysisManager &AM) {
if (UseNative.empty())
  return PreservedAnalyses::all();

AMDGPULibCalls Simplifier;
Simplifier.initNativeFuncs();

bool Changed = false;
for (auto &BB : F) {
  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
    // Ignore non-calls.
    CallInst *CI = dyn_cast<CallInst>(I);
    ++I;
    if (!CI)
      continue;

    // Ignore indirect calls.
    Function *Callee = CI->getCalledFunction();
    if (Callee == 0)
      continue;

    if (Simplifier.useNative(CI))
      Changed = true;
  }
}
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1823}