LLVM 20.0.0git
Macros | Enumerations | Functions | Variables
NVPTXISelLowering.cpp File Reference
#include "NVPTXISelLowering.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/FPEnv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <iterator>
#include <optional>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "nvptx-lower"
 
#define MAKE_CASE(V)
 

Enumerations

enum  ParamVectorizationFlags { PVF_INNER = 0x0 , PVF_FIRST = 0x1 , PVF_LAST = 0x2 , PVF_SCALAR = PVF_FIRST | PVF_LAST }
 
enum  OperandSignedness { Signed = 0 , Unsigned , Unknown }
 

Functions

static bool IsPTXVectorType (MVT VT)
 
static bool Is16bitsType (MVT VT)
 
static void ComputePTXValueVTs (const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
 ComputePTXValueVTs - For the given Type Ty, returns the set of primitive EVTs that compose it.
 
static bool PromoteScalarIntegerPTX (const EVT &VT, MVT *PromotedVT)
 PromoteScalarIntegerPTX Used to make sure the arguments/returns are suitable for passing and promote them to a larger size if they're not.
 
static unsigned CanMergeParamLoadStoresStartingAt (unsigned Idx, uint32_t AccessSize, const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, Align ParamAlignment)
 
static SmallVector< ParamVectorizationFlags, 16 > VectorizePTXValueVTs (const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, Align ParamAlignment, bool IsVAArg=false)
 
static bool IsTypePassedAsArray (const Type *Ty)
 
static bool adjustElementType (EVT &ElementType)
 
static SDValue LowerUnalignedStoreParam (SelectionDAG &DAG, SDValue Chain, uint64_t Offset, EVT ElementType, SDValue StVal, SDValue &InGlue, unsigned ArgID, const SDLoc &dl)
 
static SDValue LowerUnalignedLoadRetParam (SelectionDAG &DAG, SDValue &Chain, uint64_t Offset, EVT ElementType, SDValue &InGlue, SmallVectorImpl< SDValue > &TempProxyRegOps, const SDLoc &dl)
 
static SDValue LowerVectorArith (SDValue Op, SelectionDAG &DAG)
 
static SDValue LowerUnalignedStoreRet (SelectionDAG &DAG, SDValue Chain, uint64_t Offset, EVT ElementType, SDValue RetVal, const SDLoc &dl)
 
static unsigned getOpcForTextureInstr (unsigned Intrinsic)
 
static unsigned getOpcForSurfaceInstr (unsigned Intrinsic)
 
static bool isConstZero (const SDValue &Operand)
 
static SDValue PerformADDCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI)
 PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
 
static SDValue PerformFADDCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 
static SDValue PerformStoreCombineHelper (SDNode *N, std::size_t Front, std::size_t Back)
 
static SDValue PerformStoreParamCombine (SDNode *N)
 
static SDValue PerformStoreRetvalCombine (SDNode *N)
 
static SDValue PerformADDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
 
static SDValue PerformFADDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.
 
static SDValue PerformANDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue PerformREMCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 
static bool IsMulWideOperandDemotable (SDValue Op, unsigned OptSize, OperandSignedness &S)
 IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptSize bits without loss of information.
 
static bool AreMulWideOperandsDemotable (SDValue LHS, SDValue RHS, unsigned OptSize, bool &IsSigned)
 AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits without loss of information.
 
static SDValue TryMULWIDECombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces an M-bit result (i.e.
 
static bool isConstOne (const SDValue &Operand)
 
static SDValue matchMADConstOnePattern (SDValue Add)
 
static SDValue combineMADConstOne (SDValue X, SDValue Add, EVT VT, SDLoc DL, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue combineMulSelectConstOne (SDValue X, SDValue Select, EVT VT, SDLoc DL, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue PerformMULCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue PerformMULCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
 
static SDValue PerformSHLCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
 
static SDValue PerformSETCCCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned int SmVersion)
 
static SDValue PerformEXTRACTCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue PerformVSELECTCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue PerformLOADCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 
static void ReplaceLoadVector (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
 ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
 
static void ReplaceINTRINSIC_W_CHAIN (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
 
static void ReplaceCopyFromReg_128 (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
 

Variables

static std::atomic< unsignedGlobalUniqueCallSite
 
static cl::opt< boolsched4reg ("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false))
 
static cl::opt< unsignedFMAContractLevelOpt ("nvptx-fma-level", cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2))
 
static cl::opt< int > UsePrecDivF32 ("nvptx-prec-divf32", cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" " IEEE Compliant F32 div.rnd if available."), cl::init(2))
 
static cl::opt< boolUsePrecSqrtF32 ("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true))
 
static cl::opt< boolForceMinByValParamAlign ("nvptx-force-min-byval-param-align", cl::Hidden, cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" " params of device functions."), cl::init(false))
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "nvptx-lower"

Definition at line 70 of file NVPTXISelLowering.cpp.

◆ MAKE_CASE

#define MAKE_CASE (   V)
Value:
case V: \
return #V;

Enumeration Type Documentation

◆ OperandSignedness

Enumerator
Signed 
Unsigned 
Unknown 

Definition at line 5663 of file NVPTXISelLowering.cpp.

◆ ParamVectorizationFlags

Enumerator
PVF_INNER 
PVF_FIRST 
PVF_LAST 
PVF_SCALAR 

Definition at line 337 of file NVPTXISelLowering.cpp.

Function Documentation

◆ adjustElementType()

static bool adjustElementType ( EVT ElementType)
static

◆ AreMulWideOperandsDemotable()

static bool AreMulWideOperandsDemotable ( SDValue  LHS,
SDValue  RHS,
unsigned  OptSize,
bool IsSigned 
)
static

AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits without loss of information.

If the operands contain a constant, it should appear as the RHS operand. The signedness of the operands is placed in IsSigned.

Definition at line 5699 of file NVPTXISelLowering.cpp.

References llvm::APInt::isIntN(), IsMulWideOperandDemotable(), llvm::APInt::isSignedIntN(), LHS, RHS, Signed, llvm::Unknown, and Unsigned.

Referenced by TryMULWIDECombine().

◆ CanMergeParamLoadStoresStartingAt()

static unsigned CanMergeParamLoadStoresStartingAt ( unsigned  Idx,
uint32_t  AccessSize,
const SmallVectorImpl< EVT > &  ValueVTs,
const SmallVectorImpl< uint64_t > &  Offsets,
Align  ParamAlignment 
)
static

◆ combineMADConstOne()

static SDValue combineMADConstOne ( SDValue  X,
SDValue  Add,
EVT  VT,
SDLoc  DL,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ combineMulSelectConstOne()

static SDValue combineMulSelectConstOne ( SDValue  X,
SDValue  Select,
EVT  VT,
SDLoc  DL,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ ComputePTXValueVTs()

static void ComputePTXValueVTs ( const TargetLowering TLI,
const DataLayout DL,
Type Ty,
SmallVectorImpl< EVT > &  ValueVTs,
SmallVectorImpl< uint64_t > *  Offsets = nullptr,
uint64_t  StartingOffset = 0 
)
static

ComputePTXValueVTs - For the given Type Ty, returns the set of primitive EVTs that compose it.

Unlike ComputeValueVTs, this will break apart vectors into their primitive components. NOTE: This is a band-aid for code that expects ComputeValueVTs to return the same number of types as the Ins/Outs arrays in LowerFormalArguments, LowerCall, and LowerReturn.

Definition at line 169 of file NVPTXISelLowering.cpp.

References ComputePTXValueVTs(), llvm::ComputeValueVTs(), DL, llvm::EVT::getSimpleVT(), llvm::EVT::getStoreSize(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), Is16bitsType(), llvm::Type::isIntegerTy(), llvm::EVT::isVector(), llvm_unreachable, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::MVT::SimpleTy, and llvm::SmallVectorBase< Size_T >::size().

Referenced by ComputePTXValueVTs(), llvm::NVPTXTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerFormalArguments(), and llvm::NVPTXTargetLowering::LowerReturn().

◆ getOpcForSurfaceInstr()

static unsigned getOpcForSurfaceInstr ( unsigned  Intrinsic)
static

Definition at line 3944 of file NVPTXISelLowering.cpp.

References llvm::NVPTXISD::Suld1DArrayI16Clamp, llvm::NVPTXISD::Suld1DArrayI16Trap, llvm::NVPTXISD::Suld1DArrayI16Zero, llvm::NVPTXISD::Suld1DArrayI32Clamp, llvm::NVPTXISD::Suld1DArrayI32Trap, llvm::NVPTXISD::Suld1DArrayI32Zero, llvm::NVPTXISD::Suld1DArrayI64Clamp, llvm::NVPTXISD::Suld1DArrayI64Trap, llvm::NVPTXISD::Suld1DArrayI64Zero, llvm::NVPTXISD::Suld1DArrayI8Clamp, llvm::NVPTXISD::Suld1DArrayI8Trap, llvm::NVPTXISD::Suld1DArrayI8Zero, llvm::NVPTXISD::Suld1DArrayV2I16Clamp, llvm::NVPTXISD::Suld1DArrayV2I16Trap, llvm::NVPTXISD::Suld1DArrayV2I16Zero, llvm::NVPTXISD::Suld1DArrayV2I32Clamp, llvm::NVPTXISD::Suld1DArrayV2I32Trap, llvm::NVPTXISD::Suld1DArrayV2I32Zero, llvm::NVPTXISD::Suld1DArrayV2I64Clamp, llvm::NVPTXISD::Suld1DArrayV2I64Trap, llvm::NVPTXISD::Suld1DArrayV2I64Zero, llvm::NVPTXISD::Suld1DArrayV2I8Clamp, llvm::NVPTXISD::Suld1DArrayV2I8Trap, llvm::NVPTXISD::Suld1DArrayV2I8Zero, llvm::NVPTXISD::Suld1DArrayV4I16Clamp, llvm::NVPTXISD::Suld1DArrayV4I16Trap, llvm::NVPTXISD::Suld1DArrayV4I16Zero, llvm::NVPTXISD::Suld1DArrayV4I32Clamp, llvm::NVPTXISD::Suld1DArrayV4I32Trap, llvm::NVPTXISD::Suld1DArrayV4I32Zero, llvm::NVPTXISD::Suld1DArrayV4I8Clamp, llvm::NVPTXISD::Suld1DArrayV4I8Trap, llvm::NVPTXISD::Suld1DArrayV4I8Zero, llvm::NVPTXISD::Suld1DI16Clamp, llvm::NVPTXISD::Suld1DI16Trap, llvm::NVPTXISD::Suld1DI16Zero, llvm::NVPTXISD::Suld1DI32Clamp, llvm::NVPTXISD::Suld1DI32Trap, llvm::NVPTXISD::Suld1DI32Zero, llvm::NVPTXISD::Suld1DI64Clamp, llvm::NVPTXISD::Suld1DI64Trap, llvm::NVPTXISD::Suld1DI64Zero, llvm::NVPTXISD::Suld1DI8Clamp, llvm::NVPTXISD::Suld1DI8Trap, llvm::NVPTXISD::Suld1DI8Zero, llvm::NVPTXISD::Suld1DV2I16Clamp, llvm::NVPTXISD::Suld1DV2I16Trap, llvm::NVPTXISD::Suld1DV2I16Zero, llvm::NVPTXISD::Suld1DV2I32Clamp, llvm::NVPTXISD::Suld1DV2I32Trap, llvm::NVPTXISD::Suld1DV2I32Zero, llvm::NVPTXISD::Suld1DV2I64Clamp, llvm::NVPTXISD::Suld1DV2I64Trap, llvm::NVPTXISD::Suld1DV2I64Zero, llvm::NVPTXISD::Suld1DV2I8Clamp, llvm::NVPTXISD::Suld1DV2I8Trap, llvm::NVPTXISD::Suld1DV2I8Zero, llvm::NVPTXISD::Suld1DV4I16Clamp, llvm::NVPTXISD::Suld1DV4I16Trap, llvm::NVPTXISD::Suld1DV4I16Zero, llvm::NVPTXISD::Suld1DV4I32Clamp, llvm::NVPTXISD::Suld1DV4I32Trap, llvm::NVPTXISD::Suld1DV4I32Zero, llvm::NVPTXISD::Suld1DV4I8Clamp, llvm::NVPTXISD::Suld1DV4I8Trap, llvm::NVPTXISD::Suld1DV4I8Zero, llvm::NVPTXISD::Suld2DArrayI16Clamp, llvm::NVPTXISD::Suld2DArrayI16Trap, llvm::NVPTXISD::Suld2DArrayI16Zero, llvm::NVPTXISD::Suld2DArrayI32Clamp, llvm::NVPTXISD::Suld2DArrayI32Trap, llvm::NVPTXISD::Suld2DArrayI32Zero, llvm::NVPTXISD::Suld2DArrayI64Clamp, llvm::NVPTXISD::Suld2DArrayI64Trap, llvm::NVPTXISD::Suld2DArrayI64Zero, llvm::NVPTXISD::Suld2DArrayI8Clamp, llvm::NVPTXISD::Suld2DArrayI8Trap, llvm::NVPTXISD::Suld2DArrayI8Zero, llvm::NVPTXISD::Suld2DArrayV2I16Clamp, llvm::NVPTXISD::Suld2DArrayV2I16Trap, llvm::NVPTXISD::Suld2DArrayV2I16Zero, llvm::NVPTXISD::Suld2DArrayV2I32Clamp, llvm::NVPTXISD::Suld2DArrayV2I32Trap, llvm::NVPTXISD::Suld2DArrayV2I32Zero, llvm::NVPTXISD::Suld2DArrayV2I64Clamp, llvm::NVPTXISD::Suld2DArrayV2I64Trap, llvm::NVPTXISD::Suld2DArrayV2I64Zero, llvm::NVPTXISD::Suld2DArrayV2I8Clamp, llvm::NVPTXISD::Suld2DArrayV2I8Trap, llvm::NVPTXISD::Suld2DArrayV2I8Zero, llvm::NVPTXISD::Suld2DArrayV4I16Clamp, llvm::NVPTXISD::Suld2DArrayV4I16Trap, llvm::NVPTXISD::Suld2DArrayV4I16Zero, llvm::NVPTXISD::Suld2DArrayV4I32Clamp, llvm::NVPTXISD::Suld2DArrayV4I32Trap, llvm::NVPTXISD::Suld2DArrayV4I32Zero, llvm::NVPTXISD::Suld2DArrayV4I8Clamp, llvm::NVPTXISD::Suld2DArrayV4I8Trap, llvm::NVPTXISD::Suld2DArrayV4I8Zero, llvm::NVPTXISD::Suld2DI16Clamp, llvm::NVPTXISD::Suld2DI16Trap, llvm::NVPTXISD::Suld2DI16Zero, llvm::NVPTXISD::Suld2DI32Clamp, llvm::NVPTXISD::Suld2DI32Trap, llvm::NVPTXISD::Suld2DI32Zero, llvm::NVPTXISD::Suld2DI64Clamp, llvm::NVPTXISD::Suld2DI64Trap, llvm::NVPTXISD::Suld2DI64Zero, llvm::NVPTXISD::Suld2DI8Clamp, llvm::NVPTXISD::Suld2DI8Trap, llvm::NVPTXISD::Suld2DI8Zero, llvm::NVPTXISD::Suld2DV2I16Clamp, llvm::NVPTXISD::Suld2DV2I16Trap, llvm::NVPTXISD::Suld2DV2I16Zero, llvm::NVPTXISD::Suld2DV2I32Clamp, llvm::NVPTXISD::Suld2DV2I32Trap, llvm::NVPTXISD::Suld2DV2I32Zero, llvm::NVPTXISD::Suld2DV2I64Clamp, llvm::NVPTXISD::Suld2DV2I64Trap, llvm::NVPTXISD::Suld2DV2I64Zero, llvm::NVPTXISD::Suld2DV2I8Clamp, llvm::NVPTXISD::Suld2DV2I8Trap, llvm::NVPTXISD::Suld2DV2I8Zero, llvm::NVPTXISD::Suld2DV4I16Clamp, llvm::NVPTXISD::Suld2DV4I16Trap, llvm::NVPTXISD::Suld2DV4I16Zero, llvm::NVPTXISD::Suld2DV4I32Clamp, llvm::NVPTXISD::Suld2DV4I32Trap, llvm::NVPTXISD::Suld2DV4I32Zero, llvm::NVPTXISD::Suld2DV4I8Clamp, llvm::NVPTXISD::Suld2DV4I8Trap, llvm::NVPTXISD::Suld2DV4I8Zero, llvm::NVPTXISD::Suld3DI16Clamp, llvm::NVPTXISD::Suld3DI16Trap, llvm::NVPTXISD::Suld3DI16Zero, llvm::NVPTXISD::Suld3DI32Clamp, llvm::NVPTXISD::Suld3DI32Trap, llvm::NVPTXISD::Suld3DI32Zero, llvm::NVPTXISD::Suld3DI64Clamp, llvm::NVPTXISD::Suld3DI64Trap, llvm::NVPTXISD::Suld3DI64Zero, llvm::NVPTXISD::Suld3DI8Clamp, llvm::NVPTXISD::Suld3DI8Trap, llvm::NVPTXISD::Suld3DI8Zero, llvm::NVPTXISD::Suld3DV2I16Clamp, llvm::NVPTXISD::Suld3DV2I16Trap, llvm::NVPTXISD::Suld3DV2I16Zero, llvm::NVPTXISD::Suld3DV2I32Clamp, llvm::NVPTXISD::Suld3DV2I32Trap, llvm::NVPTXISD::Suld3DV2I32Zero, llvm::NVPTXISD::Suld3DV2I64Clamp, llvm::NVPTXISD::Suld3DV2I64Trap, llvm::NVPTXISD::Suld3DV2I64Zero, llvm::NVPTXISD::Suld3DV2I8Clamp, llvm::NVPTXISD::Suld3DV2I8Trap, llvm::NVPTXISD::Suld3DV2I8Zero, llvm::NVPTXISD::Suld3DV4I16Clamp, llvm::NVPTXISD::Suld3DV4I16Trap, llvm::NVPTXISD::Suld3DV4I16Zero, llvm::NVPTXISD::Suld3DV4I32Clamp, llvm::NVPTXISD::Suld3DV4I32Trap, llvm::NVPTXISD::Suld3DV4I32Zero, llvm::NVPTXISD::Suld3DV4I8Clamp, llvm::NVPTXISD::Suld3DV4I8Trap, and llvm::NVPTXISD::Suld3DV4I8Zero.

Referenced by llvm::NVPTXTargetLowering::getTgtMemIntrinsic().

◆ getOpcForTextureInstr()

static unsigned getOpcForTextureInstr ( unsigned  Intrinsic)
static

Definition at line 3572 of file NVPTXISelLowering.cpp.

References llvm::NVPTXISD::Tex1DArrayFloatFloat, llvm::NVPTXISD::Tex1DArrayFloatFloatGrad, llvm::NVPTXISD::Tex1DArrayFloatFloatLevel, llvm::NVPTXISD::Tex1DArrayFloatS32, llvm::NVPTXISD::Tex1DArrayS32Float, llvm::NVPTXISD::Tex1DArrayS32FloatGrad, llvm::NVPTXISD::Tex1DArrayS32FloatLevel, llvm::NVPTXISD::Tex1DArrayS32S32, llvm::NVPTXISD::Tex1DArrayU32Float, llvm::NVPTXISD::Tex1DArrayU32FloatGrad, llvm::NVPTXISD::Tex1DArrayU32FloatLevel, llvm::NVPTXISD::Tex1DArrayU32S32, llvm::NVPTXISD::Tex1DFloatFloat, llvm::NVPTXISD::Tex1DFloatFloatGrad, llvm::NVPTXISD::Tex1DFloatFloatLevel, llvm::NVPTXISD::Tex1DFloatS32, llvm::NVPTXISD::Tex1DS32Float, llvm::NVPTXISD::Tex1DS32FloatGrad, llvm::NVPTXISD::Tex1DS32FloatLevel, llvm::NVPTXISD::Tex1DS32S32, llvm::NVPTXISD::Tex1DU32Float, llvm::NVPTXISD::Tex1DU32FloatGrad, llvm::NVPTXISD::Tex1DU32FloatLevel, llvm::NVPTXISD::Tex1DU32S32, llvm::NVPTXISD::Tex2DArrayFloatFloat, llvm::NVPTXISD::Tex2DArrayFloatFloatGrad, llvm::NVPTXISD::Tex2DArrayFloatFloatLevel, llvm::NVPTXISD::Tex2DArrayFloatS32, llvm::NVPTXISD::Tex2DArrayS32Float, llvm::NVPTXISD::Tex2DArrayS32FloatGrad, llvm::NVPTXISD::Tex2DArrayS32FloatLevel, llvm::NVPTXISD::Tex2DArrayS32S32, llvm::NVPTXISD::Tex2DArrayU32Float, llvm::NVPTXISD::Tex2DArrayU32FloatGrad, llvm::NVPTXISD::Tex2DArrayU32FloatLevel, llvm::NVPTXISD::Tex2DArrayU32S32, llvm::NVPTXISD::Tex2DFloatFloat, llvm::NVPTXISD::Tex2DFloatFloatGrad, llvm::NVPTXISD::Tex2DFloatFloatLevel, llvm::NVPTXISD::Tex2DFloatS32, llvm::NVPTXISD::Tex2DS32Float, llvm::NVPTXISD::Tex2DS32FloatGrad, llvm::NVPTXISD::Tex2DS32FloatLevel, llvm::NVPTXISD::Tex2DS32S32, llvm::NVPTXISD::Tex2DU32Float, llvm::NVPTXISD::Tex2DU32FloatGrad, llvm::NVPTXISD::Tex2DU32FloatLevel, llvm::NVPTXISD::Tex2DU32S32, llvm::NVPTXISD::Tex3DFloatFloat, llvm::NVPTXISD::Tex3DFloatFloatGrad, llvm::NVPTXISD::Tex3DFloatFloatLevel, llvm::NVPTXISD::Tex3DFloatS32, llvm::NVPTXISD::Tex3DS32Float, llvm::NVPTXISD::Tex3DS32FloatGrad, llvm::NVPTXISD::Tex3DS32FloatLevel, llvm::NVPTXISD::Tex3DS32S32, llvm::NVPTXISD::Tex3DU32Float, llvm::NVPTXISD::Tex3DU32FloatGrad, llvm::NVPTXISD::Tex3DU32FloatLevel, llvm::NVPTXISD::Tex3DU32S32, llvm::NVPTXISD::TexCubeArrayFloatFloat, llvm::NVPTXISD::TexCubeArrayFloatFloatLevel, llvm::NVPTXISD::TexCubeArrayS32Float, llvm::NVPTXISD::TexCubeArrayS32FloatLevel, llvm::NVPTXISD::TexCubeArrayU32Float, llvm::NVPTXISD::TexCubeArrayU32FloatLevel, llvm::NVPTXISD::TexCubeFloatFloat, llvm::NVPTXISD::TexCubeFloatFloatLevel, llvm::NVPTXISD::TexCubeS32Float, llvm::NVPTXISD::TexCubeS32FloatLevel, llvm::NVPTXISD::TexCubeU32Float, llvm::NVPTXISD::TexCubeU32FloatLevel, llvm::NVPTXISD::TexUnified1DArrayFloatFloat, llvm::NVPTXISD::TexUnified1DArrayFloatFloatGrad, llvm::NVPTXISD::TexUnified1DArrayFloatFloatLevel, llvm::NVPTXISD::TexUnified1DArrayFloatS32, llvm::NVPTXISD::TexUnified1DArrayS32Float, llvm::NVPTXISD::TexUnified1DArrayS32FloatGrad, llvm::NVPTXISD::TexUnified1DArrayS32FloatLevel, llvm::NVPTXISD::TexUnified1DArrayS32S32, llvm::NVPTXISD::TexUnified1DArrayU32Float, llvm::NVPTXISD::TexUnified1DArrayU32FloatGrad, llvm::NVPTXISD::TexUnified1DArrayU32FloatLevel, llvm::NVPTXISD::TexUnified1DArrayU32S32, llvm::NVPTXISD::TexUnified1DFloatFloat, llvm::NVPTXISD::TexUnified1DFloatFloatGrad, llvm::NVPTXISD::TexUnified1DFloatFloatLevel, llvm::NVPTXISD::TexUnified1DFloatS32, llvm::NVPTXISD::TexUnified1DS32Float, llvm::NVPTXISD::TexUnified1DS32FloatGrad, llvm::NVPTXISD::TexUnified1DS32FloatLevel, llvm::NVPTXISD::TexUnified1DS32S32, llvm::NVPTXISD::TexUnified1DU32Float, llvm::NVPTXISD::TexUnified1DU32FloatGrad, llvm::NVPTXISD::TexUnified1DU32FloatLevel, llvm::NVPTXISD::TexUnified1DU32S32, llvm::NVPTXISD::TexUnified2DArrayFloatFloat, llvm::NVPTXISD::TexUnified2DArrayFloatFloatGrad, llvm::NVPTXISD::TexUnified2DArrayFloatFloatLevel, llvm::NVPTXISD::TexUnified2DArrayFloatS32, llvm::NVPTXISD::TexUnified2DArrayS32Float, llvm::NVPTXISD::TexUnified2DArrayS32FloatGrad, llvm::NVPTXISD::TexUnified2DArrayS32FloatLevel, llvm::NVPTXISD::TexUnified2DArrayS32S32, llvm::NVPTXISD::TexUnified2DArrayU32Float, llvm::NVPTXISD::TexUnified2DArrayU32FloatGrad, llvm::NVPTXISD::TexUnified2DArrayU32FloatLevel, llvm::NVPTXISD::TexUnified2DArrayU32S32, llvm::NVPTXISD::TexUnified2DFloatFloat, llvm::NVPTXISD::TexUnified2DFloatFloatGrad, llvm::NVPTXISD::TexUnified2DFloatFloatLevel, llvm::NVPTXISD::TexUnified2DFloatS32, llvm::NVPTXISD::TexUnified2DS32Float, llvm::NVPTXISD::TexUnified2DS32FloatGrad, llvm::NVPTXISD::TexUnified2DS32FloatLevel, llvm::NVPTXISD::TexUnified2DS32S32, llvm::NVPTXISD::TexUnified2DU32Float, llvm::NVPTXISD::TexUnified2DU32FloatGrad, llvm::NVPTXISD::TexUnified2DU32FloatLevel, llvm::NVPTXISD::TexUnified2DU32S32, llvm::NVPTXISD::TexUnified3DFloatFloat, llvm::NVPTXISD::TexUnified3DFloatFloatGrad, llvm::NVPTXISD::TexUnified3DFloatFloatLevel, llvm::NVPTXISD::TexUnified3DFloatS32, llvm::NVPTXISD::TexUnified3DS32Float, llvm::NVPTXISD::TexUnified3DS32FloatGrad, llvm::NVPTXISD::TexUnified3DS32FloatLevel, llvm::NVPTXISD::TexUnified3DS32S32, llvm::NVPTXISD::TexUnified3DU32Float, llvm::NVPTXISD::TexUnified3DU32FloatGrad, llvm::NVPTXISD::TexUnified3DU32FloatLevel, llvm::NVPTXISD::TexUnified3DU32S32, llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloat, llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad, llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel, llvm::NVPTXISD::TexUnifiedCubeArrayS32Float, llvm::NVPTXISD::TexUnifiedCubeArrayS32FloatGrad, llvm::NVPTXISD::TexUnifiedCubeArrayS32FloatLevel, llvm::NVPTXISD::TexUnifiedCubeArrayU32Float, llvm::NVPTXISD::TexUnifiedCubeArrayU32FloatGrad, llvm::NVPTXISD::TexUnifiedCubeArrayU32FloatLevel, llvm::NVPTXISD::TexUnifiedCubeFloatFloat, llvm::NVPTXISD::TexUnifiedCubeFloatFloatGrad, llvm::NVPTXISD::TexUnifiedCubeFloatFloatLevel, llvm::NVPTXISD::TexUnifiedCubeS32Float, llvm::NVPTXISD::TexUnifiedCubeS32FloatGrad, llvm::NVPTXISD::TexUnifiedCubeS32FloatLevel, llvm::NVPTXISD::TexUnifiedCubeU32Float, llvm::NVPTXISD::TexUnifiedCubeU32FloatGrad, llvm::NVPTXISD::TexUnifiedCubeU32FloatLevel, llvm::NVPTXISD::Tld4A2DFloatFloat, llvm::NVPTXISD::Tld4A2DS64Float, llvm::NVPTXISD::Tld4A2DU64Float, llvm::NVPTXISD::Tld4B2DFloatFloat, llvm::NVPTXISD::Tld4B2DS64Float, llvm::NVPTXISD::Tld4B2DU64Float, llvm::NVPTXISD::Tld4G2DFloatFloat, llvm::NVPTXISD::Tld4G2DS64Float, llvm::NVPTXISD::Tld4G2DU64Float, llvm::NVPTXISD::Tld4R2DFloatFloat, llvm::NVPTXISD::Tld4R2DS64Float, llvm::NVPTXISD::Tld4R2DU64Float, llvm::NVPTXISD::Tld4UnifiedA2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedA2DS64Float, llvm::NVPTXISD::Tld4UnifiedA2DU64Float, llvm::NVPTXISD::Tld4UnifiedB2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedB2DS64Float, llvm::NVPTXISD::Tld4UnifiedB2DU64Float, llvm::NVPTXISD::Tld4UnifiedG2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedG2DS64Float, llvm::NVPTXISD::Tld4UnifiedG2DU64Float, llvm::NVPTXISD::Tld4UnifiedR2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedR2DS64Float, and llvm::NVPTXISD::Tld4UnifiedR2DU64Float.

Referenced by llvm::NVPTXTargetLowering::getTgtMemIntrinsic().

◆ Is16bitsType()

static bool Is16bitsType ( MVT  VT)
static

Definition at line 158 of file NVPTXISelLowering.cpp.

References llvm::MVT::SimpleTy.

Referenced by ComputePTXValueVTs(), and ReplaceLoadVector().

◆ isConstOne()

static bool isConstOne ( const SDValue Operand)
static

Definition at line 5801 of file NVPTXISelLowering.cpp.

Referenced by combineMulSelectConstOne(), and matchMADConstOnePattern().

◆ isConstZero()

static bool isConstZero ( const SDValue Operand)
static

Definition at line 5337 of file NVPTXISelLowering.cpp.

Referenced by PerformADDCombineWithOperands().

◆ IsMulWideOperandDemotable()

static bool IsMulWideOperandDemotable ( SDValue  Op,
unsigned  OptSize,
OperandSignedness S 
)
static

IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptSize bits without loss of information.

The signedness of the operand, if determinable, is placed in S.

Definition at line 5672 of file NVPTXISelLowering.cpp.

References llvm::EVT::getFixedSizeInBits(), llvm::ISD::SIGN_EXTEND, llvm::ISD::SIGN_EXTEND_INREG, Signed, llvm::Unknown, Unsigned, and llvm::ISD::ZERO_EXTEND.

Referenced by AreMulWideOperandsDemotable().

◆ IsPTXVectorType()

static bool IsPTXVectorType ( MVT  VT)
static

◆ IsTypePassedAsArray()

static bool IsTypePassedAsArray ( const Type Ty)
static

◆ LowerUnalignedLoadRetParam()

static SDValue LowerUnalignedLoadRetParam ( SelectionDAG DAG,
SDValue Chain,
uint64_t  Offset,
EVT  ElementType,
SDValue InGlue,
SmallVectorImpl< SDValue > &  TempProxyRegOps,
const SDLoc dl 
)
static

◆ LowerUnalignedStoreParam()

static SDValue LowerUnalignedStoreParam ( SelectionDAG DAG,
SDValue  Chain,
uint64_t  Offset,
EVT  ElementType,
SDValue  StVal,
SDValue InGlue,
unsigned  ArgID,
const SDLoc dl 
)
static

◆ LowerUnalignedStoreRet()

static SDValue LowerUnalignedStoreRet ( SelectionDAG DAG,
SDValue  Chain,
uint64_t  Offset,
EVT  ElementType,
SDValue  RetVal,
const SDLoc dl 
)
static

◆ LowerVectorArith()

static SDValue LowerVectorArith ( SDValue  Op,
SelectionDAG DAG 
)
static

◆ matchMADConstOnePattern()

static SDValue matchMADConstOnePattern ( SDValue  Add)
static

Definition at line 5806 of file NVPTXISelLowering.cpp.

References llvm::Add, llvm::ISD::ADD, and isConstOne().

Referenced by combineMADConstOne(), and combineMulSelectConstOne().

◆ PerformADDCombine()

static SDValue PerformADDCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
CodeGenOptLevel  OptLevel 
)
static

PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.

Definition at line 5491 of file NVPTXISelLowering.cpp.

References llvm::SDValue::getValueType(), llvm::EVT::isVector(), N, llvm::None, and PerformADDCombineWithOperands().

◆ PerformADDCombineWithOperands()

static SDValue PerformADDCombineWithOperands ( SDNode N,
SDValue  N0,
SDValue  N1,
TargetLowering::DAGCombinerInfo DCI 
)
static

PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.

This is a helper for PerformADDCombine that is called with the default operands, and if that fails, with commuted operands.

Definition at line 5347 of file NVPTXISelLowering.cpp.

References llvm::TargetLowering::DAGCombinerInfo::DAG, llvm::SDValue::getNode(), llvm::SelectionDAG::getNode(), llvm::SDValue::getOpcode(), llvm::SDValue::getOperand(), llvm::SDNode::getOperand(), llvm::SelectionDAG::getSelect(), llvm::SDValue::getValueType(), llvm::SDNode::hasOneUse(), llvm::NVPTXISD::IMAD, isConstZero(), llvm::ISD::MUL, N, and llvm::ISD::SELECT.

◆ PerformANDCombine()

static SDValue PerformANDCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ PerformEXTRACTCombine()

static SDValue PerformEXTRACTCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ PerformFADDCombine()

static SDValue PerformFADDCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
CodeGenOptLevel  OptLevel 
)
static

PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.

Definition at line 5515 of file NVPTXISelLowering.cpp.

References llvm::SDValue::getValueType(), llvm::EVT::isVector(), N, and PerformFADDCombineWithOperands().

◆ PerformFADDCombineWithOperands()

static SDValue PerformFADDCombineWithOperands ( SDNode N,
SDValue  N0,
SDValue  N1,
TargetLowering::DAGCombinerInfo DCI,
CodeGenOptLevel  OptLevel 
)
static

◆ PerformLOADCombine()

static SDValue PerformLOADCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ PerformMULCombine()

static SDValue PerformMULCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
CodeGenOptLevel  OptLevel 
)
static

PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.

Definition at line 5886 of file NVPTXISelLowering.cpp.

References N, llvm::None, PerformMULCombineWithOperands(), and TryMULWIDECombine().

◆ PerformMULCombineWithOperands()

static SDValue PerformMULCombineWithOperands ( SDNode N,
SDValue  N0,
SDValue  N1,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ PerformREMCombine()

static SDValue PerformREMCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
CodeGenOptLevel  OptLevel 
)
static

◆ PerformSETCCCombine()

static SDValue PerformSETCCCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
unsigned int  SmVersion 
)
static

◆ PerformSHLCombine()

static SDValue PerformSHLCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
CodeGenOptLevel  OptLevel 
)
static

PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.

Definition at line 5901 of file NVPTXISelLowering.cpp.

References N, llvm::None, and TryMULWIDECombine().

◆ PerformStoreCombineHelper()

static SDValue PerformStoreCombineHelper ( SDNode N,
std::size_t  Front,
std::size_t  Back 
)
static

Definition at line 5467 of file NVPTXISelLowering.cpp.

References llvm::all_of(), and N.

Referenced by PerformStoreParamCombine(), and PerformStoreRetvalCombine().

◆ PerformStoreParamCombine()

static SDValue PerformStoreParamCombine ( SDNode N)
static

Definition at line 5478 of file NVPTXISelLowering.cpp.

References N, and PerformStoreCombineHelper().

◆ PerformStoreRetvalCombine()

static SDValue PerformStoreRetvalCombine ( SDNode N)
static

Definition at line 5484 of file NVPTXISelLowering.cpp.

References N, and PerformStoreCombineHelper().

◆ PerformVSELECTCombine()

static SDValue PerformVSELECTCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ PromoteScalarIntegerPTX()

static bool PromoteScalarIntegerPTX ( const EVT VT,
MVT PromotedVT 
)
static

PromoteScalarIntegerPTX Used to make sure the arguments/returns are suitable for passing and promote them to a larger size if they're not.

The promoted type is placed in PromoteVT if the function returns true.

Definition at line 252 of file NVPTXISelLowering.cpp.

References llvm::EVT::getFixedSizeInBits(), llvm::EVT::isScalarInteger(), llvm_unreachable, and llvm::PowerOf2Ceil().

Referenced by llvm::NVPTXTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerFormalArguments(), and llvm::NVPTXTargetLowering::LowerReturn().

◆ ReplaceCopyFromReg_128()

static void ReplaceCopyFromReg_128 ( SDNode N,
SelectionDAG DAG,
SmallVectorImpl< SDValue > &  Results 
)
static

◆ ReplaceINTRINSIC_W_CHAIN()

static void ReplaceINTRINSIC_W_CHAIN ( SDNode N,
SelectionDAG DAG,
SmallVectorImpl< SDValue > &  Results 
)
static

◆ ReplaceLoadVector()

static void ReplaceLoadVector ( SDNode N,
SelectionDAG DAG,
SmallVectorImpl< SDValue > &  Results 
)
static

◆ TryMULWIDECombine()

static SDValue TryMULWIDECombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI 
)
static

TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces an M-bit result (i.e.

mul.wide). This transform works on both multiply DAG nodes and SHL DAG nodes with a constant shift amount.

Definition at line 5735 of file NVPTXISelLowering.cpp.

References AreMulWideOperandsDemotable(), llvm::BitWidth, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::ConstantSDNode::getAPIntValue(), llvm::SelectionDAG::getConstant(), llvm::SelectionDAG::getNode(), llvm::EVT::getSizeInBits(), LHS, llvm::ISD::MUL, llvm::NVPTXISD::MUL_WIDE_SIGNED, llvm::NVPTXISD::MUL_WIDE_UNSIGNED, N, RHS, llvm::APInt::sge(), llvm::ISD::SHL, Signed, llvm::APInt::slt(), std::swap(), and llvm::ISD::TRUNCATE.

Referenced by PerformMULCombine(), and PerformSHLCombine().

◆ VectorizePTXValueVTs()

static SmallVector< ParamVectorizationFlags, 16 > VectorizePTXValueVTs ( const SmallVectorImpl< EVT > &  ValueVTs,
const SmallVectorImpl< uint64_t > &  Offsets,
Align  ParamAlignment,
bool  IsVAArg = false 
)
static

Variable Documentation

◆ FMAContractLevelOpt

cl::opt< unsigned > FMAContractLevelOpt("nvptx-fma-level", cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2)) ( "nvptx-fma-level"  ,
cl::Hidden   
)
static

◆ ForceMinByValParamAlign

cl::opt< bool > ForceMinByValParamAlign("nvptx-force-min-byval-param-align", cl::Hidden, cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" " params of device functions."), cl::init(false)) ( "nvptx-force-min-byval-param-align"  ,
cl::Hidden  ,
cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" " params of device functions.")  ,
cl::init(false)   
)
static

◆ GlobalUniqueCallSite

std::atomic<unsigned> GlobalUniqueCallSite
static

Definition at line 74 of file NVPTXISelLowering.cpp.

Referenced by llvm::NVPTXTargetLowering::LowerCall().

◆ sched4reg

cl::opt< bool > sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)) ( "nvptx-sched4reg"  ,
cl::desc("NVPTX Specific: schedule for register pressue")  ,
cl::init(false)   
)
static

◆ UsePrecDivF32

cl::opt< int > UsePrecDivF32("nvptx-prec-divf32", cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" " IEEE Compliant F32 div.rnd if available."), cl::init(2)) ( "nvptx-prec-divf32"  ,
cl::Hidden  ,
cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" " IEEE Compliant F32 div.rnd if available.")  ,
cl::init(2)   
)
static

◆ UsePrecSqrtF32

cl::opt< bool > UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true)) ( "nvptx-prec-sqrtf32"  ,
cl::Hidden  ,
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn.")  ,
cl::init(true  
)
static