LLVM  6.0.0svn
Macros | Enumerations | Functions | Variables
NVPTXISelLowering.cpp File Reference
#include "NVPTXISelLowering.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXSection.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
Include dependency graph for NVPTXISelLowering.cpp:

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "nvptx-lower"
 

Enumerations

enum  ParamVectorizationFlags { PVF_INNER = 0x0, PVF_FIRST = 0x1, PVF_LAST = 0x2, PVF_SCALAR = PVF_FIRST | PVF_LAST }
 
enum  OperandSignedness { Signed = 0, Unsigned, Unknown }
 

Functions

static bool IsPTXVectorType (MVT VT)
 
static void ComputePTXValueVTs (const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
 ComputePTXValueVTs - For the given Type Ty, returns the set of primitive EVTs that compose it. More...
 
static unsigned CanMergeParamLoadStoresStartingAt (unsigned Idx, uint32_t AccessSize, const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, unsigned ParamAlignment)
 
static SmallVector< ParamVectorizationFlags, 16 > VectorizePTXValueVTs (const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< uint64_t > &Offsets, unsigned ParamAlignment)
 
static bool isImageOrSamplerVal (const Value *arg, const Module *context)
 
static unsigned getOpcForTextureInstr (unsigned Intrinsic)
 
static unsigned getOpcForSurfaceInstr (unsigned Intrinsic)
 
static SDValue PerformADDCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
 PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1. More...
 
static SDValue PerformADDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &Subtarget, CodeGenOpt::Level OptLevel)
 PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. More...
 
static SDValue PerformANDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 
static SDValue PerformREMCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
 
static bool IsMulWideOperandDemotable (SDValue Op, unsigned OptSize, OperandSignedness &S)
 IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptSize bits without loss of information. More...
 
static bool AreMulWideOperandsDemotable (SDValue LHS, SDValue RHS, unsigned OptSize, bool &IsSigned)
 AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits without loss of information. More...
 
static SDValue TryMULWIDECombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces an M-bit result (i.e. More...
 
static SDValue PerformMULCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
 PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. More...
 
static SDValue PerformSHLCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOpt::Level OptLevel)
 PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. More...
 
static SDValue PerformSETCCCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 
static void ReplaceLoadVector (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
 ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. More...
 
static void ReplaceINTRINSIC_W_CHAIN (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
 

Variables

static unsigned int uniqueCallSite = 0
 
static cl::opt< boolsched4reg ("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false))
 
static cl::opt< unsignedFMAContractLevelOpt ("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2))
 
static cl::opt< int > UsePrecDivF32 ("nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" " IEEE Compliant F32 div.rnd if available."), cl::init(2))
 
static cl::opt< boolUsePrecSqrtF32 ("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true))
 
static cl::opt< boolFtzEnabled ("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "nvptx-lower"

Definition at line 65 of file NVPTXISelLowering.cpp.

Enumeration Type Documentation

◆ OperandSignedness

Enumerator
Signed 
Unsigned 
Unknown 

Definition at line 4297 of file NVPTXISelLowering.cpp.

◆ ParamVectorizationFlags

Enumerator
PVF_INNER 
PVF_FIRST 
PVF_LAST 
PVF_SCALAR 

Definition at line 270 of file NVPTXISelLowering.cpp.

Function Documentation

◆ AreMulWideOperandsDemotable()

static bool AreMulWideOperandsDemotable ( SDValue  LHS,
SDValue  RHS,
unsigned  OptSize,
bool IsSigned 
)
static

AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits without loss of information.

If the operands contain a constant, it should appear as the RHS operand. The signedness of the operands is placed in IsSigned.

Definition at line 4333 of file NVPTXISelLowering.cpp.

References llvm::APInt::isIntN(), IsMulWideOperandDemotable(), llvm::APInt::isSignedIntN(), Signed, llvm::Unknown, and Unsigned.

Referenced by TryMULWIDECombine().

◆ CanMergeParamLoadStoresStartingAt()

static unsigned CanMergeParamLoadStoresStartingAt ( unsigned  Idx,
uint32_t  AccessSize,
const SmallVectorImpl< EVT > &  ValueVTs,
const SmallVectorImpl< uint64_t > &  Offsets,
unsigned  ParamAlignment 
)
static

◆ ComputePTXValueVTs()

static void ComputePTXValueVTs ( const TargetLowering TLI,
const DataLayout DL,
Type Ty,
SmallVectorImpl< EVT > &  ValueVTs,
SmallVectorImpl< uint64_t > *  Offsets = nullptr,
uint64_t  StartingOffset = 0 
)
static

ComputePTXValueVTs - For the given Type Ty, returns the set of primitive EVTs that compose it.

Unlike ComputeValueVTs, this will break apart vectors into their primitive components. NOTE: This is a band-aid for code that expects ComputeValueVTs to return the same number of types as the Ins/Outs arrays in LowerFormalArguments, LowerCall, and LowerReturn.

Definition at line 164 of file NVPTXISelLowering.cpp.

References llvm::ComputeValueVTs(), llvm::MVT::f16, llvm::EVT::getStoreSize(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), llvm::MVT::i64, llvm::Type::isIntegerTy(), llvm::EVT::isVector(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::SmallVectorTemplateCommon< T >::size(), and llvm::MVT::v2f16.

Referenced by llvm::NVPTXTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerFormalArguments(), and llvm::NVPTXTargetLowering::LowerReturn().

◆ getOpcForSurfaceInstr()

static unsigned getOpcForSurfaceInstr ( unsigned  Intrinsic)
static

Definition at line 2977 of file NVPTXISelLowering.cpp.

References llvm::NVPTXISD::Suld1DArrayI16Clamp, llvm::NVPTXISD::Suld1DArrayI16Trap, llvm::NVPTXISD::Suld1DArrayI16Zero, llvm::NVPTXISD::Suld1DArrayI32Clamp, llvm::NVPTXISD::Suld1DArrayI32Trap, llvm::NVPTXISD::Suld1DArrayI32Zero, llvm::NVPTXISD::Suld1DArrayI64Clamp, llvm::NVPTXISD::Suld1DArrayI64Trap, llvm::NVPTXISD::Suld1DArrayI64Zero, llvm::NVPTXISD::Suld1DArrayI8Clamp, llvm::NVPTXISD::Suld1DArrayI8Trap, llvm::NVPTXISD::Suld1DArrayI8Zero, llvm::NVPTXISD::Suld1DArrayV2I16Clamp, llvm::NVPTXISD::Suld1DArrayV2I16Trap, llvm::NVPTXISD::Suld1DArrayV2I16Zero, llvm::NVPTXISD::Suld1DArrayV2I32Clamp, llvm::NVPTXISD::Suld1DArrayV2I32Trap, llvm::NVPTXISD::Suld1DArrayV2I32Zero, llvm::NVPTXISD::Suld1DArrayV2I64Clamp, llvm::NVPTXISD::Suld1DArrayV2I64Trap, llvm::NVPTXISD::Suld1DArrayV2I64Zero, llvm::NVPTXISD::Suld1DArrayV2I8Clamp, llvm::NVPTXISD::Suld1DArrayV2I8Trap, llvm::NVPTXISD::Suld1DArrayV2I8Zero, llvm::NVPTXISD::Suld1DArrayV4I16Clamp, llvm::NVPTXISD::Suld1DArrayV4I16Trap, llvm::NVPTXISD::Suld1DArrayV4I16Zero, llvm::NVPTXISD::Suld1DArrayV4I32Clamp, llvm::NVPTXISD::Suld1DArrayV4I32Trap, llvm::NVPTXISD::Suld1DArrayV4I32Zero, llvm::NVPTXISD::Suld1DArrayV4I8Clamp, llvm::NVPTXISD::Suld1DArrayV4I8Trap, llvm::NVPTXISD::Suld1DArrayV4I8Zero, llvm::NVPTXISD::Suld1DI16Clamp, llvm::NVPTXISD::Suld1DI16Trap, llvm::NVPTXISD::Suld1DI16Zero, llvm::NVPTXISD::Suld1DI32Clamp, llvm::NVPTXISD::Suld1DI32Trap, llvm::NVPTXISD::Suld1DI32Zero, llvm::NVPTXISD::Suld1DI64Clamp, llvm::NVPTXISD::Suld1DI64Trap, llvm::NVPTXISD::Suld1DI64Zero, llvm::NVPTXISD::Suld1DI8Clamp, llvm::NVPTXISD::Suld1DI8Trap, llvm::NVPTXISD::Suld1DI8Zero, llvm::NVPTXISD::Suld1DV2I16Clamp, llvm::NVPTXISD::Suld1DV2I16Trap, llvm::NVPTXISD::Suld1DV2I16Zero, llvm::NVPTXISD::Suld1DV2I32Clamp, llvm::NVPTXISD::Suld1DV2I32Trap, llvm::NVPTXISD::Suld1DV2I32Zero, llvm::NVPTXISD::Suld1DV2I64Clamp, llvm::NVPTXISD::Suld1DV2I64Trap, llvm::NVPTXISD::Suld1DV2I64Zero, llvm::NVPTXISD::Suld1DV2I8Clamp, llvm::NVPTXISD::Suld1DV2I8Trap, llvm::NVPTXISD::Suld1DV2I8Zero, llvm::NVPTXISD::Suld1DV4I16Clamp, llvm::NVPTXISD::Suld1DV4I16Trap, llvm::NVPTXISD::Suld1DV4I16Zero, llvm::NVPTXISD::Suld1DV4I32Clamp, llvm::NVPTXISD::Suld1DV4I32Trap, llvm::NVPTXISD::Suld1DV4I32Zero, llvm::NVPTXISD::Suld1DV4I8Clamp, llvm::NVPTXISD::Suld1DV4I8Trap, llvm::NVPTXISD::Suld1DV4I8Zero, llvm::NVPTXISD::Suld2DArrayI16Clamp, llvm::NVPTXISD::Suld2DArrayI16Trap, llvm::NVPTXISD::Suld2DArrayI16Zero, llvm::NVPTXISD::Suld2DArrayI32Clamp, llvm::NVPTXISD::Suld2DArrayI32Trap, llvm::NVPTXISD::Suld2DArrayI32Zero, llvm::NVPTXISD::Suld2DArrayI64Clamp, llvm::NVPTXISD::Suld2DArrayI64Trap, llvm::NVPTXISD::Suld2DArrayI64Zero, llvm::NVPTXISD::Suld2DArrayI8Clamp, llvm::NVPTXISD::Suld2DArrayI8Trap, llvm::NVPTXISD::Suld2DArrayI8Zero, llvm::NVPTXISD::Suld2DArrayV2I16Clamp, llvm::NVPTXISD::Suld2DArrayV2I16Trap, llvm::NVPTXISD::Suld2DArrayV2I16Zero, llvm::NVPTXISD::Suld2DArrayV2I32Clamp, llvm::NVPTXISD::Suld2DArrayV2I32Trap, llvm::NVPTXISD::Suld2DArrayV2I32Zero, llvm::NVPTXISD::Suld2DArrayV2I64Clamp, llvm::NVPTXISD::Suld2DArrayV2I64Trap, llvm::NVPTXISD::Suld2DArrayV2I64Zero, llvm::NVPTXISD::Suld2DArrayV2I8Clamp, llvm::NVPTXISD::Suld2DArrayV2I8Trap, llvm::NVPTXISD::Suld2DArrayV2I8Zero, llvm::NVPTXISD::Suld2DArrayV4I16Clamp, llvm::NVPTXISD::Suld2DArrayV4I16Trap, llvm::NVPTXISD::Suld2DArrayV4I16Zero, llvm::NVPTXISD::Suld2DArrayV4I32Clamp, llvm::NVPTXISD::Suld2DArrayV4I32Trap, llvm::NVPTXISD::Suld2DArrayV4I32Zero, llvm::NVPTXISD::Suld2DArrayV4I8Clamp, llvm::NVPTXISD::Suld2DArrayV4I8Trap, llvm::NVPTXISD::Suld2DArrayV4I8Zero, llvm::NVPTXISD::Suld2DI16Clamp, llvm::NVPTXISD::Suld2DI16Trap, llvm::NVPTXISD::Suld2DI16Zero, llvm::NVPTXISD::Suld2DI32Clamp, llvm::NVPTXISD::Suld2DI32Trap, llvm::NVPTXISD::Suld2DI32Zero, llvm::NVPTXISD::Suld2DI64Clamp, llvm::NVPTXISD::Suld2DI64Trap, llvm::NVPTXISD::Suld2DI64Zero, llvm::NVPTXISD::Suld2DI8Clamp, llvm::NVPTXISD::Suld2DI8Trap, llvm::NVPTXISD::Suld2DI8Zero, llvm::NVPTXISD::Suld2DV2I16Clamp, llvm::NVPTXISD::Suld2DV2I16Trap, llvm::NVPTXISD::Suld2DV2I16Zero, llvm::NVPTXISD::Suld2DV2I32Clamp, llvm::NVPTXISD::Suld2DV2I32Trap, llvm::NVPTXISD::Suld2DV2I32Zero, llvm::NVPTXISD::Suld2DV2I64Clamp, llvm::NVPTXISD::Suld2DV2I64Trap, llvm::NVPTXISD::Suld2DV2I64Zero, llvm::NVPTXISD::Suld2DV2I8Clamp, llvm::NVPTXISD::Suld2DV2I8Trap, llvm::NVPTXISD::Suld2DV2I8Zero, llvm::NVPTXISD::Suld2DV4I16Clamp, llvm::NVPTXISD::Suld2DV4I16Trap, llvm::NVPTXISD::Suld2DV4I16Zero, llvm::NVPTXISD::Suld2DV4I32Clamp, llvm::NVPTXISD::Suld2DV4I32Trap, llvm::NVPTXISD::Suld2DV4I32Zero, llvm::NVPTXISD::Suld2DV4I8Clamp, llvm::NVPTXISD::Suld2DV4I8Trap, llvm::NVPTXISD::Suld2DV4I8Zero, llvm::NVPTXISD::Suld3DI16Clamp, llvm::NVPTXISD::Suld3DI16Trap, llvm::NVPTXISD::Suld3DI16Zero, llvm::NVPTXISD::Suld3DI32Clamp, llvm::NVPTXISD::Suld3DI32Trap, llvm::NVPTXISD::Suld3DI32Zero, llvm::NVPTXISD::Suld3DI64Clamp, llvm::NVPTXISD::Suld3DI64Trap, llvm::NVPTXISD::Suld3DI64Zero, llvm::NVPTXISD::Suld3DI8Clamp, llvm::NVPTXISD::Suld3DI8Trap, llvm::NVPTXISD::Suld3DI8Zero, llvm::NVPTXISD::Suld3DV2I16Clamp, llvm::NVPTXISD::Suld3DV2I16Trap, llvm::NVPTXISD::Suld3DV2I16Zero, llvm::NVPTXISD::Suld3DV2I32Clamp, llvm::NVPTXISD::Suld3DV2I32Trap, llvm::NVPTXISD::Suld3DV2I32Zero, llvm::NVPTXISD::Suld3DV2I64Clamp, llvm::NVPTXISD::Suld3DV2I64Trap, llvm::NVPTXISD::Suld3DV2I64Zero, llvm::NVPTXISD::Suld3DV2I8Clamp, llvm::NVPTXISD::Suld3DV2I8Trap, llvm::NVPTXISD::Suld3DV2I8Zero, llvm::NVPTXISD::Suld3DV4I16Clamp, llvm::NVPTXISD::Suld3DV4I16Trap, llvm::NVPTXISD::Suld3DV4I16Zero, llvm::NVPTXISD::Suld3DV4I32Clamp, llvm::NVPTXISD::Suld3DV4I32Trap, llvm::NVPTXISD::Suld3DV4I32Zero, llvm::NVPTXISD::Suld3DV4I8Clamp, llvm::NVPTXISD::Suld3DV4I8Trap, and llvm::NVPTXISD::Suld3DV4I8Zero.

Referenced by llvm::NVPTXTargetLowering::getTgtMemIntrinsic().

◆ getOpcForTextureInstr()

static unsigned getOpcForTextureInstr ( unsigned  Intrinsic)
static

Definition at line 2618 of file NVPTXISelLowering.cpp.

References llvm::NVPTXISD::Tex1DArrayFloatFloat, llvm::NVPTXISD::Tex1DArrayFloatFloatGrad, llvm::NVPTXISD::Tex1DArrayFloatFloatLevel, llvm::NVPTXISD::Tex1DArrayFloatS32, llvm::NVPTXISD::Tex1DArrayS32Float, llvm::NVPTXISD::Tex1DArrayS32FloatGrad, llvm::NVPTXISD::Tex1DArrayS32FloatLevel, llvm::NVPTXISD::Tex1DArrayS32S32, llvm::NVPTXISD::Tex1DArrayU32Float, llvm::NVPTXISD::Tex1DArrayU32FloatGrad, llvm::NVPTXISD::Tex1DArrayU32FloatLevel, llvm::NVPTXISD::Tex1DArrayU32S32, llvm::NVPTXISD::Tex1DFloatFloat, llvm::NVPTXISD::Tex1DFloatFloatGrad, llvm::NVPTXISD::Tex1DFloatFloatLevel, llvm::NVPTXISD::Tex1DFloatS32, llvm::NVPTXISD::Tex1DS32Float, llvm::NVPTXISD::Tex1DS32FloatGrad, llvm::NVPTXISD::Tex1DS32FloatLevel, llvm::NVPTXISD::Tex1DS32S32, llvm::NVPTXISD::Tex1DU32Float, llvm::NVPTXISD::Tex1DU32FloatGrad, llvm::NVPTXISD::Tex1DU32FloatLevel, llvm::NVPTXISD::Tex1DU32S32, llvm::NVPTXISD::Tex2DArrayFloatFloat, llvm::NVPTXISD::Tex2DArrayFloatFloatGrad, llvm::NVPTXISD::Tex2DArrayFloatFloatLevel, llvm::NVPTXISD::Tex2DArrayFloatS32, llvm::NVPTXISD::Tex2DArrayS32Float, llvm::NVPTXISD::Tex2DArrayS32FloatGrad, llvm::NVPTXISD::Tex2DArrayS32FloatLevel, llvm::NVPTXISD::Tex2DArrayS32S32, llvm::NVPTXISD::Tex2DArrayU32Float, llvm::NVPTXISD::Tex2DArrayU32FloatGrad, llvm::NVPTXISD::Tex2DArrayU32FloatLevel, llvm::NVPTXISD::Tex2DArrayU32S32, llvm::NVPTXISD::Tex2DFloatFloat, llvm::NVPTXISD::Tex2DFloatFloatGrad, llvm::NVPTXISD::Tex2DFloatFloatLevel, llvm::NVPTXISD::Tex2DFloatS32, llvm::NVPTXISD::Tex2DS32Float, llvm::NVPTXISD::Tex2DS32FloatGrad, llvm::NVPTXISD::Tex2DS32FloatLevel, llvm::NVPTXISD::Tex2DS32S32, llvm::NVPTXISD::Tex2DU32Float, llvm::NVPTXISD::Tex2DU32FloatGrad, llvm::NVPTXISD::Tex2DU32FloatLevel, llvm::NVPTXISD::Tex2DU32S32, llvm::NVPTXISD::Tex3DFloatFloat, llvm::NVPTXISD::Tex3DFloatFloatGrad, llvm::NVPTXISD::Tex3DFloatFloatLevel, llvm::NVPTXISD::Tex3DFloatS32, llvm::NVPTXISD::Tex3DS32Float, llvm::NVPTXISD::Tex3DS32FloatGrad, llvm::NVPTXISD::Tex3DS32FloatLevel, llvm::NVPTXISD::Tex3DS32S32, llvm::NVPTXISD::Tex3DU32Float, llvm::NVPTXISD::Tex3DU32FloatGrad, llvm::NVPTXISD::Tex3DU32FloatLevel, llvm::NVPTXISD::Tex3DU32S32, llvm::NVPTXISD::TexCubeArrayFloatFloat, llvm::NVPTXISD::TexCubeArrayFloatFloatLevel, llvm::NVPTXISD::TexCubeArrayS32Float, llvm::NVPTXISD::TexCubeArrayS32FloatLevel, llvm::NVPTXISD::TexCubeArrayU32Float, llvm::NVPTXISD::TexCubeArrayU32FloatLevel, llvm::NVPTXISD::TexCubeFloatFloat, llvm::NVPTXISD::TexCubeFloatFloatLevel, llvm::NVPTXISD::TexCubeS32Float, llvm::NVPTXISD::TexCubeS32FloatLevel, llvm::NVPTXISD::TexCubeU32Float, llvm::NVPTXISD::TexCubeU32FloatLevel, llvm::NVPTXISD::TexUnified1DArrayFloatFloat, llvm::NVPTXISD::TexUnified1DArrayFloatFloatGrad, llvm::NVPTXISD::TexUnified1DArrayFloatFloatLevel, llvm::NVPTXISD::TexUnified1DArrayFloatS32, llvm::NVPTXISD::TexUnified1DArrayS32Float, llvm::NVPTXISD::TexUnified1DArrayS32FloatGrad, llvm::NVPTXISD::TexUnified1DArrayS32FloatLevel, llvm::NVPTXISD::TexUnified1DArrayS32S32, llvm::NVPTXISD::TexUnified1DArrayU32Float, llvm::NVPTXISD::TexUnified1DArrayU32FloatGrad, llvm::NVPTXISD::TexUnified1DArrayU32FloatLevel, llvm::NVPTXISD::TexUnified1DArrayU32S32, llvm::NVPTXISD::TexUnified1DFloatFloat, llvm::NVPTXISD::TexUnified1DFloatFloatGrad, llvm::NVPTXISD::TexUnified1DFloatFloatLevel, llvm::NVPTXISD::TexUnified1DFloatS32, llvm::NVPTXISD::TexUnified1DS32Float, llvm::NVPTXISD::TexUnified1DS32FloatGrad, llvm::NVPTXISD::TexUnified1DS32FloatLevel, llvm::NVPTXISD::TexUnified1DS32S32, llvm::NVPTXISD::TexUnified1DU32Float, llvm::NVPTXISD::TexUnified1DU32FloatGrad, llvm::NVPTXISD::TexUnified1DU32FloatLevel, llvm::NVPTXISD::TexUnified1DU32S32, llvm::NVPTXISD::TexUnified2DArrayFloatFloat, llvm::NVPTXISD::TexUnified2DArrayFloatFloatGrad, llvm::NVPTXISD::TexUnified2DArrayFloatFloatLevel, llvm::NVPTXISD::TexUnified2DArrayFloatS32, llvm::NVPTXISD::TexUnified2DArrayS32Float, llvm::NVPTXISD::TexUnified2DArrayS32FloatGrad, llvm::NVPTXISD::TexUnified2DArrayS32FloatLevel, llvm::NVPTXISD::TexUnified2DArrayS32S32, llvm::NVPTXISD::TexUnified2DArrayU32Float, llvm::NVPTXISD::TexUnified2DArrayU32FloatGrad, llvm::NVPTXISD::TexUnified2DArrayU32FloatLevel, llvm::NVPTXISD::TexUnified2DArrayU32S32, llvm::NVPTXISD::TexUnified2DFloatFloat, llvm::NVPTXISD::TexUnified2DFloatFloatGrad, llvm::NVPTXISD::TexUnified2DFloatFloatLevel, llvm::NVPTXISD::TexUnified2DFloatS32, llvm::NVPTXISD::TexUnified2DS32Float, llvm::NVPTXISD::TexUnified2DS32FloatGrad, llvm::NVPTXISD::TexUnified2DS32FloatLevel, llvm::NVPTXISD::TexUnified2DS32S32, llvm::NVPTXISD::TexUnified2DU32Float, llvm::NVPTXISD::TexUnified2DU32FloatGrad, llvm::NVPTXISD::TexUnified2DU32FloatLevel, llvm::NVPTXISD::TexUnified2DU32S32, llvm::NVPTXISD::TexUnified3DFloatFloat, llvm::NVPTXISD::TexUnified3DFloatFloatGrad, llvm::NVPTXISD::TexUnified3DFloatFloatLevel, llvm::NVPTXISD::TexUnified3DFloatS32, llvm::NVPTXISD::TexUnified3DS32Float, llvm::NVPTXISD::TexUnified3DS32FloatGrad, llvm::NVPTXISD::TexUnified3DS32FloatLevel, llvm::NVPTXISD::TexUnified3DS32S32, llvm::NVPTXISD::TexUnified3DU32Float, llvm::NVPTXISD::TexUnified3DU32FloatGrad, llvm::NVPTXISD::TexUnified3DU32FloatLevel, llvm::NVPTXISD::TexUnified3DU32S32, llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloat, llvm::NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel, llvm::NVPTXISD::TexUnifiedCubeArrayS32Float, llvm::NVPTXISD::TexUnifiedCubeArrayS32FloatLevel, llvm::NVPTXISD::TexUnifiedCubeArrayU32Float, llvm::NVPTXISD::TexUnifiedCubeArrayU32FloatLevel, llvm::NVPTXISD::TexUnifiedCubeFloatFloat, llvm::NVPTXISD::TexUnifiedCubeFloatFloatLevel, llvm::NVPTXISD::TexUnifiedCubeS32Float, llvm::NVPTXISD::TexUnifiedCubeS32FloatLevel, llvm::NVPTXISD::TexUnifiedCubeU32Float, llvm::NVPTXISD::TexUnifiedCubeU32FloatLevel, llvm::NVPTXISD::Tld4A2DFloatFloat, llvm::NVPTXISD::Tld4A2DS64Float, llvm::NVPTXISD::Tld4A2DU64Float, llvm::NVPTXISD::Tld4B2DFloatFloat, llvm::NVPTXISD::Tld4B2DS64Float, llvm::NVPTXISD::Tld4B2DU64Float, llvm::NVPTXISD::Tld4G2DFloatFloat, llvm::NVPTXISD::Tld4G2DS64Float, llvm::NVPTXISD::Tld4G2DU64Float, llvm::NVPTXISD::Tld4R2DFloatFloat, llvm::NVPTXISD::Tld4R2DS64Float, llvm::NVPTXISD::Tld4R2DU64Float, llvm::NVPTXISD::Tld4UnifiedA2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedA2DS64Float, llvm::NVPTXISD::Tld4UnifiedA2DU64Float, llvm::NVPTXISD::Tld4UnifiedB2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedB2DS64Float, llvm::NVPTXISD::Tld4UnifiedB2DU64Float, llvm::NVPTXISD::Tld4UnifiedG2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedG2DS64Float, llvm::NVPTXISD::Tld4UnifiedG2DU64Float, llvm::NVPTXISD::Tld4UnifiedR2DFloatFloat, llvm::NVPTXISD::Tld4UnifiedR2DS64Float, and llvm::NVPTXISD::Tld4UnifiedR2DU64Float.

Referenced by llvm::NVPTXTargetLowering::getTgtMemIntrinsic().

◆ isImageOrSamplerVal()

static bool isImageOrSamplerVal ( const Value arg,
const Module context 
)
static

◆ IsMulWideOperandDemotable()

static bool IsMulWideOperandDemotable ( SDValue  Op,
unsigned  OptSize,
OperandSignedness S 
)
static

IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptSize bits without loss of information.

The signedness of the operand, if determinable, is placed in S.

Definition at line 4306 of file NVPTXISelLowering.cpp.

References llvm::SDValue::getOpcode(), llvm::SDValue::getOperand(), llvm::EVT::getSizeInBits(), llvm::SDValue::getValueType(), llvm::ISD::SIGN_EXTEND, llvm::ISD::SIGN_EXTEND_INREG, Signed, llvm::Unknown, Unsigned, and llvm::ISD::ZERO_EXTEND.

Referenced by AreMulWideOperandsDemotable().

◆ IsPTXVectorType()

static bool IsPTXVectorType ( MVT  VT)
static

◆ PerformADDCombine()

static SDValue PerformADDCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
const NVPTXSubtarget Subtarget,
CodeGenOpt::Level  OptLevel 
)
static

PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.

Definition at line 4175 of file NVPTXISelLowering.cpp.

References llvm::SDNode::getOperand(), and PerformADDCombineWithOperands().

Referenced by PerformSETCCCombine().

◆ PerformADDCombineWithOperands()

static SDValue PerformADDCombineWithOperands ( SDNode N,
SDValue  N0,
SDValue  N1,
TargetLowering::DAGCombinerInfo DCI,
const NVPTXSubtarget Subtarget,
CodeGenOpt::Level  OptLevel 
)
static

◆ PerformANDCombine()

static SDValue PerformANDCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ PerformMULCombine()

static SDValue PerformMULCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
CodeGenOpt::Level  OptLevel 
)
static

PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.

Definition at line 4436 of file NVPTXISelLowering.cpp.

References llvm::MipsISD::Ret, and TryMULWIDECombine().

Referenced by PerformSETCCCombine().

◆ PerformREMCombine()

static SDValue PerformREMCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
CodeGenOpt::Level  OptLevel 
)
static

◆ PerformSETCCCombine()

static SDValue PerformSETCCCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ PerformSHLCombine()

static SDValue PerformSHLCombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI,
CodeGenOpt::Level  OptLevel 
)
static

PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.

Definition at line 4449 of file NVPTXISelLowering.cpp.

References llvm::MipsISD::Ret, and TryMULWIDECombine().

Referenced by PerformSETCCCombine().

◆ ReplaceINTRINSIC_W_CHAIN()

static void ReplaceINTRINSIC_W_CHAIN ( SDNode N,
SelectionDAG DAG,
SmallVectorImpl< SDValue > &  Results 
)
static

◆ ReplaceLoadVector()

static void ReplaceLoadVector ( SDNode N,
SelectionDAG DAG,
SmallVectorImpl< SDValue > &  Results 
)
static

ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.

Definition at line 4506 of file NVPTXISelLowering.cpp.

References llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align, assert(), llvm::ISD::EXTRACT_VECTOR_ELT, llvm::MVT::f16, llvm::MemSDNode::getAlignment(), llvm::SelectionDAG::getBuildVector(), llvm::SelectionDAG::getContext(), llvm::SelectionDAG::getDataLayout(), llvm::LoadSDNode::getExtensionType(), llvm::SelectionDAG::getIntPtrConstant(), llvm::SelectionDAG::getMemIntrinsicNode(), llvm::MemSDNode::getMemOperand(), llvm::MemSDNode::getMemoryVT(), llvm::SelectionDAG::getNode(), llvm::DataLayout::getPrefTypeAlignment(), llvm::EVT::getSimpleVT(), llvm::EVT::getSizeInBits(), llvm::EVT::getTypeForEVT(), llvm::SDValue::getValue(), llvm::SDNode::getValueType(), llvm::EVT::getVectorElementType(), llvm::EVT::getVectorNumElements(), llvm::SelectionDAG::getVTList(), llvm::MVT::i16, llvm::EVT::isSimple(), llvm::EVT::isVector(), llvm::ARM_MB::LD, llvm::NVPTXISD::LoadV2, llvm::NVPTXISD::LoadV4, N, llvm::SDNode::op_begin(), llvm::SDNode::op_end(), llvm::MVT::Other, llvm::SmallVectorTemplateBase< T, isPodLike >::push_back(), llvm::SmallVectorTemplateBase< T, isPodLike< T >::value >::push_back(), llvm::MVT::SimpleTy, llvm::ISD::TRUNCATE, llvm::MVT::v2f16, llvm::MVT::v2f32, llvm::MVT::v2f64, llvm::MVT::v2i16, llvm::MVT::v2i32, llvm::MVT::v2i64, llvm::MVT::v2i8, llvm::MVT::v4f16, llvm::MVT::v4f32, llvm::MVT::v4i16, llvm::MVT::v4i32, llvm::MVT::v4i8, and llvm::MVT::v8f16.

Referenced by ReplaceINTRINSIC_W_CHAIN().

◆ TryMULWIDECombine()

static SDValue TryMULWIDECombine ( SDNode N,
TargetLowering::DAGCombinerInfo DCI 
)
static

◆ VectorizePTXValueVTs()

static SmallVector<ParamVectorizationFlags, 16> VectorizePTXValueVTs ( const SmallVectorImpl< EVT > &  ValueVTs,
const SmallVectorImpl< uint64_t > &  Offsets,
unsigned  ParamAlignment 
)
static

Variable Documentation

◆ FMAContractLevelOpt

cl::opt<unsigned> FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2))
static

◆ FtzEnabled

cl::opt<bool> FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
static

◆ sched4reg

cl::opt<bool> sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false))
static

◆ uniqueCallSite

unsigned int uniqueCallSite = 0
static

◆ UsePrecDivF32

cl::opt<int> UsePrecDivF32("nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" " IEEE Compliant F32 div.rnd if available."), cl::init(2))
static

◆ UsePrecSqrtF32

cl::opt<bool> UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true))
static