LLVM 22.0.0git
NVPTXISelLowering.cpp File Reference
#include "NVPTXISelLowering.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXISelDAGToDAG.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXUtilities.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGenTypes/MachineValueType.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/FPEnv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdint>
#include <iterator>
#include <optional>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "nvptx-lower"
#define MAKE_CASE(V)

Enumerations

enum  OperandSignedness { Signed = 0 , Unsigned , Unknown }

Functions

static bool IsPTXVectorType (MVT VT)
static std::optional< std::pair< unsigned int, MVT > > getVectorLoweringShape (EVT VectorEVT, const NVPTXSubtarget &STI, unsigned AddressSpace)
static void ComputePTXValueVTs (const TargetLowering &TLI, const DataLayout &DL, LLVMContext &Ctx, CallingConv::ID CallConv, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > &Offsets, uint64_t StartingOffset=0)
 ComputePTXValueVTs - For the given Type Ty, returns the set of primitive legal-ish MVTs that compose it.
static EVT getVectorizedVT (EVT VT, unsigned N, LLVMContext &C)
static SDValue getExtractVectorizedValue (SDValue V, unsigned I, EVT VT, const SDLoc &dl, SelectionDAG &DAG)
template<typename T>
static SDValue getBuildVectorizedValue (unsigned N, const SDLoc &dl, SelectionDAG &DAG, T GetElement)
static EVT promoteScalarIntegerPTX (const EVT VT)
 PromoteScalarIntegerPTX Used to make sure the arguments/returns are suitable for passing and promote them to a larger size if they're not.
template<typename T>
static unsigned canMergeParamLoadStoresStartingAt (unsigned Idx, uint32_t AccessSize, const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< T > &Offsets, Align ParamAlignment)
template<typename T>
static SmallVector< unsigned, 16 > VectorizePTXValueVTs (const SmallVectorImpl< EVT > &ValueVTs, const SmallVectorImpl< T > &Offsets, Align ParamAlignment, bool IsVAArg=false)
static bool shouldConvertToIndirectCall (const CallBase *CB, const GlobalAddressSDNode *Func)
static MachinePointerInfo refinePtrAS (SDValue &Ptr, SelectionDAG &DAG, const DataLayout &DL, const TargetLowering &TL)
static ISD::NodeType getExtOpcode (const ISD::ArgFlagsTy &Flags)
static SDValue correctParamType (SDValue V, EVT ExpectedVT, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, SDLoc dl)
static SDValue getPRMT (SDValue A, SDValue B, SDValue Selector, SDLoc DL, SelectionDAG &DAG, unsigned Mode=NVPTX::PTXPrmtMode::NONE)
static SDValue getPRMT (SDValue A, SDValue B, uint64_t Selector, SDLoc DL, SelectionDAG &DAG, unsigned Mode=NVPTX::PTXPrmtMode::NONE)
static SDValue buildTreeReduction (const SmallVector< SDValue > &Elements, EVT EltTy, ArrayRef< std::pair< unsigned, unsigned > > Ops, const SDLoc &DL, const SDNodeFlags Flags, SelectionDAG &DAG)
 Reduces the elements using the scalar operations provided.
static ISD::NodeType getScalarOpcodeForReduction (unsigned ReductionOpcode)
static std::optional< NVPTXISD::NodeTypegetScalar3OpcodeForReduction (unsigned ReductionOpcode)
 Get 3-input scalar reduction opcode.
static SDValue PromoteBinOpToF32 (SDNode *N, SelectionDAG &DAG)
static SDValue LowerVectorArith (SDValue Op, SelectionDAG &DAG)
static SDValue LowerTcgen05St (SDValue Op, SelectionDAG &DAG)
static SDValue LowerIntrinsicVoid (SDValue Op, SelectionDAG &DAG)
static SDValue LowerClusterLaunchControlQueryCancel (SDValue Op, SelectionDAG &DAG)
static SDValue lowerPrmtIntrinsic (SDValue Op, SelectionDAG &DAG)
static SDValue lowerIntrinsicWOChain (SDValue Op, SelectionDAG &DAG)
static SDValue lowerCTLZCTPOP (SDValue Op, SelectionDAG &DAG)
static SDValue expandFSH64 (SDValue A, SDValue B, SDValue ShiftAmount, SDLoc DL, unsigned Opcode, SelectionDAG &DAG)
static SDValue lowerFSH (SDValue Op, SelectionDAG &DAG)
static SDValue lowerROT (SDValue Op, SelectionDAG &DAG)
static SDValue lowerFREM (SDValue Op, SelectionDAG &DAG)
static SDValue lowerSELECT (SDValue Op, SelectionDAG &DAG)
static std::optional< std::pair< SDValue, SDValue > > replaceLoadVector (SDNode *N, SelectionDAG &DAG, const NVPTXSubtarget &STI)
 replaceLoadVector - Convert vector loads into multi-output scalar loads.
static void replaceLoadVector (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results, const NVPTXSubtarget &STI)
static SDValue lowerLoadVector (SDNode *N, SelectionDAG &DAG, const NVPTXSubtarget &STI)
static SDValue lowerLOADi1 (LoadSDNode *LD, SelectionDAG &DAG)
static SDValue lowerSTOREVector (SDValue Op, SelectionDAG &DAG, const NVPTXSubtarget &STI)
static bool isConstZero (const SDValue &Operand)
static SDValue PerformADDCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI)
 PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PerformFADDCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
static SDValue combineUnpackingMovIntoLoad (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 Fold unpacking movs into a load by increasing the number of return values.
static SDValue combinePackingMovIntoStore (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned Front, unsigned Back)
 Fold packing movs into a store.
static SDValue combineSTORE (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &STI)
static SDValue combineLOAD (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const NVPTXSubtarget &STI)
static SDValue PerformADDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static SDValue PerformFADDCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.
static SDValue PerformREMCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
static SDValue combineMulWide (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
static bool IsMulWideOperandDemotable (SDValue Op, unsigned OptSize, OperandSignedness &S)
 IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptSize bits without loss of information.
static bool AreMulWideOperandsDemotable (SDValue LHS, SDValue RHS, unsigned OptSize, bool &IsSigned)
 AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits without loss of information.
static SDValue TryMULWIDECombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
 TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces an M-bit result (i.e.
static bool isConstOne (const SDValue &Operand)
static SDValue matchMADConstOnePattern (SDValue Add)
static SDValue combineMADConstOne (SDValue X, SDValue Add, EVT VT, SDLoc DL, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineMulSelectConstOne (SDValue X, SDValue Select, EVT VT, SDLoc DL, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformMULCombineWithOperands (SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformMULCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
static SDValue PerformSHLCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
 PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
static SDValue PerformSETCCCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, unsigned int SmVersion)
static SDValue PerformEXTRACTCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformVSELECTCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformBUILD_VECTORCombine (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineADDRSPACECAST (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static APInt getPRMTSelector (const APInt &Selector, unsigned Mode)
static APInt computePRMT (APInt A, APInt B, APInt Selector, unsigned Mode)
static SDValue combinePRMT (SDNode *N, TargetLowering::DAGCombinerInfo &DCI, CodeGenOptLevel OptLevel)
static SDValue sinkProxyReg (SDValue R, SDValue Chain, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineProxyReg (SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static void ReplaceBITCAST (SDNode *Node, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
static void ReplaceTcgen05Ld (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results, bool hasOffset=false)
static void ReplaceINTRINSIC_W_CHAIN (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
static void ReplaceCopyFromReg_128 (SDNode *N, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results)
static void replaceProxyReg (SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, SmallVectorImpl< SDValue > &Results)
static void replaceAtomicSwap128 (SDNode *N, SelectionDAG &DAG, const NVPTXSubtarget &STI, SmallVectorImpl< SDValue > &Results)
static void computeKnownBitsForPRMT (const SDValue Op, KnownBits &Known, const SelectionDAG &DAG, unsigned Depth)
static void computeKnownBitsForLoadV (const SDValue Op, KnownBits &Known)
static std::pair< APInt, APIntgetPRMTDemandedBits (const APInt &SelectorVal, const APInt &DemandedBits)
static SDValue canonicalizePRMTInput (SDValue Op, SelectionDAG &DAG)
static SDValue simplifyDemandedBitsForPRMT (SDValue PRMT, const APInt &DemandedBits, SelectionDAG &DAG, const TargetLowering &TLI, unsigned Depth)

Variables

static cl::opt< boolsched4reg ("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false))
static cl::opt< unsignedFMAContractLevelOpt ("nvptx-fma-level", cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2))
static cl::opt< NVPTX::DivPrecisionLevel > UsePrecDivF32 ("nvptx-prec-divf32", cl::Hidden, cl::desc("NVPTX Specific: Override the precision of the lowering for f32 fdiv"), cl::values(clEnumValN(NVPTX::DivPrecisionLevel::Approx, "0", "Use div.approx"), clEnumValN(NVPTX::DivPrecisionLevel::Full, "1", "Use div.full"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754, "2", "Use IEEE Compliant F32 div.rnd if available (default)"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754_NoFTZ, "3", "Use IEEE Compliant F32 div.rnd if available, no FTZ")), cl::init(NVPTX::DivPrecisionLevel::IEEE754))
static cl::opt< boolUsePrecSqrtF32 ("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true))
static cl::opt< boolUseApproxLog2F32 ("nvptx-approx-log2f32", cl::desc("NVPTX Specific: whether to use lg2.approx for log2"), cl::init(false))
 Whereas CUDA's implementation (see libdevice) uses ex2.approx for exp2(), it does NOT use lg2.approx for log2, so this is disabled by default.
static cl::opt< boolForceMinByValParamAlign ("nvptx-force-min-byval-param-align", cl::Hidden, cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" " params of device functions."), cl::init(false))

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "nvptx-lower"

Definition at line 76 of file NVPTXISelLowering.cpp.

◆ MAKE_CASE

#define MAKE_CASE ( V)
Value:
case V: \
return #V;

Enumeration Type Documentation

◆ OperandSignedness

Enumerator
Signed 
Unsigned 
Unknown 

Definition at line 5386 of file NVPTXISelLowering.cpp.

Function Documentation

◆ AreMulWideOperandsDemotable()

bool AreMulWideOperandsDemotable ( SDValue LHS,
SDValue RHS,
unsigned OptSize,
bool & IsSigned )
static

AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can be demoted to OptSize bits without loss of information.

If the operands contain a constant, it should appear as the RHS operand. The signedness of the operands is placed in IsSigned.

Definition at line 5422 of file NVPTXISelLowering.cpp.

References llvm::dyn_cast(), llvm::APInt::isIntN(), IsMulWideOperandDemotable(), llvm::APInt::isSignedIntN(), LHS, RHS, Signed, Unknown, and Unsigned.

Referenced by TryMULWIDECombine().

◆ buildTreeReduction()

SDValue buildTreeReduction ( const SmallVector< SDValue > & Elements,
EVT EltTy,
ArrayRef< std::pair< unsigned, unsigned > > Ops,
const SDLoc & DL,
const SDNodeFlags Flags,
SelectionDAG & DAG )
static

Reduces the elements using the scalar operations provided.

The operations are sorted descending in number of inputs they take. The flags on the original reduction operation will be propagated to each scalar operation. Nearby elements are grouped in tree reduction, unlike the shuffle reduction used in ExpandReductions and SelectionDAG.

Definition at line 1910 of file NVPTXISelLowering.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, assert(), DL, E(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::SelectionDAG::getNode(), I, OpIdx, llvm::SmallVectorTemplateBase< T, bool >::push_back(), and llvm::ArrayRef< T >::slice().

◆ canMergeParamLoadStoresStartingAt()

template<typename T>
unsigned canMergeParamLoadStoresStartingAt ( unsigned Idx,
uint32_t AccessSize,
const SmallVectorImpl< EVT > & ValueVTs,
const SmallVectorImpl< T > & Offsets,
Align ParamAlignment )
static

◆ canonicalizePRMTInput()

SDValue canonicalizePRMTInput ( SDValue Op,
SelectionDAG & DAG )
static

Definition at line 6635 of file NVPTXISelLowering.cpp.

References llvm::SelectionDAG::getConstant(), and SDValue().

Referenced by simplifyDemandedBitsForPRMT().

◆ combineADDRSPACECAST()

SDValue combineADDRSPACECAST ( SDNode * N,
TargetLowering::DAGCombinerInfo & DCI )
static

Definition at line 5811 of file NVPTXISelLowering.cpp.

References assert(), llvm::cast(), llvm::dyn_cast(), N, and SDValue().

◆ combineLOAD()

◆ combineMADConstOne()

◆ combineMulSelectConstOne()

◆ combineMulWide()

◆ combinePackingMovIntoStore()

SDValue combinePackingMovIntoStore ( SDNode * N,
TargetLowering::DAGCombinerInfo & DCI,
unsigned Front,
unsigned Back )
static

Fold packing movs into a store.

ex: v1: v2f16 = BUILD_VECTOR a:f16, b:f16 v2: v2f16 = BUILD_VECTOR c:f16, d:f16 StoreV2 v1, v2

...is turned into...

StoreV4 a, b, c, d

Definition at line 5167 of file NVPTXISelLowering.cpp.

References llvm::ISD::BUILD_VECTOR, llvm::cast(), llvm::TargetLowering::DAGCombinerInfo::DAG, llvm::ISD::FP_ROUND, llvm::SelectionDAG::getMemIntrinsicNode(), llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG(), llvm::NVPTX::isPackedVectorTy(), llvm_unreachable, N, Operands, SDValue(), llvm::NVPTXISD::StoreV2, llvm::NVPTXISD::StoreV4, llvm::NVPTXISD::StoreV8, and llvm::ISD::TRUNCATE.

Referenced by combineSTORE().

◆ combinePRMT()

◆ combineProxyReg()

SDValue combineProxyReg ( SDNode * N,
TargetLowering::DAGCombinerInfo & DCI )
static

Definition at line 5964 of file NVPTXISelLowering.cpp.

References N, Reg, SDValue(), and sinkProxyReg().

◆ combineSTORE()

◆ combineUnpackingMovIntoLoad()

SDValue combineUnpackingMovIntoLoad ( SDNode * N,
TargetLowering::DAGCombinerInfo & DCI )
static

Fold unpacking movs into a load by increasing the number of return values.

ex: L: v2f16,ch = load

a: f16 = extractelt L:0, 0 b: f16 = extractelt L:0, 1 use(a, b)

...is turned into...

L: f16,f16,ch = LoadV2

use(L:0, L:1)

Definition at line 5054 of file NVPTXISelLowering.cpp.

References llvm::all_of(), llvm::TargetLowering::DAGCombinerInfo::isAfterLegalizeDAG(), llvm::NVPTX::isPackedVectorTy(), N, and SDValue().

Referenced by combineLOAD().

◆ computeKnownBitsForLoadV()

◆ computeKnownBitsForPRMT()

◆ computePRMT()

◆ ComputePTXValueVTs()

void ComputePTXValueVTs ( const TargetLowering & TLI,
const DataLayout & DL,
LLVMContext & Ctx,
CallingConv::ID CallConv,
Type * Ty,
SmallVectorImpl< EVT > & ValueVTs,
SmallVectorImpl< uint64_t > & Offsets,
uint64_t StartingOffset = 0 )
static

ComputePTXValueVTs - For the given Type Ty, returns the set of primitive legal-ish MVTs that compose it.

Unlike ComputeValueVTs, this will legalize the types as required by the calling convention (with special handling for i8s). NOTE: This is a band-aid for code that expects ComputeValueVTs to return the same number of types as the Ins/Outs arrays in LowerFormalArguments, LowerCall, and LowerReturn.

Definition at line 296 of file NVPTXISelLowering.cpp.

References assert(), llvm::ComputeValueVTs(), DL, llvm::TargetLoweringBase::getNumRegistersForCallingConv(), llvm::TargetLoweringBase::getRegisterTypeForCallingConv(), llvm::MVT::getStoreSize(), I, llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::seq(), and llvm::zip().

Referenced by llvm::NVPTXTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerFormalArguments(), and llvm::NVPTXTargetLowering::LowerReturn().

◆ correctParamType()

◆ expandFSH64()

◆ getBuildVectorizedValue()

◆ getExtOpcode()

ISD::NodeType getExtOpcode ( const ISD::ArgFlagsTy & Flags)
static

◆ getExtractVectorizedValue()

◆ getPRMT() [1/2]

◆ getPRMT() [2/2]

SDValue getPRMT ( SDValue A,
SDValue B,
uint64_t Selector,
SDLoc DL,
SelectionDAG & DAG,
unsigned Mode = NVPTX::PTXPrmtMode::NONE )
static

◆ getPRMTDemandedBits()

std::pair< APInt, APInt > getPRMTDemandedBits ( const APInt & SelectorVal,
const APInt & DemandedBits )
static

◆ getPRMTSelector()

◆ getScalar3OpcodeForReduction()

std::optional< NVPTXISD::NodeType > getScalar3OpcodeForReduction ( unsigned ReductionOpcode)
static

Get 3-input scalar reduction opcode.

Definition at line 1974 of file NVPTXISelLowering.cpp.

References llvm::NVPTXISD::FMAXIMUM3, llvm::NVPTXISD::FMAXNUM3, llvm::NVPTXISD::FMINIMUM3, and llvm::NVPTXISD::FMINNUM3.

◆ getScalarOpcodeForReduction()

ISD::NodeType getScalarOpcodeForReduction ( unsigned ReductionOpcode)
static

Definition at line 1957 of file NVPTXISelLowering.cpp.

References llvm_unreachable.

◆ getVectorizedVT()

◆ getVectorLoweringShape()

◆ isConstOne()

bool isConstOne ( const SDValue & Operand)
static

Definition at line 5524 of file NVPTXISelLowering.cpp.

References llvm::dyn_cast().

Referenced by combineMulSelectConstOne(), and matchMADConstOnePattern().

◆ isConstZero()

bool isConstZero ( const SDValue & Operand)
static

Definition at line 4913 of file NVPTXISelLowering.cpp.

References llvm::dyn_cast().

Referenced by PerformADDCombineWithOperands().

◆ IsMulWideOperandDemotable()

bool IsMulWideOperandDemotable ( SDValue Op,
unsigned OptSize,
OperandSignedness & S )
static

IsMulWideOperandDemotable - Checks if the provided DAG node is an operand that can be demoted to OptSize bits without loss of information.

The signedness of the operand, if determinable, is placed in S.

Definition at line 5395 of file NVPTXISelLowering.cpp.

References llvm::EVT::getFixedSizeInBits(), llvm::ISD::SIGN_EXTEND, llvm::ISD::SIGN_EXTEND_INREG, Signed, Unknown, Unsigned, and llvm::ISD::ZERO_EXTEND.

Referenced by AreMulWideOperandsDemotable().

◆ IsPTXVectorType()

bool IsPTXVectorType ( MVT VT)
static

◆ LowerClusterLaunchControlQueryCancel()

◆ lowerCTLZCTPOP()

◆ lowerFREM()

◆ lowerFSH()

SDValue lowerFSH ( SDValue Op,
SelectionDAG & DAG )
static

Definition at line 2799 of file NVPTXISelLowering.cpp.

References expandFSH64().

Referenced by llvm::NVPTXTargetLowering::LowerOperation().

◆ LowerIntrinsicVoid()

SDValue LowerIntrinsicVoid ( SDValue Op,
SelectionDAG & DAG )
static

◆ lowerIntrinsicWOChain()

SDValue lowerIntrinsicWOChain ( SDValue Op,
SelectionDAG & DAG )
static

◆ lowerLOADi1()

◆ lowerLoadVector()

SDValue lowerLoadVector ( SDNode * N,
SelectionDAG & DAG,
const NVPTXSubtarget & STI )
static

Definition at line 3203 of file NVPTXISelLowering.cpp.

References llvm::SelectionDAG::getMergeValues(), N, replaceLoadVector(), and SDValue().

Referenced by combineLOAD().

◆ lowerPrmtIntrinsic()

◆ lowerROT()

SDValue lowerROT ( SDValue Op,
SelectionDAG & DAG )
static

◆ lowerSELECT()

◆ lowerSTOREVector()

◆ LowerTcgen05St()

◆ LowerVectorArith()

◆ matchMADConstOnePattern()

SDValue matchMADConstOnePattern ( SDValue Add)
static

◆ PerformADDCombine()

SDValue PerformADDCombine ( SDNode * N,
TargetLowering::DAGCombinerInfo & DCI,
CodeGenOptLevel OptLevel )
static

PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.

Definition at line 5277 of file NVPTXISelLowering.cpp.

References llvm::SDValue::getValueType(), llvm::EVT::isVector(), N, llvm::None, PerformADDCombineWithOperands(), and SDValue().

◆ PerformADDCombineWithOperands()

SDValue PerformADDCombineWithOperands ( SDNode * N,
SDValue N0,
SDValue N1,
TargetLowering::DAGCombinerInfo & DCI )
static

PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.

This is a helper for PerformADDCombine that is called with the default operands, and if that fails, with commuted operands.

Definition at line 4923 of file NVPTXISelLowering.cpp.

References llvm::ISD::ADD, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::SDValue::getNode(), llvm::SelectionDAG::getNode(), llvm::SDValue::getOpcode(), llvm::SDNode::getOperand(), llvm::SelectionDAG::getSelect(), llvm::SDValue::getValueType(), llvm::SDNode::hasOneUse(), isConstZero(), llvm::ISD::MUL, Mul, N, SDValue(), and llvm::ISD::SELECT.

◆ PerformBUILD_VECTORCombine()

◆ PerformEXTRACTCombine()

◆ PerformFADDCombine()

SDValue PerformFADDCombine ( SDNode * N,
TargetLowering::DAGCombinerInfo & DCI,
CodeGenOptLevel OptLevel )
static

PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.

Definition at line 5301 of file NVPTXISelLowering.cpp.

References llvm::SDValue::getValueType(), llvm::EVT::isVector(), N, PerformFADDCombineWithOperands(), and SDValue().

◆ PerformFADDCombineWithOperands()

◆ PerformMULCombine()

SDValue PerformMULCombine ( SDNode * N,
TargetLowering::DAGCombinerInfo & DCI,
CodeGenOptLevel OptLevel )
static

PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.

Definition at line 5611 of file NVPTXISelLowering.cpp.

References N, llvm::None, PerformMULCombineWithOperands(), SDValue(), and TryMULWIDECombine().

◆ PerformMULCombineWithOperands()

SDValue PerformMULCombineWithOperands ( SDNode * N,
SDValue N0,
SDValue N1,
TargetLowering::DAGCombinerInfo & DCI )
static

◆ PerformREMCombine()

◆ PerformSETCCCombine()

◆ PerformSHLCombine()

SDValue PerformSHLCombine ( SDNode * N,
TargetLowering::DAGCombinerInfo & DCI,
CodeGenOptLevel OptLevel )
static

PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.

Definition at line 5626 of file NVPTXISelLowering.cpp.

References N, llvm::None, SDValue(), and TryMULWIDECombine().

◆ PerformVSELECTCombine()

◆ PromoteBinOpToF32()

◆ promoteScalarIntegerPTX()

EVT promoteScalarIntegerPTX ( const EVT VT)
static

PromoteScalarIntegerPTX Used to make sure the arguments/returns are suitable for passing and promote them to a larger size if they're not.

The promoted type is placed in PromoteVT if the function returns true.

Definition at line 385 of file NVPTXISelLowering.cpp.

References llvm::EVT::getFixedSizeInBits(), llvm::EVT::isScalarInteger(), llvm_unreachable, and llvm::PowerOf2Ceil().

Referenced by llvm::NVPTXTargetLowering::LowerCall(), and llvm::NVPTXTargetLowering::LowerReturn().

◆ refinePtrAS()

◆ replaceAtomicSwap128()

◆ ReplaceBITCAST()

◆ ReplaceCopyFromReg_128()

void ReplaceCopyFromReg_128 ( SDNode * N,
SelectionDAG & DAG,
SmallVectorImpl< SDValue > & Results )
static

◆ ReplaceINTRINSIC_W_CHAIN()

◆ replaceLoadVector() [1/2]

◆ replaceLoadVector() [2/2]

void replaceLoadVector ( SDNode * N,
SelectionDAG & DAG,
SmallVectorImpl< SDValue > & Results,
const NVPTXSubtarget & STI )
static

Definition at line 3196 of file NVPTXISelLowering.cpp.

References N, replaceLoadVector(), and Results.

◆ replaceProxyReg()

◆ ReplaceTcgen05Ld()

◆ shouldConvertToIndirectCall()

bool shouldConvertToIndirectCall ( const CallBase * CB,
const GlobalAddressSDNode * Func )
static

◆ simplifyDemandedBitsForPRMT()

◆ sinkProxyReg()

◆ TryMULWIDECombine()

SDValue TryMULWIDECombine ( SDNode * N,
TargetLowering::DAGCombinerInfo & DCI )
static

TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply of M/2 bits that produces an M-bit result (i.e.

mul.wide). This transform works on both multiply DAG nodes and SHL DAG nodes with a constant shift amount.

Definition at line 5458 of file NVPTXISelLowering.cpp.

References AreMulWideOperandsDemotable(), llvm::BitWidth, llvm::TargetLowering::DAGCombinerInfo::DAG, DL, llvm::dyn_cast(), llvm::ConstantSDNode::getAPIntValue(), llvm::SelectionDAG::getConstant(), llvm::SelectionDAG::getNode(), llvm::EVT::getSizeInBits(), llvm::isa(), LHS, llvm::ISD::MUL, llvm::NVPTXISD::MUL_WIDE_SIGNED, llvm::NVPTXISD::MUL_WIDE_UNSIGNED, N, Opc, RHS, SDValue(), llvm::APInt::sge(), llvm::ISD::SHL, Signed, llvm::APInt::slt(), std::swap(), and llvm::ISD::TRUNCATE.

Referenced by PerformMULCombine(), and PerformSHLCombine().

◆ VectorizePTXValueVTs()

Variable Documentation

◆ FMAContractLevelOpt

cl::opt< unsigned > FMAContractLevelOpt("nvptx-fma-level", cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" " 1: do it 2: do it aggressively"), cl::init(2)) ( "nvptx-fma-level" ,
cl::Hidden  )
static

◆ ForceMinByValParamAlign

cl::opt< bool > ForceMinByValParamAlign("nvptx-force-min-byval-param-align", cl::Hidden, cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" " params of device functions."), cl::init(false)) ( "nvptx-force-min-byval-param-align" ,
cl::Hidden ,
cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval" " params of device functions.") ,
cl::init(false)  )
static

◆ sched4reg

cl::opt< bool > sched4reg("nvptx-sched4reg", cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)) ( "nvptx-sched4reg" ,
cl::desc("NVPTX Specific: schedule for register pressue") ,
cl::init(false)  )
static

◆ UseApproxLog2F32

cl::opt< bool > UseApproxLog2F32("nvptx-approx-log2f32", cl::desc("NVPTX Specific: whether to use lg2.approx for log2"), cl::init(false)) ( "nvptx-approx-log2f32" ,
cl::desc("NVPTX Specific: whether to use lg2.approx for log2") ,
cl::init(false)  )
static

Whereas CUDA's implementation (see libdevice) uses ex2.approx for exp2(), it does NOT use lg2.approx for log2, so this is disabled by default.

Referenced by llvm::NVPTXTargetLowering::NVPTXTargetLowering().

◆ UsePrecDivF32

cl::opt< NVPTX::DivPrecisionLevel > UsePrecDivF32("nvptx-prec-divf32", cl::Hidden, cl::desc( "NVPTX Specific: Override the precision of the lowering for f32 fdiv"), cl::values( clEnumValN(NVPTX::DivPrecisionLevel::Approx, "0", "Use div.approx"), clEnumValN(NVPTX::DivPrecisionLevel::Full, "1", "Use div.full"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754, "2", "Use IEEE Compliant F32 div.rnd if available (default)"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754_NoFTZ, "3", "Use IEEE Compliant F32 div.rnd if available, no FTZ")), cl::init(NVPTX::DivPrecisionLevel::IEEE754)) ( "nvptx-prec-divf32" ,
cl::Hidden ,
cl::desc( "NVPTX Specific: Override the precision of the lowering for f32 fdiv") ,
cl::values( clEnumValN(NVPTX::DivPrecisionLevel::Approx, "0", "Use div.approx"), clEnumValN(NVPTX::DivPrecisionLevel::Full, "1", "Use div.full"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754, "2", "Use IEEE Compliant F32 div.rnd if available (default)"), clEnumValN(NVPTX::DivPrecisionLevel::IEEE754_NoFTZ, "3", "Use IEEE Compliant F32 div.rnd if available, no FTZ")) ,
cl::init(NVPTX::DivPrecisionLevel::IEEE754)  )
static

◆ UsePrecSqrtF32

cl::opt< bool > UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden, cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true)) ( "nvptx-prec-sqrtf32" ,
cl::Hidden ,
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn.") ,
cl::init(true)  )
static