23#include "llvm/IR/IntrinsicsDirectX.h"
31#define DEBUG_TYPE "dxil-intrinsic-expansion"
45 switch (
F.getIntrinsicID()) {
47 case Intrinsic::atan2:
50 case Intrinsic::log10:
52 case Intrinsic::dx_all:
53 case Intrinsic::dx_any:
54 case Intrinsic::dx_cross:
55 case Intrinsic::dx_uclamp:
56 case Intrinsic::dx_sclamp:
57 case Intrinsic::dx_nclamp:
58 case Intrinsic::dx_degrees:
59 case Intrinsic::dx_lerp:
60 case Intrinsic::dx_normalize:
61 case Intrinsic::dx_fdot:
62 case Intrinsic::dx_sdot:
63 case Intrinsic::dx_udot:
64 case Intrinsic::dx_sign:
65 case Intrinsic::dx_step:
66 case Intrinsic::dx_radians:
67 case Intrinsic::vector_reduce_add:
68 case Intrinsic::vector_reduce_fadd:
74 assert(IntrinsicId == Intrinsic::vector_reduce_add ||
75 IntrinsicId == Intrinsic::vector_reduce_fadd);
78 bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
81 Type *Ty =
X->getType();
82 auto *XVec = dyn_cast<FixedVectorType>(Ty);
83 unsigned XVecSize = XVec->getNumElements();
94 for (
unsigned I = 1;
I < XVecSize;
I++) {
108 Type *Ty =
X->getType();
114 ConstantInt::get(EltTy, 0))
115 : ConstantInt::get(EltTy, 0);
146 Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
147 Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
148 Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
161 Type *ATy =
A->getType();
162 [[maybe_unused]]
Type *BTy =
B->getType();
167 auto *AVec = dyn_cast<FixedVectorType>(ATy);
172 switch (AVec->getNumElements()) {
174 DotIntrinsic = Intrinsic::dx_dot2;
177 DotIntrinsic = Intrinsic::dx_dot3;
180 DotIntrinsic = Intrinsic::dx_dot4;
184 Twine(
"Invalid dot product input vector: length is outside 2-4"),
203 assert(DotIntrinsic == Intrinsic::dx_sdot ||
204 DotIntrinsic == Intrinsic::dx_udot);
207 Type *ATy =
A->getType();
208 [[maybe_unused]]
Type *BTy =
B->getType();
213 auto *AVec = dyn_cast<FixedVectorType>(ATy);
218 Intrinsic::ID MadIntrinsic = DotIntrinsic == Intrinsic::dx_sdot
220 : Intrinsic::dx_umad;
224 for (
unsigned I = 1;
I < AVec->getNumElements();
I++) {
237 Type *Ty =
X->getType();
247 Builder.
CreateIntrinsic(Ty, Intrinsic::exp2, {NewX},
nullptr,
"dx.exp2");
257 Type *Ty =
X->getType();
262 if (IntrinsicId == Intrinsic::dx_any)
263 return Builder.
CreateOr(Result, Elt);
264 assert(IntrinsicId == Intrinsic::dx_all);
268 Value *Result =
nullptr;
274 auto *XVec = dyn_cast<FixedVectorType>(Ty);
280 ConstantFP::get(EltTy, 0)))
284 ConstantInt::get(EltTy, 0)));
286 for (
unsigned I = 1;
I < XVec->getNumElements();
I++) {
288 Result = ApplyOp(intrinsicId, Result, Elt);
308 Type *Ty =
X->getType();
314 ConstantFP::get(EltTy, LogConstVal))
315 : ConstantFP::get(EltTy, LogConstVal);
320 return Builder.
CreateFMul(Ln2Const, Log2Call);
334 auto *XVec = dyn_cast<FixedVectorType>(Ty);
336 if (
auto *constantFP = dyn_cast<ConstantFP>(
X)) {
337 const APFloat &fpVal = constantFP->getValueAPF();
349 if (
auto *constantFP = dyn_cast<ConstantFP>(DotProduct)) {
350 const APFloat &fpVal = constantFP->getValueAPF();
358 nullptr,
"dx.rsqrt");
360 Value *MultiplicandVec =
368 Type *Ty =
X->getType();
375 Builder.
CreateIntrinsic(Ty, Intrinsic::atan, {Tan},
nullptr,
"Elt.Atan");
383 Constant *Zero = ConstantFP::get(Ty, 0);
388 Value *Result = Atan;
396 Result = Builder.
CreateSelect(XLt0AndYGe0, AtanAddPi, Result);
400 Result = Builder.
CreateSelect(XLt0AndYLt0, AtanSubPi, Result);
404 Result = Builder.
CreateSelect(XEq0AndYLt0, NegHalfPi, Result);
408 Result = Builder.
CreateSelect(XEq0AndYGe0, HalfPi, Result);
417 Type *Ty =
X->getType();
434 Type *Ty =
X->getType();
442 auto *XVec = dyn_cast<FixedVectorType>(Ty);
454 Type *Ty =
X->getType();
461 if (ClampIntrinsic == Intrinsic::dx_uclamp)
462 return Intrinsic::umax;
463 if (ClampIntrinsic == Intrinsic::dx_sclamp)
464 return Intrinsic::smax;
465 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
466 return Intrinsic::maxnum;
470 if (ClampIntrinsic == Intrinsic::dx_uclamp)
471 return Intrinsic::umin;
472 if (ClampIntrinsic == Intrinsic::dx_sclamp)
473 return Intrinsic::smin;
474 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
475 return Intrinsic::minnum;
483 Type *Ty =
X->getType();
486 {
X, Min},
nullptr,
"dx.max");
488 {MaxCall, Max},
nullptr,
"dx.min");
493 Type *Ty =
X->getType();
501 Type *Ty =
X->getType();
522 return Builder.
CreateSub(ZextGT, ZextLT);
526 Value *Result =
nullptr;
528 switch (IntrinsicId) {
532 case Intrinsic::atan2:
541 case Intrinsic::log10:
547 case Intrinsic::dx_all:
548 case Intrinsic::dx_any:
551 case Intrinsic::dx_cross:
554 case Intrinsic::dx_uclamp:
555 case Intrinsic::dx_sclamp:
556 case Intrinsic::dx_nclamp:
559 case Intrinsic::dx_degrees:
562 case Intrinsic::dx_lerp:
565 case Intrinsic::dx_normalize:
568 case Intrinsic::dx_fdot:
571 case Intrinsic::dx_sdot:
572 case Intrinsic::dx_udot:
575 case Intrinsic::dx_sign:
578 case Intrinsic::dx_step:
581 case Intrinsic::dx_radians:
584 case Intrinsic::vector_reduce_add:
585 case Intrinsic::vector_reduce_fadd:
601 bool IntrinsicExpanded =
false;
603 auto *IntrinsicCall = dyn_cast<CallInst>(U);
608 if (
F.user_empty() && IntrinsicExpanded)
628 "DXIL Intrinsic Expansion",
false,
false)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static Value * expandNormalizeIntrinsic(CallInst *Orig)
static bool expandIntrinsic(Function &F, CallInst *Orig)
static Value * expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic)
static bool expansionIntrinsics(Module &M)
static Value * expandLerpIntrinsic(CallInst *Orig)
static Value * expandCrossIntrinsic(CallInst *Orig)
static Value * expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandAtan2Intrinsic(CallInst *Orig)
static Value * expandLog10Intrinsic(CallInst *Orig)
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandStepIntrinsic(CallInst *Orig)
static Value * expandIntegerDotIntrinsic(CallInst *Orig, Intrinsic::ID DotIntrinsic)
static Value * expandPowIntrinsic(CallInst *Orig)
static Value * expandLogIntrinsic(CallInst *Orig, float LogConstVal=numbers::ln2f)
static Value * expandDegreesIntrinsic(CallInst *Orig)
static Value * expandExpIntrinsic(CallInst *Orig)
static Value * expandSignIntrinsic(CallInst *Orig)
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandAnyOrAllIntrinsic(CallInst *Orig, Intrinsic::ID intrinsicId)
static Value * expandAbs(CallInst *Orig)
static Value * expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B)
static Value * expandRadiansIntrinsic(CallInst *Orig)
static bool isIntrinsicExpansion(Function &F)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file defines the SmallVector class.
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
DXILIntrinsicExpansionLegacy()
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void setAttributes(AttributeList A)
Set the attributes for this call.
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCall(bool IsTc=true)
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
bool isZeroValue() const
Return true if the value is negative zero or null value.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Value * CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
StringRef getName() const
Return a constant reference to the value's name.
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
ModulePass * createDXILIntrinsicExpansionLegacyPass()
Pass to expand intrinsic operations that lack DXIL opCodes.