23#include "llvm/IR/IntrinsicsDirectX.h"
31#define DEBUG_TYPE "dxil-intrinsic-expansion"
45 switch (
F.getIntrinsicID()) {
47 case Intrinsic::atan2:
50 case Intrinsic::log10:
52 case Intrinsic::dx_all:
53 case Intrinsic::dx_any:
54 case Intrinsic::dx_cross:
55 case Intrinsic::dx_uclamp:
56 case Intrinsic::dx_sclamp:
57 case Intrinsic::dx_nclamp:
58 case Intrinsic::dx_degrees:
59 case Intrinsic::dx_lerp:
60 case Intrinsic::dx_length:
61 case Intrinsic::dx_normalize:
62 case Intrinsic::dx_fdot:
63 case Intrinsic::dx_sdot:
64 case Intrinsic::dx_udot:
65 case Intrinsic::dx_sign:
66 case Intrinsic::dx_step:
67 case Intrinsic::dx_radians:
68 case Intrinsic::vector_reduce_add:
69 case Intrinsic::vector_reduce_fadd:
75 assert(IntrinsicId == Intrinsic::vector_reduce_add ||
76 IntrinsicId == Intrinsic::vector_reduce_fadd);
79 bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
82 Type *Ty =
X->getType();
83 auto *XVec = dyn_cast<FixedVectorType>(Ty);
84 unsigned XVecSize = XVec->getNumElements();
95 for (
unsigned I = 1;
I < XVecSize;
I++) {
109 Type *Ty =
X->getType();
115 ConstantInt::get(EltTy, 0))
116 : ConstantInt::get(EltTy, 0);
147 Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
148 Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
149 Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
162 Type *ATy =
A->getType();
163 [[maybe_unused]]
Type *BTy =
B->getType();
168 auto *AVec = dyn_cast<FixedVectorType>(ATy);
173 switch (AVec->getNumElements()) {
175 DotIntrinsic = Intrinsic::dx_dot2;
178 DotIntrinsic = Intrinsic::dx_dot3;
181 DotIntrinsic = Intrinsic::dx_dot4;
185 Twine(
"Invalid dot product input vector: length is outside 2-4"),
204 assert(DotIntrinsic == Intrinsic::dx_sdot ||
205 DotIntrinsic == Intrinsic::dx_udot);
208 Type *ATy =
A->getType();
209 [[maybe_unused]]
Type *BTy =
B->getType();
214 auto *AVec = dyn_cast<FixedVectorType>(ATy);
219 Intrinsic::ID MadIntrinsic = DotIntrinsic == Intrinsic::dx_sdot
221 : Intrinsic::dx_umad;
225 for (
unsigned I = 1;
I < AVec->getNumElements();
I++) {
238 Type *Ty =
X->getType();
248 Builder.
CreateIntrinsic(Ty, Intrinsic::exp2, {NewX},
nullptr,
"dx.exp2");
258 Type *Ty =
X->getType();
263 if (IntrinsicId == Intrinsic::dx_any)
264 return Builder.
CreateOr(Result, Elt);
265 assert(IntrinsicId == Intrinsic::dx_all);
269 Value *Result =
nullptr;
275 auto *XVec = dyn_cast<FixedVectorType>(Ty);
281 ConstantFP::get(EltTy, 0)))
285 ConstantInt::get(EltTy, 0)));
287 for (
unsigned I = 1;
I < XVec->getNumElements();
I++) {
289 Result = ApplyOp(intrinsicId, Result, Elt);
298 Type *Ty =
X->getType();
305 auto *XVec = dyn_cast<FixedVectorType>(Ty);
306 unsigned XVecSize = XVec->getNumElements();
312 for (
unsigned I = 1;
I < XVecSize;
I++) {
318 nullptr,
"elt.sqrt");
335 Type *Ty =
X->getType();
341 ConstantFP::get(EltTy, LogConstVal))
342 : ConstantFP::get(EltTy, LogConstVal);
347 return Builder.
CreateFMul(Ln2Const, Log2Call);
361 auto *XVec = dyn_cast<FixedVectorType>(Ty);
363 if (
auto *constantFP = dyn_cast<ConstantFP>(
X)) {
364 const APFloat &fpVal = constantFP->getValueAPF();
376 if (
auto *constantFP = dyn_cast<ConstantFP>(DotProduct)) {
377 const APFloat &fpVal = constantFP->getValueAPF();
385 nullptr,
"dx.rsqrt");
387 Value *MultiplicandVec =
395 Type *Ty =
X->getType();
402 Builder.
CreateIntrinsic(Ty, Intrinsic::atan, {Tan},
nullptr,
"Elt.Atan");
410 Constant *Zero = ConstantFP::get(Ty, 0);
415 Value *Result = Atan;
423 Result = Builder.
CreateSelect(XLt0AndYGe0, AtanAddPi, Result);
427 Result = Builder.
CreateSelect(XLt0AndYLt0, AtanSubPi, Result);
431 Result = Builder.
CreateSelect(XEq0AndYLt0, NegHalfPi, Result);
435 Result = Builder.
CreateSelect(XEq0AndYGe0, HalfPi, Result);
444 Type *Ty =
X->getType();
461 Type *Ty =
X->getType();
469 auto *XVec = dyn_cast<FixedVectorType>(Ty);
481 Type *Ty =
X->getType();
488 if (ClampIntrinsic == Intrinsic::dx_uclamp)
489 return Intrinsic::umax;
490 if (ClampIntrinsic == Intrinsic::dx_sclamp)
491 return Intrinsic::smax;
492 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
493 return Intrinsic::maxnum;
497 if (ClampIntrinsic == Intrinsic::dx_uclamp)
498 return Intrinsic::umin;
499 if (ClampIntrinsic == Intrinsic::dx_sclamp)
500 return Intrinsic::smin;
501 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
502 return Intrinsic::minnum;
510 Type *Ty =
X->getType();
513 {
X, Min},
nullptr,
"dx.max");
515 {MaxCall, Max},
nullptr,
"dx.min");
520 Type *Ty =
X->getType();
528 Type *Ty =
X->getType();
549 return Builder.
CreateSub(ZextGT, ZextLT);
553 Value *Result =
nullptr;
555 switch (IntrinsicId) {
559 case Intrinsic::atan2:
568 case Intrinsic::log10:
574 case Intrinsic::dx_all:
575 case Intrinsic::dx_any:
578 case Intrinsic::dx_cross:
581 case Intrinsic::dx_uclamp:
582 case Intrinsic::dx_sclamp:
583 case Intrinsic::dx_nclamp:
586 case Intrinsic::dx_degrees:
589 case Intrinsic::dx_lerp:
592 case Intrinsic::dx_length:
595 case Intrinsic::dx_normalize:
598 case Intrinsic::dx_fdot:
601 case Intrinsic::dx_sdot:
602 case Intrinsic::dx_udot:
605 case Intrinsic::dx_sign:
608 case Intrinsic::dx_step:
611 case Intrinsic::dx_radians:
614 case Intrinsic::vector_reduce_add:
615 case Intrinsic::vector_reduce_fadd:
631 bool IntrinsicExpanded =
false;
633 auto *IntrinsicCall = dyn_cast<CallInst>(U);
638 if (
F.user_empty() && IntrinsicExpanded)
658 "DXIL Intrinsic Expansion",
false,
false)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static Value * expandNormalizeIntrinsic(CallInst *Orig)
static bool expandIntrinsic(Function &F, CallInst *Orig)
static Value * expandLengthIntrinsic(CallInst *Orig)
static Value * expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic)
static bool expansionIntrinsics(Module &M)
static Value * expandLerpIntrinsic(CallInst *Orig)
static Value * expandCrossIntrinsic(CallInst *Orig)
static Value * expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandAtan2Intrinsic(CallInst *Orig)
static Value * expandLog10Intrinsic(CallInst *Orig)
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandStepIntrinsic(CallInst *Orig)
static Value * expandIntegerDotIntrinsic(CallInst *Orig, Intrinsic::ID DotIntrinsic)
static Value * expandPowIntrinsic(CallInst *Orig)
static Value * expandLogIntrinsic(CallInst *Orig, float LogConstVal=numbers::ln2f)
static Value * expandDegreesIntrinsic(CallInst *Orig)
static Value * expandExpIntrinsic(CallInst *Orig)
static Value * expandSignIntrinsic(CallInst *Orig)
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandAnyOrAllIntrinsic(CallInst *Orig, Intrinsic::ID intrinsicId)
static Value * expandAbs(CallInst *Orig)
static Value * expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B)
static Value * expandRadiansIntrinsic(CallInst *Orig)
static bool isIntrinsicExpansion(Function &F)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This file defines the SmallVector class.
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
DXILIntrinsicExpansionLegacy()
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void setAttributes(AttributeList A)
Set the attributes for this call.
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCall(bool IsTc=true)
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
bool isZeroValue() const
Return true if the value is negative zero or null value.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static constexpr ElementCount getFixed(ScalarTy MinVal)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Value * CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
StringRef getName() const
Return a constant reference to the value's name.
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
ModulePass * createDXILIntrinsicExpansionLegacyPass()
Pass to expand intrinsic operations that lack DXIL opCodes.