24#include "llvm/IR/IntrinsicsDirectX.h"
34#define DEBUG_TYPE "dxil-intrinsic-expansion"
49 if (IsRaw && M->getTargetTriple().getDXILVersion() >
VersionTuple(1, 2))
58 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
63 if (!ValTy->getScalarType()->isHalfTy())
73 ConstantInt::get(IType, 0x7c00))
74 : ConstantInt::get(IType, 0x7c00);
81 ConstantInt::get(IType, 0xfc00))
82 : ConstantInt::get(IType, 0xfc00);
84 Value *IVal = Builder.CreateBitCast(Val, PosInf->
getType());
85 Value *B1 = Builder.CreateICmpEQ(IVal, PosInf);
86 Value *B2 = Builder.CreateICmpEQ(IVal, NegInf);
87 Value *B3 = Builder.CreateOr(B1, B2);
93 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
98 if (!ValTy->getScalarType()->isHalfTy())
109 ConstantInt::get(IType, 0x7c00))
110 : ConstantInt::get(IType, 0x7c00);
116 ConstantInt::get(IType, 0x3ff))
117 : ConstantInt::get(IType, 0x3ff);
124 ConstantInt::get(IType, 0))
125 : ConstantInt::get(IType, 0);
127 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->
getType());
128 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
129 Value *B1 = Builder.CreateICmpEQ(Exp, ExpBitMask);
131 Value *Sig = Builder.CreateAnd(IVal, SigBitMask);
132 Value *B2 = Builder.CreateICmpNE(Sig, Zero);
133 Value *B3 = Builder.CreateAnd(B1, B2);
139 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
144 if (!ValTy->getScalarType()->isHalfTy())
155 ConstantInt::get(IType, 0x7c00))
156 : ConstantInt::get(IType, 0x7c00);
158 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->
getType());
159 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
160 Value *B1 = Builder.CreateICmpNE(Exp, ExpBitMask);
166 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
171 if (!ValTy->getScalarType()->isHalfTy())
182 ConstantInt::get(IType, 0x7c00))
183 : ConstantInt::get(IType, 0x7c00);
189 ConstantInt::get(IType, 0))
190 : ConstantInt::get(IType, 0);
192 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->
getType());
193 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
194 Value *NotAllZeroes = Builder.CreateICmpNE(Exp, Zero);
195 Value *NotAllOnes = Builder.CreateICmpNE(Exp, ExpBitMask);
196 Value *B1 = Builder.CreateAnd(NotAllZeroes, NotAllOnes);
201 switch (
F.getIntrinsicID()) {
202 case Intrinsic::assume:
204 case Intrinsic::atan2:
205 case Intrinsic::fshl:
206 case Intrinsic::fshr:
208 case Intrinsic::is_fpclass:
210 case Intrinsic::log10:
212 case Intrinsic::powi:
213 case Intrinsic::dx_all:
214 case Intrinsic::dx_any:
215 case Intrinsic::dx_cross:
216 case Intrinsic::dx_uclamp:
217 case Intrinsic::dx_sclamp:
218 case Intrinsic::dx_nclamp:
219 case Intrinsic::dx_degrees:
220 case Intrinsic::dx_isinf:
221 case Intrinsic::dx_isnan:
222 case Intrinsic::dx_lerp:
223 case Intrinsic::dx_normalize:
224 case Intrinsic::dx_fdot:
225 case Intrinsic::dx_sdot:
226 case Intrinsic::dx_udot:
227 case Intrinsic::dx_sign:
228 case Intrinsic::dx_step:
229 case Intrinsic::dx_radians:
230 case Intrinsic::usub_sat:
231 case Intrinsic::vector_reduce_add:
232 case Intrinsic::vector_reduce_fadd:
233 case Intrinsic::matrix_multiply:
234 case Intrinsic::matrix_transpose:
236 case Intrinsic::dx_resource_load_rawbuffer:
238 F.getParent(),
F.getReturnType()->getStructElementType(0),
240 case Intrinsic::dx_resource_load_typedbuffer:
242 F.getParent(),
F.getReturnType()->getStructElementType(0),
244 case Intrinsic::dx_resource_store_rawbuffer:
246 F.getParent(),
F.getFunctionType()->getParamType(3),
true);
247 case Intrinsic::dx_resource_store_typedbuffer:
249 F.getParent(),
F.getFunctionType()->getParamType(2),
false);
257 Type *Ty =
A->getType();
261 Value *Cmp = Builder.CreateICmpULT(
A,
B,
"usub.cmp");
262 Value *
Sub = Builder.CreateSub(
A,
B,
"usub.sub");
263 Value *Zero = ConstantInt::get(Ty, 0);
264 return Builder.CreateSelect(Cmp, Zero,
Sub,
"usub.sat");
268 assert(IntrinsicId == Intrinsic::vector_reduce_add ||
269 IntrinsicId == Intrinsic::vector_reduce_fadd);
272 bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
275 Type *Ty =
X->getType();
277 unsigned XVecSize = XVec->getNumElements();
278 Value *Sum = Builder.CreateExtractElement(
X,
static_cast<uint64_t>(0));
284 Sum = Builder.CreateFAdd(Sum, StartValue);
288 for (
unsigned I = 1;
I < XVecSize;
I++) {
289 Value *Elt = Builder.CreateExtractElement(
X,
I);
291 Sum = Builder.CreateFAdd(Sum, Elt);
293 Sum = Builder.CreateAdd(Sum, Elt);
302 Type *Ty =
X->getType();
308 ConstantInt::get(EltTy, 0))
309 : ConstantInt::get(EltTy, 0);
310 auto *V = Builder.CreateSub(Zero,
X);
311 return Builder.CreateIntrinsic(Ty, Intrinsic::smax, {
X, V},
nullptr,
325 Value *op0_x = Builder.CreateExtractElement(op0, (
uint64_t)0,
"x0");
326 Value *op0_y = Builder.CreateExtractElement(op0, 1,
"x1");
327 Value *op0_z = Builder.CreateExtractElement(op0, 2,
"x2");
329 Value *op1_x = Builder.CreateExtractElement(op1, (
uint64_t)0,
"y0");
330 Value *op1_y = Builder.CreateExtractElement(op1, 1,
"y1");
331 Value *op1_z = Builder.CreateExtractElement(op1, 2,
"y2");
334 Value *xy = Builder.CreateFMul(x0, y1);
335 Value *yx = Builder.CreateFMul(y0, x1);
336 return Builder.CreateFSub(xy, yx, Orig->
getName());
339 Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
340 Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
341 Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
344 cross = Builder.CreateInsertElement(cross, yz_zy, (
uint64_t)0);
345 cross = Builder.CreateInsertElement(cross, zx_xz, 1);
346 cross = Builder.CreateInsertElement(cross, xy_yx, 2);
354 Type *ATy =
A->getType();
355 [[maybe_unused]]
Type *BTy =
B->getType();
365 int NumElts = AVec->getNumElements();
368 DotIntrinsic = Intrinsic::dx_dot2;
371 DotIntrinsic = Intrinsic::dx_dot3;
374 DotIntrinsic = Intrinsic::dx_dot4;
378 "Invalid dot product input vector: length is outside 2-4");
383 for (
int I = 0;
I < NumElts; ++
I)
384 Args.push_back(Builder.CreateExtractElement(
A, Builder.getInt32(
I)));
385 for (
int I = 0;
I < NumElts; ++
I)
386 Args.push_back(Builder.CreateExtractElement(
B, Builder.getInt32(
I)));
387 return Builder.CreateIntrinsic(ATy->
getScalarType(), DotIntrinsic, Args,
402 assert(DotIntrinsic == Intrinsic::dx_sdot ||
403 DotIntrinsic == Intrinsic::dx_udot);
406 Type *ATy =
A->getType();
407 [[maybe_unused]]
Type *BTy =
B->getType();
417 Intrinsic::ID MadIntrinsic = DotIntrinsic == Intrinsic::dx_sdot
419 : Intrinsic::dx_umad;
422 Result = Builder.CreateMul(Elt0, Elt1);
423 for (
unsigned I = 1;
I < AVec->getNumElements();
I++) {
424 Elt0 = Builder.CreateExtractElement(
A,
I);
425 Elt1 = Builder.CreateExtractElement(
B,
I);
426 Result = Builder.CreateIntrinsic(Result->getType(), MadIntrinsic,
436 Type *Ty =
X->getType();
444 Value *NewX = Builder.CreateFMul(Log2eConst,
X);
446 Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {NewX},
nullptr,
"dx.exp2");
458 switch (TCI->getZExtValue()) {
472 Type *FTy =
F->getType();
473 unsigned FNumElem = 0;
479 Type *ElemTy = FVecTy->getElementType();
480 FNumElem = FVecTy->getNumElements();
481 BitWidth = ElemTy->getPrimitiveSizeInBits();
488 Value *FBitCast = Builder.CreateBitCast(
F, BitCastTy);
489 switch (TCI->getZExtValue()) {
496 Value *NegZeroSplat = Builder.CreateVectorSplat(FNumElem, NegZero);
498 Builder.CreateICmpEQ(FBitCast, NegZeroSplat,
"is.fpclass.negzero");
500 RetVal = Builder.CreateICmpEQ(FBitCast, NegZero,
"is.fpclass.negzero");
512 Type *Ty =
X->getType();
517 if (IntrinsicId == Intrinsic::dx_any)
518 return Builder.CreateOr(Result, Elt);
519 assert(IntrinsicId == Intrinsic::dx_all);
520 return Builder.CreateAnd(Result, Elt);
523 Value *Result =
nullptr;
524 if (!Ty->isVectorTy()) {
526 ? Builder.CreateFCmpUNE(
X, ConstantFP::get(EltTy, 0))
527 : Builder.CreateICmpNE(
X, ConstantInt::get(EltTy, 0));
532 ? Builder.CreateFCmpUNE(
535 ConstantFP::get(EltTy, 0)))
536 : Builder.CreateICmpNE(
539 ConstantInt::get(EltTy, 0)));
540 Result = Builder.CreateExtractElement(
Cond, (
uint64_t)0);
541 for (
unsigned I = 1;
I < XVec->getNumElements();
I++) {
542 Value *Elt = Builder.CreateExtractElement(
Cond,
I);
543 Result = ApplyOp(IntrinsicId, Result, Elt);
554 auto *V = Builder.CreateFSub(
Y,
X);
555 V = Builder.CreateFMul(S, V);
556 return Builder.CreateFAdd(
X, V,
"dx.lerp");
563 Type *Ty =
X->getType();
569 ConstantFP::get(EltTy, LogConstVal))
570 : ConstantFP::get(EltTy, LogConstVal);
572 Builder.CreateIntrinsic(Ty, Intrinsic::log2, {
X},
nullptr,
"elt.log2");
575 return Builder.CreateFMul(Ln2Const, Log2Call);
592 const APFloat &fpVal = constantFP->getValueAPF();
596 return Builder.CreateFDiv(
X,
X);
604 const APFloat &fpVal = constantFP->getValueAPF();
609 Value *Multiplicand = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_rsqrt,
611 nullptr,
"dx.rsqrt");
613 Value *MultiplicandVec =
614 Builder.CreateVectorSplat(XVec->getNumElements(), Multiplicand);
615 return Builder.CreateFMul(
X, MultiplicandVec);
621 Type *Ty =
X->getType();
625 Value *Tan = Builder.CreateFDiv(
Y,
X);
628 Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan},
nullptr,
"Elt.Atan");
636 Constant *Zero = ConstantFP::get(Ty, 0);
637 Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi);
638 Value *AtanSubPi = Builder.CreateFSub(Atan, Pi);
641 Value *Result = Atan;
642 Value *XLt0 = Builder.CreateFCmpOLT(
X, Zero);
643 Value *XEq0 = Builder.CreateFCmpOEQ(
X, Zero);
644 Value *YGe0 = Builder.CreateFCmpOGE(
Y, Zero);
645 Value *YLt0 = Builder.CreateFCmpOLT(
Y, Zero);
648 Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0);
649 Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result);
652 Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0);
653 Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result);
656 Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0);
657 Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result);
660 Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0);
661 Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result);
666template <
bool LeftFunnel>
675 unsigned BitWidth = Ty->getScalarSizeInBits();
677 "Can't use Mask to compute modulo and inverse");
692 Constant *Mask = ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1);
697 Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
702 Value *NotShift = Builder.CreateNot(Shift);
703 Value *InverseShift = Builder.CreateAnd(NotShift, Mask);
705 Constant *One = ConstantInt::get(Ty, 1);
710 ShiftedA = Builder.CreateShl(
A, MaskedShift);
711 Value *ShiftB1 = Builder.CreateLShr(
B, One);
712 ShiftedB = Builder.CreateLShr(ShiftB1, InverseShift);
714 Value *ShiftA1 = Builder.CreateShl(
A, One);
715 ShiftedA = Builder.CreateShl(ShiftA1, InverseShift);
716 ShiftedB = Builder.CreateLShr(
B, MaskedShift);
719 Value *Result = Builder.CreateOr(ShiftedA, ShiftedB);
727 Type *Ty =
X->getType();
730 if (IntrinsicId == Intrinsic::powi)
731 Y = Builder.CreateSIToFP(
Y, Ty);
734 Builder.CreateIntrinsic(Ty, Intrinsic::log2, {
X},
nullptr,
"elt.log2");
735 auto *
Mul = Builder.CreateFMul(Log2Call,
Y);
737 Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {
Mul},
nullptr,
"elt.exp2");
747 Type *Ty =
X->getType();
750 Constant *One = ConstantFP::get(Ty->getScalarType(), 1.0);
751 Constant *Zero = ConstantFP::get(Ty->getScalarType(), 0.0);
754 if (Ty != Ty->getScalarType()) {
762 return Builder.CreateSelect(
Cond, Zero, One);
767 Type *Ty =
X->getType();
770 return Builder.CreateFMul(
X, PiOver180);
780 "Only expand double or int64 scalars or vectors");
781 bool IsVector =
false;
782 unsigned ExtractNum = 2;
784 ExtractNum = 2 * VT->getNumElements();
786 assert(IsRaw || ExtractNum == 4 &&
"TypedBufferLoad vector must be size 2");
795 while (ExtractNum > 0) {
796 unsigned LoadNum = std::min(ExtractNum, 4u);
800 Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
803 LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
804 Value *Tmp = Builder.getInt32(4 *
Base * 2);
805 Args.push_back(Builder.CreateAdd(Orig->
getOperand(2), Tmp));
808 CallInst *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);
812 Value *Extract = Builder.CreateExtractValue(Load, {0});
815 for (
unsigned I = 0;
I < LoadNum; ++
I)
817 Builder.CreateExtractElement(Extract, Builder.getInt32(
I)));
820 for (
unsigned I = 0;
I < LoadNum;
I += 2) {
821 Value *Combined =
nullptr;
824 Combined = Builder.CreateIntrinsic(
825 Builder.getDoubleTy(), Intrinsic::dx_asdouble,
826 {ExtractElements[I], ExtractElements[I + 1]});
831 Builder.CreateZExt(ExtractElements[
I], Builder.getInt64Ty());
833 Builder.CreateZExt(ExtractElements[
I + 1], Builder.getInt64Ty());
835 Value *ShiftedHi = Builder.CreateShl(
Hi, Builder.getInt64(32));
837 Combined = Builder.CreateOr(
Lo, ShiftedHi);
841 Result = Builder.CreateInsertElement(Result, Combined,
842 Builder.getInt32((
I / 2) +
Base));
847 ExtractNum -= LoadNum;
851 Value *CheckBit =
nullptr;
862 if (Indices[0] == 0) {
864 EVI->replaceAllUsesWith(Result);
867 assert(Indices[0] == 1 &&
"Unexpected type for typedbufferload");
872 for (
Value *L : Loads)
873 CheckBits.
push_back(Builder.CreateExtractValue(L, {1}));
874 CheckBit = Builder.CreateAnd(CheckBits);
876 EVI->replaceAllUsesWith(CheckBit);
878 EVI->eraseFromParent();
887 unsigned ValIndex = IsRaw ? 3 : 2;
892 "Only expand double or int64 scalars or vectors");
895 bool IsVector =
false;
896 unsigned ExtractNum = 2;
899 VecLen = VT->getNumElements();
900 assert(IsRaw || VecLen == 2 &&
"TypedBufferStore vector must be size 2");
901 ExtractNum = VecLen * 2;
914 Value *LowBits =
nullptr;
915 Value *HighBits =
nullptr;
919 Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
921 LowBits = Builder.CreateExtractValue(Split, 0);
922 HighBits = Builder.CreateExtractValue(Split, 1);
926 Constant *ShiftAmt = Builder.getInt64(32);
932 LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
933 Value *ShiftedVal = Builder.CreateLShr(InputVal, ShiftAmt);
934 HighBits = Builder.CreateTrunc(ShiftedVal, SplitElementTy);
939 for (
unsigned I = 0;
I < VecLen; ++
I) {
941 Mask.push_back(
I + VecLen);
943 Val = Builder.CreateShuffleVector(LowBits, HighBits, Mask);
945 Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
946 Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
953 while (ExtractNum > 0) {
954 unsigned StoreNum = std::min(ExtractNum, 4u);
956 Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
959 StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
961 Args.push_back(Builder.CreateAdd(Orig->
getOperand(2), Tmp));
965 for (
unsigned I = 0;
I < StoreNum; ++
I) {
966 Mask.push_back(
Base +
I);
971 SubVal = Builder.CreateShuffleVector(Val, Mask);
973 Args.push_back(SubVal);
975 Builder.CreateIntrinsic(Builder.getVoidTy(), StoreIntrinsic, Args);
977 ExtractNum -= StoreNum;
985 if (ClampIntrinsic == Intrinsic::dx_uclamp)
986 return Intrinsic::umax;
987 if (ClampIntrinsic == Intrinsic::dx_sclamp)
988 return Intrinsic::smax;
989 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
990 return Intrinsic::maxnum;
994 if (ClampIntrinsic == Intrinsic::dx_uclamp)
995 return Intrinsic::umin;
996 if (ClampIntrinsic == Intrinsic::dx_sclamp)
997 return Intrinsic::smin;
998 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
999 return Intrinsic::minnum;
1007 Type *Ty =
X->getType();
1009 auto *MaxCall = Builder.CreateIntrinsic(Ty,
getMaxForClamp(ClampIntrinsic),
1010 {
X, Min},
nullptr,
"dx.max");
1011 return Builder.CreateIntrinsic(Ty,
getMinForClamp(ClampIntrinsic),
1012 {MaxCall, Max},
nullptr,
"dx.min");
1017 Type *Ty =
X->getType();
1020 return Builder.CreateFMul(
X, DegreesRatio);
1025 Type *Ty =
X->getType();
1035 GT = Builder.CreateFCmpOLT(Zero,
X);
1036 LT = Builder.CreateFCmpOLT(
X, Zero);
1039 GT = Builder.CreateICmpSLT(Zero,
X);
1040 LT = Builder.CreateICmpSLT(
X, Zero);
1043 Value *ZextGT = Builder.CreateZExt(GT, RetTy);
1044 Value *ZextLT = Builder.CreateZExt(LT, RetTy);
1046 return Builder.CreateSub(ZextGT, ZextLT);
1061 Type *EltTy = RetTy->getElementType();
1072 unsigned LHSSize = LHSRows * LHSCols;
1073 unsigned RHSSize = LHSCols * RHSCols;
1076 for (
unsigned I = 0;
I < LHSSize; ++
I)
1077 LHSElts[
I] = Builder.CreateExtractElement(
LHS,
I);
1078 for (
unsigned I = 0;
I < RHSSize; ++
I)
1079 RHSElts[
I] = Builder.CreateExtractElement(
RHS,
I);
1084 bool UseScalarFP = IsFP && (EltTy->
isDoubleTy() || LHSCols == 1);
1085 if (IsFP && !UseScalarFP) {
1088 FloatDotID = Intrinsic::dx_dot2;
1091 FloatDotID = Intrinsic::dx_dot3;
1094 FloatDotID = Intrinsic::dx_dot4;
1098 "Invalid matrix inner dimension for dot product: must be 2-4");
1103 for (
unsigned C = 0;
C < RHSCols; ++
C) {
1104 for (
unsigned R = 0; R < LHSRows; ++R) {
1107 for (
unsigned K = 0; K < LHSCols; ++K) {
1108 RowElts.
push_back(LHSElts[K * LHSRows + R]);
1115 Dot = Builder.CreateFMul(RowElts[0], ColElts[0]);
1116 for (
unsigned K = 1; K < LHSCols; ++K)
1117 Dot = Builder.CreateIntrinsic(EltTy, Intrinsic::fmuladd,
1118 {RowElts[K], ColElts[K], Dot});
1122 Args.append(RowElts.
begin(), RowElts.
end());
1123 Args.append(ColElts.
begin(), ColElts.
end());
1124 Dot = Builder.CreateIntrinsic(EltTy, FloatDotID, Args);
1127 Dot = Builder.CreateMul(RowElts[0], ColElts[0]);
1128 for (
unsigned K = 1; K < LHSCols; ++K)
1129 Dot = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_imad,
1130 {RowElts[K], ColElts[K], Dot});
1132 unsigned ResIdx =
C * LHSRows + R;
1133 Result = Builder.CreateInsertElement(Result, Dot, ResIdx);
1147 unsigned NumElts = Rows * Cols;
1149 for (
unsigned I = 0;
I < NumElts; ++
I)
1150 Mask[
I] = (
I % Cols) * Rows + (
I / Cols);
1153 return Builder.CreateShuffleVector(Mat, Mask);
1157 Value *Result =
nullptr;
1159 switch (IntrinsicId) {
1160 case Intrinsic::abs:
1163 case Intrinsic::assume:
1166 case Intrinsic::atan2:
1169 case Intrinsic::fshl:
1172 case Intrinsic::fshr:
1175 case Intrinsic::exp:
1178 case Intrinsic::is_fpclass:
1181 case Intrinsic::log:
1184 case Intrinsic::log10:
1187 case Intrinsic::pow:
1188 case Intrinsic::powi:
1191 case Intrinsic::dx_all:
1192 case Intrinsic::dx_any:
1195 case Intrinsic::dx_cross:
1198 case Intrinsic::dx_uclamp:
1199 case Intrinsic::dx_sclamp:
1200 case Intrinsic::dx_nclamp:
1203 case Intrinsic::dx_degrees:
1206 case Intrinsic::dx_isinf:
1209 case Intrinsic::dx_isnan:
1212 case Intrinsic::dx_lerp:
1215 case Intrinsic::dx_normalize:
1218 case Intrinsic::dx_fdot:
1221 case Intrinsic::dx_sdot:
1222 case Intrinsic::dx_udot:
1225 case Intrinsic::dx_sign:
1228 case Intrinsic::dx_step:
1231 case Intrinsic::dx_radians:
1234 case Intrinsic::dx_resource_load_rawbuffer:
1238 case Intrinsic::dx_resource_store_rawbuffer:
1242 case Intrinsic::dx_resource_load_typedbuffer:
1246 case Intrinsic::dx_resource_store_typedbuffer:
1250 case Intrinsic::usub_sat:
1253 case Intrinsic::vector_reduce_add:
1254 case Intrinsic::vector_reduce_fadd:
1257 case Intrinsic::matrix_multiply:
1260 case Intrinsic::matrix_transpose:
1276 bool IntrinsicExpanded =
false;
1283 if (
F.user_empty() && IntrinsicExpanded)
1284 F.eraseFromParent();
1303 "DXIL Intrinsic Expansion",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static Value * expand16BitIsNormal(CallInst *Orig)
static Value * expandNormalizeIntrinsic(CallInst *Orig)
static bool expandIntrinsic(Function &F, CallInst *Orig)
static Value * expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic)
static Value * expand16BitIsInf(CallInst *Orig)
static bool expansionIntrinsics(Module &M)
static Value * expand16BitIsFinite(CallInst *Orig)
static Value * expandLerpIntrinsic(CallInst *Orig)
static Value * expandCrossIntrinsic(CallInst *Orig)
static Value * expandUsubSat(CallInst *Orig)
static Value * expandAnyOrAllIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandMatrixTranspose(CallInst *Orig)
static Value * expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandAtan2Intrinsic(CallInst *Orig)
static Value * expandLog10Intrinsic(CallInst *Orig)
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandStepIntrinsic(CallInst *Orig)
static Value * expandIntegerDotIntrinsic(CallInst *Orig, Intrinsic::ID DotIntrinsic)
static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandLogIntrinsic(CallInst *Orig, float LogConstVal=numbers::ln2f)
static Value * expandDegreesIntrinsic(CallInst *Orig)
static Value * expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy, bool IsRaw)
static Value * expandExpIntrinsic(CallInst *Orig)
static Value * expand16BitIsNaN(CallInst *Orig)
static Value * expandSignIntrinsic(CallInst *Orig)
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandAbs(CallInst *Orig)
static Value * expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B)
static Value * expandRadiansIntrinsic(CallInst *Orig)
static bool isIntrinsicExpansion(Function &F)
static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandMatrixMultiply(CallInst *Orig)
static Value * expandIsFPClass(CallInst *Orig)
static Value * expandFunnelShiftIntrinsic(CallInst *Orig)
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
DXILIntrinsicExpansionLegacy()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCall(bool IsTc=true)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static constexpr ElementCount getFixed(ScalarTy MinVal)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Type * getParamType(unsigned i) const
Parameter type accessors.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI Type * getStructElementType(unsigned N) const
bool isVectorTy() const
True if this is an instance of VectorType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Represents a version number in the form major[.minor[.subminor[.build]]].
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
ModulePass * createDXILIntrinsicExpansionLegacyPass()
Pass to expand intrinsic operations that lack DXIL opCodes.
@ Sub
Subtraction of integers.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.