24#include "llvm/IR/IntrinsicsDirectX.h"
34#define DEBUG_TYPE "dxil-intrinsic-expansion"
49 if (IsRaw && M->getTargetTriple().getDXILVersion() >
VersionTuple(1, 2))
58 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
73 ConstantInt::get(IType, 0x7c00))
74 : ConstantInt::get(IType, 0x7c00);
81 ConstantInt::get(IType, 0xfc00))
82 : ConstantInt::get(IType, 0xfc00);
84 Value *IVal = Builder.CreateBitCast(Val, PosInf->
getType());
85 Value *B1 = Builder.CreateICmpEQ(IVal, PosInf);
86 Value *B2 = Builder.CreateICmpEQ(IVal, NegInf);
87 Value *B3 = Builder.CreateOr(B1, B2);
93 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
109 ConstantInt::get(IType, 0x7c00))
110 : ConstantInt::get(IType, 0x7c00);
116 ConstantInt::get(IType, 0x3ff))
117 : ConstantInt::get(IType, 0x3ff);
124 ConstantInt::get(IType, 0))
125 : ConstantInt::get(IType, 0);
127 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->
getType());
128 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
129 Value *B1 = Builder.CreateICmpEQ(Exp, ExpBitMask);
131 Value *Sig = Builder.CreateAnd(IVal, SigBitMask);
132 Value *B2 = Builder.CreateICmpNE(Sig, Zero);
133 Value *B3 = Builder.CreateAnd(B1, B2);
139 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
155 ConstantInt::get(IType, 0x7c00))
156 : ConstantInt::get(IType, 0x7c00);
158 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->
getType());
159 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
160 Value *B1 = Builder.CreateICmpNE(Exp, ExpBitMask);
166 if (M->getTargetTriple().getDXILVersion() >=
VersionTuple(1, 9))
182 ConstantInt::get(IType, 0x7c00))
183 : ConstantInt::get(IType, 0x7c00);
189 ConstantInt::get(IType, 0))
190 : ConstantInt::get(IType, 0);
192 Value *IVal = Builder.CreateBitCast(Val, ExpBitMask->
getType());
193 Value *Exp = Builder.CreateAnd(IVal, ExpBitMask);
194 Value *NotAllZeroes = Builder.CreateICmpNE(Exp, Zero);
195 Value *NotAllOnes = Builder.CreateICmpNE(Exp, ExpBitMask);
196 Value *B1 = Builder.CreateAnd(NotAllZeroes, NotAllOnes);
201 switch (
F.getIntrinsicID()) {
202 case Intrinsic::assume:
204 case Intrinsic::atan2:
205 case Intrinsic::fshl:
206 case Intrinsic::fshr:
208 case Intrinsic::is_fpclass:
210 case Intrinsic::log10:
212 case Intrinsic::powi:
213 case Intrinsic::dx_all:
214 case Intrinsic::dx_any:
215 case Intrinsic::dx_cross:
216 case Intrinsic::dx_uclamp:
217 case Intrinsic::dx_sclamp:
218 case Intrinsic::dx_nclamp:
219 case Intrinsic::dx_degrees:
220 case Intrinsic::dx_isinf:
221 case Intrinsic::dx_isnan:
222 case Intrinsic::dx_lerp:
223 case Intrinsic::dx_normalize:
224 case Intrinsic::dx_fdot:
225 case Intrinsic::dx_sdot:
226 case Intrinsic::dx_udot:
227 case Intrinsic::dx_sign:
228 case Intrinsic::dx_step:
229 case Intrinsic::dx_radians:
230 case Intrinsic::dx_interlocked_add:
231 case Intrinsic::usub_sat:
232 case Intrinsic::vector_reduce_add:
233 case Intrinsic::vector_reduce_fadd:
234 case Intrinsic::matrix_multiply:
235 case Intrinsic::matrix_transpose:
237 case Intrinsic::dx_resource_load_rawbuffer:
239 F.getParent(),
F.getReturnType()->getStructElementType(0),
241 case Intrinsic::dx_resource_load_typedbuffer:
243 F.getParent(),
F.getReturnType()->getStructElementType(0),
245 case Intrinsic::dx_resource_store_rawbuffer:
247 F.getParent(),
F.getFunctionType()->getParamType(3),
true);
248 case Intrinsic::dx_resource_store_typedbuffer:
250 F.getParent(),
F.getFunctionType()->getParamType(2),
false);
258 Type *Ty =
A->getType();
262 Value *Cmp = Builder.CreateICmpULT(
A,
B,
"usub.cmp");
263 Value *
Sub = Builder.CreateSub(
A,
B,
"usub.sub");
264 Value *Zero = ConstantInt::get(Ty, 0);
265 return Builder.CreateSelect(Cmp, Zero,
Sub,
"usub.sat");
269 assert(IntrinsicId == Intrinsic::vector_reduce_add ||
270 IntrinsicId == Intrinsic::vector_reduce_fadd);
273 bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
276 Type *Ty =
X->getType();
278 unsigned XVecSize = XVec->getNumElements();
279 Value *Sum = Builder.CreateExtractElement(
X,
static_cast<uint64_t>(0));
285 Sum = Builder.CreateFAdd(Sum, StartValue);
289 for (
unsigned I = 1;
I < XVecSize;
I++) {
290 Value *Elt = Builder.CreateExtractElement(
X,
I);
292 Sum = Builder.CreateFAdd(Sum, Elt);
294 Sum = Builder.CreateAdd(Sum, Elt);
303 Type *Ty =
X->getType();
309 ConstantInt::get(EltTy, 0))
310 : ConstantInt::get(EltTy, 0);
311 auto *V = Builder.CreateSub(Zero,
X);
312 return Builder.CreateIntrinsic(Ty, Intrinsic::smax, {
X, V},
nullptr,
326 Value *op0_x = Builder.CreateExtractElement(op0, (
uint64_t)0,
"x0");
327 Value *op0_y = Builder.CreateExtractElement(op0, 1,
"x1");
328 Value *op0_z = Builder.CreateExtractElement(op0, 2,
"x2");
330 Value *op1_x = Builder.CreateExtractElement(op1, (
uint64_t)0,
"y0");
331 Value *op1_y = Builder.CreateExtractElement(op1, 1,
"y1");
332 Value *op1_z = Builder.CreateExtractElement(op1, 2,
"y2");
335 Value *xy = Builder.CreateFMul(x0, y1);
336 Value *yx = Builder.CreateFMul(y0, x1);
337 return Builder.CreateFSub(xy, yx, Orig->
getName());
340 Value *yz_zy = MulSub(op0_y, op0_z, op1_y, op1_z);
341 Value *zx_xz = MulSub(op0_z, op0_x, op1_z, op1_x);
342 Value *xy_yx = MulSub(op0_x, op0_y, op1_x, op1_y);
345 cross = Builder.CreateInsertElement(cross, yz_zy, (
uint64_t)0);
346 cross = Builder.CreateInsertElement(cross, zx_xz, 1);
347 cross = Builder.CreateInsertElement(cross, xy_yx, 2);
355 Type *ATy =
A->getType();
356 [[maybe_unused]]
Type *BTy =
B->getType();
366 int NumElts = AVec->getNumElements();
369 DotIntrinsic = Intrinsic::dx_dot2;
372 DotIntrinsic = Intrinsic::dx_dot3;
375 DotIntrinsic = Intrinsic::dx_dot4;
379 "Invalid dot product input vector: length is outside 2-4");
384 for (
int I = 0;
I < NumElts; ++
I)
385 Args.push_back(Builder.CreateExtractElement(
A, Builder.getInt32(
I)));
386 for (
int I = 0;
I < NumElts; ++
I)
387 Args.push_back(Builder.CreateExtractElement(
B, Builder.getInt32(
I)));
388 return Builder.CreateIntrinsic(ATy->
getScalarType(), DotIntrinsic, Args,
403 assert(DotIntrinsic == Intrinsic::dx_sdot ||
404 DotIntrinsic == Intrinsic::dx_udot);
407 Type *ATy =
A->getType();
408 [[maybe_unused]]
Type *BTy =
B->getType();
418 Intrinsic::ID MadIntrinsic = DotIntrinsic == Intrinsic::dx_sdot
420 : Intrinsic::dx_umad;
423 Result = Builder.CreateMul(Elt0, Elt1);
424 for (
unsigned I = 1;
I < AVec->getNumElements();
I++) {
425 Elt0 = Builder.CreateExtractElement(
A,
I);
426 Elt1 = Builder.CreateExtractElement(
B,
I);
427 Result = Builder.CreateIntrinsic(Result->getType(), MadIntrinsic,
437 Type *Ty =
X->getType();
445 Value *NewX = Builder.CreateFMul(Log2eConst,
X);
447 Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {NewX},
nullptr,
"dx.exp2");
459 switch (TCI->getZExtValue()) {
473 Type *FTy =
F->getType();
474 unsigned FNumElem = 0;
480 Type *ElemTy = FVecTy->getElementType();
481 FNumElem = FVecTy->getNumElements();
482 BitWidth = ElemTy->getPrimitiveSizeInBits();
489 Value *FBitCast = Builder.CreateBitCast(
F, BitCastTy);
490 switch (TCI->getZExtValue()) {
497 Value *NegZeroSplat = Builder.CreateVectorSplat(FNumElem, NegZero);
499 Builder.CreateICmpEQ(FBitCast, NegZeroSplat,
"is.fpclass.negzero");
501 RetVal = Builder.CreateICmpEQ(FBitCast, NegZero,
"is.fpclass.negzero");
513 Type *Ty =
X->getType();
518 if (IntrinsicId == Intrinsic::dx_any)
519 return Builder.CreateOr(Result, Elt);
520 assert(IntrinsicId == Intrinsic::dx_all);
521 return Builder.CreateAnd(Result, Elt);
524 Value *Result =
nullptr;
525 if (!Ty->isVectorTy()) {
527 ? Builder.CreateFCmpUNE(
X, ConstantFP::get(EltTy, 0))
528 : Builder.CreateICmpNE(
X, ConstantInt::get(EltTy, 0));
533 ? Builder.CreateFCmpUNE(
536 ConstantFP::get(EltTy, 0)))
537 : Builder.CreateICmpNE(
540 ConstantInt::get(EltTy, 0)));
541 Result = Builder.CreateExtractElement(
Cond, (
uint64_t)0);
542 for (
unsigned I = 1;
I < XVec->getNumElements();
I++) {
543 Value *Elt = Builder.CreateExtractElement(
Cond,
I);
544 Result = ApplyOp(IntrinsicId, Result, Elt);
555 auto *V = Builder.CreateFSub(
Y,
X);
556 V = Builder.CreateFMul(S, V);
557 return Builder.CreateFAdd(
X, V,
"dx.lerp");
564 Type *Ty =
X->getType();
570 ConstantFP::get(EltTy, LogConstVal))
571 : ConstantFP::get(EltTy, LogConstVal);
573 Builder.CreateIntrinsic(Ty, Intrinsic::log2, {
X},
nullptr,
"elt.log2");
576 return Builder.CreateFMul(Ln2Const, Log2Call);
593 const APFloat &fpVal = constantFP->getValueAPF();
597 return Builder.CreateFDiv(
X,
X);
605 const APFloat &fpVal = constantFP->getValueAPF();
610 Value *Multiplicand = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_rsqrt,
612 nullptr,
"dx.rsqrt");
614 Value *MultiplicandVec =
615 Builder.CreateVectorSplat(XVec->getNumElements(), Multiplicand);
616 return Builder.CreateFMul(
X, MultiplicandVec);
622 Type *Ty =
X->getType();
626 Value *Tan = Builder.CreateFDiv(
Y,
X);
629 Builder.CreateIntrinsic(Ty, Intrinsic::atan, {Tan},
nullptr,
"Elt.Atan");
637 Constant *Zero = ConstantFP::get(Ty, 0);
638 Value *AtanAddPi = Builder.CreateFAdd(Atan, Pi);
639 Value *AtanSubPi = Builder.CreateFSub(Atan, Pi);
642 Value *Result = Atan;
643 Value *XLt0 = Builder.CreateFCmpOLT(
X, Zero);
644 Value *XEq0 = Builder.CreateFCmpOEQ(
X, Zero);
645 Value *YGe0 = Builder.CreateFCmpOGE(
Y, Zero);
646 Value *YLt0 = Builder.CreateFCmpOLT(
Y, Zero);
649 Value *XLt0AndYGe0 = Builder.CreateAnd(XLt0, YGe0);
650 Result = Builder.CreateSelect(XLt0AndYGe0, AtanAddPi, Result);
653 Value *XLt0AndYLt0 = Builder.CreateAnd(XLt0, YLt0);
654 Result = Builder.CreateSelect(XLt0AndYLt0, AtanSubPi, Result);
657 Value *XEq0AndYLt0 = Builder.CreateAnd(XEq0, YLt0);
658 Result = Builder.CreateSelect(XEq0AndYLt0, NegHalfPi, Result);
661 Value *XEq0AndYGe0 = Builder.CreateAnd(XEq0, YGe0);
662 Result = Builder.CreateSelect(XEq0AndYGe0, HalfPi, Result);
667template <
bool LeftFunnel>
676 unsigned BitWidth = Ty->getScalarSizeInBits();
678 "Can't use Mask to compute modulo and inverse");
693 Constant *Mask = ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1);
698 Value *MaskedShift = Builder.CreateAnd(Shift, Mask);
703 Value *NotShift = Builder.CreateNot(Shift);
704 Value *InverseShift = Builder.CreateAnd(NotShift, Mask);
706 Constant *One = ConstantInt::get(Ty, 1);
711 ShiftedA = Builder.CreateShl(
A, MaskedShift);
712 Value *ShiftB1 = Builder.CreateLShr(
B, One);
713 ShiftedB = Builder.CreateLShr(ShiftB1, InverseShift);
715 Value *ShiftA1 = Builder.CreateShl(
A, One);
716 ShiftedA = Builder.CreateShl(ShiftA1, InverseShift);
717 ShiftedB = Builder.CreateLShr(
B, MaskedShift);
720 Value *Result = Builder.CreateOr(ShiftedA, ShiftedB);
728 Type *Ty =
X->getType();
731 if (IntrinsicId == Intrinsic::powi)
732 Y = Builder.CreateSIToFP(
Y, Ty);
735 Builder.CreateIntrinsic(Ty, Intrinsic::log2, {
X},
nullptr,
"elt.log2");
736 auto *
Mul = Builder.CreateFMul(Log2Call,
Y);
738 Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {
Mul},
nullptr,
"elt.exp2");
748 Type *Ty =
X->getType();
751 Constant *One = ConstantFP::get(Ty->getScalarType(), 1.0);
752 Constant *Zero = ConstantFP::get(Ty->getScalarType(), 0.0);
755 if (Ty != Ty->getScalarType()) {
763 return Builder.CreateSelect(
Cond, Zero, One);
768 Type *Ty =
X->getType();
771 return Builder.CreateFMul(
X, PiOver180);
793 "Only expand double or int64 scalars or vectors");
794 bool IsVector =
false;
795 unsigned ExtractNum = 2;
797 ExtractNum = 2 * VT->getNumElements();
799 assert(IsRaw || ExtractNum == 4 &&
"TypedBufferLoad vector must be size 2");
808 while (ExtractNum > 0) {
809 unsigned LoadNum = std::min(ExtractNum, 4u);
813 Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
816 LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
817 Value *Tmp = Builder.getInt32(4 *
Base * 2);
818 Args.push_back(Builder.CreateAdd(Orig->
getOperand(2), Tmp));
821 CallInst *Load = Builder.CreateIntrinsic(LoadType, LoadIntrinsic, Args);
825 Value *Extract = Builder.CreateExtractValue(Load, {0});
828 for (
unsigned I = 0;
I < LoadNum; ++
I)
830 Builder.CreateExtractElement(Extract, Builder.getInt32(
I)));
833 for (
unsigned I = 0;
I < LoadNum;
I += 2) {
834 Value *Combined =
nullptr;
837 Combined = Builder.CreateIntrinsic(
838 Builder.getDoubleTy(), Intrinsic::dx_asdouble,
839 {ExtractElements[I], ExtractElements[I + 1]});
844 Builder.CreateZExt(ExtractElements[
I], Builder.getInt64Ty());
846 Builder.CreateZExt(ExtractElements[
I + 1], Builder.getInt64Ty());
848 Value *ShiftedHi = Builder.CreateShl(
Hi, Builder.getInt64(32));
850 Combined = Builder.CreateOr(
Lo, ShiftedHi);
854 Result = Builder.CreateInsertElement(Result, Combined,
855 Builder.getInt32((
I / 2) +
Base));
860 ExtractNum -= LoadNum;
864 Value *CheckBit =
nullptr;
875 if (Indices[0] == 0) {
877 EVI->replaceAllUsesWith(Result);
880 assert(Indices[0] == 1 &&
"Unexpected type for typedbufferload");
885 for (
Value *L : Loads)
886 CheckBits.
push_back(Builder.CreateExtractValue(L, {1}));
887 CheckBit = Builder.CreateAnd(CheckBits);
889 EVI->replaceAllUsesWith(CheckBit);
891 EVI->eraseFromParent();
900 unsigned ValIndex = IsRaw ? 3 : 2;
905 "Only expand double or int64 scalars or vectors");
908 bool IsVector =
false;
909 unsigned ExtractNum = 2;
912 VecLen = VT->getNumElements();
913 assert(IsRaw || VecLen == 2 &&
"TypedBufferStore vector must be size 2");
914 ExtractNum = VecLen * 2;
927 Value *LowBits =
nullptr;
928 Value *HighBits =
nullptr;
932 Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
934 LowBits = Builder.CreateExtractValue(Split, 0);
935 HighBits = Builder.CreateExtractValue(Split, 1);
939 Constant *ShiftAmt = Builder.getInt64(32);
945 LowBits = Builder.CreateTrunc(InputVal, SplitElementTy);
946 Value *ShiftedVal = Builder.CreateLShr(InputVal, ShiftAmt);
947 HighBits = Builder.CreateTrunc(ShiftedVal, SplitElementTy);
952 for (
unsigned I = 0;
I < VecLen; ++
I) {
954 Mask.push_back(
I + VecLen);
956 Val = Builder.CreateShuffleVector(LowBits, HighBits, Mask);
958 Val = Builder.CreateInsertElement(Val, LowBits, Builder.getInt32(0));
959 Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
966 while (ExtractNum > 0) {
967 unsigned StoreNum = std::min(ExtractNum, 4u);
969 Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
972 StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
974 Args.push_back(Builder.CreateAdd(Orig->
getOperand(2), Tmp));
978 for (
unsigned I = 0;
I < StoreNum; ++
I) {
979 Mask.push_back(
Base +
I);
984 SubVal = Builder.CreateShuffleVector(Val, Mask);
986 Args.push_back(SubVal);
988 Builder.CreateIntrinsic(Builder.getVoidTy(), StoreIntrinsic, Args);
990 ExtractNum -= StoreNum;
998 if (ClampIntrinsic == Intrinsic::dx_uclamp)
999 return Intrinsic::umax;
1000 if (ClampIntrinsic == Intrinsic::dx_sclamp)
1001 return Intrinsic::smax;
1002 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
1003 return Intrinsic::maxnum;
1007 if (ClampIntrinsic == Intrinsic::dx_uclamp)
1008 return Intrinsic::umin;
1009 if (ClampIntrinsic == Intrinsic::dx_sclamp)
1010 return Intrinsic::smin;
1011 assert(ClampIntrinsic == Intrinsic::dx_nclamp);
1012 return Intrinsic::minnum;
1020 Type *Ty =
X->getType();
1022 auto *MaxCall = Builder.CreateIntrinsic(Ty,
getMaxForClamp(ClampIntrinsic),
1023 {
X, Min},
nullptr,
"dx.max");
1024 return Builder.CreateIntrinsic(Ty,
getMinForClamp(ClampIntrinsic),
1025 {MaxCall, Max},
nullptr,
"dx.min");
1030 Type *Ty =
X->getType();
1033 return Builder.CreateFMul(
X, DegreesRatio);
1038 Type *Ty =
X->getType();
1048 GT = Builder.CreateFCmpOLT(Zero,
X);
1049 LT = Builder.CreateFCmpOLT(
X, Zero);
1052 GT = Builder.CreateICmpSLT(Zero,
X);
1053 LT = Builder.CreateICmpSLT(
X, Zero);
1056 Value *ZextGT = Builder.CreateZExt(GT, RetTy);
1057 Value *ZextLT = Builder.CreateZExt(LT, RetTy);
1059 return Builder.CreateSub(ZextGT, ZextLT);
1074 Type *EltTy = RetTy->getElementType();
1085 unsigned LHSSize = LHSRows * LHSCols;
1086 unsigned RHSSize = LHSCols * RHSCols;
1089 for (
unsigned I = 0;
I < LHSSize; ++
I)
1090 LHSElts[
I] = Builder.CreateExtractElement(
LHS,
I);
1091 for (
unsigned I = 0;
I < RHSSize; ++
I)
1092 RHSElts[
I] = Builder.CreateExtractElement(
RHS,
I);
1097 bool UseScalarFP = IsFP && (EltTy->
isDoubleTy() || LHSCols == 1);
1098 if (IsFP && !UseScalarFP) {
1101 FloatDotID = Intrinsic::dx_dot2;
1104 FloatDotID = Intrinsic::dx_dot3;
1107 FloatDotID = Intrinsic::dx_dot4;
1111 "Invalid matrix inner dimension for dot product: must be 2-4");
1116 for (
unsigned C = 0;
C < RHSCols; ++
C) {
1117 for (
unsigned R = 0; R < LHSRows; ++R) {
1120 for (
unsigned K = 0; K < LHSCols; ++K) {
1121 RowElts.
push_back(LHSElts[K * LHSRows + R]);
1128 Dot = Builder.CreateFMul(RowElts[0], ColElts[0]);
1129 for (
unsigned K = 1; K < LHSCols; ++K)
1130 Dot = Builder.CreateIntrinsic(EltTy, Intrinsic::fmuladd,
1131 {RowElts[K], ColElts[K], Dot});
1135 Args.append(RowElts.
begin(), RowElts.
end());
1136 Args.append(ColElts.
begin(), ColElts.
end());
1137 Dot = Builder.CreateIntrinsic(EltTy, FloatDotID, Args);
1140 Dot = Builder.CreateMul(RowElts[0], ColElts[0]);
1141 for (
unsigned K = 1; K < LHSCols; ++K)
1142 Dot = Builder.CreateIntrinsic(EltTy, Intrinsic::dx_imad,
1143 {RowElts[K], ColElts[K], Dot});
1145 unsigned ResIdx =
C * LHSRows + R;
1146 Result = Builder.CreateInsertElement(Result, Dot, ResIdx);
1160 unsigned NumElts = Rows * Cols;
1162 for (
unsigned I = 0;
I < NumElts; ++
I)
1163 Mask[
I] = (
I % Cols) * Rows + (
I / Cols);
1166 return Builder.CreateShuffleVector(Mat, Mask);
1170 Value *Result =
nullptr;
1172 switch (IntrinsicId) {
1173 case Intrinsic::abs:
1176 case Intrinsic::assume:
1179 case Intrinsic::atan2:
1182 case Intrinsic::fshl:
1185 case Intrinsic::fshr:
1188 case Intrinsic::exp:
1191 case Intrinsic::is_fpclass:
1194 case Intrinsic::log:
1197 case Intrinsic::log10:
1200 case Intrinsic::pow:
1201 case Intrinsic::powi:
1204 case Intrinsic::dx_all:
1205 case Intrinsic::dx_any:
1208 case Intrinsic::dx_cross:
1211 case Intrinsic::dx_uclamp:
1212 case Intrinsic::dx_sclamp:
1213 case Intrinsic::dx_nclamp:
1216 case Intrinsic::dx_degrees:
1219 case Intrinsic::dx_isinf:
1222 case Intrinsic::dx_isnan:
1225 case Intrinsic::dx_lerp:
1228 case Intrinsic::dx_normalize:
1231 case Intrinsic::dx_fdot:
1234 case Intrinsic::dx_sdot:
1235 case Intrinsic::dx_udot:
1238 case Intrinsic::dx_sign:
1241 case Intrinsic::dx_step:
1244 case Intrinsic::dx_radians:
1247 case Intrinsic::dx_interlocked_add:
1250 case Intrinsic::dx_resource_load_rawbuffer:
1254 case Intrinsic::dx_resource_store_rawbuffer:
1258 case Intrinsic::dx_resource_load_typedbuffer:
1262 case Intrinsic::dx_resource_store_typedbuffer:
1266 case Intrinsic::usub_sat:
1269 case Intrinsic::vector_reduce_add:
1270 case Intrinsic::vector_reduce_fadd:
1273 case Intrinsic::matrix_multiply:
1276 case Intrinsic::matrix_transpose:
1292 bool IntrinsicExpanded =
false;
1299 if (
F.user_empty() && IntrinsicExpanded)
1300 F.eraseFromParent();
1319 "DXIL Intrinsic Expansion",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static Value * expand16BitIsNormal(CallInst *Orig)
static Value * expandNormalizeIntrinsic(CallInst *Orig)
static bool expandIntrinsic(Function &F, CallInst *Orig)
static Value * expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic)
static Value * expand16BitIsInf(CallInst *Orig)
static bool expansionIntrinsics(Module &M)
static Value * expand16BitIsFinite(CallInst *Orig)
static Value * expandLerpIntrinsic(CallInst *Orig)
static Value * expandCrossIntrinsic(CallInst *Orig)
static Value * expandUsubSat(CallInst *Orig)
static Value * expandAnyOrAllIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandMatrixTranspose(CallInst *Orig)
static Value * expandInterlockedAddIntrinsic(CallInst *Orig)
static Value * expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId)
static Value * expandAtan2Intrinsic(CallInst *Orig)
static Value * expandLog10Intrinsic(CallInst *Orig)
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandStepIntrinsic(CallInst *Orig)
static Value * expandIntegerDotIntrinsic(CallInst *Orig, Intrinsic::ID DotIntrinsic)
static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandLogIntrinsic(CallInst *Orig, float LogConstVal=numbers::ln2f)
static Value * expandDegreesIntrinsic(CallInst *Orig)
static Value * expandPowIntrinsic(CallInst *Orig, Intrinsic::ID IntrinsicId)
static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy, bool IsRaw)
static Value * expandExpIntrinsic(CallInst *Orig)
static Value * expand16BitIsNaN(CallInst *Orig)
static Value * expandSignIntrinsic(CallInst *Orig)
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic)
static Value * expandAbs(CallInst *Orig)
static Value * expandFloatDotIntrinsic(CallInst *Orig, Value *A, Value *B)
static Value * expandRadiansIntrinsic(CallInst *Orig)
static bool isIntrinsicExpansion(Function &F)
static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw)
static Value * expandMatrixMultiply(CallInst *Orig)
static Value * expandIsFPClass(CallInst *Orig)
static Value * expandFunnelShiftIntrinsic(CallInst *Orig)
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
DXILIntrinsicExpansionLegacy()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCall(bool IsTc=true)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &)
static constexpr ElementCount getFixed(ScalarTy MinVal)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Type * getParamType(unsigned i) const
Parameter type accessors.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI Type * getStructElementType(unsigned N) const
bool isVectorTy() const
True if this is an instance of VectorType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Represents a version number in the form major[.minor[.subminor[.build]]].
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
ModulePass * createDXILIntrinsicExpansionLegacyPass()
Pass to expand intrinsic operations that lack DXIL opCodes.
@ Sub
Subtraction of integers.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.