56#define DEBUG_TYPE "expand-ir-insts"
63 cl::desc(
"fp convert instructions on integers with "
64 "more than <N> bits are expanded."));
69 cl::desc(
"div and rem instructions on integers with "
70 "more than <N> bits are expanded."));
84 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
97 "ShiftAmt out of range; callers should handle ShiftAmt == 0");
99 Value *Bias = Builder.CreateLShr(Sign,
BitWidth - ShiftAmt,
"bias");
100 return Builder.CreateAdd(
X, Bias,
"adjusted");
116 bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);
119 bool IsExact = IsDiv && BO->
isExact();
122 "Expected power-of-2 constant divisor");
127 unsigned BitWidth = Ty->getIntegerBitWidth();
129 APInt DivisorVal =
C->getValue();
130 bool IsNegativeDivisor = IsSigned && DivisorVal.
isNegative();
143 Result = IsNegativeDivisor ? Builder.CreateNeg(
X) :
X;
145 Result = ConstantInt::get(Ty, 0);
146 }
else if (IsSigned) {
152 X = Builder.CreateFreeze(
X,
X->getName() +
".fr");
156 Value *Quotient = Builder.CreateAShr(
157 Dividend, ShiftAmt, IsDiv && IsNegativeDivisor ?
"pre.neg" :
"shifted",
160 Result = IsNegativeDivisor ? Builder.CreateNeg(Quotient) : Quotient;
164 Value *Truncated = Builder.CreateShl(Quotient, ShiftAmt,
"truncated");
165 Result = Builder.CreateSub(
X, Truncated);
169 Result = Builder.CreateLShr(
X, ShiftAmt,
"", IsExact);
172 Result = Builder.CreateAnd(
X, ConstantInt::get(Ty, Mask));
213 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
217 static bool canExpandType(
Type *Ty) {
224 static bool shouldExpandFremType(
const TargetLowering &TLI, EVT VT) {
225 assert(!VT.
isVector() &&
"Cannot handle vector type; must scalarize first");
227 TargetLowering::LegalizeAction::Expand;
230 static bool shouldExpandFremType(
const TargetLowering &TLI,
Type *Ty) {
239 static bool shouldExpandAnyFremType(
const TargetLowering &TLI) {
240 return any_of(ExpandableTypes,
241 [&](MVT V) {
return shouldExpandFremType(TLI, EVT(V)); });
245 assert(canExpandType(Ty) &&
"Expected supported floating point type");
249 Type *ComputeTy = Ty;
253 unsigned MaxIter = 2;
261 unsigned Precision = APFloat::semanticsPrecision(Ty->
getFltSemantics());
262 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
278 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
279 Bits(ConstantInt::
get(ExTy, Bits)), One(ConstantInt::
get(ExTy, 1)) {}
281 Value *createRcp(
Value *V,
const Twine &Name)
const {
284 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
296 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
298 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {},
"ax");
301 Value *Axp = B.CreateFAdd(AxUpdate, Ay,
"axp");
302 return B.CreateSelect(Clt, Axp, AxUpdate,
"ax");
308 std::pair<Value *, Value *> buildExpAndPower(
Value *Src,
Value *NewExp,
310 const Twine &PowName)
const {
314 Type *Ty = Src->getType();
315 Type *ExTy = B.getInt32Ty();
316 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
317 Value *Mant = B.CreateExtractValue(Frexp, {0});
318 Value *
Exp = B.CreateExtractValue(Frexp, {1});
320 Exp = B.CreateSub(Exp, One, ExName);
321 Value *
Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
330 void buildRemainderComputation(
Value *AxInitial,
Value *AyInitial,
Value *
X,
331 PHINode *RetPhi, FastMathFlags FMF)
const {
332 IRBuilder<>::FastMathFlagGuard Guard(B);
333 B.setFastMathFlags(FMF);
340 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits,
"ex",
"ax");
341 auto [Ay, Ey] = buildExpAndPower(AyInitial, One,
"ey",
"ay");
346 Value *Nb = B.CreateSub(Ex, Ey,
"nb");
347 Value *Ayinv = createRcp(Ay,
"ayinv");
363 B.SetInsertPoint(LoopBB);
364 PHINode *NbIv = B.CreatePHI(Nb->
getType(), 2,
"nb_iv");
367 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2,
"ax_loop_phi");
368 AxPhi->addIncoming(Ax, PreheaderBB);
370 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
371 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {},
"ax_update");
372 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
373 NbIv->
addIncoming(B.CreateSub(NbIv, Bits,
"nb_update"), LoopBB);
380 B.SetInsertPoint(ExitBB);
382 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2,
"ax_exit_phi");
383 AxPhiExit->addIncoming(Ax, PreheaderBB);
384 AxPhiExit->addIncoming(AxPhi, LoopBB);
385 auto *NbExitPhi = B.CreatePHI(Nb->
getType(), 2,
"nb_exit_phi");
386 NbExitPhi->addIncoming(NbIv, LoopBB);
387 NbExitPhi->addIncoming(Nb, PreheaderBB);
389 Value *AxFinal = B.CreateLdexp(
390 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {},
"ax");
391 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
396 AxFinal = B.CreateLdexp(AxFinal, Ey, {},
"ax");
397 if (ComputeFpTy != FremTy)
398 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
399 Value *Ret = B.CreateCopySign(AxFinal,
X);
408 void buildElseBranch(
Value *Ax,
Value *Ay,
Value *
X, PHINode *RetPhi)
const {
412 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign,
X);
420 std::optional<SimplifyQuery> &SQ,
432 Ret = B.CreateSelect(XFinite, Ret, Nan);
440 IRBuilder<>::FastMathFlagGuard Guard(
B);
445 B.clearFastMathFlags();
448 Value *Trunc =
B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
449 Value *Neg =
B.CreateFNeg(Trunc);
451 return B.CreateFMA(Neg,
Y,
X);
455 std::optional<SimplifyQuery> &SQ)
const {
456 assert(
X->getType() == FremTy &&
Y->getType() == FremTy);
458 FastMathFlags FMF =
B.getFastMathFlags();
467 Value *Ax =
B.CreateFAbs(
X, {},
"ax");
468 Value *Ay =
B.CreateFAbs(
Y, {},
"ay");
469 if (ComputeFpTy !=
X->getType()) {
470 Ax =
B.CreateFPExt(Ax, ComputeFpTy,
"ax");
471 Ay =
B.CreateFPExt(Ay, ComputeFpTy,
"ay");
473 Value *AxAyCmp =
B.CreateFCmpOGT(Ax, Ay);
475 PHINode *RetPhi =
B.CreatePHI(FremTy, 2,
"ret");
481 Ret = handleInputCornerCases(Ret,
X,
Y, SQ, FMF.
noInfs());
488 auto SavedInsertPt =
B.GetInsertPoint();
496 FastMathFlags ComputeFMF = FMF;
500 B.SetInsertPoint(ThenBB);
501 buildRemainderComputation(Ax, Ay,
X, RetPhi, FMF);
505 B.SetInsertPoint(ElseBB);
506 buildElseBranch(Ax, Ay,
X, RetPhi);
509 B.SetInsertPoint(SavedInsertPt);
517 Type *Ty =
I.getType();
518 assert(FRemExpander::canExpandType(Ty) &&
519 "Expected supported floating point type");
527 B.setFastMathFlags(FMF);
528 B.SetCurrentDebugLocation(
I.getDebugLoc());
530 const FRemExpander Expander = FRemExpander::create(
B, Ty);
532 ? Expander.buildApproxFRem(
I.getOperand(0),
I.getOperand(1))
533 : Expander.buildFRem(
I.getOperand(0),
I.getOperand(1), SQ);
535 I.replaceAllUsesWith(Ret);
601 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
606 if (FloatVal->getType()->isHalfTy() &&
BitWidth >= 32) {
607 if (FPToI->
getOpcode() == Instruction::FPToUI) {
608 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
609 A1 = Builder.CreateZExt(A0, IntTy);
611 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
612 A1 = Builder.CreateSExt(A0, IntTy);
622 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
623 unsigned FloatWidth =
624 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
625 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
626 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
628 Value *ImplicitBit = ConstantInt::get(
630 Value *SignificandMask = ConstantInt::get(
635 Entry->setName(
Twine(Entry->getName(),
"fp-to-i-entry"));
641 "fp-to-i-if-check.saturate",
F, End);
646 Builder.getContext(),
"fp-to-i-if-check.exp.size",
F, End);
652 Entry->getTerminator()->eraseFromParent();
655 Builder.SetInsertPoint(Entry);
658 FloatVal = Builder.CreateFreeze(FloatVal);
661 if (FloatVal->getType()->isX86_FP80Ty())
664 Value *ARep = Builder.CreateBitCast(FloatVal, FloatIntTy);
665 Value *PosOrNeg, *Sign;
673 Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
674 Value *BiasedExp = Builder.CreateAnd(
675 And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1),
"biased.exp");
676 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
677 Value *Significand = Builder.CreateOr(Abs, ImplicitBit,
"significand");
678 Value *ZeroResultCond = Builder.CreateICmpULT(
679 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias),
"exp.is.negative");
681 Value *IsNaN = Builder.CreateFCmpUNO(FloatVal, FloatVal,
"is.nan");
682 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNaN);
684 Value *IsNeg = Builder.CreateIsNeg(ARep);
685 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNeg);
688 Builder.CreateCondBr(ZeroResultCond, End,
689 IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
694 Builder.SetInsertPoint(CheckSaturateBB);
695 Value *Cmp3 = Builder.CreateICmpUGE(
697 FloatIntTy,
static_cast<int64_t
>(ExponentBias +
699 Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
702 Builder.SetInsertPoint(SaturateBB);
709 Builder.CreateSelect(PosOrNeg, SignedMax, SignedMin,
"saturated");
713 Builder.CreateBr(End);
717 Builder.SetInsertPoint(CheckExpSizeBB);
718 Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
719 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),
720 "exp.smaller.mantissa.width");
721 Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);
724 Builder.SetInsertPoint(ExpSmallBB);
725 Value *Sub13 = Builder.CreateSub(
726 Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
728 Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
730 ExpSmallRes = Builder.CreateMul(ExpSmallRes, Sign);
731 Builder.CreateBr(End);
734 Builder.SetInsertPoint(ExpLargeBB);
735 Value *Sub15 = Builder.CreateAdd(
738 FloatIntTy, -
static_cast<int64_t
>(ExponentBias + FPMantissaWidth)));
739 Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
740 Value *ExpLargeRes = Builder.CreateShl(
741 SignificandCast, Builder.CreateZExtOrTrunc(Sub15, IntTy));
743 ExpLargeRes = Builder.CreateMul(ExpLargeRes, Sign);
744 Builder.CreateBr(End);
747 Builder.SetInsertPoint(End, End->
begin());
748 PHINode *Retval0 = Builder.CreatePHI(FPToI->
getType(), 3 + IsSaturating);
851 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
855 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
858 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
859 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
861 bool IsSigned = IToFP->
getOpcode() == Instruction::SIToFP;
865 IntVal = Builder.CreateFreeze(IntVal);
871 IntTy = Builder.getIntNTy(
BitWidth);
872 IntVal = Builder.CreateIntCast(IntVal, IntTy, IsSigned);
876 Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
877 Builder.getIntN(
BitWidth, FPMantissaWidth + 3));
881 Entry->setName(
Twine(Entry->getName(),
"itofp-entry"));
901 Entry->getTerminator()->eraseFromParent();
908 Builder.SetInsertPoint(Entry);
910 Builder.CreateCondBr(Cmp, End, IfEnd);
913 Builder.SetInsertPoint(IfEnd);
916 Value *
Xor = Builder.CreateXor(Shr, IntVal);
918 Value *
Call = Builder.CreateCall(CTLZ, {IsSigned ?
Sub : IntVal, True});
919 Value *Cast = Builder.CreateTrunc(
Call, Builder.getInt32Ty());
920 int BitWidthNew = FloatWidth == 128 ?
BitWidth : 32;
921 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth),
922 FloatWidth == 128 ?
Call : Cast);
923 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth - 1),
924 FloatWidth == 128 ?
Call : Cast);
925 Value *Cmp3 = Builder.CreateICmpSGT(
926 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
927 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
930 Builder.SetInsertPoint(IfThen4);
932 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
933 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
936 Builder.SetInsertPoint(SwBB);
938 Builder.CreateShl(IsSigned ?
Sub : IntVal, Builder.getIntN(
BitWidth, 1));
939 Builder.CreateBr(SwEpilog);
942 Builder.SetInsertPoint(SwDefault);
943 Value *Sub5 = Builder.CreateSub(
944 Builder.getIntN(BitWidthNew,
BitWidth - FPMantissaWidth - 3),
945 FloatWidth == 128 ?
Call : Cast);
946 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
947 Value *Shr6 = Builder.CreateLShr(IsSigned ?
Sub : IntVal,
948 FloatWidth == 128 ? Sub5 : ShProm);
950 Builder.CreateAdd(FloatWidth == 128 ?
Call : Cast,
951 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
952 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
954 FloatWidth == 128 ? Sub8 : ShProm9);
955 Value *
And = Builder.CreateAnd(Shr9, IsSigned ?
Sub : IntVal);
957 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
958 Value *
Or = Builder.CreateOr(Shr6, Conv11);
959 Builder.CreateBr(SwEpilog);
962 Builder.SetInsertPoint(SwEpilog);
963 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
967 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
968 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
969 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
970 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
971 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
972 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(
BitWidth, 1));
973 Value *Shr18 =
nullptr;
975 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 2));
977 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 2));
978 Value *A3 = Builder.CreateAnd(Inc, Temp1,
"a3");
979 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(
BitWidth, 0));
980 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
981 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(
BitWidth, 32));
982 Value *ExtractT64 =
nullptr;
984 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
986 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
987 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
990 Builder.SetInsertPoint(IfThen20);
991 Value *Shr21 =
nullptr;
993 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 3));
995 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 3));
996 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
997 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(
BitWidth, 32));
998 Value *ExtractT62 =
nullptr;
1000 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
1002 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
1003 Builder.CreateBr(IfEnd26);
1006 Builder.SetInsertPoint(IfElse);
1007 Value *Sub24 = Builder.CreateAdd(
1008 FloatWidth == 128 ?
Call : Cast,
1010 -(
int)(
BitWidth - FPMantissaWidth - 1)));
1011 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
1012 Value *Shl26 = Builder.CreateShl(IsSigned ?
Sub : IntVal,
1013 FloatWidth == 128 ? Sub24 : ShProm25);
1014 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
1015 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(
BitWidth, 32));
1016 Value *ExtractT66 =
nullptr;
1017 if (FloatWidth > 80)
1018 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
1020 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
1021 Builder.CreateBr(IfEnd26);
1024 Builder.SetInsertPoint(IfEnd26);
1025 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
1029 PHINode *AAddr1Off32 =
nullptr;
1030 if (FloatWidth > 32) {
1032 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
1038 if (FloatWidth <= 80) {
1039 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
1044 Value *And29 =
nullptr;
1045 if (FloatWidth > 80) {
1046 Value *Temp2 = Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
1048 And29 = Builder.CreateAnd(Shr, Temp2,
"and29");
1050 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
1051 And29 = Builder.CreateAnd(
1054 unsigned TempMod = FPMantissaWidth % 32;
1055 Value *And34 =
nullptr;
1056 Value *Shl30 =
nullptr;
1057 if (FloatWidth > 80) {
1059 Value *
Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
1060 Shl30 = Builder.CreateAdd(
1061 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
1062 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
1064 Value *
Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
1065 Shl30 = Builder.CreateAdd(
1066 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
1067 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
1068 Builder.getInt32((1 << TempMod) - 1));
1070 Value *Or35 =
nullptr;
1071 if (FloatWidth > 80) {
1072 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
1073 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
1074 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
1075 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
1076 Builder.getIntN(128, FPMantissaWidth));
1077 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
1078 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
1079 Or35 = Builder.CreateOr(Or34, A6);
1081 Value *Or31 = Builder.CreateOr(And34, And29);
1082 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
1084 Value *A4 =
nullptr;
1086 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
1087 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
1089 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
1090 Value *Or1 = Builder.CreateOr(Shl1, And1);
1091 A4 = Builder.CreateBitCast(Or1, IToFP->
getType());
1095 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1101 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1103 A4 = Builder.CreateBitCast(Or35, IToFP->
getType());
1104 Builder.CreateBr(End);
1107 Builder.SetInsertPoint(End, End->
begin());
1123 unsigned NumElements = VTy->getElementCount().getFixedValue();
1125 for (
unsigned Idx = 0; Idx < NumElements; ++Idx) {
1126 Value *Ext = Builder.CreateExtractElement(
I->getOperand(0), Idx);
1128 Value *NewOp =
nullptr;
1130 NewOp = Builder.CreateBinOp(
1131 BinOp->getOpcode(), Ext,
1132 Builder.CreateExtractElement(
I->getOperand(1), Idx));
1134 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
1135 I->getType()->getScalarType());
1139 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
1141 ScalarizedI->copyIRFlags(
I,
true);
1146 I->replaceAllUsesWith(Result);
1147 I->dropAllReferences();
1148 I->eraseFromParent();
1153 if (
I.getOperand(0)->getType()->isVectorTy())
1163 unsigned MaxLegalFpConvertBitWidth =
1172 bool DisableExpandLargeFp =
1174 bool DisableExpandLargeDivRem =
1176 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1178 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1182 Type *Ty =
I.getType();
1184 if (Ty->isScalableTy())
1187 switch (
I.getOpcode()) {
1188 case Instruction::FRem:
1189 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1190 case Instruction::FPToUI:
1191 case Instruction::FPToSI:
1192 return !DisableExpandLargeFp &&
1194 MaxLegalFpConvertBitWidth;
1195 case Instruction::UIToFP:
1196 case Instruction::SIToFP:
1197 return !DisableExpandLargeFp &&
1199 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1200 case Instruction::UDiv:
1201 case Instruction::SDiv:
1202 case Instruction::URem:
1203 case Instruction::SRem:
1208 return !DisableExpandLargeDivRem &&
1210 MaxLegalDivRemBitWidth;
1211 case Instruction::Call: {
1213 if (
II && (
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1214 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1215 return !DisableExpandLargeFp &&
1217 MaxLegalFpConvertBitWidth;
1229 if (!ShouldHandleInst(
I))
1236 while (!Worklist.
empty()) {
1239 switch (
I->getOpcode()) {
1240 case Instruction::FRem: {
1241 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1243 auto Res = std::make_optional<SimplifyQuery>(
1244 I->getModule()->getDataLayout(),
I);
1255 case Instruction::FPToUI:
1258 case Instruction::FPToSI:
1262 case Instruction::UIToFP:
1263 case Instruction::SIToFP:
1267 case Instruction::UDiv:
1268 case Instruction::SDiv:
1269 case Instruction::URem:
1270 case Instruction::SRem: {
1277 unsigned Opc = BO->getOpcode();
1278 if (
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv)
1285 case Instruction::Call: {
1287 assert(
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1288 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1290 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1300class ExpandIRInstsLegacyPass :
public FunctionPass {
1307 : FunctionPass(
ID), OptLevel(OptLevel) {}
1312 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1313 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(
F);
1314 auto *TLI = Subtarget->getTargetLowering();
1315 AssumptionCache *AC =
nullptr;
1317 const LibcallLoweringInfo &Libcalls =
1318 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1319 *
F.getParent(), *Subtarget);
1321 if (OptLevel != CodeGenOptLevel::None && !
F.hasOptNone())
1322 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1323 return runImpl(
F, *TLI, Libcalls, AC);
1326 void getAnalysisUsage(AnalysisUsage &AU)
const override {
1329 if (OptLevel != CodeGenOptLevel::None)
1340 : TM(&TM), OptLevel(OptLevel) {}
1345 OS, MapClassName2PassName);
1347 OS <<
"O" << (int)OptLevel;
1364 if (!LibcallLowering) {
1366 "' analysis required");
1371 LibcallLowering->getLibcallLowering(*STI);
1377char ExpandIRInstsLegacyPass::ID = 0;
1379 "Expand certain fp instructions",
false,
false)
1385 return new ExpandIRInstsLegacyPass(OptLevel);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static cl::opt< unsigned > ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("div and rem instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
static void expandPow2DivRem(BinaryOperator *BO)
Expand division or remainder by a power-of-2 constant.
static bool isSigned(unsigned Opcode)
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
static Value * addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth, unsigned ShiftAmt)
For signed div/rem by a power of 2, compute the bias-adjusted dividend: Sign = ashr X,...
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
static bool isConstantPowerOfTwo(Value *V, bool SignedOp)
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BinaryOps getOpcode() const
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ ICMP_SGT
signed greater than
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
ExpandIRInstsPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
void setAllowReciprocal(bool B=true)
void setNoNaNs(bool B=true)
void setNoInfs(bool B=true)
FunctionPass class - This class is used to implement most global optimizations.
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
LLVM_ABI const fltSemantics & getFltSemantics() const
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI bool expandDivision(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI FunctionPass * createExpandIRInstsPass(CodeGenOptLevel)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI bool expandRemainder(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
A CRTP mix-in to automatically provide informational APIs needed for passes.