56#define DEBUG_TYPE "expand-ir-insts"
63 cl::desc(
"fp convert instructions on integers with "
64 "more than <N> bits are expanded."));
69 cl::desc(
"div and rem instructions on integers with "
70 "more than <N> bits are expanded."));
73bool isConstantPowerOfTwo(
llvm::Value *V,
bool SignedOp) {
84bool isSigned(
unsigned int Opcode) {
85 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
98 "ShiftAmt out of range; callers should handle ShiftAmt == 0");
100 Value *Bias = Builder.CreateLShr(Sign,
BitWidth - ShiftAmt,
"bias");
101 return Builder.CreateAdd(
X, Bias,
"adjusted");
117 bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);
118 bool IsSigned = isSigned(Opcode);
120 bool IsExact = IsDiv && BO->
isExact();
123 "Expected power-of-2 constant divisor");
128 unsigned BitWidth = Ty->getIntegerBitWidth();
130 APInt DivisorVal =
C->getValue();
131 bool IsNegativeDivisor = IsSigned && DivisorVal.
isNegative();
144 Result = IsNegativeDivisor ? Builder.CreateNeg(
X) :
X;
146 Result = ConstantInt::get(Ty, 0);
147 }
else if (IsSigned) {
153 X = Builder.CreateFreeze(
X,
X->getName() +
".fr");
156 IsExact ?
X : addSignedBias(Builder,
X,
BitWidth, ShiftAmt);
157 Value *Quotient = Builder.CreateAShr(
158 Dividend, ShiftAmt, IsDiv && IsNegativeDivisor ?
"pre.neg" :
"shifted",
161 Result = IsNegativeDivisor ? Builder.CreateNeg(Quotient) : Quotient;
165 Value *Truncated = Builder.CreateShl(Quotient, ShiftAmt,
"truncated");
166 Result = Builder.CreateSub(
X, Truncated);
170 Result = Builder.CreateLShr(
X, ShiftAmt,
"", IsExact);
173 Result = Builder.CreateAnd(
X, ConstantInt::get(Ty, Mask));
213 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
217 static bool canExpandType(
Type *Ty) {
224 static bool shouldExpandFremType(
const TargetLowering &TLI, EVT VT) {
225 assert(!VT.
isVector() &&
"Cannot handle vector type; must scalarize first");
227 TargetLowering::LegalizeAction::Expand;
230 static bool shouldExpandFremType(
const TargetLowering &TLI,
Type *Ty) {
239 static bool shouldExpandAnyFremType(
const TargetLowering &TLI) {
240 return any_of(ExpandableTypes,
241 [&](MVT V) {
return shouldExpandFremType(TLI, EVT(V)); });
245 assert(canExpandType(Ty) &&
"Expected supported floating point type");
249 Type *ComputeTy = Ty;
253 unsigned MaxIter = 2;
263 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
279 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
280 Bits(ConstantInt::
get(ExTy, Bits)), One(ConstantInt::
get(ExTy, 1)) {};
282 Value *createRcp(
Value *V,
const Twine &Name)
const {
285 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
297 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
299 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {},
"ax");
302 Value *Axp = B.CreateFAdd(AxUpdate, Ay,
"axp");
303 return B.CreateSelect(Clt, Axp, AxUpdate,
"ax");
309 std::pair<Value *, Value *> buildExpAndPower(
Value *Src,
Value *NewExp,
311 const Twine &PowName)
const {
315 Type *Ty = Src->getType();
316 Type *ExTy = B.getInt32Ty();
317 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
318 Value *Mant = B.CreateExtractValue(Frexp, {0});
319 Value *
Exp = B.CreateExtractValue(Frexp, {1});
321 Exp = B.CreateSub(Exp, One, ExName);
322 Value *
Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
331 void buildRemainderComputation(
Value *AxInitial,
Value *AyInitial,
Value *
X,
332 PHINode *RetPhi, FastMathFlags FMF)
const {
333 IRBuilder<>::FastMathFlagGuard Guard(B);
334 B.setFastMathFlags(FMF);
341 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits,
"ex",
"ax");
342 auto [Ay, Ey] = buildExpAndPower(AyInitial, One,
"ey",
"ay");
347 Value *Nb = B.CreateSub(Ex, Ey,
"nb");
348 Value *Ayinv = createRcp(Ay,
"ayinv");
364 B.SetInsertPoint(LoopBB);
365 PHINode *NbIv = B.CreatePHI(Nb->
getType(), 2,
"nb_iv");
368 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2,
"ax_loop_phi");
369 AxPhi->addIncoming(Ax, PreheaderBB);
371 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
372 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {},
"ax_update");
373 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
374 NbIv->
addIncoming(B.CreateSub(NbIv, Bits,
"nb_update"), LoopBB);
381 B.SetInsertPoint(ExitBB);
383 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2,
"ax_exit_phi");
384 AxPhiExit->addIncoming(Ax, PreheaderBB);
385 AxPhiExit->addIncoming(AxPhi, LoopBB);
386 auto *NbExitPhi = B.CreatePHI(Nb->
getType(), 2,
"nb_exit_phi");
387 NbExitPhi->addIncoming(NbIv, LoopBB);
388 NbExitPhi->addIncoming(Nb, PreheaderBB);
390 Value *AxFinal = B.CreateLdexp(
391 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {},
"ax");
392 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
397 AxFinal = B.CreateLdexp(AxFinal, Ey, {},
"ax");
398 if (ComputeFpTy != FremTy)
399 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
400 Value *Ret = B.CreateCopySign(AxFinal,
X);
409 void buildElseBranch(
Value *Ax,
Value *Ay,
Value *
X, PHINode *RetPhi)
const {
413 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign,
X);
421 std::optional<SimplifyQuery> &SQ,
432 : B.CreateFCmpULT(B.CreateUnaryIntrinsic(Intrinsic::fabs,
X),
434 Ret = B.CreateSelect(XFinite, Ret, Nan);
441 IRBuilder<>::FastMathFlagGuard Guard(
B);
446 B.clearFastMathFlags();
449 Value *Trunc =
B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
450 Value *Neg =
B.CreateFNeg(Trunc);
452 return B.CreateFMA(Neg,
Y,
X);
456 std::optional<SimplifyQuery> &SQ)
const {
457 assert(
X->getType() == FremTy &&
Y->getType() == FremTy);
459 FastMathFlags FMF =
B.getFastMathFlags();
468 Value *Ax =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
X, {},
"ax");
469 Value *Ay =
B.CreateUnaryIntrinsic(Intrinsic::fabs,
Y, {},
"ay");
470 if (ComputeFpTy !=
X->getType()) {
471 Ax =
B.CreateFPExt(Ax, ComputeFpTy,
"ax");
472 Ay =
B.CreateFPExt(Ay, ComputeFpTy,
"ay");
474 Value *AxAyCmp =
B.CreateFCmpOGT(Ax, Ay);
476 PHINode *RetPhi =
B.CreatePHI(FremTy, 2,
"ret");
482 Ret = handleInputCornerCases(Ret,
X,
Y, SQ, FMF.
noInfs());
489 auto SavedInsertPt =
B.GetInsertPoint();
497 FastMathFlags ComputeFMF = FMF;
501 B.SetInsertPoint(ThenBB);
502 buildRemainderComputation(Ax, Ay,
X, RetPhi, FMF);
506 B.SetInsertPoint(ElseBB);
507 buildElseBranch(Ax, Ay,
X, RetPhi);
510 B.SetInsertPoint(SavedInsertPt);
519 Type *Ty =
I.getType();
520 assert(FRemExpander::canExpandType(Ty) &&
521 "Expected supported floating point type");
529 B.setFastMathFlags(FMF);
530 B.SetCurrentDebugLocation(
I.getDebugLoc());
532 const FRemExpander Expander = FRemExpander::create(
B, Ty);
534 ? Expander.buildApproxFRem(
I.getOperand(0),
I.getOperand(1))
535 : Expander.buildFRem(
I.getOperand(0),
I.getOperand(1), SQ);
537 I.replaceAllUsesWith(Ret);
603 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
608 if (FloatVal->getType()->isHalfTy() &&
BitWidth >= 32) {
609 if (FPToI->
getOpcode() == Instruction::FPToUI) {
610 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
611 A1 = Builder.CreateZExt(A0, IntTy);
613 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
614 A1 = Builder.CreateSExt(A0, IntTy);
624 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
625 unsigned FloatWidth =
626 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
627 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
628 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
630 Value *ImplicitBit = ConstantInt::get(
632 Value *SignificandMask = ConstantInt::get(
637 Entry->setName(
Twine(Entry->getName(),
"fp-to-i-entry"));
643 "fp-to-i-if-check.saturate",
F, End);
648 Builder.getContext(),
"fp-to-i-if-check.exp.size",
F, End);
654 Entry->getTerminator()->eraseFromParent();
657 Builder.SetInsertPoint(Entry);
660 FloatVal = Builder.CreateFreeze(FloatVal);
663 if (FloatVal->getType()->isX86_FP80Ty())
666 Value *ARep = Builder.CreateBitCast(FloatVal, FloatIntTy);
667 Value *PosOrNeg, *Sign;
675 Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
676 Value *BiasedExp = Builder.CreateAnd(
677 And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1),
"biased.exp");
678 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
679 Value *Significand = Builder.CreateOr(Abs, ImplicitBit,
"significand");
680 Value *ZeroResultCond = Builder.CreateICmpULT(
681 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias),
"exp.is.negative");
683 Value *IsNaN = Builder.CreateFCmpUNO(FloatVal, FloatVal,
"is.nan");
684 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNaN);
686 Value *IsNeg = Builder.CreateIsNeg(ARep);
687 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNeg);
690 Builder.CreateCondBr(ZeroResultCond, End,
691 IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
696 Builder.SetInsertPoint(CheckSaturateBB);
697 Value *Cmp3 = Builder.CreateICmpUGE(
699 FloatIntTy,
static_cast<int64_t
>(ExponentBias +
701 Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
704 Builder.SetInsertPoint(SaturateBB);
711 Builder.CreateSelect(PosOrNeg, SignedMax, SignedMin,
"saturated");
715 Builder.CreateBr(End);
719 Builder.SetInsertPoint(CheckExpSizeBB);
720 Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
721 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),
722 "exp.smaller.mantissa.width");
723 Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);
726 Builder.SetInsertPoint(ExpSmallBB);
727 Value *Sub13 = Builder.CreateSub(
728 Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
730 Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
732 ExpSmallRes = Builder.CreateMul(ExpSmallRes, Sign);
733 Builder.CreateBr(End);
736 Builder.SetInsertPoint(ExpLargeBB);
737 Value *Sub15 = Builder.CreateAdd(
740 FloatIntTy, -
static_cast<int64_t
>(ExponentBias + FPMantissaWidth)));
741 Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
742 Value *ExpLargeRes = Builder.CreateShl(
743 SignificandCast, Builder.CreateZExtOrTrunc(Sub15, IntTy));
745 ExpLargeRes = Builder.CreateMul(ExpLargeRes, Sign);
746 Builder.CreateBr(End);
749 Builder.SetInsertPoint(End, End->
begin());
750 PHINode *Retval0 = Builder.CreatePHI(FPToI->
getType(), 3 + IsSaturating);
853 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
857 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
860 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
861 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
863 bool IsSigned = IToFP->
getOpcode() == Instruction::SIToFP;
867 IntVal = Builder.CreateFreeze(IntVal);
873 IntTy = Builder.getIntNTy(
BitWidth);
874 IntVal = Builder.CreateIntCast(IntVal, IntTy, IsSigned);
878 Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
879 Builder.getIntN(
BitWidth, FPMantissaWidth + 3));
883 Entry->setName(
Twine(Entry->getName(),
"itofp-entry"));
903 Entry->getTerminator()->eraseFromParent();
910 Builder.SetInsertPoint(Entry);
912 Builder.CreateCondBr(Cmp, End, IfEnd);
915 Builder.SetInsertPoint(IfEnd);
918 Value *
Xor = Builder.CreateXor(Shr, IntVal);
920 Value *
Call = Builder.CreateCall(CTLZ, {IsSigned ?
Sub : IntVal, True});
921 Value *Cast = Builder.CreateTrunc(
Call, Builder.getInt32Ty());
922 int BitWidthNew = FloatWidth == 128 ?
BitWidth : 32;
923 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth),
924 FloatWidth == 128 ?
Call : Cast);
925 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth - 1),
926 FloatWidth == 128 ?
Call : Cast);
927 Value *Cmp3 = Builder.CreateICmpSGT(
928 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
929 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
932 Builder.SetInsertPoint(IfThen4);
934 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
935 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
938 Builder.SetInsertPoint(SwBB);
940 Builder.CreateShl(IsSigned ?
Sub : IntVal, Builder.getIntN(
BitWidth, 1));
941 Builder.CreateBr(SwEpilog);
944 Builder.SetInsertPoint(SwDefault);
945 Value *Sub5 = Builder.CreateSub(
946 Builder.getIntN(BitWidthNew,
BitWidth - FPMantissaWidth - 3),
947 FloatWidth == 128 ?
Call : Cast);
948 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
949 Value *Shr6 = Builder.CreateLShr(IsSigned ?
Sub : IntVal,
950 FloatWidth == 128 ? Sub5 : ShProm);
952 Builder.CreateAdd(FloatWidth == 128 ?
Call : Cast,
953 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
954 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
956 FloatWidth == 128 ? Sub8 : ShProm9);
957 Value *
And = Builder.CreateAnd(Shr9, IsSigned ?
Sub : IntVal);
959 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
960 Value *
Or = Builder.CreateOr(Shr6, Conv11);
961 Builder.CreateBr(SwEpilog);
964 Builder.SetInsertPoint(SwEpilog);
965 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
969 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
970 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
971 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
972 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
973 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
974 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(
BitWidth, 1));
975 Value *Shr18 =
nullptr;
977 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 2));
979 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 2));
980 Value *A3 = Builder.CreateAnd(Inc, Temp1,
"a3");
981 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(
BitWidth, 0));
982 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
983 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(
BitWidth, 32));
984 Value *ExtractT64 =
nullptr;
986 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
988 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
989 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
992 Builder.SetInsertPoint(IfThen20);
993 Value *Shr21 =
nullptr;
995 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 3));
997 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 3));
998 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
999 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(
BitWidth, 32));
1000 Value *ExtractT62 =
nullptr;
1001 if (FloatWidth > 80)
1002 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
1004 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
1005 Builder.CreateBr(IfEnd26);
1008 Builder.SetInsertPoint(IfElse);
1009 Value *Sub24 = Builder.CreateAdd(
1010 FloatWidth == 128 ?
Call : Cast,
1012 -(
int)(
BitWidth - FPMantissaWidth - 1)));
1013 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
1014 Value *Shl26 = Builder.CreateShl(IsSigned ?
Sub : IntVal,
1015 FloatWidth == 128 ? Sub24 : ShProm25);
1016 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
1017 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(
BitWidth, 32));
1018 Value *ExtractT66 =
nullptr;
1019 if (FloatWidth > 80)
1020 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
1022 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
1023 Builder.CreateBr(IfEnd26);
1026 Builder.SetInsertPoint(IfEnd26);
1027 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
1031 PHINode *AAddr1Off32 =
nullptr;
1032 if (FloatWidth > 32) {
1034 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
1040 if (FloatWidth <= 80) {
1041 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
1046 Value *And29 =
nullptr;
1047 if (FloatWidth > 80) {
1048 Value *Temp2 = Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
1050 And29 = Builder.CreateAnd(Shr, Temp2,
"and29");
1052 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
1053 And29 = Builder.CreateAnd(
1056 unsigned TempMod = FPMantissaWidth % 32;
1057 Value *And34 =
nullptr;
1058 Value *Shl30 =
nullptr;
1059 if (FloatWidth > 80) {
1061 Value *
Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
1062 Shl30 = Builder.CreateAdd(
1063 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
1064 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
1066 Value *
Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
1067 Shl30 = Builder.CreateAdd(
1068 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
1069 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
1070 Builder.getInt32((1 << TempMod) - 1));
1072 Value *Or35 =
nullptr;
1073 if (FloatWidth > 80) {
1074 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
1075 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
1076 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
1077 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
1078 Builder.getIntN(128, FPMantissaWidth));
1079 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
1080 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
1081 Or35 = Builder.CreateOr(Or34, A6);
1083 Value *Or31 = Builder.CreateOr(And34, And29);
1084 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
1086 Value *A4 =
nullptr;
1088 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
1089 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
1091 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
1092 Value *Or1 = Builder.CreateOr(Shl1, And1);
1093 A4 = Builder.CreateBitCast(Or1, IToFP->
getType());
1097 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1103 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1105 A4 = Builder.CreateBitCast(Or35, IToFP->
getType());
1106 Builder.CreateBr(End);
1109 Builder.SetInsertPoint(End, End->
begin());
1125 unsigned NumElements = VTy->getElementCount().getFixedValue();
1127 for (
unsigned Idx = 0; Idx < NumElements; ++Idx) {
1128 Value *Ext = Builder.CreateExtractElement(
I->getOperand(0), Idx);
1130 Value *NewOp =
nullptr;
1132 NewOp = Builder.CreateBinOp(
1133 BinOp->getOpcode(), Ext,
1134 Builder.CreateExtractElement(
I->getOperand(1), Idx));
1136 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
1137 I->getType()->getScalarType());
1141 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
1143 ScalarizedI->copyIRFlags(
I,
true);
1148 I->replaceAllUsesWith(Result);
1149 I->dropAllReferences();
1150 I->eraseFromParent();
1155 if (
I.getOperand(0)->getType()->isVectorTy())
1165 unsigned MaxLegalFpConvertBitWidth =
1174 bool DisableExpandLargeFp =
1176 bool DisableExpandLargeDivRem =
1178 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1180 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1184 Type *Ty =
I.getType();
1186 if (Ty->isScalableTy())
1189 switch (
I.getOpcode()) {
1190 case Instruction::FRem:
1191 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1192 case Instruction::FPToUI:
1193 case Instruction::FPToSI:
1194 return !DisableExpandLargeFp &&
1196 MaxLegalFpConvertBitWidth;
1197 case Instruction::UIToFP:
1198 case Instruction::SIToFP:
1199 return !DisableExpandLargeFp &&
1201 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1202 case Instruction::UDiv:
1203 case Instruction::SDiv:
1204 case Instruction::URem:
1205 case Instruction::SRem:
1210 return !DisableExpandLargeDivRem &&
1212 MaxLegalDivRemBitWidth;
1213 case Instruction::Call: {
1215 if (
II && (
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1216 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1217 return !DisableExpandLargeFp &&
1219 MaxLegalFpConvertBitWidth;
1231 if (!ShouldHandleInst(
I))
1238 while (!Worklist.
empty()) {
1241 switch (
I->getOpcode()) {
1242 case Instruction::FRem: {
1243 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1245 auto Res = std::make_optional<SimplifyQuery>(
1246 I->getModule()->getDataLayout(),
I);
1257 case Instruction::FPToUI:
1260 case Instruction::FPToSI:
1264 case Instruction::UIToFP:
1265 case Instruction::SIToFP:
1269 case Instruction::UDiv:
1270 case Instruction::SDiv:
1271 case Instruction::URem:
1272 case Instruction::SRem: {
1277 expandPow2DivRem(BO);
1280 if (
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv)
1287 case Instruction::Call: {
1289 assert(
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1290 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1292 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1302class ExpandIRInstsLegacyPass :
public FunctionPass {
1309 : FunctionPass(
ID), OptLevel(OptLevel) {}
1314 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1315 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(
F);
1316 auto *TLI = Subtarget->getTargetLowering();
1317 AssumptionCache *AC =
nullptr;
1319 const LibcallLoweringInfo &Libcalls =
1320 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1321 *
F.getParent(), *Subtarget);
1323 if (OptLevel != CodeGenOptLevel::None && !
F.hasOptNone())
1324 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1325 return runImpl(
F, *TLI, Libcalls, AC);
1328 void getAnalysisUsage(AnalysisUsage &AU)
const override {
1331 if (OptLevel != CodeGenOptLevel::None)
1342 : TM(&TM), OptLevel(OptLevel) {}
1347 OS, MapClassName2PassName);
1349 OS <<
"O" << (int)OptLevel;
1366 if (!LibcallLowering) {
1368 "' analysis required");
1373 LibcallLowering->getLibcallLowering(*STI);
1379char ExpandIRInstsLegacyPass::ID = 0;
1381 "Expand certain fp instructions",
false,
false)
1387 return new ExpandIRInstsLegacyPass(OptLevel);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static cl::opt< unsigned > ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("div and rem instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Class for arbitrary precision integers.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BinaryOps getOpcode() const
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ ICMP_SGT
signed greater than
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
ExpandIRInstsPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
void setAllowReciprocal(bool B=true)
void setNoNaNs(bool B=true)
void setNoInfs(bool B=true)
FunctionPass class - This class is used to implement most global optimizations.
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
LLVM_ABI const fltSemantics & getFltSemantics() const
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI bool expandDivision(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI FunctionPass * createExpandIRInstsPass(CodeGenOptLevel)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI bool expandRemainder(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
A CRTP mix-in to automatically provide informational APIs needed for passes.