56#define DEBUG_TYPE "expand-ir-insts"
63 cl::desc(
"fp convert instructions on integers with "
64 "more than <N> bits are expanded."));
69 cl::desc(
"div and rem instructions on integers with "
70 "more than <N> bits are expanded."));
73bool isConstantPowerOfTwo(
llvm::Value *V,
bool SignedOp) {
84bool isSigned(
unsigned int Opcode) {
85 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
98 "ShiftAmt out of range; callers should handle ShiftAmt == 0");
100 Value *Bias = Builder.CreateLShr(Sign,
BitWidth - ShiftAmt,
"bias");
101 return Builder.CreateAdd(
X, Bias,
"adjusted");
117 bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);
118 bool IsSigned = isSigned(Opcode);
120 bool IsExact = IsDiv && BO->
isExact();
123 "Expected power-of-2 constant divisor");
128 unsigned BitWidth = Ty->getIntegerBitWidth();
130 APInt DivisorVal =
C->getValue();
131 bool IsNegativeDivisor = IsSigned && DivisorVal.
isNegative();
144 Result = IsNegativeDivisor ? Builder.CreateNeg(
X) :
X;
146 Result = ConstantInt::get(Ty, 0);
147 }
else if (IsSigned) {
153 X = Builder.CreateFreeze(
X,
X->getName() +
".fr");
156 IsExact ?
X : addSignedBias(Builder,
X,
BitWidth, ShiftAmt);
157 Value *Quotient = Builder.CreateAShr(
158 Dividend, ShiftAmt, IsDiv && IsNegativeDivisor ?
"pre.neg" :
"shifted",
161 Result = IsNegativeDivisor ? Builder.CreateNeg(Quotient) : Quotient;
165 Value *Truncated = Builder.CreateShl(Quotient, ShiftAmt,
"truncated");
166 Result = Builder.CreateSub(
X, Truncated);
170 Result = Builder.CreateLShr(
X, ShiftAmt,
"", IsExact);
173 Result = Builder.CreateAnd(
X, ConstantInt::get(Ty, Mask));
213 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
217 static bool canExpandType(
Type *Ty) {
224 static bool shouldExpandFremType(
const TargetLowering &TLI, EVT VT) {
225 assert(!VT.
isVector() &&
"Cannot handle vector type; must scalarize first");
227 TargetLowering::LegalizeAction::Expand;
230 static bool shouldExpandFremType(
const TargetLowering &TLI,
Type *Ty) {
239 static bool shouldExpandAnyFremType(
const TargetLowering &TLI) {
240 return any_of(ExpandableTypes,
241 [&](MVT V) {
return shouldExpandFremType(TLI, EVT(V)); });
245 assert(canExpandType(Ty) &&
"Expected supported floating point type");
249 Type *ComputeTy = Ty;
253 unsigned MaxIter = 2;
263 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
279 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
280 Bits(ConstantInt::
get(ExTy, Bits)), One(ConstantInt::
get(ExTy, 1)) {};
282 Value *createRcp(
Value *V,
const Twine &Name)
const {
285 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
297 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
299 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {},
"ax");
302 Value *Axp = B.CreateFAdd(AxUpdate, Ay,
"axp");
303 return B.CreateSelect(Clt, Axp, AxUpdate,
"ax");
309 std::pair<Value *, Value *> buildExpAndPower(
Value *Src,
Value *NewExp,
311 const Twine &PowName)
const {
315 Type *Ty = Src->getType();
316 Type *ExTy = B.getInt32Ty();
317 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
318 Value *Mant = B.CreateExtractValue(Frexp, {0});
319 Value *
Exp = B.CreateExtractValue(Frexp, {1});
321 Exp = B.CreateSub(Exp, One, ExName);
322 Value *
Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
331 void buildRemainderComputation(
Value *AxInitial,
Value *AyInitial,
Value *
X,
332 PHINode *RetPhi, FastMathFlags FMF)
const {
333 IRBuilder<>::FastMathFlagGuard Guard(B);
334 B.setFastMathFlags(FMF);
341 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits,
"ex",
"ax");
342 auto [Ay, Ey] = buildExpAndPower(AyInitial, One,
"ey",
"ay");
347 Value *Nb = B.CreateSub(Ex, Ey,
"nb");
348 Value *Ayinv = createRcp(Ay,
"ayinv");
364 B.SetInsertPoint(LoopBB);
365 PHINode *NbIv = B.CreatePHI(Nb->
getType(), 2,
"nb_iv");
368 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2,
"ax_loop_phi");
369 AxPhi->addIncoming(Ax, PreheaderBB);
371 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
372 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {},
"ax_update");
373 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
374 NbIv->
addIncoming(B.CreateSub(NbIv, Bits,
"nb_update"), LoopBB);
381 B.SetInsertPoint(ExitBB);
383 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2,
"ax_exit_phi");
384 AxPhiExit->addIncoming(Ax, PreheaderBB);
385 AxPhiExit->addIncoming(AxPhi, LoopBB);
386 auto *NbExitPhi = B.CreatePHI(Nb->
getType(), 2,
"nb_exit_phi");
387 NbExitPhi->addIncoming(NbIv, LoopBB);
388 NbExitPhi->addIncoming(Nb, PreheaderBB);
390 Value *AxFinal = B.CreateLdexp(
391 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {},
"ax");
392 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
397 AxFinal = B.CreateLdexp(AxFinal, Ey, {},
"ax");
398 if (ComputeFpTy != FremTy)
399 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
400 Value *Ret = B.CreateCopySign(AxFinal,
X);
409 void buildElseBranch(
Value *Ax,
Value *Ay,
Value *
X, PHINode *RetPhi)
const {
413 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign,
X);
421 std::optional<SimplifyQuery> &SQ,
433 Ret = B.CreateSelect(XFinite, Ret, Nan);
440 IRBuilder<>::FastMathFlagGuard Guard(
B);
445 B.clearFastMathFlags();
448 Value *Trunc =
B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
449 Value *Neg =
B.CreateFNeg(Trunc);
451 return B.CreateFMA(Neg,
Y,
X);
455 std::optional<SimplifyQuery> &SQ)
const {
456 assert(
X->getType() == FremTy &&
Y->getType() == FremTy);
458 FastMathFlags FMF =
B.getFastMathFlags();
467 Value *Ax =
B.CreateFAbs(
X, {},
"ax");
468 Value *Ay =
B.CreateFAbs(
Y, {},
"ay");
469 if (ComputeFpTy !=
X->getType()) {
470 Ax =
B.CreateFPExt(Ax, ComputeFpTy,
"ax");
471 Ay =
B.CreateFPExt(Ay, ComputeFpTy,
"ay");
473 Value *AxAyCmp =
B.CreateFCmpOGT(Ax, Ay);
475 PHINode *RetPhi =
B.CreatePHI(FremTy, 2,
"ret");
481 Ret = handleInputCornerCases(Ret,
X,
Y, SQ, FMF.
noInfs());
488 auto SavedInsertPt =
B.GetInsertPoint();
496 FastMathFlags ComputeFMF = FMF;
500 B.SetInsertPoint(ThenBB);
501 buildRemainderComputation(Ax, Ay,
X, RetPhi, FMF);
505 B.SetInsertPoint(ElseBB);
506 buildElseBranch(Ax, Ay,
X, RetPhi);
509 B.SetInsertPoint(SavedInsertPt);
518 Type *Ty =
I.getType();
519 assert(FRemExpander::canExpandType(Ty) &&
520 "Expected supported floating point type");
528 B.setFastMathFlags(FMF);
529 B.SetCurrentDebugLocation(
I.getDebugLoc());
531 const FRemExpander Expander = FRemExpander::create(
B, Ty);
533 ? Expander.buildApproxFRem(
I.getOperand(0),
I.getOperand(1))
534 : Expander.buildFRem(
I.getOperand(0),
I.getOperand(1), SQ);
536 I.replaceAllUsesWith(Ret);
602 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
607 if (FloatVal->getType()->isHalfTy() &&
BitWidth >= 32) {
608 if (FPToI->
getOpcode() == Instruction::FPToUI) {
609 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
610 A1 = Builder.CreateZExt(A0, IntTy);
612 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
613 A1 = Builder.CreateSExt(A0, IntTy);
623 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
624 unsigned FloatWidth =
625 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
626 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
627 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
629 Value *ImplicitBit = ConstantInt::get(
631 Value *SignificandMask = ConstantInt::get(
636 Entry->setName(
Twine(Entry->getName(),
"fp-to-i-entry"));
642 "fp-to-i-if-check.saturate",
F, End);
647 Builder.getContext(),
"fp-to-i-if-check.exp.size",
F, End);
653 Entry->getTerminator()->eraseFromParent();
656 Builder.SetInsertPoint(Entry);
659 FloatVal = Builder.CreateFreeze(FloatVal);
662 if (FloatVal->getType()->isX86_FP80Ty())
665 Value *ARep = Builder.CreateBitCast(FloatVal, FloatIntTy);
666 Value *PosOrNeg, *Sign;
674 Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));
675 Value *BiasedExp = Builder.CreateAnd(
676 And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1),
"biased.exp");
677 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
678 Value *Significand = Builder.CreateOr(Abs, ImplicitBit,
"significand");
679 Value *ZeroResultCond = Builder.CreateICmpULT(
680 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias),
"exp.is.negative");
682 Value *IsNaN = Builder.CreateFCmpUNO(FloatVal, FloatVal,
"is.nan");
683 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNaN);
685 Value *IsNeg = Builder.CreateIsNeg(ARep);
686 ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNeg);
689 Builder.CreateCondBr(ZeroResultCond, End,
690 IsSaturating ? CheckSaturateBB : CheckExpSizeBB);
695 Builder.SetInsertPoint(CheckSaturateBB);
696 Value *Cmp3 = Builder.CreateICmpUGE(
698 FloatIntTy,
static_cast<int64_t
>(ExponentBias +
700 Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);
703 Builder.SetInsertPoint(SaturateBB);
710 Builder.CreateSelect(PosOrNeg, SignedMax, SignedMin,
"saturated");
714 Builder.CreateBr(End);
718 Builder.SetInsertPoint(CheckExpSizeBB);
719 Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(
720 BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),
721 "exp.smaller.mantissa.width");
722 Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);
725 Builder.SetInsertPoint(ExpSmallBB);
726 Value *Sub13 = Builder.CreateSub(
727 Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);
729 Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);
731 ExpSmallRes = Builder.CreateMul(ExpSmallRes, Sign);
732 Builder.CreateBr(End);
735 Builder.SetInsertPoint(ExpLargeBB);
736 Value *Sub15 = Builder.CreateAdd(
739 FloatIntTy, -
static_cast<int64_t
>(ExponentBias + FPMantissaWidth)));
740 Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);
741 Value *ExpLargeRes = Builder.CreateShl(
742 SignificandCast, Builder.CreateZExtOrTrunc(Sub15, IntTy));
744 ExpLargeRes = Builder.CreateMul(ExpLargeRes, Sign);
745 Builder.CreateBr(End);
748 Builder.SetInsertPoint(End, End->
begin());
749 PHINode *Retval0 = Builder.CreatePHI(FPToI->
getType(), 3 + IsSaturating);
852 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
856 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
859 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
860 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
862 bool IsSigned = IToFP->
getOpcode() == Instruction::SIToFP;
866 IntVal = Builder.CreateFreeze(IntVal);
872 IntTy = Builder.getIntNTy(
BitWidth);
873 IntVal = Builder.CreateIntCast(IntVal, IntTy, IsSigned);
877 Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
878 Builder.getIntN(
BitWidth, FPMantissaWidth + 3));
882 Entry->setName(
Twine(Entry->getName(),
"itofp-entry"));
902 Entry->getTerminator()->eraseFromParent();
909 Builder.SetInsertPoint(Entry);
911 Builder.CreateCondBr(Cmp, End, IfEnd);
914 Builder.SetInsertPoint(IfEnd);
917 Value *
Xor = Builder.CreateXor(Shr, IntVal);
919 Value *
Call = Builder.CreateCall(CTLZ, {IsSigned ?
Sub : IntVal, True});
920 Value *Cast = Builder.CreateTrunc(
Call, Builder.getInt32Ty());
921 int BitWidthNew = FloatWidth == 128 ?
BitWidth : 32;
922 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth),
923 FloatWidth == 128 ?
Call : Cast);
924 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew,
BitWidth - 1),
925 FloatWidth == 128 ?
Call : Cast);
926 Value *Cmp3 = Builder.CreateICmpSGT(
927 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
928 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
931 Builder.SetInsertPoint(IfThen4);
933 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
934 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
937 Builder.SetInsertPoint(SwBB);
939 Builder.CreateShl(IsSigned ?
Sub : IntVal, Builder.getIntN(
BitWidth, 1));
940 Builder.CreateBr(SwEpilog);
943 Builder.SetInsertPoint(SwDefault);
944 Value *Sub5 = Builder.CreateSub(
945 Builder.getIntN(BitWidthNew,
BitWidth - FPMantissaWidth - 3),
946 FloatWidth == 128 ?
Call : Cast);
947 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
948 Value *Shr6 = Builder.CreateLShr(IsSigned ?
Sub : IntVal,
949 FloatWidth == 128 ? Sub5 : ShProm);
951 Builder.CreateAdd(FloatWidth == 128 ?
Call : Cast,
952 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
953 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
955 FloatWidth == 128 ? Sub8 : ShProm9);
956 Value *
And = Builder.CreateAnd(Shr9, IsSigned ?
Sub : IntVal);
958 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
959 Value *
Or = Builder.CreateOr(Shr6, Conv11);
960 Builder.CreateBr(SwEpilog);
963 Builder.SetInsertPoint(SwEpilog);
964 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
968 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
969 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
970 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
971 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
972 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
973 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(
BitWidth, 1));
974 Value *Shr18 =
nullptr;
976 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 2));
978 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 2));
979 Value *A3 = Builder.CreateAnd(Inc, Temp1,
"a3");
980 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(
BitWidth, 0));
981 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
982 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(
BitWidth, 32));
983 Value *ExtractT64 =
nullptr;
985 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
987 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
988 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
991 Builder.SetInsertPoint(IfThen20);
992 Value *Shr21 =
nullptr;
994 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(
BitWidth, 3));
996 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(
BitWidth, 3));
997 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
998 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(
BitWidth, 32));
999 Value *ExtractT62 =
nullptr;
1000 if (FloatWidth > 80)
1001 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
1003 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
1004 Builder.CreateBr(IfEnd26);
1007 Builder.SetInsertPoint(IfElse);
1008 Value *Sub24 = Builder.CreateAdd(
1009 FloatWidth == 128 ?
Call : Cast,
1011 -(
int)(
BitWidth - FPMantissaWidth - 1)));
1012 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
1013 Value *Shl26 = Builder.CreateShl(IsSigned ?
Sub : IntVal,
1014 FloatWidth == 128 ? Sub24 : ShProm25);
1015 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
1016 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(
BitWidth, 32));
1017 Value *ExtractT66 =
nullptr;
1018 if (FloatWidth > 80)
1019 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
1021 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
1022 Builder.CreateBr(IfEnd26);
1025 Builder.SetInsertPoint(IfEnd26);
1026 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
1030 PHINode *AAddr1Off32 =
nullptr;
1031 if (FloatWidth > 32) {
1033 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
1039 if (FloatWidth <= 80) {
1040 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
1045 Value *And29 =
nullptr;
1046 if (FloatWidth > 80) {
1047 Value *Temp2 = Builder.CreateShl(Builder.getIntN(
BitWidth, 1),
1049 And29 = Builder.CreateAnd(Shr, Temp2,
"and29");
1051 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
1052 And29 = Builder.CreateAnd(
1055 unsigned TempMod = FPMantissaWidth % 32;
1056 Value *And34 =
nullptr;
1057 Value *Shl30 =
nullptr;
1058 if (FloatWidth > 80) {
1060 Value *
Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
1061 Shl30 = Builder.CreateAdd(
1062 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
1063 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
1065 Value *
Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
1066 Shl30 = Builder.CreateAdd(
1067 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
1068 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
1069 Builder.getInt32((1 << TempMod) - 1));
1071 Value *Or35 =
nullptr;
1072 if (FloatWidth > 80) {
1073 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
1074 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
1075 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
1076 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
1077 Builder.getIntN(128, FPMantissaWidth));
1078 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
1079 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
1080 Or35 = Builder.CreateOr(Or34, A6);
1082 Value *Or31 = Builder.CreateOr(And34, And29);
1083 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
1085 Value *A4 =
nullptr;
1087 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
1088 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
1090 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
1091 Value *Or1 = Builder.CreateOr(Shl1, And1);
1092 A4 = Builder.CreateBitCast(Or1, IToFP->
getType());
1096 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1102 A4 = Builder.CreateFPTrunc(A40, IToFP->
getType());
1104 A4 = Builder.CreateBitCast(Or35, IToFP->
getType());
1105 Builder.CreateBr(End);
1108 Builder.SetInsertPoint(End, End->
begin());
1124 unsigned NumElements = VTy->getElementCount().getFixedValue();
1126 for (
unsigned Idx = 0; Idx < NumElements; ++Idx) {
1127 Value *Ext = Builder.CreateExtractElement(
I->getOperand(0), Idx);
1129 Value *NewOp =
nullptr;
1131 NewOp = Builder.CreateBinOp(
1132 BinOp->getOpcode(), Ext,
1133 Builder.CreateExtractElement(
I->getOperand(1), Idx));
1135 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
1136 I->getType()->getScalarType());
1140 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
1142 ScalarizedI->copyIRFlags(
I,
true);
1147 I->replaceAllUsesWith(Result);
1148 I->dropAllReferences();
1149 I->eraseFromParent();
1154 if (
I.getOperand(0)->getType()->isVectorTy())
1164 unsigned MaxLegalFpConvertBitWidth =
1173 bool DisableExpandLargeFp =
1175 bool DisableExpandLargeDivRem =
1177 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1179 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1183 Type *Ty =
I.getType();
1185 if (Ty->isScalableTy())
1188 switch (
I.getOpcode()) {
1189 case Instruction::FRem:
1190 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1191 case Instruction::FPToUI:
1192 case Instruction::FPToSI:
1193 return !DisableExpandLargeFp &&
1195 MaxLegalFpConvertBitWidth;
1196 case Instruction::UIToFP:
1197 case Instruction::SIToFP:
1198 return !DisableExpandLargeFp &&
1200 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1201 case Instruction::UDiv:
1202 case Instruction::SDiv:
1203 case Instruction::URem:
1204 case Instruction::SRem:
1209 return !DisableExpandLargeDivRem &&
1211 MaxLegalDivRemBitWidth;
1212 case Instruction::Call: {
1214 if (
II && (
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1215 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {
1216 return !DisableExpandLargeFp &&
1218 MaxLegalFpConvertBitWidth;
1230 if (!ShouldHandleInst(
I))
1237 while (!Worklist.
empty()) {
1240 switch (
I->getOpcode()) {
1241 case Instruction::FRem: {
1242 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1244 auto Res = std::make_optional<SimplifyQuery>(
1245 I->getModule()->getDataLayout(),
I);
1256 case Instruction::FPToUI:
1259 case Instruction::FPToSI:
1263 case Instruction::UIToFP:
1264 case Instruction::SIToFP:
1268 case Instruction::UDiv:
1269 case Instruction::SDiv:
1270 case Instruction::URem:
1271 case Instruction::SRem: {
1276 expandPow2DivRem(BO);
1279 if (
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv)
1286 case Instruction::Call: {
1288 assert(
II->getIntrinsicID() == Intrinsic::fptoui_sat ||
1289 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1291 II->getIntrinsicID() == Intrinsic::fptosi_sat);
1301class ExpandIRInstsLegacyPass :
public FunctionPass {
1308 : FunctionPass(
ID), OptLevel(OptLevel) {}
1313 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1314 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(
F);
1315 auto *TLI = Subtarget->getTargetLowering();
1316 AssumptionCache *AC =
nullptr;
1318 const LibcallLoweringInfo &Libcalls =
1319 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1320 *
F.getParent(), *Subtarget);
1322 if (OptLevel != CodeGenOptLevel::None && !
F.hasOptNone())
1323 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1324 return runImpl(
F, *TLI, Libcalls, AC);
1327 void getAnalysisUsage(AnalysisUsage &AU)
const override {
1330 if (OptLevel != CodeGenOptLevel::None)
1341 : TM(&TM), OptLevel(OptLevel) {}
1346 OS, MapClassName2PassName);
1348 OS <<
"O" << (int)OptLevel;
1365 if (!LibcallLowering) {
1367 "' analysis required");
1372 LibcallLowering->getLibcallLowering(*STI);
1378char ExpandIRInstsLegacyPass::ID = 0;
1380 "Expand certain fp instructions",
false,
false)
1386 return new ExpandIRInstsLegacyPass(OptLevel);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static cl::opt< unsigned > ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("div and rem instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Class for arbitrary precision integers.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
bool isNegative() const
Determine sign of this APInt.
unsigned countr_zero() const
Count the number of trailing zero bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BinaryOps getOpcode() const
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ ICMP_SGT
signed greater than
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
ExpandIRInstsPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
void setAllowContract(bool B=true)
void setAllowReciprocal(bool B=true)
void setNoNaNs(bool B=true)
void setNoInfs(bool B=true)
FunctionPass class - This class is used to implement most global optimizations.
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
LLVM_ABI const fltSemantics & getFltSemantics() const
void dropAllReferences()
Drop all references to operands.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI bool expandDivision(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI FunctionPass * createExpandIRInstsPass(CodeGenOptLevel)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI bool expandRemainder(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isVector() const
Return true if this is a vector value type.
A CRTP mix-in to automatically provide informational APIs needed for passes.