LLVM 22.0.0git
ExpandFp.cpp
Go to the documentation of this file.
1//===--- ExpandFp.cpp - Expand fp instructions ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass expands certain floating point instructions at the IR level.
9//
10// It expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp
11// .. to’ instructions with a bitwidth above a threshold. This is
12// useful for targets like x86_64 that cannot lower fp convertions
13// with more than 128 bits.
14//
15//===----------------------------------------------------------------------===//
16
24#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/Module.h"
31#include "llvm/IR/PassManager.h"
34#include "llvm/Pass.h"
39#include <optional>
40
41#define DEBUG_TYPE "expand-fp"
42
43using namespace llvm;
44
46 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
48 cl::desc("fp convert instructions on integers with "
49 "more than <N> bits are expanded."));
50
51namespace {
52/// This class implements a precise expansion of the frem instruction.
53/// The generated code is based on the fmod implementation in the AMD device
54/// libs.
55class FRemExpander {
56 /// The IRBuilder to use for the expansion.
58
59 /// Floating point type of the return value and the arguments of the FRem
60 /// instructions that should be expanded.
61 Type *FremTy;
62
63 /// Floating point type to use for the computation. This may be
64 /// wider than the \p FremTy.
65 Type *ComputeFpTy;
66
67 /// Integer type used to hold the exponents returned by frexp.
68 Type *ExTy;
69
70 /// How many bits of the quotient to compute per iteration of the
71 /// algorithm, stored as a value of type \p ExTy.
72 Value *Bits;
73
74 /// Constant 1 of type \p ExTy.
75 Value *One;
76
77public:
78 static bool canExpandType(Type *Ty) {
79 // TODO The expansion should work for other floating point types
80 // as well, but this would require additional testing.
81 return Ty->isIEEELikeFPTy() && !Ty->isBFloatTy() && !Ty->isFP128Ty();
82 }
83
84 static FRemExpander create(IRBuilder<> &B, Type *Ty) {
85 assert(canExpandType(Ty) && "Expected supported floating point type");
86
87 // The type to use for the computation of the remainder. This may be
88 // wider than the input/result type which affects the ...
89 Type *ComputeTy = Ty;
90 // ... maximum number of iterations of the remainder computation loop
91 // to use. This value is for the case in which the computation
92 // uses the same input/result type.
93 unsigned MaxIter = 2;
94
95 if (Ty->isHalfTy()) {
96 // Use the wider type and less iterations.
97 ComputeTy = B.getFloatTy();
98 MaxIter = 1;
99 }
100
101 unsigned Precision =
103 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
104 }
105
106 /// Build the FRem expansion for the numerator \p X and the
107 /// denumerator \p Y. The type of X and Y must match \p FremTy. The
108 /// code will be generated at the insertion point of \p B and the
109 /// insertion point will be reset at exit.
110 Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;
111
112 /// Build an approximate FRem expansion for the numerator \p X and
113 /// the denumerator \p Y at the insertion point of builder \p B.
114 /// The type of X and Y must match \p FremTy.
115 Value *buildApproxFRem(Value *X, Value *Y) const;
116
117private:
118 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
119 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
120 Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {};
121
122 Value *createRcp(Value *V, const Twine &Name) const {
123 // Leave it to later optimizations to turn this into an rcp
124 // instruction if available.
125 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
126 }
127
128 // Helper function to build the UPDATE_AX code which is common to the
129 // loop body and the "final iteration".
130 Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
131 // Build:
132 // float q = rint(ax * ayinv);
133 // ax = fma(-q, ay, ax);
134 // int clt = ax < 0.0f;
135 // float axp = ax + ay;
136 // ax = clt ? axp : ax;
137 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
138 {}, "q");
139 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax");
140 Value *Clt = B.CreateFCmp(CmpInst::FCMP_OLT, AxUpdate,
141 ConstantFP::getZero(ComputeFpTy), "clt");
142 Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp");
143 return B.CreateSelect(Clt, Axp, AxUpdate, "ax");
144 }
145
146 /// Build code to extract the exponent and mantissa of \p Src.
147 /// Return the exponent minus one for use as a loop bound and
148 /// the mantissa taken to the given \p NewExp power.
149 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
150 const Twine &ExName,
151 const Twine &PowName) const {
152 // Build:
153 // ExName = frexp_exp(Src) - 1;
154 // PowName = fldexp(frexp_mant(ExName), NewExp);
155 Type *Ty = Src->getType();
156 Type *ExTy = B.getInt32Ty();
157 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
158 Value *Mant = B.CreateExtractValue(Frexp, {0});
159 Value *Exp = B.CreateExtractValue(Frexp, {1});
160
161 Exp = B.CreateSub(Exp, One, ExName);
162 Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
163
164 return {Pow, Exp};
165 }
166
167 /// Build the main computation of the remainder for the case in which
168 /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the
169 /// denumerator. Add the incoming edge from the computation result
170 /// to \p RetPhi.
171 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
172 PHINode *RetPhi, FastMathFlags FMF) const {
173 IRBuilder<>::FastMathFlagGuard Guard(B);
174 B.setFastMathFlags(FMF);
175
176 // Build:
177 // ex = frexp_exp(ax) - 1;
178 // ax = fldexp(frexp_mant(ax), bits);
179 // ey = frexp_exp(ay) - 1;
180 // ay = fledxp(frexp_mant(ay), 1);
181 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax");
182 auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay");
183
184 // Build:
185 // int nb = ex - ey;
186 // float ayinv = 1.0/ay;
187 Value *Nb = B.CreateSub(Ex, Ey, "nb");
188 Value *Ayinv = createRcp(Ay, "ayinv");
189
190 // Build: while (nb > bits)
191 BasicBlock *PreheaderBB = B.GetInsertBlock();
192 Function *Fun = PreheaderBB->getParent();
193 auto *LoopBB = BasicBlock::Create(B.getContext(), "frem.loop_body", Fun);
194 auto *ExitBB = BasicBlock::Create(B.getContext(), "frem.loop_exit", Fun);
195
196 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB);
197
198 // Build loop body:
199 // UPDATE_AX
200 // ax = fldexp(ax, bits);
201 // nb -= bits;
202 // One iteration of the loop is factored out. The code shared by
203 // the loop and this "iteration" is denoted by UPDATE_AX.
204 B.SetInsertPoint(LoopBB);
205 PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv");
206 NbIv->addIncoming(Nb, PreheaderBB);
207
208 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi");
209 AxPhi->addIncoming(Ax, PreheaderBB);
210
211 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
212 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update");
213 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
214 NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB);
215
216 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB);
217
218 // Build final iteration
219 // ax = fldexp(ax, nb - bits + 1);
220 // UPDATE_AX
221 B.SetInsertPoint(ExitBB);
222
223 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi");
224 AxPhiExit->addIncoming(Ax, PreheaderBB);
225 AxPhiExit->addIncoming(AxPhi, LoopBB);
226 auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi");
227 NbExitPhi->addIncoming(NbIv, LoopBB);
228 NbExitPhi->addIncoming(Nb, PreheaderBB);
229
230 Value *AxFinal = B.CreateLdexp(
231 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax");
232 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
233
234 // Build:
235 // ax = fldexp(ax, ey);
236 // ret = copysign(ax,x);
237 AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax");
238 if (ComputeFpTy != FremTy)
239 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
240 Value *Ret = B.CreateCopySign(AxFinal, X);
241
242 RetPhi->addIncoming(Ret, ExitBB);
243 }
244
245 /// Build the else-branch of the conditional in the FRem
246 /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay
247 /// = |Y|, and X is the numerator and Y the denumerator. Add the
248 /// incoming edge from the result to \p RetPhi.
249 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
250 // Build:
251 // ret = ax == ay ? copysign(0.0f, x) : x;
252 Value *ZeroWithXSign = B.CreateCopySign(ConstantFP::getZero(FremTy), X);
253 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X);
254
255 RetPhi->addIncoming(Ret, B.GetInsertBlock());
256 }
257
258 /// Return a value that is NaN if one of the corner cases concerning
259 /// the inputs \p X and \p Y is detected, and \p Ret otherwise.
260 Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,
261 std::optional<SimplifyQuery> &SQ,
262 bool NoInfs) const {
263 // Build:
264 // ret = (y == 0.0f || isnan(y)) ? QNAN : ret;
265 // ret = isfinite(x) ? ret : QNAN;
266 Value *Nan = ConstantFP::getQNaN(FremTy);
267 Ret = B.CreateSelect(B.CreateFCmpUEQ(Y, ConstantFP::getZero(FremTy)), Nan,
268 Ret);
269 Value *XFinite =
270 NoInfs || (SQ && isKnownNeverInfinity(X, *SQ))
271 ? B.getTrue()
272 : B.CreateFCmpULT(B.CreateUnaryIntrinsic(Intrinsic::fabs, X),
274 Ret = B.CreateSelect(XFinite, Ret, Nan);
275
276 return Ret;
277 }
278};
279
280Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {
281 IRBuilder<>::FastMathFlagGuard Guard(B);
282 // Propagating the approximate functions flag to the
283 // division leads to an unacceptable drop in precision
284 // on AMDGPU.
285 // TODO Find out if any flags might be worth propagating.
286 B.clearFastMathFlags();
287
288 Value *Quot = B.CreateFDiv(X, Y);
289 Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
290 Value *Neg = B.CreateFNeg(Trunc);
291
292 return B.CreateFMA(Neg, Y, X);
293}
294
295Value *FRemExpander::buildFRem(Value *X, Value *Y,
296 std::optional<SimplifyQuery> &SQ) const {
297 assert(X->getType() == FremTy && Y->getType() == FremTy);
298
299 FastMathFlags FMF = B.getFastMathFlags();
300
301 // This function generates the following code structure:
302 // if (abs(x) > abs(y))
303 // { ret = compute remainder }
304 // else
305 // { ret = x or 0 with sign of x }
306 // Adjust ret to NaN/inf in input
307 // return ret
308 Value *Ax = B.CreateUnaryIntrinsic(Intrinsic::fabs, X, {}, "ax");
309 Value *Ay = B.CreateUnaryIntrinsic(Intrinsic::fabs, Y, {}, "ay");
310 if (ComputeFpTy != X->getType()) {
311 Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax");
312 Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay");
313 }
314 Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay);
315
316 PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret");
317 Value *Ret = RetPhi;
318
319 // We would return NaN in all corner cases handled here.
320 // Hence, if NaNs are excluded, keep the result as it is.
321 if (!FMF.noNaNs())
322 Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs());
323
324 Function *Fun = B.GetInsertBlock()->getParent();
325 auto *ThenBB = BasicBlock::Create(B.getContext(), "frem.compute", Fun);
326 auto *ElseBB = BasicBlock::Create(B.getContext(), "frem.else", Fun);
327 SplitBlockAndInsertIfThenElse(AxAyCmp, RetPhi, &ThenBB, &ElseBB);
328
329 auto SavedInsertPt = B.GetInsertPoint();
330
331 // Build remainder computation for "then" branch
332 //
333 // The ordered comparison ensures that ax and ay are not NaNs
334 // in the then-branch. Furthermore, y cannot be an infinity and the
335 // check at the end of the function ensures that the result will not
336 // be used if x is an infinity.
337 FastMathFlags ComputeFMF = FMF;
338 ComputeFMF.setNoInfs();
339 ComputeFMF.setNoNaNs();
340
341 B.SetInsertPoint(ThenBB);
342 buildRemainderComputation(Ax, Ay, X, RetPhi, FMF);
343 B.CreateBr(RetPhi->getParent());
344
345 // Build "else"-branch
346 B.SetInsertPoint(ElseBB);
347 buildElseBranch(Ax, Ay, X, RetPhi);
348 B.CreateBr(RetPhi->getParent());
349
350 B.SetInsertPoint(SavedInsertPt);
351
352 return Ret;
353}
354} // namespace
355
356static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
357 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
358
359 Type *Ty = I.getType();
360 assert(FRemExpander::canExpandType(Ty) &&
361 "Expected supported floating point type");
362
363 FastMathFlags FMF = I.getFastMathFlags();
364 // TODO Make use of those flags for optimization?
365 FMF.setAllowReciprocal(false);
366 FMF.setAllowContract(false);
367
368 IRBuilder<> B(&I);
369 B.setFastMathFlags(FMF);
370 B.SetCurrentDebugLocation(I.getDebugLoc());
371
372 const FRemExpander Expander = FRemExpander::create(B, Ty);
373 Value *Ret = FMF.approxFunc()
374 ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))
375 : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);
376
377 I.replaceAllUsesWith(Ret);
378 Ret->takeName(&I);
379 I.eraseFromParent();
380
381 return true;
382}
383// clang-format off: preserve formatting of the following example
384
385/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
386/// the generated code. This currently generates code similarly to compiler-rt's
387/// implementations.
388///
389/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
390/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
391/// entry:
392/// %0 = bitcast float %a to i32
393/// %conv.i = zext i32 %0 to i64
394/// %tobool.not = icmp sgt i32 %0, -1
395/// %conv = select i1 %tobool.not, i64 1, i64 -1
396/// %and = lshr i64 %conv.i, 23
397/// %shr = and i64 %and, 255
398/// %and2 = and i64 %conv.i, 8388607
399/// %or = or i64 %and2, 8388608
400/// %cmp = icmp ult i64 %shr, 127
401/// br i1 %cmp, label %cleanup, label %if.end
402///
403/// if.end: ; preds = %entry
404/// %sub = add nuw nsw i64 %shr, 4294967169
405/// %conv5 = and i64 %sub, 4294967232
406/// %cmp6.not = icmp eq i64 %conv5, 0
407/// br i1 %cmp6.not, label %if.end12, label %if.then8
408///
409/// if.then8: ; preds = %if.end
410/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
411/// -9223372036854775808 br label %cleanup
412///
413/// if.end12: ; preds = %if.end
414/// %cmp13 = icmp ult i64 %shr, 150
415/// br i1 %cmp13, label %if.then15, label %if.else
416///
417/// if.then15: ; preds = %if.end12
418/// %sub16 = sub nuw nsw i64 150, %shr
419/// %shr17 = lshr i64 %or, %sub16
420/// %mul = mul nsw i64 %shr17, %conv
421/// br label %cleanup
422///
423/// if.else: ; preds = %if.end12
424/// %sub18 = add nsw i64 %shr, -150
425/// %shl = shl i64 %or, %sub18
426/// %mul19 = mul nsw i64 %shl, %conv
427/// br label %cleanup
428///
429/// cleanup: ; preds = %entry,
430/// %if.else, %if.then15, %if.then8
431/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
432/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
433/// }
434///
435/// Replace fp to integer with generated code.
436static void expandFPToI(Instruction *FPToI) {
437 // clang-format on
438 IRBuilder<> Builder(FPToI);
439 auto *FloatVal = FPToI->getOperand(0);
440 IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
441
442 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
443 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
444
445 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
446 // to i32 first following a sext/zext to target integer type.
447 Value *A1 = nullptr;
448 if (FloatVal->getType()->isHalfTy()) {
449 if (FPToI->getOpcode() == Instruction::FPToUI) {
450 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
451 A1 = Builder.CreateZExt(A0, IntTy);
452 } else { // FPToSI
453 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
454 A1 = Builder.CreateSExt(A0, IntTy);
455 }
456 FPToI->replaceAllUsesWith(A1);
457 FPToI->dropAllReferences();
458 FPToI->eraseFromParent();
459 return;
460 }
461
462 // fp80 conversion is implemented by fpext to fp128 first then do the
463 // conversion.
464 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
465 unsigned FloatWidth =
466 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
467 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
468 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
469 Value *ImplicitBit = Builder.CreateShl(
470 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
471 Value *SignificandMask =
472 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
473 Value *NegOne = Builder.CreateSExt(
474 ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
475 Value *NegInf =
476 Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
477 ConstantInt::getSigned(IntTy, BitWidth - 1));
478
479 BasicBlock *Entry = Builder.GetInsertBlock();
480 Function *F = Entry->getParent();
481 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
482 BasicBlock *End =
483 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
484 BasicBlock *IfEnd =
485 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
486 BasicBlock *IfThen5 =
487 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
488 BasicBlock *IfEnd9 =
489 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
490 BasicBlock *IfThen12 =
491 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
492 BasicBlock *IfElse =
493 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
494
495 Entry->getTerminator()->eraseFromParent();
496
497 // entry:
498 Builder.SetInsertPoint(Entry);
499 Value *FloatVal0 = FloatVal;
500 // fp80 conversion is implemented by fpext to fp128 first then do the
501 // conversion.
502 if (FloatVal->getType()->isX86_FP80Ty())
503 FloatVal0 =
504 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
505 Value *ARep0 =
506 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
507 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
508 Value *PosOrNeg = Builder.CreateICmpSGT(
509 ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
510 Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
511 ConstantInt::getSigned(IntTy, -1));
512 Value *And =
513 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
514 Value *And2 = Builder.CreateAnd(
515 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
516 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
517 Value *Or = Builder.CreateOr(Abs, ImplicitBit);
518 Value *Cmp =
519 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
520 Builder.CreateCondBr(Cmp, End, IfEnd);
521
522 // if.end:
523 Builder.SetInsertPoint(IfEnd);
524 Value *Add1 = Builder.CreateAdd(
526 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
527 Value *Cmp3 = Builder.CreateICmpULT(
528 Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
529 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
530
531 // if.then5:
532 Builder.SetInsertPoint(IfThen5);
533 Value *PosInf = Builder.CreateXor(NegOne, NegInf);
534 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
535 Builder.CreateBr(End);
536
537 // if.end9:
538 Builder.SetInsertPoint(IfEnd9);
539 Value *Cmp10 = Builder.CreateICmpULT(
540 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
541 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
542
543 // if.then12:
544 Builder.SetInsertPoint(IfThen12);
545 Value *Sub13 = Builder.CreateSub(
546 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
547 Value *Shr14 = Builder.CreateLShr(Or, Sub13);
548 Value *Mul = Builder.CreateMul(Shr14, Sign);
549 Builder.CreateBr(End);
550
551 // if.else:
552 Builder.SetInsertPoint(IfElse);
553 Value *Sub15 = Builder.CreateAdd(
555 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
556 Value *Shl = Builder.CreateShl(Or, Sub15);
557 Value *Mul16 = Builder.CreateMul(Shl, Sign);
558 Builder.CreateBr(End);
559
560 // cleanup:
561 Builder.SetInsertPoint(End, End->begin());
562 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
563
564 Retval0->addIncoming(Cond8, IfThen5);
565 Retval0->addIncoming(Mul, IfThen12);
566 Retval0->addIncoming(Mul16, IfElse);
567 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
568
569 FPToI->replaceAllUsesWith(Retval0);
570 FPToI->dropAllReferences();
571 FPToI->eraseFromParent();
572}
573
574// clang-format off: preserve formatting of the following example
575
576/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
577/// the generated code. This currently generates code similarly to compiler-rt's
578/// implementations. This implementation has an implicit assumption that integer
579/// width is larger than fp.
580///
581/// An example IR generated from compiler-rt/floatdisf.c looks like below:
582/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
583/// entry:
584/// %cmp = icmp eq i64 %a, 0
585/// br i1 %cmp, label %return, label %if.end
586///
587/// if.end: ; preds = %entry
588/// %shr = ashr i64 %a, 63
589/// %xor = xor i64 %shr, %a
590/// %sub = sub nsw i64 %xor, %shr
591/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
592/// %cast = trunc i64 %0 to i32
593/// %sub1 = sub nuw nsw i32 64, %cast
594/// %sub2 = xor i32 %cast, 63
595/// %cmp3 = icmp ult i32 %cast, 40
596/// br i1 %cmp3, label %if.then4, label %if.else
597///
598/// if.then4: ; preds = %if.end
599/// switch i32 %sub1, label %sw.default [
600/// i32 25, label %sw.bb
601/// i32 26, label %sw.epilog
602/// ]
603///
604/// sw.bb: ; preds = %if.then4
605/// %shl = shl i64 %sub, 1
606/// br label %sw.epilog
607///
608/// sw.default: ; preds = %if.then4
609/// %sub5 = sub nsw i64 38, %0
610/// %sh_prom = and i64 %sub5, 4294967295
611/// %shr6 = lshr i64 %sub, %sh_prom
612/// %shr9 = lshr i64 274877906943, %0
613/// %and = and i64 %shr9, %sub
614/// %cmp10 = icmp ne i64 %and, 0
615/// %conv11 = zext i1 %cmp10 to i64
616/// %or = or i64 %shr6, %conv11
617/// br label %sw.epilog
618///
619/// sw.epilog: ; preds = %sw.default,
620/// %if.then4, %sw.bb
621/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
622/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
623/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
624/// %tobool.not = icmp eq i64 %3, 0
625/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
626/// %spec.select = ashr i64 %inc, %spec.select.v
627/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
628/// br label %if.end26
629///
630/// if.else: ; preds = %if.end
631/// %sub23 = add nuw nsw i64 %0, 4294967256
632/// %sh_prom24 = and i64 %sub23, 4294967295
633/// %shl25 = shl i64 %sub, %sh_prom24
634/// br label %if.end26
635///
636/// if.end26: ; preds = %sw.epilog,
637/// %if.else
638/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
639/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
640/// %conv27 = trunc i64 %shr to i32
641/// %and28 = and i32 %conv27, -2147483648
642/// %add = shl nuw nsw i32 %e.0, 23
643/// %shl29 = add nuw nsw i32 %add, 1065353216
644/// %conv31 = trunc i64 %a.addr.1 to i32
645/// %and32 = and i32 %conv31, 8388607
646/// %or30 = or i32 %and32, %and28
647/// %or33 = or i32 %or30, %shl29
648/// %4 = bitcast i32 %or33 to float
649/// br label %return
650///
651/// return: ; preds = %entry,
652/// %if.end26
653/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
654/// ret float %retval.0
655/// }
656///
657/// Replace integer to fp with generated code.
658static void expandIToFP(Instruction *IToFP) {
659 // clang-format on
660 IRBuilder<> Builder(IToFP);
661 auto *IntVal = IToFP->getOperand(0);
662 IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
663
664 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
665 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
666 // fp80 conversion is implemented by conversion tp fp128 first following
667 // a fptrunc to fp80.
668 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
669 // FIXME: As there is no related builtins added in compliler-rt,
670 // here currently utilized the fp32 <-> fp16 lib calls to implement.
671 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
672 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
673 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
674 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
675
676 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
677 "assumes integer width is larger than fp.");
678
679 Value *Temp1 =
680 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
681 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
682
683 BasicBlock *Entry = Builder.GetInsertBlock();
684 Function *F = Entry->getParent();
685 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
686 BasicBlock *End =
687 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
688 BasicBlock *IfEnd =
689 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
690 BasicBlock *IfThen4 =
691 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
692 BasicBlock *SwBB =
693 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
694 BasicBlock *SwDefault =
695 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
696 BasicBlock *SwEpilog =
697 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
698 BasicBlock *IfThen20 =
699 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
700 BasicBlock *IfElse =
701 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
702 BasicBlock *IfEnd26 =
703 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
704
705 Entry->getTerminator()->eraseFromParent();
706
707 Function *CTLZ =
708 Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
709 ConstantInt *True = Builder.getTrue();
710
711 // entry:
712 Builder.SetInsertPoint(Entry);
713 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
714 Builder.CreateCondBr(Cmp, End, IfEnd);
715
716 // if.end:
717 Builder.SetInsertPoint(IfEnd);
718 Value *Shr =
719 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
720 Value *Xor = Builder.CreateXor(Shr, IntVal);
721 Value *Sub = Builder.CreateSub(Xor, Shr);
722 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
723 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
724 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
725 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
726 FloatWidth == 128 ? Call : Cast);
727 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
728 FloatWidth == 128 ? Call : Cast);
729 Value *Cmp3 = Builder.CreateICmpSGT(
730 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
731 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
732
733 // if.then4:
734 Builder.SetInsertPoint(IfThen4);
735 llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
736 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
737 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
738
739 // sw.bb:
740 Builder.SetInsertPoint(SwBB);
741 Value *Shl =
742 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
743 Builder.CreateBr(SwEpilog);
744
745 // sw.default:
746 Builder.SetInsertPoint(SwDefault);
747 Value *Sub5 = Builder.CreateSub(
748 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
749 FloatWidth == 128 ? Call : Cast);
750 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
751 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
752 FloatWidth == 128 ? Sub5 : ShProm);
753 Value *Sub8 =
754 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
755 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
756 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
757 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
758 FloatWidth == 128 ? Sub8 : ShProm9);
759 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
760 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
761 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
762 Value *Or = Builder.CreateOr(Shr6, Conv11);
763 Builder.CreateBr(SwEpilog);
764
765 // sw.epilog:
766 Builder.SetInsertPoint(SwEpilog);
767 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
768 AAddr0->addIncoming(Or, SwDefault);
769 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
770 AAddr0->addIncoming(Shl, SwBB);
771 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
772 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
773 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
774 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
775 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
776 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
777 Value *Shr18 = nullptr;
778 if (IsSigned)
779 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
780 else
781 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
782 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
783 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
784 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
785 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
786 Value *ExtractT64 = nullptr;
787 if (FloatWidth > 80)
788 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
789 else
790 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
791 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
792
793 // if.then20
794 Builder.SetInsertPoint(IfThen20);
795 Value *Shr21 = nullptr;
796 if (IsSigned)
797 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
798 else
799 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
800 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
801 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
802 Value *ExtractT62 = nullptr;
803 if (FloatWidth > 80)
804 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
805 else
806 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
807 Builder.CreateBr(IfEnd26);
808
809 // if.else:
810 Builder.SetInsertPoint(IfElse);
811 Value *Sub24 = Builder.CreateAdd(
812 FloatWidth == 128 ? Call : Cast,
813 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
814 -(BitWidth - FPMantissaWidth - 1)));
815 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
816 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
817 FloatWidth == 128 ? Sub24 : ShProm25);
818 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
819 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
820 Value *ExtractT66 = nullptr;
821 if (FloatWidth > 80)
822 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
823 else
824 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
825 Builder.CreateBr(IfEnd26);
826
827 // if.end26:
828 Builder.SetInsertPoint(IfEnd26);
829 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
830 AAddr1Off0->addIncoming(ExtractT, IfThen20);
831 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
832 AAddr1Off0->addIncoming(ExtractT61, IfElse);
833 PHINode *AAddr1Off32 = nullptr;
834 if (FloatWidth > 32) {
835 AAddr1Off32 =
836 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
837 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
838 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
839 AAddr1Off32->addIncoming(ExtractT66, IfElse);
840 }
841 PHINode *E0 = nullptr;
842 if (FloatWidth <= 80) {
843 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
844 E0->addIncoming(Sub1, IfThen20);
845 E0->addIncoming(Sub2, SwEpilog);
846 E0->addIncoming(Sub2, IfElse);
847 }
848 Value *And29 = nullptr;
849 if (FloatWidth > 80) {
850 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
851 Builder.getIntN(BitWidth, 63));
852 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
853 } else {
854 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
855 And29 = Builder.CreateAnd(
856 Conv28, ConstantInt::getSigned(Builder.getInt32Ty(), 0x80000000));
857 }
858 unsigned TempMod = FPMantissaWidth % 32;
859 Value *And34 = nullptr;
860 Value *Shl30 = nullptr;
861 if (FloatWidth > 80) {
862 TempMod += 32;
863 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
864 Shl30 = Builder.CreateAdd(
865 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
866 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
867 } else {
868 Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
869 Shl30 = Builder.CreateAdd(
870 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
871 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
872 Builder.getInt32((1 << TempMod) - 1));
873 }
874 Value *Or35 = nullptr;
875 if (FloatWidth > 80) {
876 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
877 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
878 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
879 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
880 Builder.getIntN(128, FPMantissaWidth));
881 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
882 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
883 Or35 = Builder.CreateOr(Or34, A6);
884 } else {
885 Value *Or31 = Builder.CreateOr(And34, And29);
886 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
887 }
888 Value *A4 = nullptr;
889 if (IToFP->getType()->isDoubleTy()) {
890 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
891 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
892 Value *And1 =
893 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
894 Value *Or1 = Builder.CreateOr(Shl1, And1);
895 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
896 } else if (IToFP->getType()->isX86_FP80Ty()) {
897 Value *A40 =
898 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
899 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
900 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
901 // Deal with "half" situation. This is a workaround since we don't have
902 // floattihf.c currently as referring.
903 Value *A40 =
904 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
905 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
906 } else // float type
907 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
908 Builder.CreateBr(End);
909
910 // return:
911 Builder.SetInsertPoint(End, End->begin());
912 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
913 Retval0->addIncoming(A4, IfEnd26);
914 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
915
916 IToFP->replaceAllUsesWith(Retval0);
917 IToFP->dropAllReferences();
918 IToFP->eraseFromParent();
919}
920
923 VectorType *VTy = cast<FixedVectorType>(I->getType());
924
925 IRBuilder<> Builder(I);
926
927 unsigned NumElements = VTy->getElementCount().getFixedValue();
928 Value *Result = PoisonValue::get(VTy);
929 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
930 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
931
932 Value *NewOp = nullptr;
933 if (auto *BinOp = dyn_cast<BinaryOperator>(I))
934 NewOp = Builder.CreateBinOp(
935 BinOp->getOpcode(), Ext,
936 Builder.CreateExtractElement(I->getOperand(1), Idx));
937 else if (auto *CastI = dyn_cast<CastInst>(I))
938 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
939 I->getType()->getScalarType());
940 else
941 llvm_unreachable("Unsupported instruction type");
942
943 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
944 if (auto *ScalarizedI = dyn_cast<Instruction>(NewOp)) {
945 ScalarizedI->copyIRFlags(I, true);
946 Worklist.push_back(ScalarizedI);
947 }
948 }
949
950 I->replaceAllUsesWith(Result);
951 I->dropAllReferences();
952 I->eraseFromParent();
953}
954
955// This covers all floating point types; more than we need here.
956// TODO Move somewhere else for general use?
957/// Return the Libcall for a frem instruction of
958/// type \p Ty.
959static RTLIB::Libcall fremToLibcall(Type *Ty) {
960 assert(Ty->isFloatingPointTy());
961 if (Ty->isFloatTy() || Ty->is16bitFPTy())
962 return RTLIB::REM_F32;
963 if (Ty->isDoubleTy())
964 return RTLIB::REM_F64;
965 if (Ty->isFP128Ty())
966 return RTLIB::REM_F128;
967 if (Ty->isX86_FP80Ty())
968 return RTLIB::REM_F80;
969 if (Ty->isPPC_FP128Ty())
970 return RTLIB::REM_PPCF128;
971
972 llvm_unreachable("Unknown floating point type");
973}
974
975/* Return true if, according to \p LibInfo, the target either directly
976 supports the frem instruction for the \p Ty, has a custom lowering,
977 or uses a libcall. */
978static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty) {
980 return true;
981
982 return TLI.getLibcallName(fremToLibcall(Ty->getScalarType()));
983}
984
987 if (I.getOperand(0)->getType()->isVectorTy())
988 scalarize(&I, Worklist);
989 else
990 Worklist.push_back(&I);
991}
992
993static bool runImpl(Function &F, const TargetLowering &TLI,
994 AssumptionCache *AC) {
996 bool Modified = false;
997
998 unsigned MaxLegalFpConvertBitWidth =
1001 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
1002
1003 if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
1004 return false;
1005
1006 for (auto It = inst_begin(&F), End = inst_end(F); It != End;) {
1007 Instruction &I = *It++;
1008 Type *Ty = I.getType();
1009 // TODO: This pass doesn't handle scalable vectors.
1010 if (Ty->isScalableTy())
1011 continue;
1012
1013 switch (I.getOpcode()) {
1014 case Instruction::FRem:
1015 if (!targetSupportsFrem(TLI, Ty) &&
1016 FRemExpander::canExpandType(Ty->getScalarType())) {
1017 addToWorklist(I, Worklist);
1018 Modified = true;
1019 }
1020 break;
1021 case Instruction::FPToUI:
1022 case Instruction::FPToSI: {
1023 auto *IntTy = cast<IntegerType>(Ty->getScalarType());
1024 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
1025 continue;
1026
1027 addToWorklist(I, Worklist);
1028 Modified = true;
1029 break;
1030 }
1031 case Instruction::UIToFP:
1032 case Instruction::SIToFP: {
1033 auto *IntTy =
1034 cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
1035 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
1036 continue;
1037
1038 addToWorklist(I, Worklist);
1039 Modified = true;
1040 break;
1041 }
1042 default:
1043 break;
1044 }
1045 }
1046
1047 while (!Worklist.empty()) {
1048 Instruction *I = Worklist.pop_back_val();
1049 if (I->getOpcode() == Instruction::FRem) {
1050 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1051 if (AC) {
1052 auto Res = std::make_optional<SimplifyQuery>(
1053 I->getModule()->getDataLayout(), I);
1054 Res->AC = AC;
1055 return Res;
1056 }
1057 return {};
1058 }();
1059
1061 } else if (I->getOpcode() == Instruction::FPToUI ||
1062 I->getOpcode() == Instruction::FPToSI) {
1063 expandFPToI(I);
1064 } else {
1065 expandIToFP(I);
1066 }
1067 }
1068
1069 return Modified;
1070}
1071
1072namespace {
1073class ExpandFpLegacyPass : public FunctionPass {
1074 CodeGenOptLevel OptLevel;
1075
1076public:
1077 static char ID;
1078
1079 ExpandFpLegacyPass(CodeGenOptLevel OptLevel)
1080 : FunctionPass(ID), OptLevel(OptLevel) {
1082 }
1083
1084 ExpandFpLegacyPass() : ExpandFpLegacyPass(CodeGenOptLevel::None) {};
1085
1086 bool runOnFunction(Function &F) override {
1087 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1088 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
1089 AssumptionCache *AC = nullptr;
1090
1091 if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
1092 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1093 return runImpl(F, *TLI, AC);
1094 }
1095
1096 void getAnalysisUsage(AnalysisUsage &AU) const override {
1097 AU.addRequired<TargetPassConfig>();
1098 if (OptLevel != CodeGenOptLevel::None)
1099 AU.addRequired<AssumptionCacheTracker>();
1100 AU.addPreserved<AAResultsWrapperPass>();
1101 AU.addPreserved<GlobalsAAWrapperPass>();
1102 }
1103};
1104} // namespace
1105
1107 : TM(TM), OptLevel(OptLevel) {}
1108
1110 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1111 static_cast<PassInfoMixin<ExpandFpPass> *>(this)->printPipeline(
1112 OS, MapClassName2PassName);
1113 OS << '<';
1114 OS << "O" << (int)OptLevel;
1115 OS << '>';
1116}
1117
1119 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
1120 auto &TLI = *STI->getTargetLowering();
1121 AssumptionCache *AC = nullptr;
1122 if (OptLevel != CodeGenOptLevel::None)
1123 AC = &FAM.getResult<AssumptionAnalysis>(F);
1124 return runImpl(F, TLI, AC) ? PreservedAnalyses::none()
1126}
1127
1128char ExpandFpLegacyPass::ID = 0;
1129INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp",
1130 "Expand certain fp instructions", false, false)
1131INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false)
1132
1134 return new ExpandFpLegacyPass(OptLevel);
1135}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
Definition ExpandFp.cpp:356
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
Definition ExpandFp.cpp:658
static void expandFPToI(Instruction *FPToI)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
Definition ExpandFp.cpp:436
static RTLIB::Libcall fremToLibcall(Type *Ty)
Return the Libcall for a frem instruction of type Ty.
Definition ExpandFp.cpp:959
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:993
static bool targetSupportsFrem(const TargetLowering &TLI, Type *Ty)
Definition ExpandFp.cpp:978
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
Definition ExpandFp.cpp:985
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
Definition ExpandFp.cpp:921
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
FunctionAnalysisManager FAM
Function * Fun
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
BinaryOperator * Mul
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
ExpandFpPass(const TargetMachine *TM, CodeGenOptLevel OptLevel)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setAllowContract(bool B=true)
Definition FMF.h:90
bool noInfs() const
Definition FMF.h:66
void setAllowReciprocal(bool B=true)
Definition FMF.h:87
bool approxFunc() const
Definition FMF.h:70
void setNoNaNs(bool B=true)
Definition FMF.h:78
bool noNaNs() const
Definition FMF.h:65
void setNoInfs(bool B=true)
Definition FMF.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Multiway switch.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:159
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:290
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:236
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:107
void dropAllReferences()
Drop all references to operands.
Definition User.h:349
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:396
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI void initializeExpandFpLegacyPassPass(PassRegistry &)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI FunctionPass * createExpandFpPass()
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:324
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Matching combinators.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70