LLVM 22.0.0git
ExpandFp.cpp
Go to the documentation of this file.
1//===--- ExpandFp.cpp - Expand fp instructions ----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass expands certain floating point instructions at the IR level.
9//
10// It expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp
11// .. to’ instructions with a bitwidth above a threshold. This is
12// useful for targets like x86_64 that cannot lower fp convertions
13// with more than 128 bits.
14//
15//===----------------------------------------------------------------------===//
16
24#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/Module.h"
31#include "llvm/IR/PassManager.h"
33#include "llvm/Pass.h"
38#include <optional>
39
40#define DEBUG_TYPE "expand-fp"
41
42using namespace llvm;
43
45 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
47 cl::desc("fp convert instructions on integers with "
48 "more than <N> bits are expanded."));
49
50namespace {
51/// This class implements a precise expansion of the frem instruction.
52/// The generated code is based on the fmod implementation in the AMD device
53/// libs.
54class FRemExpander {
55 /// The IRBuilder to use for the expansion.
57
58 /// Floating point type of the return value and the arguments of the FRem
59 /// instructions that should be expanded.
60 Type *FremTy;
61
62 /// Floating point type to use for the computation. This may be
63 /// wider than the \p FremTy.
64 Type *ComputeFpTy;
65
66 /// Integer type used to hold the exponents returned by frexp.
67 Type *ExTy;
68
69 /// How many bits of the quotient to compute per iteration of the
70 /// algorithm, stored as a value of type \p ExTy.
71 Value *Bits;
72
73 /// Constant 1 of type \p ExTy.
74 Value *One;
75
76 /// The frem argument/return types that can be expanded by this class.
77 // TODO: The expansion could work for other floating point types
78 // as well, but this would require additional testing.
79 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
80 MVT::f64};
81
82public:
83 static bool canExpandType(Type *Ty) {
84 EVT VT = EVT::getEVT(Ty);
85 assert(VT.isSimple() && "Can expand only simple types");
86
87 return is_contained(ExpandableTypes, VT.getSimpleVT());
88 }
89
90 static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
91 assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
92 return TLI.getOperationAction(ISD::FREM, VT) ==
93 TargetLowering::LegalizeAction::Expand;
94 }
95
96 static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
97 // Consider scalar type for simplicity. It seems unlikely that a
98 // vector type can be legalized without expansion if the scalar
99 // type cannot.
100 return shouldExpandFremType(TLI, EVT::getEVT(Ty->getScalarType()));
101 }
102
103 /// Return true if the pass should expand frem instructions of any type
104 /// for the target represented by \p TLI.
105 static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
106 return any_of(ExpandableTypes,
107 [&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); });
108 }
109
110 static FRemExpander create(IRBuilder<> &B, Type *Ty) {
111 assert(canExpandType(Ty) && "Expected supported floating point type");
112
113 // The type to use for the computation of the remainder. This may be
114 // wider than the input/result type which affects the ...
115 Type *ComputeTy = Ty;
116 // ... maximum number of iterations of the remainder computation loop
117 // to use. This value is for the case in which the computation
118 // uses the same input/result type.
119 unsigned MaxIter = 2;
120
121 if (Ty->isHalfTy()) {
122 // Use the wider type and less iterations.
123 ComputeTy = B.getFloatTy();
124 MaxIter = 1;
125 }
126
127 unsigned Precision =
129 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
130 }
131
132 /// Build the FRem expansion for the numerator \p X and the
133 /// denumerator \p Y. The type of X and Y must match \p FremTy. The
134 /// code will be generated at the insertion point of \p B and the
135 /// insertion point will be reset at exit.
136 Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;
137
138 /// Build an approximate FRem expansion for the numerator \p X and
139 /// the denumerator \p Y at the insertion point of builder \p B.
140 /// The type of X and Y must match \p FremTy.
141 Value *buildApproxFRem(Value *X, Value *Y) const;
142
143private:
144 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
145 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
146 Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {};
147
148 Value *createRcp(Value *V, const Twine &Name) const {
149 // Leave it to later optimizations to turn this into an rcp
150 // instruction if available.
151 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
152 }
153
154 // Helper function to build the UPDATE_AX code which is common to the
155 // loop body and the "final iteration".
156 Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
157 // Build:
158 // float q = rint(ax * ayinv);
159 // ax = fma(-q, ay, ax);
160 // int clt = ax < 0.0f;
161 // float axp = ax + ay;
162 // ax = clt ? axp : ax;
163 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
164 {}, "q");
165 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax");
166 Value *Clt = B.CreateFCmp(CmpInst::FCMP_OLT, AxUpdate,
167 ConstantFP::getZero(ComputeFpTy), "clt");
168 Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp");
169 return B.CreateSelect(Clt, Axp, AxUpdate, "ax");
170 }
171
172 /// Build code to extract the exponent and mantissa of \p Src.
173 /// Return the exponent minus one for use as a loop bound and
174 /// the mantissa taken to the given \p NewExp power.
175 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
176 const Twine &ExName,
177 const Twine &PowName) const {
178 // Build:
179 // ExName = frexp_exp(Src) - 1;
180 // PowName = fldexp(frexp_mant(ExName), NewExp);
181 Type *Ty = Src->getType();
182 Type *ExTy = B.getInt32Ty();
183 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
184 Value *Mant = B.CreateExtractValue(Frexp, {0});
185 Value *Exp = B.CreateExtractValue(Frexp, {1});
186
187 Exp = B.CreateSub(Exp, One, ExName);
188 Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
189
190 return {Pow, Exp};
191 }
192
193 /// Build the main computation of the remainder for the case in which
194 /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the
195 /// denumerator. Add the incoming edge from the computation result
196 /// to \p RetPhi.
197 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
198 PHINode *RetPhi, FastMathFlags FMF) const {
199 IRBuilder<>::FastMathFlagGuard Guard(B);
200 B.setFastMathFlags(FMF);
201
202 // Build:
203 // ex = frexp_exp(ax) - 1;
204 // ax = fldexp(frexp_mant(ax), bits);
205 // ey = frexp_exp(ay) - 1;
206 // ay = fledxp(frexp_mant(ay), 1);
207 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax");
208 auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay");
209
210 // Build:
211 // int nb = ex - ey;
212 // float ayinv = 1.0/ay;
213 Value *Nb = B.CreateSub(Ex, Ey, "nb");
214 Value *Ayinv = createRcp(Ay, "ayinv");
215
216 // Build: while (nb > bits)
217 BasicBlock *PreheaderBB = B.GetInsertBlock();
218 Function *Fun = PreheaderBB->getParent();
219 auto *LoopBB = BasicBlock::Create(B.getContext(), "frem.loop_body", Fun);
220 auto *ExitBB = BasicBlock::Create(B.getContext(), "frem.loop_exit", Fun);
221
222 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB);
223
224 // Build loop body:
225 // UPDATE_AX
226 // ax = fldexp(ax, bits);
227 // nb -= bits;
228 // One iteration of the loop is factored out. The code shared by
229 // the loop and this "iteration" is denoted by UPDATE_AX.
230 B.SetInsertPoint(LoopBB);
231 PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv");
232 NbIv->addIncoming(Nb, PreheaderBB);
233
234 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi");
235 AxPhi->addIncoming(Ax, PreheaderBB);
236
237 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
238 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update");
239 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
240 NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB);
241
242 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB);
243
244 // Build final iteration
245 // ax = fldexp(ax, nb - bits + 1);
246 // UPDATE_AX
247 B.SetInsertPoint(ExitBB);
248
249 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi");
250 AxPhiExit->addIncoming(Ax, PreheaderBB);
251 AxPhiExit->addIncoming(AxPhi, LoopBB);
252 auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi");
253 NbExitPhi->addIncoming(NbIv, LoopBB);
254 NbExitPhi->addIncoming(Nb, PreheaderBB);
255
256 Value *AxFinal = B.CreateLdexp(
257 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax");
258 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
259
260 // Build:
261 // ax = fldexp(ax, ey);
262 // ret = copysign(ax,x);
263 AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax");
264 if (ComputeFpTy != FremTy)
265 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
266 Value *Ret = B.CreateCopySign(AxFinal, X);
267
268 RetPhi->addIncoming(Ret, ExitBB);
269 }
270
271 /// Build the else-branch of the conditional in the FRem
272 /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay
273 /// = |Y|, and X is the numerator and Y the denumerator. Add the
274 /// incoming edge from the result to \p RetPhi.
275 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
276 // Build:
277 // ret = ax == ay ? copysign(0.0f, x) : x;
278 Value *ZeroWithXSign = B.CreateCopySign(ConstantFP::getZero(FremTy), X);
279 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X);
280
281 RetPhi->addIncoming(Ret, B.GetInsertBlock());
282 }
283
284 /// Return a value that is NaN if one of the corner cases concerning
285 /// the inputs \p X and \p Y is detected, and \p Ret otherwise.
286 Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,
287 std::optional<SimplifyQuery> &SQ,
288 bool NoInfs) const {
289 // Build:
290 // ret = (y == 0.0f || isnan(y)) ? QNAN : ret;
291 // ret = isfinite(x) ? ret : QNAN;
292 Value *Nan = ConstantFP::getQNaN(FremTy);
293 Ret = B.CreateSelect(B.CreateFCmpUEQ(Y, ConstantFP::getZero(FremTy)), Nan,
294 Ret);
295 Value *XFinite =
296 NoInfs || (SQ && isKnownNeverInfinity(X, *SQ))
297 ? B.getTrue()
298 : B.CreateFCmpULT(B.CreateUnaryIntrinsic(Intrinsic::fabs, X),
300 Ret = B.CreateSelect(XFinite, Ret, Nan);
301
302 return Ret;
303 }
304};
305
306Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {
307 IRBuilder<>::FastMathFlagGuard Guard(B);
308 // Propagating the approximate functions flag to the
309 // division leads to an unacceptable drop in precision
310 // on AMDGPU.
311 // TODO Find out if any flags might be worth propagating.
312 B.clearFastMathFlags();
313
314 Value *Quot = B.CreateFDiv(X, Y);
315 Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
316 Value *Neg = B.CreateFNeg(Trunc);
317
318 return B.CreateFMA(Neg, Y, X);
319}
320
321Value *FRemExpander::buildFRem(Value *X, Value *Y,
322 std::optional<SimplifyQuery> &SQ) const {
323 assert(X->getType() == FremTy && Y->getType() == FremTy);
324
325 FastMathFlags FMF = B.getFastMathFlags();
326
327 // This function generates the following code structure:
328 // if (abs(x) > abs(y))
329 // { ret = compute remainder }
330 // else
331 // { ret = x or 0 with sign of x }
332 // Adjust ret to NaN/inf in input
333 // return ret
334 Value *Ax = B.CreateUnaryIntrinsic(Intrinsic::fabs, X, {}, "ax");
335 Value *Ay = B.CreateUnaryIntrinsic(Intrinsic::fabs, Y, {}, "ay");
336 if (ComputeFpTy != X->getType()) {
337 Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax");
338 Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay");
339 }
340 Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay);
341
342 PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret");
343 Value *Ret = RetPhi;
344
345 // We would return NaN in all corner cases handled here.
346 // Hence, if NaNs are excluded, keep the result as it is.
347 if (!FMF.noNaNs())
348 Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs());
349
350 Function *Fun = B.GetInsertBlock()->getParent();
351 auto *ThenBB = BasicBlock::Create(B.getContext(), "frem.compute", Fun);
352 auto *ElseBB = BasicBlock::Create(B.getContext(), "frem.else", Fun);
353 SplitBlockAndInsertIfThenElse(AxAyCmp, RetPhi, &ThenBB, &ElseBB);
354
355 auto SavedInsertPt = B.GetInsertPoint();
356
357 // Build remainder computation for "then" branch
358 //
359 // The ordered comparison ensures that ax and ay are not NaNs
360 // in the then-branch. Furthermore, y cannot be an infinity and the
361 // check at the end of the function ensures that the result will not
362 // be used if x is an infinity.
363 FastMathFlags ComputeFMF = FMF;
364 ComputeFMF.setNoInfs();
365 ComputeFMF.setNoNaNs();
366
367 B.SetInsertPoint(ThenBB);
368 buildRemainderComputation(Ax, Ay, X, RetPhi, FMF);
369 B.CreateBr(RetPhi->getParent());
370
371 // Build "else"-branch
372 B.SetInsertPoint(ElseBB);
373 buildElseBranch(Ax, Ay, X, RetPhi);
374 B.CreateBr(RetPhi->getParent());
375
376 B.SetInsertPoint(SavedInsertPt);
377
378 return Ret;
379}
380} // namespace
381
382static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
383 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
384
385 Type *Ty = I.getType();
386 assert(FRemExpander::canExpandType(Ty) &&
387 "Expected supported floating point type");
388
389 FastMathFlags FMF = I.getFastMathFlags();
390 // TODO Make use of those flags for optimization?
391 FMF.setAllowReciprocal(false);
392 FMF.setAllowContract(false);
393
394 IRBuilder<> B(&I);
395 B.setFastMathFlags(FMF);
396 B.SetCurrentDebugLocation(I.getDebugLoc());
397
398 const FRemExpander Expander = FRemExpander::create(B, Ty);
399 Value *Ret = FMF.approxFunc()
400 ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))
401 : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);
402
403 I.replaceAllUsesWith(Ret);
404 Ret->takeName(&I);
405 I.eraseFromParent();
406
407 return true;
408}
409// clang-format off: preserve formatting of the following example
410
411/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
412/// the generated code. This currently generates code similarly to compiler-rt's
413/// implementations.
414///
415/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
416/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
417/// entry:
418/// %0 = bitcast float %a to i32
419/// %conv.i = zext i32 %0 to i64
420/// %tobool.not = icmp sgt i32 %0, -1
421/// %conv = select i1 %tobool.not, i64 1, i64 -1
422/// %and = lshr i64 %conv.i, 23
423/// %shr = and i64 %and, 255
424/// %and2 = and i64 %conv.i, 8388607
425/// %or = or i64 %and2, 8388608
426/// %cmp = icmp ult i64 %shr, 127
427/// br i1 %cmp, label %cleanup, label %if.end
428///
429/// if.end: ; preds = %entry
430/// %sub = add nuw nsw i64 %shr, 4294967169
431/// %conv5 = and i64 %sub, 4294967232
432/// %cmp6.not = icmp eq i64 %conv5, 0
433/// br i1 %cmp6.not, label %if.end12, label %if.then8
434///
435/// if.then8: ; preds = %if.end
436/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
437/// -9223372036854775808 br label %cleanup
438///
439/// if.end12: ; preds = %if.end
440/// %cmp13 = icmp ult i64 %shr, 150
441/// br i1 %cmp13, label %if.then15, label %if.else
442///
443/// if.then15: ; preds = %if.end12
444/// %sub16 = sub nuw nsw i64 150, %shr
445/// %shr17 = lshr i64 %or, %sub16
446/// %mul = mul nsw i64 %shr17, %conv
447/// br label %cleanup
448///
449/// if.else: ; preds = %if.end12
450/// %sub18 = add nsw i64 %shr, -150
451/// %shl = shl i64 %or, %sub18
452/// %mul19 = mul nsw i64 %shl, %conv
453/// br label %cleanup
454///
455/// cleanup: ; preds = %entry,
456/// %if.else, %if.then15, %if.then8
457/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
458/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
459/// }
460///
461/// Replace fp to integer with generated code.
462static void expandFPToI(Instruction *FPToI) {
463 // clang-format on
464 IRBuilder<> Builder(FPToI);
465 auto *FloatVal = FPToI->getOperand(0);
466 IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
467
468 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
469 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
470
471 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
472 // to i32 first following a sext/zext to target integer type.
473 Value *A1 = nullptr;
474 if (FloatVal->getType()->isHalfTy()) {
475 if (FPToI->getOpcode() == Instruction::FPToUI) {
476 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
477 A1 = Builder.CreateZExt(A0, IntTy);
478 } else { // FPToSI
479 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
480 A1 = Builder.CreateSExt(A0, IntTy);
481 }
482 FPToI->replaceAllUsesWith(A1);
483 FPToI->dropAllReferences();
484 FPToI->eraseFromParent();
485 return;
486 }
487
488 // fp80 conversion is implemented by fpext to fp128 first then do the
489 // conversion.
490 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
491 unsigned FloatWidth =
492 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
493 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
494 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
495 Value *ImplicitBit = Builder.CreateShl(
496 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
497 Value *SignificandMask =
498 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
499 Value *NegOne = Builder.CreateSExt(
500 ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
501 Value *NegInf =
502 Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
503 ConstantInt::getSigned(IntTy, BitWidth - 1));
504
505 BasicBlock *Entry = Builder.GetInsertBlock();
506 Function *F = Entry->getParent();
507 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
508 BasicBlock *End =
509 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
510 BasicBlock *IfEnd =
511 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
512 BasicBlock *IfThen5 =
513 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
514 BasicBlock *IfEnd9 =
515 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
516 BasicBlock *IfThen12 =
517 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
518 BasicBlock *IfElse =
519 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
520
521 Entry->getTerminator()->eraseFromParent();
522
523 // entry:
524 Builder.SetInsertPoint(Entry);
525 Value *FloatVal0 = FloatVal;
526 // fp80 conversion is implemented by fpext to fp128 first then do the
527 // conversion.
528 if (FloatVal->getType()->isX86_FP80Ty())
529 FloatVal0 =
530 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
531 Value *ARep0 =
532 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
533 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
534 Value *PosOrNeg = Builder.CreateICmpSGT(
535 ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
536 Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
537 ConstantInt::getSigned(IntTy, -1));
538 Value *And =
539 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
540 Value *And2 = Builder.CreateAnd(
541 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
542 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
543 Value *Or = Builder.CreateOr(Abs, ImplicitBit);
544 Value *Cmp =
545 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
546 Builder.CreateCondBr(Cmp, End, IfEnd);
547
548 // if.end:
549 Builder.SetInsertPoint(IfEnd);
550 Value *Add1 = Builder.CreateAdd(
552 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
553 Value *Cmp3 = Builder.CreateICmpULT(
554 Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
555 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
556
557 // if.then5:
558 Builder.SetInsertPoint(IfThen5);
559 Value *PosInf = Builder.CreateXor(NegOne, NegInf);
560 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
561 Builder.CreateBr(End);
562
563 // if.end9:
564 Builder.SetInsertPoint(IfEnd9);
565 Value *Cmp10 = Builder.CreateICmpULT(
566 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
567 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
568
569 // if.then12:
570 Builder.SetInsertPoint(IfThen12);
571 Value *Sub13 = Builder.CreateSub(
572 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
573 Value *Shr14 = Builder.CreateLShr(Or, Sub13);
574 Value *Mul = Builder.CreateMul(Shr14, Sign);
575 Builder.CreateBr(End);
576
577 // if.else:
578 Builder.SetInsertPoint(IfElse);
579 Value *Sub15 = Builder.CreateAdd(
581 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
582 Value *Shl = Builder.CreateShl(Or, Sub15);
583 Value *Mul16 = Builder.CreateMul(Shl, Sign);
584 Builder.CreateBr(End);
585
586 // cleanup:
587 Builder.SetInsertPoint(End, End->begin());
588 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
589
590 Retval0->addIncoming(Cond8, IfThen5);
591 Retval0->addIncoming(Mul, IfThen12);
592 Retval0->addIncoming(Mul16, IfElse);
593 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
594
595 FPToI->replaceAllUsesWith(Retval0);
596 FPToI->dropAllReferences();
597 FPToI->eraseFromParent();
598}
599
600// clang-format off: preserve formatting of the following example
601
602/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
603/// the generated code. This currently generates code similarly to compiler-rt's
604/// implementations. This implementation has an implicit assumption that integer
605/// width is larger than fp.
606///
607/// An example IR generated from compiler-rt/floatdisf.c looks like below:
608/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
609/// entry:
610/// %cmp = icmp eq i64 %a, 0
611/// br i1 %cmp, label %return, label %if.end
612///
613/// if.end: ; preds = %entry
614/// %shr = ashr i64 %a, 63
615/// %xor = xor i64 %shr, %a
616/// %sub = sub nsw i64 %xor, %shr
617/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
618/// %cast = trunc i64 %0 to i32
619/// %sub1 = sub nuw nsw i32 64, %cast
620/// %sub2 = xor i32 %cast, 63
621/// %cmp3 = icmp ult i32 %cast, 40
622/// br i1 %cmp3, label %if.then4, label %if.else
623///
624/// if.then4: ; preds = %if.end
625/// switch i32 %sub1, label %sw.default [
626/// i32 25, label %sw.bb
627/// i32 26, label %sw.epilog
628/// ]
629///
630/// sw.bb: ; preds = %if.then4
631/// %shl = shl i64 %sub, 1
632/// br label %sw.epilog
633///
634/// sw.default: ; preds = %if.then4
635/// %sub5 = sub nsw i64 38, %0
636/// %sh_prom = and i64 %sub5, 4294967295
637/// %shr6 = lshr i64 %sub, %sh_prom
638/// %shr9 = lshr i64 274877906943, %0
639/// %and = and i64 %shr9, %sub
640/// %cmp10 = icmp ne i64 %and, 0
641/// %conv11 = zext i1 %cmp10 to i64
642/// %or = or i64 %shr6, %conv11
643/// br label %sw.epilog
644///
645/// sw.epilog: ; preds = %sw.default,
646/// %if.then4, %sw.bb
647/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
648/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
649/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
650/// %tobool.not = icmp eq i64 %3, 0
651/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
652/// %spec.select = ashr i64 %inc, %spec.select.v
653/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
654/// br label %if.end26
655///
656/// if.else: ; preds = %if.end
657/// %sub23 = add nuw nsw i64 %0, 4294967256
658/// %sh_prom24 = and i64 %sub23, 4294967295
659/// %shl25 = shl i64 %sub, %sh_prom24
660/// br label %if.end26
661///
662/// if.end26: ; preds = %sw.epilog,
663/// %if.else
664/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
665/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
666/// %conv27 = trunc i64 %shr to i32
667/// %and28 = and i32 %conv27, -2147483648
668/// %add = shl nuw nsw i32 %e.0, 23
669/// %shl29 = add nuw nsw i32 %add, 1065353216
670/// %conv31 = trunc i64 %a.addr.1 to i32
671/// %and32 = and i32 %conv31, 8388607
672/// %or30 = or i32 %and32, %and28
673/// %or33 = or i32 %or30, %shl29
674/// %4 = bitcast i32 %or33 to float
675/// br label %return
676///
677/// return: ; preds = %entry,
678/// %if.end26
679/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
680/// ret float %retval.0
681/// }
682///
683/// Replace integer to fp with generated code.
684static void expandIToFP(Instruction *IToFP) {
685 // clang-format on
686 IRBuilder<> Builder(IToFP);
687 auto *IntVal = IToFP->getOperand(0);
688 IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
689
690 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
691 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
692 // fp80 conversion is implemented by conversion tp fp128 first following
693 // a fptrunc to fp80.
694 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
695 // FIXME: As there is no related builtins added in compliler-rt,
696 // here currently utilized the fp32 <-> fp16 lib calls to implement.
697 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
698 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
699 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
700 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
701
702 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
703 "assumes integer width is larger than fp.");
704
705 Value *Temp1 =
706 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
707 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
708
709 BasicBlock *Entry = Builder.GetInsertBlock();
710 Function *F = Entry->getParent();
711 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
712 BasicBlock *End =
713 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
714 BasicBlock *IfEnd =
715 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
716 BasicBlock *IfThen4 =
717 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
718 BasicBlock *SwBB =
719 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
720 BasicBlock *SwDefault =
721 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
722 BasicBlock *SwEpilog =
723 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
724 BasicBlock *IfThen20 =
725 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
726 BasicBlock *IfElse =
727 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
728 BasicBlock *IfEnd26 =
729 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
730
731 Entry->getTerminator()->eraseFromParent();
732
733 Function *CTLZ =
734 Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
735 ConstantInt *True = Builder.getTrue();
736
737 // entry:
738 Builder.SetInsertPoint(Entry);
739 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
740 Builder.CreateCondBr(Cmp, End, IfEnd);
741
742 // if.end:
743 Builder.SetInsertPoint(IfEnd);
744 Value *Shr =
745 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
746 Value *Xor = Builder.CreateXor(Shr, IntVal);
747 Value *Sub = Builder.CreateSub(Xor, Shr);
748 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
749 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
750 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
751 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
752 FloatWidth == 128 ? Call : Cast);
753 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
754 FloatWidth == 128 ? Call : Cast);
755 Value *Cmp3 = Builder.CreateICmpSGT(
756 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
757 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
758
759 // if.then4:
760 Builder.SetInsertPoint(IfThen4);
761 llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
762 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
763 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
764
765 // sw.bb:
766 Builder.SetInsertPoint(SwBB);
767 Value *Shl =
768 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
769 Builder.CreateBr(SwEpilog);
770
771 // sw.default:
772 Builder.SetInsertPoint(SwDefault);
773 Value *Sub5 = Builder.CreateSub(
774 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
775 FloatWidth == 128 ? Call : Cast);
776 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
777 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
778 FloatWidth == 128 ? Sub5 : ShProm);
779 Value *Sub8 =
780 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
781 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
782 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
783 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
784 FloatWidth == 128 ? Sub8 : ShProm9);
785 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
786 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
787 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
788 Value *Or = Builder.CreateOr(Shr6, Conv11);
789 Builder.CreateBr(SwEpilog);
790
791 // sw.epilog:
792 Builder.SetInsertPoint(SwEpilog);
793 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
794 AAddr0->addIncoming(Or, SwDefault);
795 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
796 AAddr0->addIncoming(Shl, SwBB);
797 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
798 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
799 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
800 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
801 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
802 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
803 Value *Shr18 = nullptr;
804 if (IsSigned)
805 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
806 else
807 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
808 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
809 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
810 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
811 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
812 Value *ExtractT64 = nullptr;
813 if (FloatWidth > 80)
814 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
815 else
816 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
817 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
818
819 // if.then20
820 Builder.SetInsertPoint(IfThen20);
821 Value *Shr21 = nullptr;
822 if (IsSigned)
823 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
824 else
825 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
826 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
827 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
828 Value *ExtractT62 = nullptr;
829 if (FloatWidth > 80)
830 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
831 else
832 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
833 Builder.CreateBr(IfEnd26);
834
835 // if.else:
836 Builder.SetInsertPoint(IfElse);
837 Value *Sub24 = Builder.CreateAdd(
838 FloatWidth == 128 ? Call : Cast,
839 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
840 -(int)(BitWidth - FPMantissaWidth - 1)));
841 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
842 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
843 FloatWidth == 128 ? Sub24 : ShProm25);
844 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
845 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
846 Value *ExtractT66 = nullptr;
847 if (FloatWidth > 80)
848 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
849 else
850 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
851 Builder.CreateBr(IfEnd26);
852
853 // if.end26:
854 Builder.SetInsertPoint(IfEnd26);
855 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
856 AAddr1Off0->addIncoming(ExtractT, IfThen20);
857 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
858 AAddr1Off0->addIncoming(ExtractT61, IfElse);
859 PHINode *AAddr1Off32 = nullptr;
860 if (FloatWidth > 32) {
861 AAddr1Off32 =
862 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
863 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
864 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
865 AAddr1Off32->addIncoming(ExtractT66, IfElse);
866 }
867 PHINode *E0 = nullptr;
868 if (FloatWidth <= 80) {
869 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
870 E0->addIncoming(Sub1, IfThen20);
871 E0->addIncoming(Sub2, SwEpilog);
872 E0->addIncoming(Sub2, IfElse);
873 }
874 Value *And29 = nullptr;
875 if (FloatWidth > 80) {
876 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
877 Builder.getIntN(BitWidth, 63));
878 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
879 } else {
880 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
881 And29 = Builder.CreateAnd(
882 Conv28, ConstantInt::get(Builder.getContext(), APInt::getSignMask(32)));
883 }
884 unsigned TempMod = FPMantissaWidth % 32;
885 Value *And34 = nullptr;
886 Value *Shl30 = nullptr;
887 if (FloatWidth > 80) {
888 TempMod += 32;
889 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
890 Shl30 = Builder.CreateAdd(
891 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
892 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
893 } else {
894 Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
895 Shl30 = Builder.CreateAdd(
896 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
897 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
898 Builder.getInt32((1 << TempMod) - 1));
899 }
900 Value *Or35 = nullptr;
901 if (FloatWidth > 80) {
902 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
903 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
904 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
905 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
906 Builder.getIntN(128, FPMantissaWidth));
907 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
908 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
909 Or35 = Builder.CreateOr(Or34, A6);
910 } else {
911 Value *Or31 = Builder.CreateOr(And34, And29);
912 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
913 }
914 Value *A4 = nullptr;
915 if (IToFP->getType()->isDoubleTy()) {
916 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
917 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
918 Value *And1 =
919 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
920 Value *Or1 = Builder.CreateOr(Shl1, And1);
921 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
922 } else if (IToFP->getType()->isX86_FP80Ty()) {
923 Value *A40 =
924 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
925 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
926 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
927 // Deal with "half" situation. This is a workaround since we don't have
928 // floattihf.c currently as referring.
929 Value *A40 =
930 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
931 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
932 } else // float type
933 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
934 Builder.CreateBr(End);
935
936 // return:
937 Builder.SetInsertPoint(End, End->begin());
938 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
939 Retval0->addIncoming(A4, IfEnd26);
940 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
941
942 IToFP->replaceAllUsesWith(Retval0);
943 IToFP->dropAllReferences();
944 IToFP->eraseFromParent();
945}
946
949 VectorType *VTy = cast<FixedVectorType>(I->getType());
950
951 IRBuilder<> Builder(I);
952
953 unsigned NumElements = VTy->getElementCount().getFixedValue();
954 Value *Result = PoisonValue::get(VTy);
955 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
956 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
957
958 Value *NewOp = nullptr;
959 if (auto *BinOp = dyn_cast<BinaryOperator>(I))
960 NewOp = Builder.CreateBinOp(
961 BinOp->getOpcode(), Ext,
962 Builder.CreateExtractElement(I->getOperand(1), Idx));
963 else if (auto *CastI = dyn_cast<CastInst>(I))
964 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
965 I->getType()->getScalarType());
966 else
967 llvm_unreachable("Unsupported instruction type");
968
969 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
970 if (auto *ScalarizedI = dyn_cast<Instruction>(NewOp)) {
971 ScalarizedI->copyIRFlags(I, true);
972 Worklist.push_back(ScalarizedI);
973 }
974 }
975
976 I->replaceAllUsesWith(Result);
977 I->dropAllReferences();
978 I->eraseFromParent();
979}
980
983 if (I.getOperand(0)->getType()->isVectorTy())
984 scalarize(&I, Worklist);
985 else
986 Worklist.push_back(&I);
987}
988
989static bool runImpl(Function &F, const TargetLowering &TLI,
990 const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) {
992
993 unsigned MaxLegalFpConvertBitWidth =
996 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
997
998 bool DisableExpandLargeFp =
999 MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
1000 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1001
1002 if (DisableExpandLargeFp && DisableFrem)
1003 return false;
1004
1005 auto ShouldHandleInst = [&](Instruction &I) {
1006 Type *Ty = I.getType();
1007 // TODO: This pass doesn't handle scalable vectors.
1008 if (Ty->isScalableTy())
1009 return false;
1010
1011 switch (I.getOpcode()) {
1012 case Instruction::FRem:
1013 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1014 case Instruction::FPToUI:
1015 case Instruction::FPToSI:
1016 return !DisableExpandLargeFp &&
1017 cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
1018 MaxLegalFpConvertBitWidth;
1019 case Instruction::UIToFP:
1020 case Instruction::SIToFP:
1021 return !DisableExpandLargeFp &&
1022 cast<IntegerType>(I.getOperand(0)->getType()->getScalarType())
1023 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1024 }
1025
1026 return false;
1027 };
1028
1029 bool Modified = false;
1030 for (auto It = inst_begin(&F), End = inst_end(F); It != End;) {
1031 Instruction &I = *It++;
1032 if (!ShouldHandleInst(I))
1033 continue;
1034
1035 addToWorklist(I, Worklist);
1036 Modified = true;
1037 }
1038
1039 while (!Worklist.empty()) {
1040 Instruction *I = Worklist.pop_back_val();
1041
1042 switch (I->getOpcode()) {
1043 case Instruction::FRem: {
1044 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1045 if (AC) {
1046 auto Res = std::make_optional<SimplifyQuery>(
1047 I->getModule()->getDataLayout(), I);
1048 Res->AC = AC;
1049 return Res;
1050 }
1051 return {};
1052 }();
1053
1055 break;
1056 }
1057
1058 case Instruction::FPToUI:
1059 case Instruction::FPToSI:
1060 expandFPToI(I);
1061 break;
1062
1063 case Instruction::UIToFP:
1064 case Instruction::SIToFP:
1065 expandIToFP(I);
1066 break;
1067 }
1068 }
1069
1070 return Modified;
1071}
1072
1073namespace {
1074class ExpandFpLegacyPass : public FunctionPass {
1075 CodeGenOptLevel OptLevel;
1076
1077public:
1078 static char ID;
1079
1080 ExpandFpLegacyPass(CodeGenOptLevel OptLevel)
1081 : FunctionPass(ID), OptLevel(OptLevel) {
1083 }
1084
1085 ExpandFpLegacyPass() : ExpandFpLegacyPass(CodeGenOptLevel::None) {};
1086
1087 bool runOnFunction(Function &F) override {
1088 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1089 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);
1090 auto *TLI = Subtarget->getTargetLowering();
1091 AssumptionCache *AC = nullptr;
1092
1093 const LibcallLoweringInfo &Libcalls =
1094 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1095 *Subtarget);
1096
1097 if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
1098 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1099 return runImpl(F, *TLI, Libcalls, AC);
1100 }
1101
1102 void getAnalysisUsage(AnalysisUsage &AU) const override {
1103 AU.addRequired<LibcallLoweringInfoWrapper>();
1104 AU.addRequired<TargetPassConfig>();
1105 if (OptLevel != CodeGenOptLevel::None)
1106 AU.addRequired<AssumptionCacheTracker>();
1107 AU.addPreserved<AAResultsWrapperPass>();
1108 AU.addPreserved<GlobalsAAWrapperPass>();
1109 AU.addRequired<LibcallLoweringInfoWrapper>();
1110 }
1111};
1112} // namespace
1113
1115 : TM(&TM), OptLevel(OptLevel) {}
1116
1118 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1119 static_cast<PassInfoMixin<ExpandFpPass> *>(this)->printPipeline(
1120 OS, MapClassName2PassName);
1121 OS << '<';
1122 OS << "O" << (int)OptLevel;
1123 OS << '>';
1124}
1125
1127 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
1128 auto &TLI = *STI->getTargetLowering();
1129 AssumptionCache *AC = nullptr;
1130 if (OptLevel != CodeGenOptLevel::None)
1131 AC = &FAM.getResult<AssumptionAnalysis>(F);
1132
1133 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1134
1135 const LibcallLoweringModuleAnalysisResult *LibcallLowering =
1136 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
1137
1138 if (!LibcallLowering) {
1139 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
1140 "' analysis required");
1141 return PreservedAnalyses::all();
1142 }
1143
1144 const LibcallLoweringInfo &Libcalls =
1145 LibcallLowering->getLibcallLowering(*STI);
1146
1147 return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none()
1149}
1150
1151char ExpandFpLegacyPass::ID = 0;
1152INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp",
1153 "Expand certain fp instructions", false, false)
1155INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false)
1156
1158 return new ExpandFpLegacyPass(OptLevel);
1159}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
Definition ExpandFp.cpp:382
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
Definition ExpandFp.cpp:684
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
Definition ExpandFp.cpp:989
static void expandFPToI(Instruction *FPToI)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
Definition ExpandFp.cpp:462
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
Definition ExpandFp.cpp:981
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
Definition ExpandFp.cpp:947
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
FunctionAnalysisManager FAM
Function * Fun
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
BinaryOperator * Mul
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:136
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
ExpandFpPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setAllowContract(bool B=true)
Definition FMF.h:90
bool noInfs() const
Definition FMF.h:66
void setAllowReciprocal(bool B=true)
Definition FMF.h:87
bool approxFunc() const
Definition FMF.h:70
void setNoNaNs(bool B=true)
Definition FMF.h:78
bool noNaNs() const
Definition FMF.h:65
void setNoInfs(bool B=true)
Definition FMF.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Multiway switch.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:159
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:289
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:235
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
void dropAllReferences()
Drop all references to operands.
Definition User.h:349
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI void initializeExpandFpLegacyPassPass(PassRegistry &)
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
LLVM_ABI FunctionPass * createExpandFpPass()
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
Matching combinators.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:69