LLVM 22.0.0git
ExpandIRInsts.cpp
Go to the documentation of this file.
1//===--- ExpandIRInsts.cpp - Expand IR instructions -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass expands certain instructions at the IR level.
9//
10// The following expansions are implemented:
11// - Expansion of ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp
12// .. to’ instructions with a bitwidth above a threshold. This is
13// useful for targets like x86_64 that cannot lower fp convertions
14// with more than 128 bits.
15//
16// - Expansion of ‘frem‘ for types MVT::f16, MVT::f32, and MVT::f64 for
17// targets which use "Expand" as the legalization action for the
18// corresponding type.
19//
20// - Expansion of ‘udiv‘, ‘sdiv‘, ‘urem‘, and ‘srem‘ instructions with
21// a bitwidth above a threshold into a call to auto-generated
22// functions. This is useful for targets like x86_64 that cannot
23// lower divisions with more than 128 bits or targets like x86_32 that
24// cannot lower divisions with more than 64 bits.
25//
26// Instructions with vector types are scalarized first if their scalar
27// types can be expanded. Scalable vector types are not supported.
28//===----------------------------------------------------------------------===//
29
37#include "llvm/CodeGen/Passes.h"
41#include "llvm/IR/IRBuilder.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/PassManager.h"
46#include "llvm/Pass.h"
53#include <optional>
54
55#define DEBUG_TYPE "expand-ir-insts"
56
57using namespace llvm;
58
60 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
62 cl::desc("fp convert instructions on integers with "
63 "more than <N> bits are expanded."));
64
66 ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,
68 cl::desc("div and rem instructions on integers with "
69 "more than <N> bits are expanded."));
70
71namespace {
72bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) {
73 auto *C = dyn_cast<ConstantInt>(V);
74 if (!C)
75 return false;
76
77 APInt Val = C->getValue();
78 if (SignedOp && Val.isNegative())
79 Val = -Val;
80 return Val.isPowerOf2();
81}
82
83bool isSigned(unsigned int Opcode) {
84 return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
85}
86
87/// This class implements a precise expansion of the frem instruction.
88/// The generated code is based on the fmod implementation in the AMD device
89/// libs.
90class FRemExpander {
91 /// The IRBuilder to use for the expansion.
92 IRBuilder<> &B;
93
94 /// Floating point type of the return value and the arguments of the FRem
95 /// instructions that should be expanded.
96 Type *FremTy;
97
98 /// Floating point type to use for the computation. This may be
99 /// wider than the \p FremTy.
100 Type *ComputeFpTy;
101
102 /// Integer type used to hold the exponents returned by frexp.
103 Type *ExTy;
104
105 /// How many bits of the quotient to compute per iteration of the
106 /// algorithm, stored as a value of type \p ExTy.
107 Value *Bits;
108
109 /// Constant 1 of type \p ExTy.
110 Value *One;
111
112 /// The frem argument/return types that can be expanded by this class.
113 // TODO: The expansion could work for other floating point types
114 // as well, but this would require additional testing.
115 static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,
116 MVT::f64};
117
118public:
119 static bool canExpandType(Type *Ty) {
120 EVT VT = EVT::getEVT(Ty);
121 assert(VT.isSimple() && "Can expand only simple types");
122
123 return is_contained(ExpandableTypes, VT.getSimpleVT());
124 }
125
126 static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {
127 assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");
128 return TLI.getOperationAction(ISD::FREM, VT) ==
129 TargetLowering::LegalizeAction::Expand;
130 }
131
132 static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {
133 // Consider scalar type for simplicity. It seems unlikely that a
134 // vector type can be legalized without expansion if the scalar
135 // type cannot.
136 return shouldExpandFremType(TLI, EVT::getEVT(Ty->getScalarType()));
137 }
138
139 /// Return true if the pass should expand frem instructions of any type
140 /// for the target represented by \p TLI.
141 static bool shouldExpandAnyFremType(const TargetLowering &TLI) {
142 return any_of(ExpandableTypes,
143 [&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); });
144 }
145
146 static FRemExpander create(IRBuilder<> &B, Type *Ty) {
147 assert(canExpandType(Ty) && "Expected supported floating point type");
148
149 // The type to use for the computation of the remainder. This may be
150 // wider than the input/result type which affects the ...
151 Type *ComputeTy = Ty;
152 // ... maximum number of iterations of the remainder computation loop
153 // to use. This value is for the case in which the computation
154 // uses the same input/result type.
155 unsigned MaxIter = 2;
156
157 if (Ty->isHalfTy()) {
158 // Use the wider type and less iterations.
159 ComputeTy = B.getFloatTy();
160 MaxIter = 1;
161 }
162
163 unsigned Precision =
165 return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};
166 }
167
168 /// Build the FRem expansion for the numerator \p X and the
169 /// denumerator \p Y. The type of X and Y must match \p FremTy. The
170 /// code will be generated at the insertion point of \p B and the
171 /// insertion point will be reset at exit.
172 Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;
173
174 /// Build an approximate FRem expansion for the numerator \p X and
175 /// the denumerator \p Y at the insertion point of builder \p B.
176 /// The type of X and Y must match \p FremTy.
177 Value *buildApproxFRem(Value *X, Value *Y) const;
178
179private:
180 FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)
181 : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),
182 Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {};
183
184 Value *createRcp(Value *V, const Twine &Name) const {
185 // Leave it to later optimizations to turn this into an rcp
186 // instruction if available.
187 return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);
188 }
189
190 // Helper function to build the UPDATE_AX code which is common to the
191 // loop body and the "final iteration".
192 Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {
193 // Build:
194 // float q = rint(ax * ayinv);
195 // ax = fma(-q, ay, ax);
196 // int clt = ax < 0.0f;
197 // float axp = ax + ay;
198 // ax = clt ? axp : ax;
199 Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),
200 {}, "q");
201 Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax");
202 Value *Clt = B.CreateFCmp(CmpInst::FCMP_OLT, AxUpdate,
203 ConstantFP::getZero(ComputeFpTy), "clt");
204 Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp");
205 return B.CreateSelect(Clt, Axp, AxUpdate, "ax");
206 }
207
208 /// Build code to extract the exponent and mantissa of \p Src.
209 /// Return the exponent minus one for use as a loop bound and
210 /// the mantissa taken to the given \p NewExp power.
211 std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,
212 const Twine &ExName,
213 const Twine &PowName) const {
214 // Build:
215 // ExName = frexp_exp(Src) - 1;
216 // PowName = fldexp(frexp_mant(ExName), NewExp);
217 Type *Ty = Src->getType();
218 Type *ExTy = B.getInt32Ty();
219 Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);
220 Value *Mant = B.CreateExtractValue(Frexp, {0});
221 Value *Exp = B.CreateExtractValue(Frexp, {1});
222
223 Exp = B.CreateSub(Exp, One, ExName);
224 Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);
225
226 return {Pow, Exp};
227 }
228
229 /// Build the main computation of the remainder for the case in which
230 /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the
231 /// denumerator. Add the incoming edge from the computation result
232 /// to \p RetPhi.
233 void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,
234 PHINode *RetPhi, FastMathFlags FMF) const {
235 IRBuilder<>::FastMathFlagGuard Guard(B);
236 B.setFastMathFlags(FMF);
237
238 // Build:
239 // ex = frexp_exp(ax) - 1;
240 // ax = fldexp(frexp_mant(ax), bits);
241 // ey = frexp_exp(ay) - 1;
242 // ay = fledxp(frexp_mant(ay), 1);
243 auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax");
244 auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay");
245
246 // Build:
247 // int nb = ex - ey;
248 // float ayinv = 1.0/ay;
249 Value *Nb = B.CreateSub(Ex, Ey, "nb");
250 Value *Ayinv = createRcp(Ay, "ayinv");
251
252 // Build: while (nb > bits)
253 BasicBlock *PreheaderBB = B.GetInsertBlock();
254 Function *Fun = PreheaderBB->getParent();
255 auto *LoopBB = BasicBlock::Create(B.getContext(), "frem.loop_body", Fun);
256 auto *ExitBB = BasicBlock::Create(B.getContext(), "frem.loop_exit", Fun);
257
258 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB);
259
260 // Build loop body:
261 // UPDATE_AX
262 // ax = fldexp(ax, bits);
263 // nb -= bits;
264 // One iteration of the loop is factored out. The code shared by
265 // the loop and this "iteration" is denoted by UPDATE_AX.
266 B.SetInsertPoint(LoopBB);
267 PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv");
268 NbIv->addIncoming(Nb, PreheaderBB);
269
270 auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi");
271 AxPhi->addIncoming(Ax, PreheaderBB);
272
273 Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);
274 AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update");
275 AxPhi->addIncoming(AxPhiUpdate, LoopBB);
276 NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB);
277
278 B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB);
279
280 // Build final iteration
281 // ax = fldexp(ax, nb - bits + 1);
282 // UPDATE_AX
283 B.SetInsertPoint(ExitBB);
284
285 auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi");
286 AxPhiExit->addIncoming(Ax, PreheaderBB);
287 AxPhiExit->addIncoming(AxPhi, LoopBB);
288 auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi");
289 NbExitPhi->addIncoming(NbIv, LoopBB);
290 NbExitPhi->addIncoming(Nb, PreheaderBB);
291
292 Value *AxFinal = B.CreateLdexp(
293 AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax");
294 AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);
295
296 // Build:
297 // ax = fldexp(ax, ey);
298 // ret = copysign(ax,x);
299 AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax");
300 if (ComputeFpTy != FremTy)
301 AxFinal = B.CreateFPTrunc(AxFinal, FremTy);
302 Value *Ret = B.CreateCopySign(AxFinal, X);
303
304 RetPhi->addIncoming(Ret, ExitBB);
305 }
306
307 /// Build the else-branch of the conditional in the FRem
308 /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay
309 /// = |Y|, and X is the numerator and Y the denumerator. Add the
310 /// incoming edge from the result to \p RetPhi.
311 void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {
312 // Build:
313 // ret = ax == ay ? copysign(0.0f, x) : x;
314 Value *ZeroWithXSign = B.CreateCopySign(ConstantFP::getZero(FremTy), X);
315 Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X);
316
317 RetPhi->addIncoming(Ret, B.GetInsertBlock());
318 }
319
320 /// Return a value that is NaN if one of the corner cases concerning
321 /// the inputs \p X and \p Y is detected, and \p Ret otherwise.
322 Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,
323 std::optional<SimplifyQuery> &SQ,
324 bool NoInfs) const {
325 // Build:
326 // ret = (y == 0.0f || isnan(y)) ? QNAN : ret;
327 // ret = isfinite(x) ? ret : QNAN;
328 Value *Nan = ConstantFP::getQNaN(FremTy);
329 Ret = B.CreateSelect(B.CreateFCmpUEQ(Y, ConstantFP::getZero(FremTy)), Nan,
330 Ret);
331 Value *XFinite =
332 NoInfs || (SQ && isKnownNeverInfinity(X, *SQ))
333 ? B.getTrue()
334 : B.CreateFCmpULT(B.CreateUnaryIntrinsic(Intrinsic::fabs, X),
336 Ret = B.CreateSelect(XFinite, Ret, Nan);
337
338 return Ret;
339 }
340};
341
342Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {
343 IRBuilder<>::FastMathFlagGuard Guard(B);
344 // Propagating the approximate functions flag to the
345 // division leads to an unacceptable drop in precision
346 // on AMDGPU.
347 // TODO Find out if any flags might be worth propagating.
348 B.clearFastMathFlags();
349
350 Value *Quot = B.CreateFDiv(X, Y);
351 Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});
352 Value *Neg = B.CreateFNeg(Trunc);
353
354 return B.CreateFMA(Neg, Y, X);
355}
356
357Value *FRemExpander::buildFRem(Value *X, Value *Y,
358 std::optional<SimplifyQuery> &SQ) const {
359 assert(X->getType() == FremTy && Y->getType() == FremTy);
360
361 FastMathFlags FMF = B.getFastMathFlags();
362
363 // This function generates the following code structure:
364 // if (abs(x) > abs(y))
365 // { ret = compute remainder }
366 // else
367 // { ret = x or 0 with sign of x }
368 // Adjust ret to NaN/inf in input
369 // return ret
370 Value *Ax = B.CreateUnaryIntrinsic(Intrinsic::fabs, X, {}, "ax");
371 Value *Ay = B.CreateUnaryIntrinsic(Intrinsic::fabs, Y, {}, "ay");
372 if (ComputeFpTy != X->getType()) {
373 Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax");
374 Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay");
375 }
376 Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay);
377
378 PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret");
379 Value *Ret = RetPhi;
380
381 // We would return NaN in all corner cases handled here.
382 // Hence, if NaNs are excluded, keep the result as it is.
383 if (!FMF.noNaNs())
384 Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs());
385
386 Function *Fun = B.GetInsertBlock()->getParent();
387 auto *ThenBB = BasicBlock::Create(B.getContext(), "frem.compute", Fun);
388 auto *ElseBB = BasicBlock::Create(B.getContext(), "frem.else", Fun);
389 SplitBlockAndInsertIfThenElse(AxAyCmp, RetPhi, &ThenBB, &ElseBB);
390
391 auto SavedInsertPt = B.GetInsertPoint();
392
393 // Build remainder computation for "then" branch
394 //
395 // The ordered comparison ensures that ax and ay are not NaNs
396 // in the then-branch. Furthermore, y cannot be an infinity and the
397 // check at the end of the function ensures that the result will not
398 // be used if x is an infinity.
399 FastMathFlags ComputeFMF = FMF;
400 ComputeFMF.setNoInfs();
401 ComputeFMF.setNoNaNs();
402
403 B.SetInsertPoint(ThenBB);
404 buildRemainderComputation(Ax, Ay, X, RetPhi, FMF);
405 B.CreateBr(RetPhi->getParent());
406
407 // Build "else"-branch
408 B.SetInsertPoint(ElseBB);
409 buildElseBranch(Ax, Ay, X, RetPhi);
410 B.CreateBr(RetPhi->getParent());
411
412 B.SetInsertPoint(SavedInsertPt);
413
414 return Ret;
415}
416} // namespace
417
418static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {
419 LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');
420
421 Type *Ty = I.getType();
422 assert(FRemExpander::canExpandType(Ty) &&
423 "Expected supported floating point type");
424
425 FastMathFlags FMF = I.getFastMathFlags();
426 // TODO Make use of those flags for optimization?
427 FMF.setAllowReciprocal(false);
428 FMF.setAllowContract(false);
429
430 IRBuilder<> B(&I);
431 B.setFastMathFlags(FMF);
432 B.SetCurrentDebugLocation(I.getDebugLoc());
433
434 const FRemExpander Expander = FRemExpander::create(B, Ty);
435 Value *Ret = FMF.approxFunc()
436 ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))
437 : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);
438
439 I.replaceAllUsesWith(Ret);
440 Ret->takeName(&I);
441 I.eraseFromParent();
442
443 return true;
444}
445// clang-format off: preserve formatting of the following example
446
447/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
448/// the generated code. This currently generates code similarly to compiler-rt's
449/// implementations.
450///
451/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
452/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
453/// entry:
454/// %0 = bitcast float %a to i32
455/// %conv.i = zext i32 %0 to i64
456/// %tobool.not = icmp sgt i32 %0, -1
457/// %conv = select i1 %tobool.not, i64 1, i64 -1
458/// %and = lshr i64 %conv.i, 23
459/// %shr = and i64 %and, 255
460/// %and2 = and i64 %conv.i, 8388607
461/// %or = or i64 %and2, 8388608
462/// %cmp = icmp ult i64 %shr, 127
463/// br i1 %cmp, label %cleanup, label %if.end
464///
465/// if.end: ; preds = %entry
466/// %sub = add nuw nsw i64 %shr, 4294967169
467/// %conv5 = and i64 %sub, 4294967232
468/// %cmp6.not = icmp eq i64 %conv5, 0
469/// br i1 %cmp6.not, label %if.end12, label %if.then8
470///
471/// if.then8: ; preds = %if.end
472/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64
473/// -9223372036854775808 br label %cleanup
474///
475/// if.end12: ; preds = %if.end
476/// %cmp13 = icmp ult i64 %shr, 150
477/// br i1 %cmp13, label %if.then15, label %if.else
478///
479/// if.then15: ; preds = %if.end12
480/// %sub16 = sub nuw nsw i64 150, %shr
481/// %shr17 = lshr i64 %or, %sub16
482/// %mul = mul nsw i64 %shr17, %conv
483/// br label %cleanup
484///
485/// if.else: ; preds = %if.end12
486/// %sub18 = add nsw i64 %shr, -150
487/// %shl = shl i64 %or, %sub18
488/// %mul19 = mul nsw i64 %shl, %conv
489/// br label %cleanup
490///
491/// cleanup: ; preds = %entry,
492/// %if.else, %if.then15, %if.then8
493/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [
494/// %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0
495/// }
496///
497/// Replace fp to integer with generated code.
498static void expandFPToI(Instruction *FPToI) {
499 // clang-format on
500 IRBuilder<> Builder(FPToI);
501 auto *FloatVal = FPToI->getOperand(0);
502 IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
503
504 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
505 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
506
507 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
508 // to i32 first following a sext/zext to target integer type.
509 Value *A1 = nullptr;
510 if (FloatVal->getType()->isHalfTy()) {
511 if (FPToI->getOpcode() == Instruction::FPToUI) {
512 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());
513 A1 = Builder.CreateZExt(A0, IntTy);
514 } else { // FPToSI
515 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());
516 A1 = Builder.CreateSExt(A0, IntTy);
517 }
518 FPToI->replaceAllUsesWith(A1);
519 FPToI->dropAllReferences();
520 FPToI->eraseFromParent();
521 return;
522 }
523
524 // fp80 conversion is implemented by fpext to fp128 first then do the
525 // conversion.
526 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
527 unsigned FloatWidth =
528 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
529 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
530 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
531 Value *ImplicitBit = Builder.CreateShl(
532 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
533 Value *SignificandMask =
534 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
535 Value *NegOne = Builder.CreateSExt(
536 ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
537 Value *NegInf =
538 Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
539 ConstantInt::getSigned(IntTy, BitWidth - 1));
540
541 BasicBlock *Entry = Builder.GetInsertBlock();
542 Function *F = Entry->getParent();
543 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
544 BasicBlock *End =
545 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
546 BasicBlock *IfEnd =
547 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
548 BasicBlock *IfThen5 =
549 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
550 BasicBlock *IfEnd9 =
551 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
552 BasicBlock *IfThen12 =
553 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
554 BasicBlock *IfElse =
555 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
556
557 Entry->getTerminator()->eraseFromParent();
558
559 // entry:
560 Builder.SetInsertPoint(Entry);
561 Value *FloatVal0 = FloatVal;
562 // fp80 conversion is implemented by fpext to fp128 first then do the
563 // conversion.
564 if (FloatVal->getType()->isX86_FP80Ty())
565 FloatVal0 =
566 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
567 Value *ARep0 =
568 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
569 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
570 Value *PosOrNeg = Builder.CreateICmpSGT(
571 ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
572 Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
573 ConstantInt::getSigned(IntTy, -1));
574 Value *And =
575 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
576 Value *And2 = Builder.CreateAnd(
577 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
578 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
579 Value *Or = Builder.CreateOr(Abs, ImplicitBit);
580 Value *Cmp =
581 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
582 Builder.CreateCondBr(Cmp, End, IfEnd);
583
584 // if.end:
585 Builder.SetInsertPoint(IfEnd);
586 Value *Add1 = Builder.CreateAdd(
588 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
589 Value *Cmp3 = Builder.CreateICmpULT(
590 Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
591 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
592
593 // if.then5:
594 Builder.SetInsertPoint(IfThen5);
595 Value *PosInf = Builder.CreateXor(NegOne, NegInf);
596 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
597 Builder.CreateBr(End);
598
599 // if.end9:
600 Builder.SetInsertPoint(IfEnd9);
601 Value *Cmp10 = Builder.CreateICmpULT(
602 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
603 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
604
605 // if.then12:
606 Builder.SetInsertPoint(IfThen12);
607 Value *Sub13 = Builder.CreateSub(
608 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
609 Value *Shr14 = Builder.CreateLShr(Or, Sub13);
610 Value *Mul = Builder.CreateMul(Shr14, Sign);
611 Builder.CreateBr(End);
612
613 // if.else:
614 Builder.SetInsertPoint(IfElse);
615 Value *Sub15 = Builder.CreateAdd(
617 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
618 Value *Shl = Builder.CreateShl(Or, Sub15);
619 Value *Mul16 = Builder.CreateMul(Shl, Sign);
620 Builder.CreateBr(End);
621
622 // cleanup:
623 Builder.SetInsertPoint(End, End->begin());
624 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
625
626 Retval0->addIncoming(Cond8, IfThen5);
627 Retval0->addIncoming(Mul, IfThen12);
628 Retval0->addIncoming(Mul16, IfElse);
629 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
630
631 FPToI->replaceAllUsesWith(Retval0);
632 FPToI->dropAllReferences();
633 FPToI->eraseFromParent();
634}
635
636// clang-format off: preserve formatting of the following example
637
638/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
639/// the generated code. This currently generates code similarly to compiler-rt's
640/// implementations. This implementation has an implicit assumption that integer
641/// width is larger than fp.
642///
643/// An example IR generated from compiler-rt/floatdisf.c looks like below:
644/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
645/// entry:
646/// %cmp = icmp eq i64 %a, 0
647/// br i1 %cmp, label %return, label %if.end
648///
649/// if.end: ; preds = %entry
650/// %shr = ashr i64 %a, 63
651/// %xor = xor i64 %shr, %a
652/// %sub = sub nsw i64 %xor, %shr
653/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
654/// %cast = trunc i64 %0 to i32
655/// %sub1 = sub nuw nsw i32 64, %cast
656/// %sub2 = xor i32 %cast, 63
657/// %cmp3 = icmp ult i32 %cast, 40
658/// br i1 %cmp3, label %if.then4, label %if.else
659///
660/// if.then4: ; preds = %if.end
661/// switch i32 %sub1, label %sw.default [
662/// i32 25, label %sw.bb
663/// i32 26, label %sw.epilog
664/// ]
665///
666/// sw.bb: ; preds = %if.then4
667/// %shl = shl i64 %sub, 1
668/// br label %sw.epilog
669///
670/// sw.default: ; preds = %if.then4
671/// %sub5 = sub nsw i64 38, %0
672/// %sh_prom = and i64 %sub5, 4294967295
673/// %shr6 = lshr i64 %sub, %sh_prom
674/// %shr9 = lshr i64 274877906943, %0
675/// %and = and i64 %shr9, %sub
676/// %cmp10 = icmp ne i64 %and, 0
677/// %conv11 = zext i1 %cmp10 to i64
678/// %or = or i64 %shr6, %conv11
679/// br label %sw.epilog
680///
681/// sw.epilog: ; preds = %sw.default,
682/// %if.then4, %sw.bb
683/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,
684/// %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,
685/// %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864
686/// %tobool.not = icmp eq i64 %3, 0
687/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
688/// %spec.select = ashr i64 %inc, %spec.select.v
689/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
690/// br label %if.end26
691///
692/// if.else: ; preds = %if.end
693/// %sub23 = add nuw nsw i64 %0, 4294967256
694/// %sh_prom24 = and i64 %sub23, 4294967295
695/// %shl25 = shl i64 %sub, %sh_prom24
696/// br label %if.end26
697///
698/// if.end26: ; preds = %sw.epilog,
699/// %if.else
700/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
701/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
702/// %conv27 = trunc i64 %shr to i32
703/// %and28 = and i32 %conv27, -2147483648
704/// %add = shl nuw nsw i32 %e.0, 23
705/// %shl29 = add nuw nsw i32 %add, 1065353216
706/// %conv31 = trunc i64 %a.addr.1 to i32
707/// %and32 = and i32 %conv31, 8388607
708/// %or30 = or i32 %and32, %and28
709/// %or33 = or i32 %or30, %shl29
710/// %4 = bitcast i32 %or33 to float
711/// br label %return
712///
713/// return: ; preds = %entry,
714/// %if.end26
715/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
716/// ret float %retval.0
717/// }
718///
719/// Replace integer to fp with generated code.
720static void expandIToFP(Instruction *IToFP) {
721 // clang-format on
722 IRBuilder<> Builder(IToFP);
723 auto *IntVal = IToFP->getOperand(0);
724 IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
725
726 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
727 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
728 // fp80 conversion is implemented by conversion tp fp128 first following
729 // a fptrunc to fp80.
730 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
731 // FIXME: As there is no related builtins added in compliler-rt,
732 // here currently utilized the fp32 <-> fp16 lib calls to implement.
733 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
734 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
735 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
736 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
737
738 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
739 "assumes integer width is larger than fp.");
740
741 Value *Temp1 =
742 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
743 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
744
745 BasicBlock *Entry = Builder.GetInsertBlock();
746 Function *F = Entry->getParent();
747 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
748 BasicBlock *End =
749 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
750 BasicBlock *IfEnd =
751 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
752 BasicBlock *IfThen4 =
753 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
754 BasicBlock *SwBB =
755 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
756 BasicBlock *SwDefault =
757 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
758 BasicBlock *SwEpilog =
759 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
760 BasicBlock *IfThen20 =
761 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
762 BasicBlock *IfElse =
763 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
764 BasicBlock *IfEnd26 =
765 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
766
767 Entry->getTerminator()->eraseFromParent();
768
769 Function *CTLZ =
770 Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
771 ConstantInt *True = Builder.getTrue();
772
773 // entry:
774 Builder.SetInsertPoint(Entry);
775 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
776 Builder.CreateCondBr(Cmp, End, IfEnd);
777
778 // if.end:
779 Builder.SetInsertPoint(IfEnd);
780 Value *Shr =
781 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
782 Value *Xor = Builder.CreateXor(Shr, IntVal);
783 Value *Sub = Builder.CreateSub(Xor, Shr);
784 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
785 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
786 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
787 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
788 FloatWidth == 128 ? Call : Cast);
789 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
790 FloatWidth == 128 ? Call : Cast);
791 Value *Cmp3 = Builder.CreateICmpSGT(
792 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
793 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
794
795 // if.then4:
796 Builder.SetInsertPoint(IfThen4);
797 llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
798 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
799 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
800
801 // sw.bb:
802 Builder.SetInsertPoint(SwBB);
803 Value *Shl =
804 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
805 Builder.CreateBr(SwEpilog);
806
807 // sw.default:
808 Builder.SetInsertPoint(SwDefault);
809 Value *Sub5 = Builder.CreateSub(
810 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
811 FloatWidth == 128 ? Call : Cast);
812 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
813 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
814 FloatWidth == 128 ? Sub5 : ShProm);
815 Value *Sub8 =
816 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
817 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
818 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
819 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
820 FloatWidth == 128 ? Sub8 : ShProm9);
821 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
822 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
823 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
824 Value *Or = Builder.CreateOr(Shr6, Conv11);
825 Builder.CreateBr(SwEpilog);
826
827 // sw.epilog:
828 Builder.SetInsertPoint(SwEpilog);
829 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
830 AAddr0->addIncoming(Or, SwDefault);
831 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
832 AAddr0->addIncoming(Shl, SwBB);
833 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
834 Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));
835 Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));
836 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
837 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
838 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
839 Value *Shr18 = nullptr;
840 if (IsSigned)
841 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
842 else
843 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
844 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
845 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
846 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
847 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
848 Value *ExtractT64 = nullptr;
849 if (FloatWidth > 80)
850 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
851 else
852 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
853 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
854
855 // if.then20
856 Builder.SetInsertPoint(IfThen20);
857 Value *Shr21 = nullptr;
858 if (IsSigned)
859 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
860 else
861 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
862 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
863 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
864 Value *ExtractT62 = nullptr;
865 if (FloatWidth > 80)
866 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());
867 else
868 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());
869 Builder.CreateBr(IfEnd26);
870
871 // if.else:
872 Builder.SetInsertPoint(IfElse);
873 Value *Sub24 = Builder.CreateAdd(
874 FloatWidth == 128 ? Call : Cast,
875 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
876 -(int)(BitWidth - FPMantissaWidth - 1)));
877 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
878 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
879 FloatWidth == 128 ? Sub24 : ShProm25);
880 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
881 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
882 Value *ExtractT66 = nullptr;
883 if (FloatWidth > 80)
884 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
885 else
886 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
887 Builder.CreateBr(IfEnd26);
888
889 // if.end26:
890 Builder.SetInsertPoint(IfEnd26);
891 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
892 AAddr1Off0->addIncoming(ExtractT, IfThen20);
893 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
894 AAddr1Off0->addIncoming(ExtractT61, IfElse);
895 PHINode *AAddr1Off32 = nullptr;
896 if (FloatWidth > 32) {
897 AAddr1Off32 =
898 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
899 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
900 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
901 AAddr1Off32->addIncoming(ExtractT66, IfElse);
902 }
903 PHINode *E0 = nullptr;
904 if (FloatWidth <= 80) {
905 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
906 E0->addIncoming(Sub1, IfThen20);
907 E0->addIncoming(Sub2, SwEpilog);
908 E0->addIncoming(Sub2, IfElse);
909 }
910 Value *And29 = nullptr;
911 if (FloatWidth > 80) {
912 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
913 Builder.getIntN(BitWidth, 63));
914 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
915 } else {
916 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());
917 And29 = Builder.CreateAnd(
918 Conv28, ConstantInt::get(Builder.getContext(), APInt::getSignMask(32)));
919 }
920 unsigned TempMod = FPMantissaWidth % 32;
921 Value *And34 = nullptr;
922 Value *Shl30 = nullptr;
923 if (FloatWidth > 80) {
924 TempMod += 32;
925 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));
926 Shl30 = Builder.CreateAdd(
927 Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));
928 And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());
929 } else {
930 Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));
931 Shl30 = Builder.CreateAdd(
932 Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));
933 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
934 Builder.getInt32((1 << TempMod) - 1));
935 }
936 Value *Or35 = nullptr;
937 if (FloatWidth > 80) {
938 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());
939 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
940 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
941 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
942 Builder.getIntN(128, FPMantissaWidth));
943 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
944 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
945 Or35 = Builder.CreateOr(Or34, A6);
946 } else {
947 Value *Or31 = Builder.CreateOr(And34, And29);
948 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
949 }
950 Value *A4 = nullptr;
951 if (IToFP->getType()->isDoubleTy()) {
952 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
953 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
954 Value *And1 =
955 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
956 Value *Or1 = Builder.CreateOr(Shl1, And1);
957 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
958 } else if (IToFP->getType()->isX86_FP80Ty()) {
959 Value *A40 =
960 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
961 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
962 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
963 // Deal with "half" situation. This is a workaround since we don't have
964 // floattihf.c currently as referring.
965 Value *A40 =
966 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
967 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
968 } else // float type
969 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
970 Builder.CreateBr(End);
971
972 // return:
973 Builder.SetInsertPoint(End, End->begin());
974 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
975 Retval0->addIncoming(A4, IfEnd26);
976 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
977
978 IToFP->replaceAllUsesWith(Retval0);
979 IToFP->dropAllReferences();
980 IToFP->eraseFromParent();
981}
982
985 VectorType *VTy = cast<FixedVectorType>(I->getType());
986
987 IRBuilder<> Builder(I);
988
989 unsigned NumElements = VTy->getElementCount().getFixedValue();
990 Value *Result = PoisonValue::get(VTy);
991 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
992 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
993
994 Value *NewOp = nullptr;
995 if (auto *BinOp = dyn_cast<BinaryOperator>(I))
996 NewOp = Builder.CreateBinOp(
997 BinOp->getOpcode(), Ext,
998 Builder.CreateExtractElement(I->getOperand(1), Idx));
999 else if (auto *CastI = dyn_cast<CastInst>(I))
1000 NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,
1001 I->getType()->getScalarType());
1002 else
1003 llvm_unreachable("Unsupported instruction type");
1004
1005 Result = Builder.CreateInsertElement(Result, NewOp, Idx);
1006 if (auto *ScalarizedI = dyn_cast<Instruction>(NewOp)) {
1007 ScalarizedI->copyIRFlags(I, true);
1008 Worklist.push_back(ScalarizedI);
1009 }
1010 }
1011
1012 I->replaceAllUsesWith(Result);
1013 I->dropAllReferences();
1014 I->eraseFromParent();
1015}
1016
1019 if (I.getOperand(0)->getType()->isVectorTy())
1020 scalarize(&I, Worklist);
1021 else
1022 Worklist.push_back(&I);
1023}
1024
1025static bool runImpl(Function &F, const TargetLowering &TLI,
1026 const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) {
1028
1029 unsigned MaxLegalFpConvertBitWidth =
1032 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
1033
1034 unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();
1036 MaxLegalDivRemBitWidth = ExpandDivRemBits;
1037
1038 bool DisableExpandLargeFp =
1039 MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS;
1040 bool DisableExpandLargeDivRem =
1041 MaxLegalDivRemBitWidth >= llvm::IntegerType::MAX_INT_BITS;
1042 bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);
1043
1044 if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)
1045 return false;
1046
1047 auto ShouldHandleInst = [&](Instruction &I) {
1048 Type *Ty = I.getType();
1049 // TODO: This pass doesn't handle scalable vectors.
1050 if (Ty->isScalableTy())
1051 return false;
1052
1053 switch (I.getOpcode()) {
1054 case Instruction::FRem:
1055 return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);
1056 case Instruction::FPToUI:
1057 case Instruction::FPToSI:
1058 return !DisableExpandLargeFp &&
1059 cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
1060 MaxLegalFpConvertBitWidth;
1061 case Instruction::UIToFP:
1062 case Instruction::SIToFP:
1063 return !DisableExpandLargeFp &&
1064 cast<IntegerType>(I.getOperand(0)->getType()->getScalarType())
1065 ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;
1066 case Instruction::UDiv:
1067 case Instruction::SDiv:
1068 case Instruction::URem:
1069 case Instruction::SRem:
1070 return !DisableExpandLargeDivRem &&
1071 cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >
1072 MaxLegalDivRemBitWidth
1073 // The backend has peephole optimizations for powers of two.
1074 // TODO: We don't consider vectors here.
1075 && !isConstantPowerOfTwo(I.getOperand(1), isSigned(I.getOpcode()));
1076 }
1077
1078 return false;
1079 };
1080
1081 bool Modified = false;
1082 for (auto It = inst_begin(&F), End = inst_end(F); It != End;) {
1083 Instruction &I = *It++;
1084 if (!ShouldHandleInst(I))
1085 continue;
1086
1087 addToWorklist(I, Worklist);
1088 Modified = true;
1089 }
1090
1091 while (!Worklist.empty()) {
1092 Instruction *I = Worklist.pop_back_val();
1093
1094 switch (I->getOpcode()) {
1095 case Instruction::FRem: {
1096 auto SQ = [&]() -> std::optional<SimplifyQuery> {
1097 if (AC) {
1098 auto Res = std::make_optional<SimplifyQuery>(
1099 I->getModule()->getDataLayout(), I);
1100 Res->AC = AC;
1101 return Res;
1102 }
1103 return {};
1104 }();
1105
1107 break;
1108 }
1109
1110 case Instruction::FPToUI:
1111 case Instruction::FPToSI:
1112 expandFPToI(I);
1113 break;
1114
1115 case Instruction::UIToFP:
1116 case Instruction::SIToFP:
1117 expandIToFP(I);
1118 break;
1119
1120 case Instruction::UDiv:
1121 case Instruction::SDiv:
1123 break;
1124 case Instruction::URem:
1125 case Instruction::SRem:
1127 break;
1128 }
1129 }
1130
1131 return Modified;
1132}
1133
1134namespace {
1135class ExpandIRInstsLegacyPass : public FunctionPass {
1136 CodeGenOptLevel OptLevel;
1137
1138public:
1139 static char ID;
1140
1141 ExpandIRInstsLegacyPass(CodeGenOptLevel OptLevel)
1142 : FunctionPass(ID), OptLevel(OptLevel) {
1144 }
1145
1146 ExpandIRInstsLegacyPass() : ExpandIRInstsLegacyPass(CodeGenOptLevel::None) {};
1147
1148 bool runOnFunction(Function &F) override {
1149 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1150 const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);
1151 auto *TLI = Subtarget->getTargetLowering();
1152 AssumptionCache *AC = nullptr;
1153
1154 const LibcallLoweringInfo &Libcalls =
1155 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
1156 *Subtarget);
1157
1158 if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())
1159 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1160 return runImpl(F, *TLI, Libcalls, AC);
1161 }
1162
1163 void getAnalysisUsage(AnalysisUsage &AU) const override {
1164 AU.addRequired<LibcallLoweringInfoWrapper>();
1165 AU.addRequired<TargetPassConfig>();
1166 if (OptLevel != CodeGenOptLevel::None)
1167 AU.addRequired<AssumptionCacheTracker>();
1168 AU.addPreserved<AAResultsWrapperPass>();
1169 AU.addPreserved<GlobalsAAWrapperPass>();
1170 AU.addRequired<LibcallLoweringInfoWrapper>();
1171 }
1172};
1173} // namespace
1174
1176 CodeGenOptLevel OptLevel)
1177 : TM(&TM), OptLevel(OptLevel) {}
1178
1180 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1181 static_cast<PassInfoMixin<ExpandIRInstsPass> *>(this)->printPipeline(
1182 OS, MapClassName2PassName);
1183 OS << '<';
1184 OS << "O" << (int)OptLevel;
1185 OS << '>';
1186}
1187
1190 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
1191 auto &TLI = *STI->getTargetLowering();
1192 AssumptionCache *AC = nullptr;
1193 if (OptLevel != CodeGenOptLevel::None)
1194 AC = &FAM.getResult<AssumptionAnalysis>(F);
1195
1196 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
1197
1198 const LibcallLoweringModuleAnalysisResult *LibcallLowering =
1199 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
1200
1201 if (!LibcallLowering) {
1202 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
1203 "' analysis required");
1204 return PreservedAnalyses::all();
1205 }
1206
1207 const LibcallLoweringInfo &Libcalls =
1208 LibcallLowering->getLibcallLowering(*STI);
1209
1210 return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none()
1212}
1213
1214char ExpandIRInstsLegacyPass::ID = 0;
1215INITIALIZE_PASS_BEGIN(ExpandIRInstsLegacyPass, "expand-ir-insts",
1216 "Expand certain fp instructions", false, false)
1218INITIALIZE_PASS_END(ExpandIRInstsLegacyPass, "expand-ir-insts",
1219 "Expand IR instructions", false, false)
1220
1222 return new ExpandIRInstsLegacyPass(OptLevel);
1223}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static cl::opt< unsigned > ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("div and rem instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
static void expandFPToI(Instruction *FPToI)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
FunctionAnalysisManager FAM
Function * Fun
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
BinaryOperator * Mul
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:136
ExpandIRInstsPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setAllowContract(bool B=true)
Definition FMF.h:90
bool noInfs() const
Definition FMF.h:66
void setAllowReciprocal(bool B=true)
Definition FMF.h:87
bool approxFunc() const
Definition FMF.h:70
void setNoNaNs(bool B=true)
Definition FMF.h:78
bool noNaNs() const
Definition FMF.h:65
void setNoInfs(bool B=true)
Definition FMF.h:81
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Multiway switch.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:159
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:289
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:235
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
void dropAllReferences()
Drop all references to operands.
Definition User.h:349
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool expandDivision(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI void initializeExpandIRInstsLegacyPassPass(PassRegistry &)
inst_iterator inst_begin(Function *F)
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
inst_iterator inst_end(Function *F)
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
constexpr bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1748
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1918
LLVM_ABI bool expandRemainder(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
LLVM_ABI FunctionPass * createExpandIRInstsPass()
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
Matching combinators.
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:69