LLVM 18.0.0git
ExpandLargeFpConvert.cpp
Go to the documentation of this file.
1//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9
10// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
11// ‘sitofp .. to’ instructions with a bitwidth above a threshold into
12// auto-generated functions. This is useful for targets like x86_64 that cannot
13// lower fp convertions with more than 128 bits.
14//
15//===----------------------------------------------------------------------===//
16
21#include "llvm/CodeGen/Passes.h"
25#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/PassManager.h"
29#include "llvm/Pass.h"
32
33using namespace llvm;
34
36 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
38 cl::desc("fp convert instructions on integers with "
39 "more than <N> bits are expanded."));
40
41/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
42/// the generated code. This currently generates code similarly to compiler-rt's
43/// implementations.
44///
45/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
46/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
47/// entry:
48/// %0 = bitcast float %a to i32
49/// %conv.i = zext i32 %0 to i64
50/// %tobool.not = icmp sgt i32 %0, -1
51/// %conv = select i1 %tobool.not, i64 1, i64 -1
52/// %and = lshr i64 %conv.i, 23
53/// %shr = and i64 %and, 255
54/// %and2 = and i64 %conv.i, 8388607
55/// %or = or i64 %and2, 8388608
56/// %cmp = icmp ult i64 %shr, 127
57/// br i1 %cmp, label %cleanup, label %if.end
58///
59/// if.end: ; preds = %entry
60/// %sub = add nuw nsw i64 %shr, 4294967169
61/// %conv5 = and i64 %sub, 4294967232
62/// %cmp6.not = icmp eq i64 %conv5, 0
63/// br i1 %cmp6.not, label %if.end12, label %if.then8
64///
65/// if.then8: ; preds = %if.end
66/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
67/// br label %cleanup
68///
69/// if.end12: ; preds = %if.end
70/// %cmp13 = icmp ult i64 %shr, 150
71/// br i1 %cmp13, label %if.then15, label %if.else
72///
73/// if.then15: ; preds = %if.end12
74/// %sub16 = sub nuw nsw i64 150, %shr
75/// %shr17 = lshr i64 %or, %sub16
76/// %mul = mul nsw i64 %shr17, %conv
77/// br label %cleanup
78///
79/// if.else: ; preds = %if.end12
80/// %sub18 = add nsw i64 %shr, -150
81/// %shl = shl i64 %or, %sub18
82/// %mul19 = mul nsw i64 %shl, %conv
83/// br label %cleanup
84///
85/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
86/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
87/// ret i64 %retval.0
88/// }
89///
90/// Replace fp to integer with generated code.
91static void expandFPToI(Instruction *FPToI) {
92 IRBuilder<> Builder(FPToI);
93 auto *FloatVal = FPToI->getOperand(0);
94 IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
95
96 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
97 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
98
99 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
100 // to i32 first following a sext/zext to target integer type.
101 Value *A1 = nullptr;
102 if (FloatVal->getType()->isHalfTy()) {
103 if (FPToI->getOpcode() == Instruction::FPToUI) {
104 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
105 A1 = Builder.CreateZExt(A0, IntTy);
106 } else { // FPToSI
107 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
108 A1 = Builder.CreateSExt(A0, IntTy);
109 }
110 FPToI->replaceAllUsesWith(A1);
111 FPToI->dropAllReferences();
112 FPToI->eraseFromParent();
113 return;
114 }
115
116 // fp80 conversion is implemented by fpext to fp128 first then do the
117 // conversion.
118 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
119 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
120 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
121 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
122 Value *ImplicitBit = Builder.CreateShl(
123 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
124 Value *SignificandMask =
125 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
126 Value *NegOne = Builder.CreateSExt(
127 ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
128 Value *NegInf =
129 Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
130 ConstantInt::getSigned(IntTy, BitWidth - 1));
131
132 BasicBlock *Entry = Builder.GetInsertBlock();
133 Function *F = Entry->getParent();
134 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
135 BasicBlock *End =
136 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
137 BasicBlock *IfEnd =
138 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
139 BasicBlock *IfThen5 =
140 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
141 BasicBlock *IfEnd9 =
142 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
143 BasicBlock *IfThen12 =
144 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
145 BasicBlock *IfElse =
146 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
147
148 Entry->getTerminator()->eraseFromParent();
149
150 // entry:
151 Builder.SetInsertPoint(Entry);
152 Value *FloatVal0 = FloatVal;
153 // fp80 conversion is implemented by fpext to fp128 first then do the
154 // conversion.
155 if (FloatVal->getType()->isX86_FP80Ty())
156 FloatVal0 =
157 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
158 Value *ARep0 =
159 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
160 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
161 Value *PosOrNeg = Builder.CreateICmpSGT(
162 ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
163 Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
164 ConstantInt::getSigned(IntTy, -1));
165 Value *And =
166 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
167 Value *And2 = Builder.CreateAnd(
168 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
169 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
170 Value *Or = Builder.CreateOr(Abs, ImplicitBit);
171 Value *Cmp =
172 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
173 Builder.CreateCondBr(Cmp, End, IfEnd);
174
175 // if.end:
176 Builder.SetInsertPoint(IfEnd);
177 Value *Add1 = Builder.CreateAdd(
178 And2, ConstantInt::getSigned(IntTy, -int64_t(ExponentBias + BitWidth)));
179 Value *Cmp3 =
180 Builder.CreateICmpULT(Add1, ConstantInt::getSigned(IntTy, -BitWidth));
181 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
182
183 // if.then5:
184 Builder.SetInsertPoint(IfThen5);
185 Value *PosInf = Builder.CreateXor(NegOne, NegInf);
186 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
187 Builder.CreateBr(End);
188
189 // if.end9:
190 Builder.SetInsertPoint(IfEnd9);
191 Value *Cmp10 = Builder.CreateICmpULT(
192 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
193 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
194
195 // if.then12:
196 Builder.SetInsertPoint(IfThen12);
197 Value *Sub13 = Builder.CreateSub(
198 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
199 Value *Shr14 = Builder.CreateLShr(Or, Sub13);
200 Value *Mul = Builder.CreateMul(Shr14, Sign);
201 Builder.CreateBr(End);
202
203 // if.else:
204 Builder.SetInsertPoint(IfElse);
205 Value *Sub15 = Builder.CreateAdd(
206 And2,
207 ConstantInt::getSigned(IntTy, -(ExponentBias + FPMantissaWidth)));
208 Value *Shl = Builder.CreateShl(Or, Sub15);
209 Value *Mul16 = Builder.CreateMul(Shl, Sign);
210 Builder.CreateBr(End);
211
212 // cleanup:
213 Builder.SetInsertPoint(End, End->begin());
214 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
215
216 Retval0->addIncoming(Cond8, IfThen5);
217 Retval0->addIncoming(Mul, IfThen12);
218 Retval0->addIncoming(Mul16, IfElse);
219 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
220
221 FPToI->replaceAllUsesWith(Retval0);
222 FPToI->dropAllReferences();
223 FPToI->eraseFromParent();
224}
225
226/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
227/// the generated code. This currently generates code similarly to compiler-rt's
228/// implementations. This implementation has an implicit assumption that integer
229/// width is larger than fp.
230///
231/// An example IR generated from compiler-rt/floatdisf.c looks like below:
232/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
233/// entry:
234/// %cmp = icmp eq i64 %a, 0
235/// br i1 %cmp, label %return, label %if.end
236///
237/// if.end: ; preds = %entry
238/// %shr = ashr i64 %a, 63
239/// %xor = xor i64 %shr, %a
240/// %sub = sub nsw i64 %xor, %shr
241/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
242/// %cast = trunc i64 %0 to i32
243/// %sub1 = sub nuw nsw i32 64, %cast
244/// %sub2 = xor i32 %cast, 63
245/// %cmp3 = icmp ult i32 %cast, 40
246/// br i1 %cmp3, label %if.then4, label %if.else
247///
248/// if.then4: ; preds = %if.end
249/// switch i32 %sub1, label %sw.default [
250/// i32 25, label %sw.bb
251/// i32 26, label %sw.epilog
252/// ]
253///
254/// sw.bb: ; preds = %if.then4
255/// %shl = shl i64 %sub, 1
256/// br label %sw.epilog
257///
258/// sw.default: ; preds = %if.then4
259/// %sub5 = sub nsw i64 38, %0
260/// %sh_prom = and i64 %sub5, 4294967295
261/// %shr6 = lshr i64 %sub, %sh_prom
262/// %shr9 = lshr i64 274877906943, %0
263/// %and = and i64 %shr9, %sub
264/// %cmp10 = icmp ne i64 %and, 0
265/// %conv11 = zext i1 %cmp10 to i64
266/// %or = or i64 %shr6, %conv11
267/// br label %sw.epilog
268///
269/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
270/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
271/// %1 = lshr i64 %a.addr.0, 2
272/// %2 = and i64 %1, 1
273/// %or16 = or i64 %2, %a.addr.0
274/// %inc = add nsw i64 %or16, 1
275/// %3 = and i64 %inc, 67108864
276/// %tobool.not = icmp eq i64 %3, 0
277/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
278/// %spec.select = ashr i64 %inc, %spec.select.v
279/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
280/// br label %if.end26
281///
282/// if.else: ; preds = %if.end
283/// %sub23 = add nuw nsw i64 %0, 4294967256
284/// %sh_prom24 = and i64 %sub23, 4294967295
285/// %shl25 = shl i64 %sub, %sh_prom24
286/// br label %if.end26
287///
288/// if.end26: ; preds = %sw.epilog, %if.else
289/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
290/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
291/// %conv27 = trunc i64 %shr to i32
292/// %and28 = and i32 %conv27, -2147483648
293/// %add = shl nuw nsw i32 %e.0, 23
294/// %shl29 = add nuw nsw i32 %add, 1065353216
295/// %conv31 = trunc i64 %a.addr.1 to i32
296/// %and32 = and i32 %conv31, 8388607
297/// %or30 = or i32 %and32, %and28
298/// %or33 = or i32 %or30, %shl29
299/// %4 = bitcast i32 %or33 to float
300/// br label %return
301///
302/// return: ; preds = %entry, %if.end26
303/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
304/// ret float %retval.0
305/// }
306///
307/// Replace integer to fp with generated code.
308static void expandIToFP(Instruction *IToFP) {
309 IRBuilder<> Builder(IToFP);
310 auto *IntVal = IToFP->getOperand(0);
311 IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
312
313 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
314 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
315 // fp80 conversion is implemented by conversion tp fp128 first following
316 // a fptrunc to fp80.
317 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
318 // FIXME: As there is no related builtins added in compliler-rt,
319 // here currently utilized the fp32 <-> fp16 lib calls to implement.
320 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
321 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
322 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
323
324 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
325 "assumes integer width is larger than fp.");
326
327 Value *Temp1 =
328 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
329 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
330
331 BasicBlock *Entry = Builder.GetInsertBlock();
332 Function *F = Entry->getParent();
333 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
334 BasicBlock *End =
335 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
336 BasicBlock *IfEnd =
337 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
338 BasicBlock *IfThen4 =
339 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
340 BasicBlock *SwBB =
341 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
342 BasicBlock *SwDefault =
343 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
344 BasicBlock *SwEpilog =
345 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
346 BasicBlock *IfThen20 =
347 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
348 BasicBlock *IfElse =
349 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
350 BasicBlock *IfEnd26 =
351 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
352
353 Entry->getTerminator()->eraseFromParent();
354
355 Function *CTLZ =
356 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
357 ConstantInt *True = Builder.getTrue();
358
359 // entry:
360 Builder.SetInsertPoint(Entry);
361 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
362 Builder.CreateCondBr(Cmp, End, IfEnd);
363
364 // if.end:
365 Builder.SetInsertPoint(IfEnd);
366 Value *Shr =
367 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
368 Value *Xor = Builder.CreateXor(Shr, IntVal);
369 Value *Sub = Builder.CreateSub(Xor, Shr);
370 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
371 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
372 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
373 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
374 FloatWidth == 128 ? Call : Cast);
375 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
376 FloatWidth == 128 ? Call : Cast);
377 Value *Cmp3 = Builder.CreateICmpSGT(
378 Sub2, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
379 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
380
381 // if.then4:
382 Builder.SetInsertPoint(IfThen4);
383 llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
384 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
385 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
386
387 // sw.bb:
388 Builder.SetInsertPoint(SwBB);
389 Value *Shl =
390 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
391 Builder.CreateBr(SwEpilog);
392
393 // sw.default:
394 Builder.SetInsertPoint(SwDefault);
395 Value *Sub5 = Builder.CreateSub(
396 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
397 FloatWidth == 128 ? Call : Cast);
398 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
399 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
400 FloatWidth == 128 ? Sub5 : ShProm);
401 Value *Sub8 =
402 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
403 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
404 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
405 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
406 FloatWidth == 128 ? Sub8 : ShProm9);
407 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
408 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
409 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
410 Value *Or = Builder.CreateOr(Shr6, Conv11);
411 Builder.CreateBr(SwEpilog);
412
413 // sw.epilog:
414 Builder.SetInsertPoint(SwEpilog);
415 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
416 AAddr0->addIncoming(Or, SwDefault);
417 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
418 AAddr0->addIncoming(Shl, SwBB);
419 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
420 Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
421 Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
422 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
423 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
424 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
425 Value *Shr18 = nullptr;
426 if (IsSigned)
427 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
428 else
429 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
430 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
431 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
432 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
433 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
434 Value *ExtractT64 = nullptr;
435 if (FloatWidth > 80)
436 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
437 else
438 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
439 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
440
441 // if.then20
442 Builder.SetInsertPoint(IfThen20);
443 Value *Shr21 = nullptr;
444 if (IsSigned)
445 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
446 else
447 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
448 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
449 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
450 Value *ExtractT62 = nullptr;
451 if (FloatWidth > 80)
452 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
453 else
454 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
455 Builder.CreateBr(IfEnd26);
456
457 // if.else:
458 Builder.SetInsertPoint(IfElse);
459 Value *Sub24 = Builder.CreateAdd(
460 FloatWidth == 128 ? Call : Cast,
461 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
462 -(BitWidth - FPMantissaWidth - 1)));
463 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
464 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
465 FloatWidth == 128 ? Sub24 : ShProm25);
466 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
467 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
468 Value *ExtractT66 = nullptr;
469 if (FloatWidth > 80)
470 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
471 else
472 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
473 Builder.CreateBr(IfEnd26);
474
475 // if.end26:
476 Builder.SetInsertPoint(IfEnd26);
477 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
478 AAddr1Off0->addIncoming(ExtractT, IfThen20);
479 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
480 AAddr1Off0->addIncoming(ExtractT61, IfElse);
481 PHINode *AAddr1Off32 = nullptr;
482 if (FloatWidth > 32) {
483 AAddr1Off32 =
484 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
485 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
486 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
487 AAddr1Off32->addIncoming(ExtractT66, IfElse);
488 }
489 PHINode *E0 = nullptr;
490 if (FloatWidth <= 80) {
491 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
492 E0->addIncoming(Sub1, IfThen20);
493 E0->addIncoming(Sub2, SwEpilog);
494 E0->addIncoming(Sub2, IfElse);
495 }
496 Value *And29 = nullptr;
497 if (FloatWidth > 80) {
498 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
499 Builder.getIntN(BitWidth, 63));
500 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
501 } else {
502 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
503 And29 = Builder.CreateAnd(
504 Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
505 }
506 unsigned TempMod = FPMantissaWidth % 32;
507 Value *And34 = nullptr;
508 Value *Shl30 = nullptr;
509 if (FloatWidth > 80) {
510 TempMod += 32;
511 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
512 Shl30 = Builder.CreateAdd(
513 Add,
514 Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
515 And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
516 } else {
517 Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
518 Shl30 = Builder.CreateAdd(
519 Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
520 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
521 Builder.getIntN(32, (1 << TempMod) - 1));
522 }
523 Value *Or35 = nullptr;
524 if (FloatWidth > 80) {
525 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
526 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
527 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
528 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
529 Builder.getIntN(128, FPMantissaWidth));
530 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
531 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
532 Or35 = Builder.CreateOr(Or34, A6);
533 } else {
534 Value *Or31 = Builder.CreateOr(And34, And29);
535 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
536 }
537 Value *A4 = nullptr;
538 if (IToFP->getType()->isDoubleTy()) {
539 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
540 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
541 Value *And1 =
542 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
543 Value *Or1 = Builder.CreateOr(Shl1, And1);
544 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
545 } else if (IToFP->getType()->isX86_FP80Ty()) {
546 Value *A40 =
547 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
548 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
549 } else if (IToFP->getType()->isHalfTy()) {
550 // Deal with "half" situation. This is a workaround since we don't have
551 // floattihf.c currently as referring.
552 Value *A40 =
553 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
554 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
555 } else // float type
556 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
557 Builder.CreateBr(End);
558
559 // return:
560 Builder.SetInsertPoint(End, End->begin());
561 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
562 Retval0->addIncoming(A4, IfEnd26);
563 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
564
565 IToFP->replaceAllUsesWith(Retval0);
566 IToFP->dropAllReferences();
567 IToFP->eraseFromParent();
568}
569
570static bool runImpl(Function &F, const TargetLowering &TLI) {
572 bool Modified = false;
573
574 unsigned MaxLegalFpConvertBitWidth =
577 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
578
579 if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
580 return false;
581
582 for (auto &I : instructions(F)) {
583 switch (I.getOpcode()) {
584 case Instruction::FPToUI:
585 case Instruction::FPToSI: {
586 // TODO: This pass doesn't handle vectors.
587 if (I.getOperand(0)->getType()->isVectorTy())
588 continue;
589
590 auto *IntTy = dyn_cast<IntegerType>(I.getType());
591 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
592 continue;
593
594 Replace.push_back(&I);
595 Modified = true;
596 break;
597 }
598 case Instruction::UIToFP:
599 case Instruction::SIToFP: {
600 // TODO: This pass doesn't handle vectors.
601 if (I.getOperand(0)->getType()->isVectorTy())
602 continue;
603
604 auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType());
605 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
606 continue;
607
608 Replace.push_back(&I);
609 Modified = true;
610 break;
611 }
612 default:
613 break;
614 }
615 }
616
617 if (Replace.empty())
618 return false;
619
620 while (!Replace.empty()) {
621 Instruction *I = Replace.pop_back_val();
622 if (I->getOpcode() == Instruction::FPToUI ||
623 I->getOpcode() == Instruction::FPToSI) {
624 expandFPToI(I);
625 } else {
626 expandIToFP(I);
627 }
628 }
629
630 return Modified;
631}
632
633namespace {
634class ExpandLargeFpConvertLegacyPass : public FunctionPass {
635public:
636 static char ID;
637
638 ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {
641 }
642
643 bool runOnFunction(Function &F) override {
644 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
645 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
646 return runImpl(F, *TLI);
647 }
648
649 void getAnalysisUsage(AnalysisUsage &AU) const override {
653 }
654};
655} // namespace
656
659 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
662}
663
664char ExpandLargeFpConvertLegacyPass::ID = 0;
665INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
666 "Expand large fp convert", false, false)
667INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
668 "Expand large fp convert", false, false)
669
671 return new ExpandLargeFpConvertLegacyPass();
672}
bool End
Definition: ELF_riscv.cpp:478
static bool runImpl(Function &F, const TargetLowering &TLI)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static void expandFPToI(Instruction *FPToI)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
expand large fp convert
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI)
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:21
This is the interface for a simple mod/ref and alias analysis over globals.
Select target instructions out of generic instructions
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
BinaryOperator * Mul
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:649
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:206
static Constant * getZero(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1001
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:115
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Legacy wrapper pass to provide the GlobalsAAResult object.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2230
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1996
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:533
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2238
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2074
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:460
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1108
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2046
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2012
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:520
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:525
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2218
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2370
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1137
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2214
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1338
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2100
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:491
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1114
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2000
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1108
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2385
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1450
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2083
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1513
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1355
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2053
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2639
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:93
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:239
Class to represent integer types.
Definition: DerivedTypes.h:40
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:52
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:172
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:175
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:178
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Multiway switch.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum larget fp convert the backend supports.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition: Type.h:160
static Type * getFP128Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
static Type * getFloatTy(LLVMContext &C)
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1444
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:361
FunctionPass * createExpandLargeFpConvertPass()
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry &)
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191