LLVM 20.0.0git
ExpandLargeFpConvert.cpp
Go to the documentation of this file.
1//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9
10// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
11// ‘sitofp .. to’ instructions with a bitwidth above a threshold into
12// auto-generated functions. This is useful for targets like x86_64 that cannot
13// lower fp convertions with more than 128 bits.
14//
15//===----------------------------------------------------------------------===//
16
20#include "llvm/CodeGen/Passes.h"
24#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
31
32using namespace llvm;
33
35 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
37 cl::desc("fp convert instructions on integers with "
38 "more than <N> bits are expanded."));
39
40/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
41/// the generated code. This currently generates code similarly to compiler-rt's
42/// implementations.
43///
44/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
45/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
46/// entry:
47/// %0 = bitcast float %a to i32
48/// %conv.i = zext i32 %0 to i64
49/// %tobool.not = icmp sgt i32 %0, -1
50/// %conv = select i1 %tobool.not, i64 1, i64 -1
51/// %and = lshr i64 %conv.i, 23
52/// %shr = and i64 %and, 255
53/// %and2 = and i64 %conv.i, 8388607
54/// %or = or i64 %and2, 8388608
55/// %cmp = icmp ult i64 %shr, 127
56/// br i1 %cmp, label %cleanup, label %if.end
57///
58/// if.end: ; preds = %entry
59/// %sub = add nuw nsw i64 %shr, 4294967169
60/// %conv5 = and i64 %sub, 4294967232
61/// %cmp6.not = icmp eq i64 %conv5, 0
62/// br i1 %cmp6.not, label %if.end12, label %if.then8
63///
64/// if.then8: ; preds = %if.end
65/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
66/// br label %cleanup
67///
68/// if.end12: ; preds = %if.end
69/// %cmp13 = icmp ult i64 %shr, 150
70/// br i1 %cmp13, label %if.then15, label %if.else
71///
72/// if.then15: ; preds = %if.end12
73/// %sub16 = sub nuw nsw i64 150, %shr
74/// %shr17 = lshr i64 %or, %sub16
75/// %mul = mul nsw i64 %shr17, %conv
76/// br label %cleanup
77///
78/// if.else: ; preds = %if.end12
79/// %sub18 = add nsw i64 %shr, -150
80/// %shl = shl i64 %or, %sub18
81/// %mul19 = mul nsw i64 %shl, %conv
82/// br label %cleanup
83///
84/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
85/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
86/// ret i64 %retval.0
87/// }
88///
89/// Replace fp to integer with generated code.
90static void expandFPToI(Instruction *FPToI) {
91 IRBuilder<> Builder(FPToI);
92 auto *FloatVal = FPToI->getOperand(0);
93 IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
94
95 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
96 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
97
98 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
99 // to i32 first following a sext/zext to target integer type.
100 Value *A1 = nullptr;
101 if (FloatVal->getType()->isHalfTy()) {
102 if (FPToI->getOpcode() == Instruction::FPToUI) {
103 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
104 A1 = Builder.CreateZExt(A0, IntTy);
105 } else { // FPToSI
106 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
107 A1 = Builder.CreateSExt(A0, IntTy);
108 }
109 FPToI->replaceAllUsesWith(A1);
110 FPToI->dropAllReferences();
111 FPToI->eraseFromParent();
112 return;
113 }
114
115 // fp80 conversion is implemented by fpext to fp128 first then do the
116 // conversion.
117 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
118 unsigned FloatWidth =
119 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
120 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
121 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
122 Value *ImplicitBit = Builder.CreateShl(
123 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
124 Value *SignificandMask =
125 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
126 Value *NegOne = Builder.CreateSExt(
127 ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
128 Value *NegInf =
129 Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
130 ConstantInt::getSigned(IntTy, BitWidth - 1));
131
132 BasicBlock *Entry = Builder.GetInsertBlock();
133 Function *F = Entry->getParent();
134 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
135 BasicBlock *End =
136 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
137 BasicBlock *IfEnd =
138 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
139 BasicBlock *IfThen5 =
140 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
141 BasicBlock *IfEnd9 =
142 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
143 BasicBlock *IfThen12 =
144 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
145 BasicBlock *IfElse =
146 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
147
148 Entry->getTerminator()->eraseFromParent();
149
150 // entry:
151 Builder.SetInsertPoint(Entry);
152 Value *FloatVal0 = FloatVal;
153 // fp80 conversion is implemented by fpext to fp128 first then do the
154 // conversion.
155 if (FloatVal->getType()->isX86_FP80Ty())
156 FloatVal0 =
157 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
158 Value *ARep0 =
159 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
160 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
161 Value *PosOrNeg = Builder.CreateICmpSGT(
162 ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
163 Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
164 ConstantInt::getSigned(IntTy, -1));
165 Value *And =
166 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
167 Value *And2 = Builder.CreateAnd(
168 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
169 Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
170 Value *Or = Builder.CreateOr(Abs, ImplicitBit);
171 Value *Cmp =
172 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
173 Builder.CreateCondBr(Cmp, End, IfEnd);
174
175 // if.end:
176 Builder.SetInsertPoint(IfEnd);
177 Value *Add1 = Builder.CreateAdd(
179 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
180 Value *Cmp3 = Builder.CreateICmpULT(
181 Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
182 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
183
184 // if.then5:
185 Builder.SetInsertPoint(IfThen5);
186 Value *PosInf = Builder.CreateXor(NegOne, NegInf);
187 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
188 Builder.CreateBr(End);
189
190 // if.end9:
191 Builder.SetInsertPoint(IfEnd9);
192 Value *Cmp10 = Builder.CreateICmpULT(
193 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
194 Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
195
196 // if.then12:
197 Builder.SetInsertPoint(IfThen12);
198 Value *Sub13 = Builder.CreateSub(
199 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
200 Value *Shr14 = Builder.CreateLShr(Or, Sub13);
201 Value *Mul = Builder.CreateMul(Shr14, Sign);
202 Builder.CreateBr(End);
203
204 // if.else:
205 Builder.SetInsertPoint(IfElse);
206 Value *Sub15 = Builder.CreateAdd(
208 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
209 Value *Shl = Builder.CreateShl(Or, Sub15);
210 Value *Mul16 = Builder.CreateMul(Shl, Sign);
211 Builder.CreateBr(End);
212
213 // cleanup:
214 Builder.SetInsertPoint(End, End->begin());
215 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
216
217 Retval0->addIncoming(Cond8, IfThen5);
218 Retval0->addIncoming(Mul, IfThen12);
219 Retval0->addIncoming(Mul16, IfElse);
220 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
221
222 FPToI->replaceAllUsesWith(Retval0);
223 FPToI->dropAllReferences();
224 FPToI->eraseFromParent();
225}
226
227/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
228/// the generated code. This currently generates code similarly to compiler-rt's
229/// implementations. This implementation has an implicit assumption that integer
230/// width is larger than fp.
231///
232/// An example IR generated from compiler-rt/floatdisf.c looks like below:
233/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
234/// entry:
235/// %cmp = icmp eq i64 %a, 0
236/// br i1 %cmp, label %return, label %if.end
237///
238/// if.end: ; preds = %entry
239/// %shr = ashr i64 %a, 63
240/// %xor = xor i64 %shr, %a
241/// %sub = sub nsw i64 %xor, %shr
242/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
243/// %cast = trunc i64 %0 to i32
244/// %sub1 = sub nuw nsw i32 64, %cast
245/// %sub2 = xor i32 %cast, 63
246/// %cmp3 = icmp ult i32 %cast, 40
247/// br i1 %cmp3, label %if.then4, label %if.else
248///
249/// if.then4: ; preds = %if.end
250/// switch i32 %sub1, label %sw.default [
251/// i32 25, label %sw.bb
252/// i32 26, label %sw.epilog
253/// ]
254///
255/// sw.bb: ; preds = %if.then4
256/// %shl = shl i64 %sub, 1
257/// br label %sw.epilog
258///
259/// sw.default: ; preds = %if.then4
260/// %sub5 = sub nsw i64 38, %0
261/// %sh_prom = and i64 %sub5, 4294967295
262/// %shr6 = lshr i64 %sub, %sh_prom
263/// %shr9 = lshr i64 274877906943, %0
264/// %and = and i64 %shr9, %sub
265/// %cmp10 = icmp ne i64 %and, 0
266/// %conv11 = zext i1 %cmp10 to i64
267/// %or = or i64 %shr6, %conv11
268/// br label %sw.epilog
269///
270/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
271/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
272/// %1 = lshr i64 %a.addr.0, 2
273/// %2 = and i64 %1, 1
274/// %or16 = or i64 %2, %a.addr.0
275/// %inc = add nsw i64 %or16, 1
276/// %3 = and i64 %inc, 67108864
277/// %tobool.not = icmp eq i64 %3, 0
278/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
279/// %spec.select = ashr i64 %inc, %spec.select.v
280/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
281/// br label %if.end26
282///
283/// if.else: ; preds = %if.end
284/// %sub23 = add nuw nsw i64 %0, 4294967256
285/// %sh_prom24 = and i64 %sub23, 4294967295
286/// %shl25 = shl i64 %sub, %sh_prom24
287/// br label %if.end26
288///
289/// if.end26: ; preds = %sw.epilog, %if.else
290/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
291/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
292/// %conv27 = trunc i64 %shr to i32
293/// %and28 = and i32 %conv27, -2147483648
294/// %add = shl nuw nsw i32 %e.0, 23
295/// %shl29 = add nuw nsw i32 %add, 1065353216
296/// %conv31 = trunc i64 %a.addr.1 to i32
297/// %and32 = and i32 %conv31, 8388607
298/// %or30 = or i32 %and32, %and28
299/// %or33 = or i32 %or30, %shl29
300/// %4 = bitcast i32 %or33 to float
301/// br label %return
302///
303/// return: ; preds = %entry, %if.end26
304/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
305/// ret float %retval.0
306/// }
307///
308/// Replace integer to fp with generated code.
309static void expandIToFP(Instruction *IToFP) {
310 IRBuilder<> Builder(IToFP);
311 auto *IntVal = IToFP->getOperand(0);
312 IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
313
314 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
315 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
316 // fp80 conversion is implemented by conversion tp fp128 first following
317 // a fptrunc to fp80.
318 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
319 // FIXME: As there is no related builtins added in compliler-rt,
320 // here currently utilized the fp32 <-> fp16 lib calls to implement.
321 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
322 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
323 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
324 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
325
326 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
327 "assumes integer width is larger than fp.");
328
329 Value *Temp1 =
330 Builder.CreateShl(Builder.getIntN(BitWidth, 1),
331 Builder.getIntN(BitWidth, FPMantissaWidth + 3));
332
333 BasicBlock *Entry = Builder.GetInsertBlock();
334 Function *F = Entry->getParent();
335 Entry->setName(Twine(Entry->getName(), "itofp-entry"));
336 BasicBlock *End =
337 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
338 BasicBlock *IfEnd =
339 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
340 BasicBlock *IfThen4 =
341 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
342 BasicBlock *SwBB =
343 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
344 BasicBlock *SwDefault =
345 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
346 BasicBlock *SwEpilog =
347 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
348 BasicBlock *IfThen20 =
349 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
350 BasicBlock *IfElse =
351 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
352 BasicBlock *IfEnd26 =
353 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
354
355 Entry->getTerminator()->eraseFromParent();
356
357 Function *CTLZ =
358 Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
359 ConstantInt *True = Builder.getTrue();
360
361 // entry:
362 Builder.SetInsertPoint(Entry);
363 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
364 Builder.CreateCondBr(Cmp, End, IfEnd);
365
366 // if.end:
367 Builder.SetInsertPoint(IfEnd);
368 Value *Shr =
369 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
370 Value *Xor = Builder.CreateXor(Shr, IntVal);
371 Value *Sub = Builder.CreateSub(Xor, Shr);
372 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
373 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
374 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
375 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
376 FloatWidth == 128 ? Call : Cast);
377 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
378 FloatWidth == 128 ? Call : Cast);
379 Value *Cmp3 = Builder.CreateICmpSGT(
380 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
381 Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
382
383 // if.then4:
384 Builder.SetInsertPoint(IfThen4);
385 llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
386 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
387 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
388
389 // sw.bb:
390 Builder.SetInsertPoint(SwBB);
391 Value *Shl =
392 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
393 Builder.CreateBr(SwEpilog);
394
395 // sw.default:
396 Builder.SetInsertPoint(SwDefault);
397 Value *Sub5 = Builder.CreateSub(
398 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
399 FloatWidth == 128 ? Call : Cast);
400 Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
401 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
402 FloatWidth == 128 ? Sub5 : ShProm);
403 Value *Sub8 =
404 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
405 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
406 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
407 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
408 FloatWidth == 128 ? Sub8 : ShProm9);
409 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
410 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
411 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
412 Value *Or = Builder.CreateOr(Shr6, Conv11);
413 Builder.CreateBr(SwEpilog);
414
415 // sw.epilog:
416 Builder.SetInsertPoint(SwEpilog);
417 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
418 AAddr0->addIncoming(Or, SwDefault);
419 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
420 AAddr0->addIncoming(Shl, SwBB);
421 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
422 Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
423 Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
424 Value *Conv16 = Builder.CreateZExt(A2, IntTy);
425 Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
426 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
427 Value *Shr18 = nullptr;
428 if (IsSigned)
429 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
430 else
431 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
432 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
433 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
434 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
435 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
436 Value *ExtractT64 = nullptr;
437 if (FloatWidth > 80)
438 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
439 else
440 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
441 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
442
443 // if.then20
444 Builder.SetInsertPoint(IfThen20);
445 Value *Shr21 = nullptr;
446 if (IsSigned)
447 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
448 else
449 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
450 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
451 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
452 Value *ExtractT62 = nullptr;
453 if (FloatWidth > 80)
454 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
455 else
456 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
457 Builder.CreateBr(IfEnd26);
458
459 // if.else:
460 Builder.SetInsertPoint(IfElse);
461 Value *Sub24 = Builder.CreateAdd(
462 FloatWidth == 128 ? Call : Cast,
463 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
464 -(BitWidth - FPMantissaWidth - 1)));
465 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
466 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
467 FloatWidth == 128 ? Sub24 : ShProm25);
468 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
469 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
470 Value *ExtractT66 = nullptr;
471 if (FloatWidth > 80)
472 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
473 else
474 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
475 Builder.CreateBr(IfEnd26);
476
477 // if.end26:
478 Builder.SetInsertPoint(IfEnd26);
479 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
480 AAddr1Off0->addIncoming(ExtractT, IfThen20);
481 AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
482 AAddr1Off0->addIncoming(ExtractT61, IfElse);
483 PHINode *AAddr1Off32 = nullptr;
484 if (FloatWidth > 32) {
485 AAddr1Off32 =
486 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
487 AAddr1Off32->addIncoming(ExtractT62, IfThen20);
488 AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
489 AAddr1Off32->addIncoming(ExtractT66, IfElse);
490 }
491 PHINode *E0 = nullptr;
492 if (FloatWidth <= 80) {
493 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
494 E0->addIncoming(Sub1, IfThen20);
495 E0->addIncoming(Sub2, SwEpilog);
496 E0->addIncoming(Sub2, IfElse);
497 }
498 Value *And29 = nullptr;
499 if (FloatWidth > 80) {
500 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
501 Builder.getIntN(BitWidth, 63));
502 And29 = Builder.CreateAnd(Shr, Temp2, "and29");
503 } else {
504 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
505 And29 = Builder.CreateAnd(
506 Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
507 }
508 unsigned TempMod = FPMantissaWidth % 32;
509 Value *And34 = nullptr;
510 Value *Shl30 = nullptr;
511 if (FloatWidth > 80) {
512 TempMod += 32;
513 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
514 Shl30 = Builder.CreateAdd(
515 Add,
516 Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
517 And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
518 } else {
519 Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
520 Shl30 = Builder.CreateAdd(
521 Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
522 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
523 Builder.getIntN(32, (1 << TempMod) - 1));
524 }
525 Value *Or35 = nullptr;
526 if (FloatWidth > 80) {
527 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
528 Value *Or31 = Builder.CreateOr(And29Trunc, And34);
529 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
530 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
531 Builder.getIntN(128, FPMantissaWidth));
532 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
533 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
534 Or35 = Builder.CreateOr(Or34, A6);
535 } else {
536 Value *Or31 = Builder.CreateOr(And34, And29);
537 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
538 }
539 Value *A4 = nullptr;
540 if (IToFP->getType()->isDoubleTy()) {
541 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
542 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
543 Value *And1 =
544 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
545 Value *Or1 = Builder.CreateOr(Shl1, And1);
546 A4 = Builder.CreateBitCast(Or1, IToFP->getType());
547 } else if (IToFP->getType()->isX86_FP80Ty()) {
548 Value *A40 =
549 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
550 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
551 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
552 // Deal with "half" situation. This is a workaround since we don't have
553 // floattihf.c currently as referring.
554 Value *A40 =
555 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
556 A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
557 } else // float type
558 A4 = Builder.CreateBitCast(Or35, IToFP->getType());
559 Builder.CreateBr(End);
560
561 // return:
562 Builder.SetInsertPoint(End, End->begin());
563 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
564 Retval0->addIncoming(A4, IfEnd26);
565 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
566
567 IToFP->replaceAllUsesWith(Retval0);
568 IToFP->dropAllReferences();
569 IToFP->eraseFromParent();
570}
571
573 VectorType *VTy = cast<FixedVectorType>(I->getType());
574
575 IRBuilder<> Builder(I);
576
577 unsigned NumElements = VTy->getElementCount().getFixedValue();
578 Value *Result = PoisonValue::get(VTy);
579 for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
580 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
581 Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
582 I->getType()->getScalarType());
583 Result = Builder.CreateInsertElement(Result, Cast, Idx);
584 if (isa<Instruction>(Cast))
585 Replace.push_back(cast<Instruction>(Cast));
586 }
587 I->replaceAllUsesWith(Result);
588 I->dropAllReferences();
589 I->eraseFromParent();
590}
591
592static bool runImpl(Function &F, const TargetLowering &TLI) {
594 SmallVector<Instruction *, 4> ReplaceVector;
595 bool Modified = false;
596
597 unsigned MaxLegalFpConvertBitWidth =
600 MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
601
602 if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
603 return false;
604
605 for (auto &I : instructions(F)) {
606 switch (I.getOpcode()) {
607 case Instruction::FPToUI:
608 case Instruction::FPToSI: {
609 // TODO: This pass doesn't handle scalable vectors.
610 if (I.getOperand(0)->getType()->isScalableTy())
611 continue;
612
613 auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());
614 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
615 continue;
616
617 if (I.getOperand(0)->getType()->isVectorTy())
618 ReplaceVector.push_back(&I);
619 else
620 Replace.push_back(&I);
621 Modified = true;
622 break;
623 }
624 case Instruction::UIToFP:
625 case Instruction::SIToFP: {
626 // TODO: This pass doesn't handle scalable vectors.
627 if (I.getOperand(0)->getType()->isScalableTy())
628 continue;
629
630 auto *IntTy =
631 cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
632 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
633 continue;
634
635 if (I.getOperand(0)->getType()->isVectorTy())
636 ReplaceVector.push_back(&I);
637 else
638 Replace.push_back(&I);
639 Modified = true;
640 break;
641 }
642 default:
643 break;
644 }
645 }
646
647 while (!ReplaceVector.empty()) {
648 Instruction *I = ReplaceVector.pop_back_val();
649 scalarize(I, Replace);
650 }
651
652 if (Replace.empty())
653 return false;
654
655 while (!Replace.empty()) {
656 Instruction *I = Replace.pop_back_val();
657 if (I->getOpcode() == Instruction::FPToUI ||
658 I->getOpcode() == Instruction::FPToSI) {
659 expandFPToI(I);
660 } else {
661 expandIToFP(I);
662 }
663 }
664
665 return Modified;
666}
667
668namespace {
669class ExpandLargeFpConvertLegacyPass : public FunctionPass {
670public:
671 static char ID;
672
673 ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {
676 }
677
678 bool runOnFunction(Function &F) override {
679 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
680 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
681 return runImpl(F, *TLI);
682 }
683
684 void getAnalysisUsage(AnalysisUsage &AU) const override {
688 }
689};
690} // namespace
691
694 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
697}
698
699char ExpandLargeFpConvertLegacyPass::ID = 0;
700INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
701 "Expand large fp convert", false, false)
702INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
703 "Expand large fp convert", false, false)
704
706 return new ExpandLargeFpConvertLegacyPass();
707}
Expand Atomic instructions
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
bool End
Definition: ELF_riscv.cpp:480
static bool runImpl(Function &F, const TargetLowering &TLI)
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
static void expandFPToI(Instruction *FPToI)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Replace)
expand large fp convert
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(llvm::IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))
static bool runImpl(Function &F, const TargetLowering &TLI)
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:21
This is the interface for a simple mod/ref and alias analysis over globals.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
BinaryOperator * Mul
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
static Constant * getZero(Type *Ty, bool Negative=false)
Definition: Constants.cpp:1057
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:126
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Legacy wrapper pass to provide the GlobalsAAResult object.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2289
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2503
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2491
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:536
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2297
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2128
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:463
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2094
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2060
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2277
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1167
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2155
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1144
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1498
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2189
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1350
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1138
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2137
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1479
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1542
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1384
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2101
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
Class to represent integer types.
Definition: DerivedTypes.h:42
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Definition: DerivedTypes.h:54
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
Multiway switch.
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum larget fp convert the backend supports.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getIntegerBitWidth() const
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition: Type.h:159
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
static Type * getFP128Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
static Type * getFloatTy(LLVMContext &C)
void dropAllReferences()
Drop all references to operands.
Definition: User.h:345
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
FunctionPass * createExpandLargeFpConvertPass()
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry &)
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217