LLVM 23.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Error.h"
29#include <cstring>
30#include <limits.h>
31
32/// Shared headers from LLVM libc
33/// Make sure to add ${LLVM_SOURCE_DIR}/../libc to include directories.
34///
35/// Notes: So far it looks like APFloat does not check errnos or floating-point
36/// exceptions after calling the math functions, so we will configure LLVM libc
37/// math functions to skip setting errnos and floating-point exceptions
38/// explicitly. We also put them in a separate namespace so that the symbols
39/// do not clash with other libc math builds just in case.
40#define LIBC_NAMESPACE __llvm_libc_apfloat
41#define LIBC_MATH (LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
42
43#include "shared/math.h"
44#include "shared/math_check_exceptions.h"
45
46#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
47 do { \
48 if (usesLayout<IEEEFloat>(getSemantics())) \
49 return U.IEEE.METHOD_CALL; \
50 if (usesLayout<DoubleAPFloat>(getSemantics())) \
51 return U.Double.METHOD_CALL; \
52 llvm_unreachable("Unexpected semantics"); \
53 } while (false)
54
55using namespace llvm;
56
57/// A macro used to combine two fcCategory enums into one key which can be used
58/// in a switch statement to classify how the interaction of two APFloat's
59/// categories affects an operation.
60///
61/// TODO: If clang source code is ever allowed to use constexpr in its own
62/// codebase, change this into a static inline function.
63#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
64
65/* Assumed in hexadecimal significand parsing, and conversion to
66 hexadecimal strings. */
67static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
68
69namespace llvm {
70
71constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
72constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
73constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
74constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
75constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
76constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
77constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
79constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
80constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
82constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
84constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
86constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
87constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
88constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
89 127,
90 -127,
91 1,
92 8,
95 false,
96 false,
97 false,
98 false};
99
100constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
102constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
104constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
106constexpr fltSemantics APFloatBase::semX87DoubleExtended = {
107 16383,
108 -16382,
109 64,
110 80,
113 true,
114 true,
115 true,
116 true,
117 true};
118constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
119constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
120constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
121 1023, -1022 + 53, 53 + 53, 128};
122
124 switch (S) {
125 case S_IEEEhalf:
126 return IEEEhalf();
127 case S_BFloat:
128 return BFloat();
129 case S_IEEEsingle:
130 return IEEEsingle();
131 case S_IEEEdouble:
132 return IEEEdouble();
133 case S_IEEEquad:
134 return IEEEquad();
136 return PPCDoubleDouble();
138 return PPCDoubleDoubleLegacy();
139 case S_Float8E5M2:
140 return Float8E5M2();
141 case S_Float8E5M2FNUZ:
142 return Float8E5M2FNUZ();
143 case S_Float8E4M3:
144 return Float8E4M3();
145 case S_Float8E4M3FN:
146 return Float8E4M3FN();
147 case S_Float8E4M3FNUZ:
148 return Float8E4M3FNUZ();
150 return Float8E4M3B11FNUZ();
151 case S_Float8E3M4:
152 return Float8E3M4();
153 case S_FloatTF32:
154 return FloatTF32();
155 case S_Float8E8M0FNU:
156 return Float8E8M0FNU();
157 case S_Float6E3M2FN:
158 return Float6E3M2FN();
159 case S_Float6E2M3FN:
160 return Float6E2M3FN();
161 case S_Float4E2M1FN:
162 return Float4E2M1FN();
164 return x87DoubleExtended();
165 }
166 llvm_unreachable("Unrecognised floating semantics");
167}
168
171 if (&Sem == &llvm::APFloat::IEEEhalf())
172 return S_IEEEhalf;
173 else if (&Sem == &llvm::APFloat::BFloat())
174 return S_BFloat;
175 else if (&Sem == &llvm::APFloat::IEEEsingle())
176 return S_IEEEsingle;
177 else if (&Sem == &llvm::APFloat::IEEEdouble())
178 return S_IEEEdouble;
179 else if (&Sem == &llvm::APFloat::IEEEquad())
180 return S_IEEEquad;
181 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
182 return S_PPCDoubleDouble;
183 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
185 else if (&Sem == &llvm::APFloat::Float8E5M2())
186 return S_Float8E5M2;
187 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
188 return S_Float8E5M2FNUZ;
189 else if (&Sem == &llvm::APFloat::Float8E4M3())
190 return S_Float8E4M3;
191 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
192 return S_Float8E4M3FN;
193 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
194 return S_Float8E4M3FNUZ;
195 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
196 return S_Float8E4M3B11FNUZ;
197 else if (&Sem == &llvm::APFloat::Float8E3M4())
198 return S_Float8E3M4;
199 else if (&Sem == &llvm::APFloat::FloatTF32())
200 return S_FloatTF32;
201 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
202 return S_Float8E8M0FNU;
203 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
204 return S_Float6E3M2FN;
205 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
206 return S_Float6E2M3FN;
207 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
208 return S_Float4E2M1FN;
209 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
210 return S_x87DoubleExtended;
211 else
212 llvm_unreachable("Unknown floating semantics");
213}
214
216 const fltSemantics &B) {
217 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
218 A.precision <= B.precision;
219}
220
221/* A tight upper bound on number of parts required to hold the value
222 pow(5, power) is
223
224 power * 815 / (351 * integerPartWidth) + 1
225
226 However, whilst the result may require only this many parts,
227 because we are multiplying two values to get it, the
228 multiplication may require an extra part with the excess part
229 being zero (consider the trivial case of 1 * 1, tcFullMultiply
230 requires two parts to hold the single-part result). So we add an
231 extra one to guarantee enough space whilst multiplying. */
232const unsigned int maxExponent = 16383;
233const unsigned int maxPrecision = 113;
235const unsigned int maxPowerOfFiveParts =
236 2 +
238
239unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
240 return semantics.precision;
241}
244 return semantics.maxExponent;
245}
248 return semantics.minExponent;
249}
250unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
251 return semantics.sizeInBits;
252}
254 bool isSigned) {
255 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
256 // at least one more bit than the MaxExponent to hold the max FP value.
257 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
258 // Extra sign bit needed.
259 if (isSigned)
260 ++MinBitWidth;
261 return MinBitWidth;
262}
263
265 return semantics.hasZero;
266}
267
269 return semantics.hasSignedRepr;
270}
271
275
279
281 // Keep in sync with Type::isIEEELikeFPTy
282 return SemanticsToEnum(semantics) <= S_IEEEquad;
283}
284
286 return semantics.hasSignBitInMSB;
287}
288
290 const fltSemantics &Dst) {
291 // Exponent range must be larger.
292 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
293 return false;
294
295 // If the mantissa is long enough, the result value could still be denormal
296 // with a larger exponent range.
297 //
298 // FIXME: This condition is probably not accurate but also shouldn't be a
299 // practical concern with existing types.
300 return Dst.precision >= Src.precision;
301}
302
304 return Sem.sizeInBits;
305}
306
307static constexpr APFloatBase::ExponentType
308exponentZero(const fltSemantics &semantics) {
309 return semantics.minExponent - 1;
310}
311
312static constexpr APFloatBase::ExponentType
313exponentInf(const fltSemantics &semantics) {
314 return semantics.maxExponent + 1;
315}
316
317static constexpr APFloatBase::ExponentType
318exponentNaN(const fltSemantics &semantics) {
321 return exponentZero(semantics);
322 if (semantics.hasSignedRepr)
323 return semantics.maxExponent;
324 }
325 return semantics.maxExponent + 1;
326}
327
328/* A bunch of private, handy routines. */
329
330static inline Error createError(const Twine &Err) {
332}
333
334static constexpr inline unsigned int partCountForBits(unsigned int bits) {
335 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
337}
338
339/* Returns 0U-9U. Return values >= 10U are not digits. */
340static inline unsigned int
341decDigitValue(unsigned int c)
342{
343 return c - '0';
344}
345
346/* Return the value of a decimal exponent of the form
347 [+-]ddddddd.
348
349 If the exponent overflows, returns a large exponent with the
350 appropriate sign. */
353 const unsigned int overlargeExponent = 24000; /* FIXME. */
354 StringRef::iterator p = begin;
355
356 // Treat no exponent as 0 to match binutils
357 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end))
358 return 0;
359
360 bool isNegative = *p == '-';
361 if (*p == '-' || *p == '+') {
362 p++;
363 if (p == end)
364 return createError("Exponent has no digits");
365 }
366
367 unsigned absExponent = decDigitValue(*p++);
368 if (absExponent >= 10U)
369 return createError("Invalid character in exponent");
370
371 for (; p != end; ++p) {
372 unsigned value = decDigitValue(*p);
373 if (value >= 10U)
374 return createError("Invalid character in exponent");
375
376 absExponent = absExponent * 10U + value;
377 if (absExponent >= overlargeExponent) {
378 absExponent = overlargeExponent;
379 break;
380 }
381 }
382
383 if (isNegative)
384 return -(int) absExponent;
385 else
386 return (int) absExponent;
387}
388
389/* This is ugly and needs cleaning up, but I don't immediately see
390 how whilst remaining safe. */
393 int exponentAdjustment) {
394 int exponent = 0;
395
396 if (p == end)
397 return createError("Exponent has no digits");
398
399 bool negative = *p == '-';
400 if (*p == '-' || *p == '+') {
401 p++;
402 if (p == end)
403 return createError("Exponent has no digits");
404 }
405
406 int unsignedExponent = 0;
407 bool overflow = false;
408 for (; p != end; ++p) {
409 unsigned int value;
410
411 value = decDigitValue(*p);
412 if (value >= 10U)
413 return createError("Invalid character in exponent");
414
415 unsignedExponent = unsignedExponent * 10 + value;
416 if (unsignedExponent > 32767) {
417 overflow = true;
418 break;
419 }
420 }
421
422 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
423 overflow = true;
424
425 if (!overflow) {
426 exponent = unsignedExponent;
427 if (negative)
428 exponent = -exponent;
429 exponent += exponentAdjustment;
430 if (exponent > 32767 || exponent < -32768)
431 overflow = true;
432 }
433
434 if (overflow)
435 exponent = negative ? -32768: 32767;
436
437 return exponent;
438}
439
442 StringRef::iterator *dot) {
443 StringRef::iterator p = begin;
444 *dot = end;
445 while (p != end && *p == '0')
446 p++;
447
448 if (p != end && *p == '.') {
449 *dot = p++;
450
451 if (end - begin == 1)
452 return createError("Significand has no digits");
453
454 while (p != end && *p == '0')
455 p++;
456 }
457
458 return p;
459}
460
461/* Given a normal decimal floating point number of the form
462
463 dddd.dddd[eE][+-]ddd
464
465 where the decimal point and exponent are optional, fill out the
466 structure D. Exponent is appropriate if the significand is
467 treated as an integer, and normalizedExponent if the significand
468 is taken to have the decimal point after a single leading
469 non-zero digit.
470
471 If the value is zero, V->firstSigDigit points to a non-digit, and
472 the return exponent is zero.
473*/
475 const char *firstSigDigit;
476 const char *lastSigDigit;
479};
480
483 StringRef::iterator dot = end;
484
485 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
486 if (!PtrOrErr)
487 return PtrOrErr.takeError();
488 StringRef::iterator p = *PtrOrErr;
489
490 D->firstSigDigit = p;
491 D->exponent = 0;
492 D->normalizedExponent = 0;
493
494 for (; p != end; ++p) {
495 if (*p == '.') {
496 if (dot != end)
497 return createError("String contains multiple dots");
498 dot = p++;
499 if (p == end)
500 break;
501 }
502 if (decDigitValue(*p) >= 10U)
503 break;
504 }
505
506 if (p != end) {
507 if (*p != 'e' && *p != 'E')
508 return createError("Invalid character in significand");
509 if (p == begin)
510 return createError("Significand has no digits");
511 if (dot != end && p - begin == 1)
512 return createError("Significand has no digits");
513
514 /* p points to the first non-digit in the string */
515 auto ExpOrErr = readExponent(p + 1, end);
516 if (!ExpOrErr)
517 return ExpOrErr.takeError();
518 D->exponent = *ExpOrErr;
519
520 /* Implied decimal point? */
521 if (dot == end)
522 dot = p;
523 }
524
525 /* If number is all zeroes accept any exponent. */
526 if (p != D->firstSigDigit) {
527 /* Drop insignificant trailing zeroes. */
528 if (p != begin) {
529 do
530 do
531 p--;
532 while (p != begin && *p == '0');
533 while (p != begin && *p == '.');
534 }
535
536 /* Adjust the exponents for any decimal point. */
537 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
538 D->normalizedExponent = (D->exponent +
539 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
540 - (dot > D->firstSigDigit && dot < p)));
541 }
542
543 D->lastSigDigit = p;
544 return Error::success();
545}
546
547/* Return the trailing fraction of a hexadecimal number.
548 DIGITVALUE is the first hex digit of the fraction, P points to
549 the next digit. */
552 unsigned int digitValue) {
553 /* If the first trailing digit isn't 0 or 8 we can work out the
554 fraction immediately. */
555 if (digitValue > 8)
556 return lfMoreThanHalf;
557 else if (digitValue < 8 && digitValue > 0)
558 return lfLessThanHalf;
559
560 // Otherwise we need to find the first non-zero digit.
561 while (p != end && (*p == '0' || *p == '.'))
562 p++;
563
564 if (p == end)
565 return createError("Invalid trailing hexadecimal fraction!");
566
567 unsigned hexDigit = hexDigitValue(*p);
568
569 /* If we ran off the end it is exactly zero or one-half, otherwise
570 a little more. */
571 if (hexDigit == UINT_MAX)
572 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
573 else
574 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
575}
576
577/* Return the fraction lost were a bignum truncated losing the least
578 significant BITS bits. */
579static lostFraction
581 unsigned int partCount,
582 unsigned int bits)
583{
584 unsigned lsb = APInt::tcLSB(parts, partCount);
585
586 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
587 if (bits <= lsb)
588 return lfExactlyZero;
589 if (bits == lsb + 1)
590 return lfExactlyHalf;
591 if (bits <= partCount * APFloatBase::integerPartWidth &&
592 APInt::tcExtractBit(parts, bits - 1))
593 return lfMoreThanHalf;
594
595 return lfLessThanHalf;
596}
597
598/* Shift DST right BITS bits noting lost fraction. */
599static lostFraction
600shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
601{
602 lostFraction lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
603
604 APInt::tcShiftRight(dst, parts, bits);
605
606 return lost_fraction;
607}
608
609/* Combine the effect of two lost fractions. */
610static lostFraction
612 lostFraction lessSignificant)
613{
614 if (lessSignificant != lfExactlyZero) {
615 if (moreSignificant == lfExactlyZero)
616 moreSignificant = lfLessThanHalf;
617 else if (moreSignificant == lfExactlyHalf)
618 moreSignificant = lfMoreThanHalf;
619 }
620
621 return moreSignificant;
622}
623
624/* The error from the true value, in half-ulps, on multiplying two
625 floating point numbers, which differ from the value they
626 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
627 than the returned value.
628
629 See "How to Read Floating Point Numbers Accurately" by William D
630 Clinger. */
631static unsigned int
632HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
633{
634 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
635
636 if (HUerr1 + HUerr2 == 0)
637 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
638 else
639 return inexactMultiply + 2 * (HUerr1 + HUerr2);
640}
641
642/* The number of ulps from the boundary (zero, or half if ISNEAREST)
643 when the least significant BITS are truncated. BITS cannot be
644 zero. */
646ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
647 bool isNearest) {
648 assert(bits != 0);
649
650 bits--;
651 unsigned count = bits / APFloatBase::integerPartWidth;
652 unsigned partBits = bits % APFloatBase::integerPartWidth + 1;
653
655 parts[count] & (~(APFloatBase::integerPart)0 >>
656 (APFloatBase::integerPartWidth - partBits));
657
659 if (isNearest)
660 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
661 else
662 boundary = 0;
663
664 if (count == 0) {
665 if (part - boundary <= boundary - part)
666 return part - boundary;
667 else
668 return boundary - part;
669 }
670
671 if (part == boundary) {
672 while (--count)
673 if (parts[count])
674 return ~(APFloatBase::integerPart) 0; /* A lot. */
675
676 return parts[0];
677 } else if (part == boundary - 1) {
678 while (--count)
679 if (~parts[count])
680 return ~(APFloatBase::integerPart) 0; /* A lot. */
681
682 return -parts[0];
683 }
684
685 return ~(APFloatBase::integerPart) 0; /* A lot. */
686}
687
688/* Place pow(5, power) in DST, and return the number of parts used.
689 DST must be at least one part larger than size of the answer. */
690static unsigned int
691powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
692 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
694 pow5s[0] = 78125 * 5;
695
696 unsigned int partsCount = 1;
697 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
698 assert(power <= maxExponent);
699
700 p1 = dst;
701 p2 = scratch;
702
703 *p1 = firstEightPowers[power & 7];
704 power >>= 3;
705
706 unsigned result = 1;
707 pow5 = pow5s;
708
709 for (unsigned int n = 0; power; power >>= 1, n++) {
710 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
711 if (n != 0) {
712 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
713 partsCount, partsCount);
714 partsCount *= 2;
715 if (pow5[partsCount - 1] == 0)
716 partsCount--;
717 }
718
719 if (power & 1) {
721
722 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
723 result += partsCount;
724 if (p2[result - 1] == 0)
725 result--;
726
727 /* Now result is in p1 with partsCount parts and p2 is scratch
728 space. */
729 tmp = p1;
730 p1 = p2;
731 p2 = tmp;
732 }
733
734 pow5 += partsCount;
735 }
736
737 if (p1 != dst)
738 APInt::tcAssign(dst, p1, result);
739
740 return result;
741}
742
743/* Zero at the end to avoid modular arithmetic when adding one; used
744 when rounding up during hexadecimal output. */
745static const char hexDigitsLower[] = "0123456789abcdef0";
746static const char hexDigitsUpper[] = "0123456789ABCDEF0";
747static const char infinityL[] = "infinity";
748static const char infinityU[] = "INFINITY";
749static const char NaNL[] = "nan";
750static const char NaNU[] = "NAN";
751
752/* Write out an integerPart in hexadecimal, starting with the most
753 significant nibble. Write out exactly COUNT hexdigits, return
754 COUNT. */
755static unsigned int
756partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
757 const char *hexDigitChars)
758{
759 unsigned int result = count;
760
762
763 part >>= (APFloatBase::integerPartWidth - 4 * count);
764 while (count--) {
765 dst[count] = hexDigitChars[part & 0xf];
766 part >>= 4;
767 }
768
769 return result;
770}
771
772/* Write out an unsigned decimal integer. */
773static char *writeUnsignedDecimal(char *dst, unsigned int n) {
774 char buff[40], *p;
775
776 p = buff;
777 do
778 *p++ = '0' + n % 10;
779 while (n /= 10);
780
781 do
782 *dst++ = *--p;
783 while (p != buff);
784
785 return dst;
786}
787
788/* Write out a signed decimal integer. */
789static char *writeSignedDecimal(char *dst, int value) {
790 if (value < 0) {
791 *dst++ = '-';
792 dst = writeUnsignedDecimal(dst, -(unsigned) value);
793 } else {
794 dst = writeUnsignedDecimal(dst, value);
795 }
796
797 return dst;
798}
799
800// Compute the ULP of the input using a definition from:
801// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
802// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
803static APFloat harrisonUlp(const APFloat &X) {
804 const fltSemantics &Sem = X.getSemantics();
805 switch (X.getCategory()) {
806 case APFloat::fcNaN:
807 return APFloat::getQNaN(Sem);
809 return APFloat::getInf(Sem);
810 case APFloat::fcZero:
811 return APFloat::getSmallest(Sem);
813 break;
814 }
815 if (X.isDenormal() || X.isSmallestNormalized())
816 return APFloat::getSmallest(Sem);
817 int Exp = ilogb(X);
818 if (X.getExactLog2() != INT_MIN)
819 Exp -= 1;
820 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
822}
823
824namespace detail {
825/* Constructors. */
826void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
827 semantics = ourSemantics;
828 unsigned count = partCount();
829 if (count > 1)
830 significand.parts = new integerPart[count];
831}
832
833void IEEEFloat::freeSignificand() {
834 if (needsCleanup())
835 delete [] significand.parts;
836}
837
838void IEEEFloat::assign(const IEEEFloat &rhs) {
839 assert(semantics == rhs.semantics);
840
841 sign = rhs.sign;
842 category = rhs.category;
843 exponent = rhs.exponent;
844 if (isFiniteNonZero() || category == fcNaN)
845 copySignificand(rhs);
846}
847
848void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
849 assert(isFiniteNonZero() || category == fcNaN);
850 assert(rhs.partCount() >= partCount());
851
852 APInt::tcAssign(significandParts(), rhs.significandParts(),
853 partCount());
854}
855
856/* Make this number a NaN, with an arbitrary but deterministic value
857 for the significand. If double or longer, this is a signalling NaN,
858 which may not be ideal. If float, this is QNaN(0). */
859void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
860 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
861 llvm_unreachable("This floating point format does not support NaN");
862
863 if (Negative && !semantics->hasSignedRepr)
865 "This floating point format does not support signed values");
866
867 category = fcNaN;
868 sign = Negative;
869 exponent = exponentNaN();
870
871 integerPart *significand = significandParts();
872 unsigned numParts = partCount();
873
874 APInt fill_storage;
875 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
876 // Finite-only types do not distinguish signalling and quiet NaN, so
877 // make them all signalling.
878 SNaN = false;
879 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
880 sign = true;
881 fill_storage = APInt::getZero(semantics->precision - 1);
882 } else {
883 fill_storage = APInt::getAllOnes(semantics->precision - 1);
884 }
885 fill = &fill_storage;
886 }
887
888 // Set the significand bits to the fill.
889 if (!fill || fill->getNumWords() < numParts)
890 APInt::tcSet(significand, 0, numParts);
891 if (fill) {
892 APInt::tcAssign(significand, fill->getRawData(),
893 std::min(fill->getNumWords(), numParts));
894
895 // Zero out the excess bits of the significand.
896 unsigned bitsToPreserve = semantics->precision - 1;
897 unsigned part = bitsToPreserve / 64;
898 bitsToPreserve %= 64;
899 significand[part] &= ((1ULL << bitsToPreserve) - 1);
900 for (part++; part != numParts; ++part)
901 significand[part] = 0;
902 }
903
904 unsigned QNaNBit =
905 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
906
907 if (SNaN) {
908 // We always have to clear the QNaN bit to make it an SNaN.
909 APInt::tcClearBit(significand, QNaNBit);
910
911 // If there are no bits set in the payload, we have to set
912 // *something* to make it a NaN instead of an infinity;
913 // conventionally, this is the next bit down from the QNaN bit.
914 if (APInt::tcIsZero(significand, numParts))
915 APInt::tcSetBit(significand, QNaNBit - 1);
916 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
917 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
918 // Do nothing.
919 } else {
920 // We always have to set the QNaN bit to make it a QNaN.
921 APInt::tcSetBit(significand, QNaNBit);
922 }
923
924 // For x87 extended precision, we want to make a NaN, not a
925 // pseudo-NaN. Maybe we should expose the ability to make
926 // pseudo-NaNs?
927 if (semantics == &APFloatBase::semX87DoubleExtended)
928 APInt::tcSetBit(significand, QNaNBit + 1);
929}
930
932 if (this != &rhs) {
933 if (semantics != rhs.semantics) {
934 freeSignificand();
935 initialize(rhs.semantics);
936 }
937 assign(rhs);
938 }
939
940 return *this;
941}
942
944 freeSignificand();
945
946 semantics = rhs.semantics;
947 significand = rhs.significand;
948 exponent = rhs.exponent;
949 category = rhs.category;
950 sign = rhs.sign;
951
952 rhs.semantics = &APFloatBase::semBogus;
953 return *this;
954}
955
958 (exponent == semantics->minExponent) &&
959 (APInt::tcExtractBit(significandParts(), semantics->precision - 1) ==
960 0);
961}
962
964 // The smallest number by magnitude in our format will be the smallest
965 // denormal, i.e. the floating point number with exponent being minimum
966 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
967 return isFiniteNonZero() && exponent == semantics->minExponent &&
968 significandMSB() == 0;
969}
970
972 return getCategory() == fcNormal && exponent == semantics->minExponent &&
973 isSignificandAllZerosExceptMSB();
974}
975
976unsigned int IEEEFloat::getNumHighBits() const {
977 const unsigned int PartCount = partCountForBits(semantics->precision);
978 const unsigned int Bits = PartCount * integerPartWidth;
979
980 // Compute how many bits are used in the final word.
981 // When precision is just 1, it represents the 'Pth'
982 // Precision bit and not the actual significand bit.
983 const unsigned int NumHighBits = (semantics->precision > 1)
984 ? (Bits - semantics->precision + 1)
985 : (Bits - semantics->precision);
986 return NumHighBits;
987}
988
989bool IEEEFloat::isSignificandAllOnes() const {
990 // Test if the significand excluding the integral bit is all ones. This allows
991 // us to test for binade boundaries.
992 const integerPart *Parts = significandParts();
993 const unsigned PartCount = partCountForBits(semantics->precision);
994 for (unsigned i = 0; i < PartCount - 1; i++)
995 if (~Parts[i])
996 return false;
997
998 // Set the unused high bits to all ones when we compare.
999 const unsigned NumHighBits = getNumHighBits();
1000 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1001 "Can not have more high bits to fill than integerPartWidth");
1002 const integerPart HighBitFill =
1003 ~integerPart(0) << (integerPartWidth - NumHighBits);
1004 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
1005 return false;
1006
1007 return true;
1008}
1009
1010bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1011 // Test if the significand excluding the integral bit is all ones except for
1012 // the least significant bit.
1013 const integerPart *Parts = significandParts();
1014
1015 if (Parts[0] & 1)
1016 return false;
1017
1018 const unsigned PartCount = partCountForBits(semantics->precision);
1019 for (unsigned i = 0; i < PartCount - 1; i++) {
1020 if (~Parts[i] & ~unsigned{!i})
1021 return false;
1022 }
1023
1024 // Set the unused high bits to all ones when we compare.
1025 const unsigned NumHighBits = getNumHighBits();
1026 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1027 "Can not have more high bits to fill than integerPartWidth");
1028 const integerPart HighBitFill = ~integerPart(0)
1029 << (integerPartWidth - NumHighBits);
1030 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1031 return false;
1032
1033 return true;
1034}
1035
1036bool IEEEFloat::isSignificandAllZeros() const {
1037 // Test if the significand excluding the integral bit is all zeros. This
1038 // allows us to test for binade boundaries.
1039 const integerPart *Parts = significandParts();
1040 const unsigned PartCount = partCountForBits(semantics->precision);
1041
1042 for (unsigned i = 0; i < PartCount - 1; i++)
1043 if (Parts[i])
1044 return false;
1045
1046 // Compute how many bits are used in the final word.
1047 const unsigned NumHighBits = getNumHighBits();
1048 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1049 "clear than integerPartWidth");
1050 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1051
1052 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1053 return false;
1054
1055 return true;
1056}
1057
1058bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1059 const integerPart *Parts = significandParts();
1060 const unsigned PartCount = partCountForBits(semantics->precision);
1061
1062 for (unsigned i = 0; i < PartCount - 1; i++) {
1063 if (Parts[i])
1064 return false;
1065 }
1066
1067 const unsigned NumHighBits = getNumHighBits();
1068 const integerPart MSBMask = integerPart(1)
1069 << (integerPartWidth - NumHighBits);
1070 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1071}
1072
1074 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1075 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1076 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1077 // The largest number by magnitude in our format will be the floating point
1078 // number with maximum exponent and with significand that is all ones except
1079 // the LSB.
1080 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1081 ? isSignificandAllOnesExceptLSB()
1082 : IsMaxExp;
1083 } else {
1084 // The largest number by magnitude in our format will be the floating point
1085 // number with maximum exponent and with significand that is all ones.
1086 return IsMaxExp && isSignificandAllOnes();
1087 }
1088}
1089
1091 // This could be made more efficient; I'm going for obviously correct.
1092 if (!isFinite()) return false;
1093 IEEEFloat truncated = *this;
1094 truncated.roundToIntegral(rmTowardZero);
1095 return compare(truncated) == cmpEqual;
1096}
1097
1098bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1099 if (this == &rhs)
1100 return true;
1101 if (semantics != rhs.semantics ||
1102 category != rhs.category ||
1103 sign != rhs.sign)
1104 return false;
1105 if (category==fcZero || category==fcInfinity)
1106 return true;
1107
1108 if (isFiniteNonZero() && exponent != rhs.exponent)
1109 return false;
1110
1111 return std::equal(significandParts(), significandParts() + partCount(),
1112 rhs.significandParts());
1113}
1114
1116 initialize(&ourSemantics);
1117 sign = 0;
1118 category = fcNormal;
1119 zeroSignificand();
1120 exponent = ourSemantics.precision - 1;
1121 significandParts()[0] = value;
1123}
1124
1126 initialize(&ourSemantics);
1127 // The Float8E8MOFNU format does not have a representation
1128 // for zero. So, use the closest representation instead.
1129 // Moreover, the all-zero encoding represents a valid
1130 // normal value (which is the smallestNormalized here).
1131 // Hence, we call makeSmallestNormalized (where category is
1132 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1133 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1134}
1135
1136// Delegate to the previous constructor, because later copy constructor may
1137// actually inspects category, which can't be garbage.
1139 : IEEEFloat(ourSemantics) {}
1140
1142 initialize(rhs.semantics);
1143 assign(rhs);
1144}
1145
1146IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1147 *this = std::move(rhs);
1148}
1149
1150IEEEFloat::~IEEEFloat() { freeSignificand(); }
1151
1152unsigned int IEEEFloat::partCount() const {
1153 return partCountForBits(semantics->precision + 1);
1154}
1155
1156const APFloat::integerPart *IEEEFloat::significandParts() const {
1157 return const_cast<IEEEFloat *>(this)->significandParts();
1158}
1159
1160APFloat::integerPart *IEEEFloat::significandParts() {
1161 if (partCount() > 1)
1162 return significand.parts;
1163 else
1164 return &significand.part;
1165}
1166
1167void IEEEFloat::zeroSignificand() {
1168 APInt::tcSet(significandParts(), 0, partCount());
1169}
1170
1171/* Increment an fcNormal floating point number's significand. */
1172void IEEEFloat::incrementSignificand() {
1173 [[maybe_unused]] integerPart carry =
1174 APInt::tcIncrement(significandParts(), partCount());
1175
1176 /* Our callers should never cause us to overflow. */
1177 assert(carry == 0);
1178}
1179
1180/* Add the significand of the RHS. Returns the carry flag. */
1181APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1182 integerPart *parts = significandParts();
1183
1184 assert(semantics == rhs.semantics);
1185 assert(exponent == rhs.exponent);
1186
1187 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1188}
1189
1190/* Subtract the significand of the RHS with a borrow flag. Returns
1191 the borrow flag. */
1192APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1193 integerPart borrow) {
1194 integerPart *parts = significandParts();
1195
1196 assert(semantics == rhs.semantics);
1197 assert(exponent == rhs.exponent);
1198
1199 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1200 partCount());
1201}
1202
1203/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1204 on to the full-precision result of the multiplication. Returns the
1205 lost fraction. */
1206lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1207 IEEEFloat addend,
1208 bool ignoreAddend) {
1209 integerPart scratch[4];
1210 bool ignored;
1211
1212 assert(semantics == rhs.semantics);
1213
1214 unsigned precision = semantics->precision;
1215
1216 // Allocate space for twice as many bits as the original significand, plus one
1217 // extra bit for the addition to overflow into.
1218 unsigned newPartsCount = partCountForBits(precision * 2 + 1);
1219
1220 // FIXME: Replace with SmallVector<4>.
1221 integerPart *fullSignificand =
1222 newPartsCount > 4 ? new integerPart[newPartsCount] : scratch;
1223
1224 integerPart *lhsSignificand = significandParts();
1225 unsigned partsCount = partCount();
1226
1227 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1228 rhs.significandParts(), partsCount, partsCount);
1229
1230 lostFraction lost_fraction = lfExactlyZero;
1231 // One, not zero, based MSB.
1232 unsigned omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1233 exponent += rhs.exponent;
1234
1235 // Assume the operands involved in the multiplication are single-precision
1236 // FP, and the two multiplicants are:
1237 // *this = a23 . a22 ... a0 * 2^e1
1238 // rhs = b23 . b22 ... b0 * 2^e2
1239 // the result of multiplication is:
1240 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1241 // Note that there are three significant bits at the left-hand side of the
1242 // radix point: two for the multiplication, and an overflow bit for the
1243 // addition (that will always be zero at this point). Move the radix point
1244 // toward left by two bits, and adjust exponent accordingly.
1245 exponent += 2;
1246
1247 if (!ignoreAddend && addend.isNonZero()) {
1248 // The intermediate result of the multiplication has "2 * precision"
1249 // signicant bit; adjust the addend to be consistent with mul result.
1250 //
1251 Significand savedSignificand = significand;
1252 const fltSemantics *savedSemantics = semantics;
1253
1254 // Normalize our MSB to one below the top bit to allow for overflow.
1255 unsigned extendedPrecision = 2 * precision + 1;
1256 if (omsb != extendedPrecision - 1) {
1257 assert(extendedPrecision > omsb);
1258 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1259 (extendedPrecision - 1) - omsb);
1260 exponent -= (extendedPrecision - 1) - omsb;
1261 }
1262
1263 /* Create new semantics. */
1264 fltSemantics extendedSemantics = *semantics;
1265 extendedSemantics.precision = extendedPrecision;
1266
1267 if (newPartsCount == 1)
1268 significand.part = fullSignificand[0];
1269 else
1270 significand.parts = fullSignificand;
1271 semantics = &extendedSemantics;
1272
1273 // Make a copy so we can convert it to the extended semantics.
1274 // Note that we cannot convert the addend directly, as the extendedSemantics
1275 // is a local variable (which we take a reference to).
1276 IEEEFloat extendedAddend(addend);
1277 [[maybe_unused]] opStatus status = extendedAddend.convert(
1278 extendedSemantics, APFloat::rmTowardZero, &ignored);
1279 assert(status == APFloat::opOK);
1280
1281 // Shift the significand of the addend right by one bit. This guarantees
1282 // that the high bit of the significand is zero (same as fullSignificand),
1283 // so the addition will overflow (if it does overflow at all) into the top bit.
1284 lost_fraction = extendedAddend.shiftSignificandRight(1);
1285 assert(lost_fraction == lfExactlyZero &&
1286 "Lost precision while shifting addend for fused-multiply-add.");
1287
1288 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1289
1290 /* Restore our state. */
1291 if (newPartsCount == 1)
1292 fullSignificand[0] = significand.part;
1293 significand = savedSignificand;
1294 semantics = savedSemantics;
1295
1296 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1297 }
1298
1299 // Convert the result having "2 * precision" significant-bits back to the one
1300 // having "precision" significant-bits. First, move the radix point from
1301 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1302 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1303 exponent -= precision + 1;
1304
1305 // In case MSB resides at the left-hand side of radix point, shift the
1306 // mantissa right by some amount to make sure the MSB reside right before
1307 // the radix point (i.e. "MSB . rest-significant-bits").
1308 //
1309 // Note that the result is not normalized when "omsb < precision". So, the
1310 // caller needs to call IEEEFloat::normalize() if normalized value is
1311 // expected.
1312 if (omsb > precision) {
1313 unsigned int bits, significantParts;
1314 lostFraction lf;
1315
1316 bits = omsb - precision;
1317 significantParts = partCountForBits(omsb);
1318 lf = shiftRight(fullSignificand, significantParts, bits);
1319 lost_fraction = combineLostFractions(lf, lost_fraction);
1320 exponent += bits;
1321 }
1322
1323 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1324
1325 if (newPartsCount > 4)
1326 delete [] fullSignificand;
1327
1328 return lost_fraction;
1329}
1330
1331lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1332 // When the given semantics has zero, the addend here is a zero.
1333 // i.e . it belongs to the 'fcZero' category.
1334 // But when the semantics does not support zero, we need to
1335 // explicitly convey that this addend should be ignored
1336 // for multiplication.
1337 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1338}
1339
1340/* Multiply the significands of LHS and RHS to DST. */
1341lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1342 integerPart scratch[4];
1343
1344 assert(semantics == rhs.semantics);
1345
1346 integerPart *lhsSignificand = significandParts();
1347 const integerPart *rhsSignificand = rhs.significandParts();
1348 unsigned partsCount = partCount();
1349
1350 integerPart *dividend =
1351 partsCount > 2 ? new integerPart[partsCount * 2] : scratch;
1352 integerPart *divisor = dividend + partsCount;
1353
1354 /* Copy the dividend and divisor as they will be modified in-place. */
1355 for (unsigned i = 0; i < partsCount; i++) {
1356 dividend[i] = lhsSignificand[i];
1357 divisor[i] = rhsSignificand[i];
1358 lhsSignificand[i] = 0;
1359 }
1360
1361 exponent -= rhs.exponent;
1362
1363 unsigned int precision = semantics->precision;
1364
1365 /* Normalize the divisor. */
1366 unsigned bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1367 if (bit) {
1368 exponent += bit;
1369 APInt::tcShiftLeft(divisor, partsCount, bit);
1370 }
1371
1372 /* Normalize the dividend. */
1373 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1374 if (bit) {
1375 exponent -= bit;
1376 APInt::tcShiftLeft(dividend, partsCount, bit);
1377 }
1378
1379 /* Ensure the dividend >= divisor initially for the loop below.
1380 Incidentally, this means that the division loop below is
1381 guaranteed to set the integer bit to one. */
1382 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1383 exponent--;
1384 APInt::tcShiftLeft(dividend, partsCount, 1);
1385 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1386 }
1387
1388 /* Long division. */
1389 for (bit = precision; bit; bit -= 1) {
1390 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1391 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1392 APInt::tcSetBit(lhsSignificand, bit - 1);
1393 }
1394
1395 APInt::tcShiftLeft(dividend, partsCount, 1);
1396 }
1397
1398 /* Figure out the lost fraction. */
1399 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1400
1401 lostFraction lost_fraction;
1402 if (cmp > 0)
1403 lost_fraction = lfMoreThanHalf;
1404 else if (cmp == 0)
1405 lost_fraction = lfExactlyHalf;
1406 else if (APInt::tcIsZero(dividend, partsCount))
1407 lost_fraction = lfExactlyZero;
1408 else
1409 lost_fraction = lfLessThanHalf;
1410
1411 if (partsCount > 2)
1412 delete [] dividend;
1413
1414 return lost_fraction;
1415}
1416
1417unsigned int IEEEFloat::significandMSB() const {
1418 return APInt::tcMSB(significandParts(), partCount());
1419}
1420
1421unsigned int IEEEFloat::significandLSB() const {
1422 return APInt::tcLSB(significandParts(), partCount());
1423}
1424
1425/* Note that a zero result is NOT normalized to fcZero. */
1426lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1427 /* Our exponent should not overflow. */
1428 assert((ExponentType) (exponent + bits) >= exponent);
1429
1430 exponent += bits;
1431
1432 return shiftRight(significandParts(), partCount(), bits);
1433}
1434
1435/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1436void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1437 assert(bits < semantics->precision ||
1438 (semantics->precision == 1 && bits <= 1));
1439
1440 if (bits) {
1441 unsigned int partsCount = partCount();
1442
1443 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1444 exponent -= bits;
1445
1446 assert(!APInt::tcIsZero(significandParts(), partsCount));
1447 }
1448}
1449
1451 assert(semantics == rhs.semantics);
1453 assert(rhs.isFiniteNonZero());
1454
1455 int compare = exponent - rhs.exponent;
1456
1457 /* If exponents are equal, do an unsigned bignum comparison of the
1458 significands. */
1459 if (compare == 0)
1460 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1461 partCount());
1462
1463 if (compare > 0)
1464 return cmpGreaterThan;
1465 else if (compare < 0)
1466 return cmpLessThan;
1467 else
1468 return cmpEqual;
1469}
1470
1471/* Set the least significant BITS bits of a bignum, clear the
1472 rest. */
1473static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1474 unsigned bits) {
1475 unsigned i = 0;
1476 while (bits > APInt::APINT_BITS_PER_WORD) {
1477 dst[i++] = ~(APInt::WordType)0;
1479 }
1480
1481 if (bits)
1482 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1483
1484 while (i < parts)
1485 dst[i++] = 0;
1486}
1487
1488/* Handle overflow. Sign is preserved. We either become infinity or
1489 the largest finite number. */
1490APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1492 /* Infinity? */
1493 if (rounding_mode == rmNearestTiesToEven ||
1494 rounding_mode == rmNearestTiesToAway ||
1495 (rounding_mode == rmTowardPositive && !sign) ||
1496 (rounding_mode == rmTowardNegative && sign)) {
1498 makeNaN(false, sign);
1499 else
1500 category = fcInfinity;
1501 return static_cast<opStatus>(opOverflow | opInexact);
1502 }
1503 }
1504
1505 /* Otherwise we become the largest finite number. */
1506 category = fcNormal;
1507 exponent = semantics->maxExponent;
1508 tcSetLeastSignificantBits(significandParts(), partCount(),
1509 semantics->precision);
1510 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1511 semantics->nanEncoding == fltNanEncoding::AllOnes)
1512 APInt::tcClearBit(significandParts(), 0);
1513
1514 return opInexact;
1515}
1516
1517/* Returns TRUE if, when truncating the current number, with BIT the
1518 new LSB, with the given lost fraction and rounding mode, the result
1519 would need to be rounded away from zero (i.e., by increasing the
1520 signficand). This routine must work for fcZero of both signs, and
1521 fcNormal numbers. */
1522bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1523 lostFraction lost_fraction,
1524 unsigned int bit) const {
1525 /* NaNs and infinities should not have lost fractions. */
1526 assert(isFiniteNonZero() || category == fcZero);
1527
1528 /* Current callers never pass this so we don't handle it. */
1529 assert(lost_fraction != lfExactlyZero);
1530
1531 switch (rounding_mode) {
1533 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1534
1536 if (lost_fraction == lfMoreThanHalf)
1537 return true;
1538
1539 /* Our zeroes don't have a significand to test. */
1540 if (lost_fraction == lfExactlyHalf && category != fcZero)
1541 return APInt::tcExtractBit(significandParts(), bit);
1542
1543 return false;
1544
1545 case rmTowardZero:
1546 return false;
1547
1548 case rmTowardPositive:
1549 return !sign;
1550
1551 case rmTowardNegative:
1552 return sign;
1553
1554 default:
1555 break;
1556 }
1557 llvm_unreachable("Invalid rounding mode found");
1558}
1559
1560APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1561 lostFraction lost_fraction) {
1562 if (!isFiniteNonZero())
1563 return opOK;
1564
1565 /* Before rounding normalize the exponent of fcNormal numbers. */
1566 /* One, not zero, based MSB. */
1567 unsigned omsb = significandMSB() + 1;
1568
1569 // Only skip this `if` if the value is exactly zero.
1570 if (omsb || lost_fraction != lfExactlyZero) {
1571 /* OMSB is numbered from 1. We want to place it in the integer
1572 bit numbered PRECISION if possible, with a compensating change in
1573 the exponent. */
1574 int exponentChange = omsb - semantics->precision;
1575
1576 /* If the resulting exponent is too high, overflow according to
1577 the rounding mode. */
1578 if (exponent + exponentChange > semantics->maxExponent)
1579 return handleOverflow(rounding_mode);
1580
1581 /* Subnormal numbers have exponent minExponent, and their MSB
1582 is forced based on that. */
1583 if (exponent + exponentChange < semantics->minExponent)
1584 exponentChange = semantics->minExponent - exponent;
1585
1586 /* Shifting left is easy as we don't lose precision. */
1587 if (exponentChange < 0) {
1588 assert(lost_fraction == lfExactlyZero);
1589
1590 shiftSignificandLeft(-exponentChange);
1591
1592 return opOK;
1593 }
1594
1595 if (exponentChange > 0) {
1596 lostFraction lf;
1597
1598 /* Shift right and capture any new lost fraction. */
1599 lf = shiftSignificandRight(exponentChange);
1600
1601 lost_fraction = combineLostFractions(lf, lost_fraction);
1602
1603 /* Keep OMSB up-to-date. */
1604 if (omsb > (unsigned) exponentChange)
1605 omsb -= exponentChange;
1606 else
1607 omsb = 0;
1608 }
1609 }
1610
1611 // The all-ones values is an overflow if NaN is all ones. If NaN is
1612 // represented by negative zero, then it is a valid finite value.
1613 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1614 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1615 exponent == semantics->maxExponent && isSignificandAllOnes())
1616 return handleOverflow(rounding_mode);
1617
1618 /* Now round the number according to rounding_mode given the lost
1619 fraction. */
1620
1621 /* As specified in IEEE 754, since we do not trap we do not report
1622 underflow for exact results. */
1623 if (lost_fraction == lfExactlyZero) {
1624 /* Canonicalize zeroes. */
1625 if (omsb == 0) {
1626 category = fcZero;
1627 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1628 sign = false;
1629 if (!semantics->hasZero)
1631 }
1632
1633 return opOK;
1634 }
1635
1636 /* Increment the significand if we're rounding away from zero. */
1637 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1638 if (omsb == 0)
1639 exponent = semantics->minExponent;
1640
1641 incrementSignificand();
1642 omsb = significandMSB() + 1;
1643
1644 /* Did the significand increment overflow? */
1645 if (omsb == (unsigned) semantics->precision + 1) {
1646 /* Renormalize by incrementing the exponent and shifting our
1647 significand right one. However if we already have the
1648 maximum exponent we overflow to infinity. */
1649 if (exponent == semantics->maxExponent)
1650 // Invoke overflow handling with a rounding mode that will guarantee
1651 // that the result gets turned into the correct infinity representation.
1652 // This is needed instead of just setting the category to infinity to
1653 // account for 8-bit floating point types that have no inf, only NaN.
1654 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1655
1656 shiftSignificandRight(1);
1657
1658 return opInexact;
1659 }
1660
1661 // The all-ones values is an overflow if NaN is all ones. If NaN is
1662 // represented by negative zero, then it is a valid finite value.
1663 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1664 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1665 exponent == semantics->maxExponent && isSignificandAllOnes())
1666 return handleOverflow(rounding_mode);
1667 }
1668
1669 /* The normal case - we were and are not denormal, and any
1670 significand increment above didn't overflow. */
1671 if (omsb == semantics->precision)
1672 return opInexact;
1673
1674 /* We have a non-zero denormal. */
1675 assert(omsb < semantics->precision);
1676
1677 /* Canonicalize zeroes. */
1678 if (omsb == 0) {
1679 category = fcZero;
1680 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1681 sign = false;
1682 // This condition handles the case where the semantics
1683 // does not have zero but uses the all-zero encoding
1684 // to represent the smallest normal value.
1685 if (!semantics->hasZero)
1687 }
1688
1689 /* The fcZero case is a denormal that underflowed to zero. */
1690 return (opStatus) (opUnderflow | opInexact);
1691}
1692
1693APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1694 bool subtract) {
1695 switch (PackCategoriesIntoKey(category, rhs.category)) {
1696 default:
1697 llvm_unreachable(nullptr);
1698
1702 assign(rhs);
1703 [[fallthrough]];
1708 if (isSignaling()) {
1709 makeQuiet();
1710 return opInvalidOp;
1711 }
1712 return rhs.isSignaling() ? opInvalidOp : opOK;
1713
1717 return opOK;
1718
1721 category = fcInfinity;
1722 sign = rhs.sign ^ subtract;
1723 return opOK;
1724
1726 assign(rhs);
1727 sign = rhs.sign ^ subtract;
1728 return opOK;
1729
1731 /* Sign depends on rounding mode; handled by caller. */
1732 return opOK;
1733
1735 /* Differently signed infinities can only be validly
1736 subtracted. */
1737 if (((sign ^ rhs.sign)!=0) != subtract) {
1738 makeNaN();
1739 return opInvalidOp;
1740 }
1741
1742 return opOK;
1743
1745 return opDivByZero;
1746 }
1747}
1748
1749/* Add or subtract two normal numbers. */
1750lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1751 bool subtract) {
1752 [[maybe_unused]] integerPart carry = 0;
1753 lostFraction lost_fraction;
1754
1755 /* Determine if the operation on the absolute values is effectively
1756 an addition or subtraction. */
1757 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1758
1759 /* Are we bigger exponent-wise than the RHS? */
1760 int bits = exponent - rhs.exponent;
1761
1762 /* Subtraction is more subtle than one might naively expect. */
1763 if (subtract) {
1764 if ((bits < 0) && !semantics->hasSignedRepr)
1766 "This floating point format does not support signed values");
1767
1768 IEEEFloat temp_rhs(rhs);
1769 bool lost_fraction_is_from_rhs = false;
1770
1771 if (bits == 0)
1772 lost_fraction = lfExactlyZero;
1773 else if (bits > 0) {
1774 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1775 lost_fraction_is_from_rhs = true;
1776 shiftSignificandLeft(1);
1777 } else {
1778 lost_fraction = shiftSignificandRight(-bits - 1);
1779 temp_rhs.shiftSignificandLeft(1);
1780 }
1781
1782 // Should we reverse the subtraction.
1783 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1784 if (cmp_result == cmpLessThan) {
1785 bool borrow =
1786 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1787 if (borrow) {
1788 // The lost fraction is being subtracted, borrow from the significand
1789 // and invert `lost_fraction`.
1790 if (lost_fraction == lfLessThanHalf)
1791 lost_fraction = lfMoreThanHalf;
1792 else if (lost_fraction == lfMoreThanHalf)
1793 lost_fraction = lfLessThanHalf;
1794 }
1795 carry = temp_rhs.subtractSignificand(*this, borrow);
1796 copySignificand(temp_rhs);
1797 sign = !sign;
1798 } else if (cmp_result == cmpGreaterThan) {
1799 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1800 if (borrow) {
1801 // The lost fraction is being subtracted, borrow from the significand
1802 // and invert `lost_fraction`.
1803 if (lost_fraction == lfLessThanHalf)
1804 lost_fraction = lfMoreThanHalf;
1805 else if (lost_fraction == lfMoreThanHalf)
1806 lost_fraction = lfLessThanHalf;
1807 }
1808 carry = subtractSignificand(temp_rhs, borrow);
1809 } else { // cmpEqual
1810 zeroSignificand();
1811 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1812 // rhs is slightly larger due to the lost fraction, flip the sign.
1813 sign = !sign;
1814 }
1815 }
1816
1817 /* The code above is intended to ensure that no borrow is
1818 necessary. */
1819 assert(!carry);
1820 } else {
1821 if (bits > 0) {
1822 IEEEFloat temp_rhs(rhs);
1823
1824 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1825 carry = addSignificand(temp_rhs);
1826 } else {
1827 lost_fraction = shiftSignificandRight(-bits);
1828 carry = addSignificand(rhs);
1829 }
1830
1831 /* We have a guard bit; generating a carry cannot happen. */
1832 assert(!carry);
1833 }
1834
1835 return lost_fraction;
1836}
1837
1838APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1839 switch (PackCategoriesIntoKey(category, rhs.category)) {
1840 default:
1841 llvm_unreachable(nullptr);
1842
1846 assign(rhs);
1847 sign = false;
1848 [[fallthrough]];
1853 sign ^= rhs.sign; // restore the original sign
1854 if (isSignaling()) {
1855 makeQuiet();
1856 return opInvalidOp;
1857 }
1858 return rhs.isSignaling() ? opInvalidOp : opOK;
1859
1863 category = fcInfinity;
1864 return opOK;
1865
1869 category = fcZero;
1870 return opOK;
1871
1874 makeNaN();
1875 return opInvalidOp;
1876
1878 return opOK;
1879 }
1880}
1881
1882APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1883 switch (PackCategoriesIntoKey(category, rhs.category)) {
1884 default:
1885 llvm_unreachable(nullptr);
1886
1890 assign(rhs);
1891 sign = false;
1892 [[fallthrough]];
1897 sign ^= rhs.sign; // restore the original sign
1898 if (isSignaling()) {
1899 makeQuiet();
1900 return opInvalidOp;
1901 }
1902 return rhs.isSignaling() ? opInvalidOp : opOK;
1903
1908 return opOK;
1909
1911 category = fcZero;
1912 return opOK;
1913
1915 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1916 makeNaN(false, sign);
1917 else
1918 category = fcInfinity;
1919 return opDivByZero;
1920
1923 makeNaN();
1924 return opInvalidOp;
1925
1927 return opOK;
1928 }
1929}
1930
1931APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1932 switch (PackCategoriesIntoKey(category, rhs.category)) {
1933 default:
1934 llvm_unreachable(nullptr);
1935
1939 assign(rhs);
1940 [[fallthrough]];
1945 if (isSignaling()) {
1946 makeQuiet();
1947 return opInvalidOp;
1948 }
1949 return rhs.isSignaling() ? opInvalidOp : opOK;
1950
1954 return opOK;
1955
1961 makeNaN();
1962 return opInvalidOp;
1963
1965 return opOK;
1966 }
1967}
1968
1969APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1970 switch (PackCategoriesIntoKey(category, rhs.category)) {
1971 default:
1972 llvm_unreachable(nullptr);
1973
1977 assign(rhs);
1978 [[fallthrough]];
1983 if (isSignaling()) {
1984 makeQuiet();
1985 return opInvalidOp;
1986 }
1987 return rhs.isSignaling() ? opInvalidOp : opOK;
1988
1992 return opOK;
1993
1999 makeNaN();
2000 return opInvalidOp;
2001
2003 return opDivByZero; // fake status, indicating this is not a special case
2004 }
2005}
2006
2007/* Change sign. */
2009 // With NaN-as-negative-zero, neither NaN or negative zero can change
2010 // their signs.
2011 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2012 (isZero() || isNaN()))
2013 return;
2014 /* Look mummy, this one's easy. */
2015 sign = !sign;
2016}
2017
2018/* Normalized addition or subtraction. */
2019APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2020 roundingMode rounding_mode,
2021 bool subtract) {
2022 opStatus fs = addOrSubtractSpecials(rhs, subtract);
2023
2024 /* This return code means it was not a simple case. */
2025 if (fs == opDivByZero) {
2026 lostFraction lost_fraction;
2027
2028 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2029 fs = normalize(rounding_mode, lost_fraction);
2030
2031 /* Can only be zero if we lost no fraction. */
2032 assert(category != fcZero || lost_fraction == lfExactlyZero);
2033 }
2034
2035 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2036 positive zero unless rounding to minus infinity, except that
2037 adding two like-signed zeroes gives that zero. */
2038 if (category == fcZero) {
2039 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2040 sign = (rounding_mode == rmTowardNegative);
2041 // NaN-in-negative-zero means zeros need to be normalized to +0.
2042 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2043 sign = false;
2044 }
2045
2046 return fs;
2047}
2048
2049/* Normalized addition. */
2051 roundingMode rounding_mode) {
2052 return addOrSubtract(rhs, rounding_mode, false);
2053}
2054
2055/* Normalized subtraction. */
2057 roundingMode rounding_mode) {
2058 return addOrSubtract(rhs, rounding_mode, true);
2059}
2060
2061/* Normalized multiply. */
2063 roundingMode rounding_mode) {
2064 sign ^= rhs.sign;
2065 opStatus fs = multiplySpecials(rhs);
2066
2067 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2068 sign = false;
2069 if (isFiniteNonZero()) {
2070 lostFraction lost_fraction = multiplySignificand(rhs);
2071 fs = normalize(rounding_mode, lost_fraction);
2072 if (lost_fraction != lfExactlyZero)
2073 fs = (opStatus) (fs | opInexact);
2074 }
2075
2076 return fs;
2077}
2078
2079/* Normalized divide. */
2081 roundingMode rounding_mode) {
2082 sign ^= rhs.sign;
2083 opStatus fs = divideSpecials(rhs);
2084
2085 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2086 sign = false;
2087 if (isFiniteNonZero()) {
2088 lostFraction lost_fraction = divideSignificand(rhs);
2089 fs = normalize(rounding_mode, lost_fraction);
2090 if (lost_fraction != lfExactlyZero)
2091 fs = (opStatus) (fs | opInexact);
2092 }
2093
2094 return fs;
2095}
2096
2097/* Normalized remainder. */
2099 unsigned int origSign = sign;
2100
2101 // First handle the special cases.
2102 opStatus fs = remainderSpecials(rhs);
2103 if (fs != opDivByZero)
2104 return fs;
2105
2106 fs = opOK;
2107
2108 // Make sure the current value is less than twice the denom. If the addition
2109 // did not succeed (an overflow has happened), which means that the finite
2110 // value we currently posses must be less than twice the denom (as we are
2111 // using the same semantics).
2112 IEEEFloat P2 = rhs;
2113 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2114 fs = mod(P2);
2115 assert(fs == opOK);
2116 }
2117
2118 // Lets work with absolute numbers.
2119 IEEEFloat P = rhs;
2120 P.sign = false;
2121 sign = false;
2122
2123 //
2124 // To calculate the remainder we use the following scheme.
2125 //
2126 // The remainder is defained as follows:
2127 //
2128 // remainder = numer - rquot * denom = x - r * p
2129 //
2130 // Where r is the result of: x/p, rounded toward the nearest integral value
2131 // (with halfway cases rounded toward the even number).
2132 //
2133 // Currently, (after x mod 2p):
2134 // r is the number of 2p's present inside x, which is inherently, an even
2135 // number of p's.
2136 //
2137 // We may split the remaining calculation into 4 options:
2138 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2139 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2140 // are done as well.
2141 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2142 // to subtract 1p at least once.
2143 // - if x >= p then we must subtract p at least once, as x must be a
2144 // remainder.
2145 //
2146 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2147 //
2148 // We can now split the remaining calculation to the following 3 options:
2149 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2150 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2151 // must round up to the next even number. so we must subtract p once more.
2152 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2153 // integral, and subtract p once more.
2154 //
2155
2156 // Extend the semantics to prevent an overflow/underflow or inexact result.
2157 bool losesInfo;
2158 fltSemantics extendedSemantics = *semantics;
2159 extendedSemantics.maxExponent++;
2160 extendedSemantics.minExponent--;
2161 extendedSemantics.precision += 2;
2162
2163 IEEEFloat VEx = *this;
2164 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2165 assert(fs == opOK && !losesInfo);
2166 IEEEFloat PEx = P;
2167 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2168 assert(fs == opOK && !losesInfo);
2169
2170 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2171 // any fraction.
2172 fs = VEx.add(VEx, rmNearestTiesToEven);
2173 assert(fs == opOK);
2174
2175 if (VEx.compare(PEx) == cmpGreaterThan) {
2177 assert(fs == opOK);
2178
2179 // Make VEx = this.add(this), but because we have different semantics, we do
2180 // not want to `convert` again, so we just subtract PEx twice (which equals
2181 // to the desired value).
2182 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2183 assert(fs == opOK);
2184 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2185 assert(fs == opOK);
2186
2187 cmpResult result = VEx.compare(PEx);
2188 if (result == cmpGreaterThan || result == cmpEqual) {
2190 assert(fs == opOK);
2191 }
2192 }
2193
2194 if (isZero()) {
2195 sign = origSign; // IEEE754 requires this
2196 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2197 // But some 8-bit floats only have positive 0.
2198 sign = false;
2199 } else {
2200 sign ^= origSign;
2201 }
2202 return fs;
2203}
2204
2205/* Normalized llvm frem (C fmod). */
2207 opStatus fs = modSpecials(rhs);
2208 unsigned int origSign = sign;
2209
2210 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2212 int Exp = ilogb(*this) - ilogb(rhs);
2213 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2214 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2215 // check for it.
2216 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2217 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2218 V.sign = sign;
2219
2221
2222 // When the semantics supports zero, this loop's
2223 // exit-condition is handled by the 'isFiniteNonZero'
2224 // category check above. However, when the semantics
2225 // does not have 'fcZero' and we have reached the
2226 // minimum possible value, (and any further subtract
2227 // will underflow to the same value) explicitly
2228 // provide an exit-path here.
2229 if (!semantics->hasZero && this->isSmallest())
2230 break;
2231
2232 assert(fs==opOK);
2233 }
2234 if (isZero()) {
2235 sign = origSign; // fmod requires this
2236 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2237 sign = false;
2238 }
2239 return fs;
2240}
2241
2242/* Normalized fused-multiply-add. */
2244 const IEEEFloat &addend,
2245 roundingMode rounding_mode) {
2246 opStatus fs;
2247
2248 /* Post-multiplication sign, before addition. */
2249 sign ^= multiplicand.sign;
2250
2251 /* If and only if all arguments are normal do we need to do an
2252 extended-precision calculation. */
2253 if (isFiniteNonZero() &&
2254 multiplicand.isFiniteNonZero() &&
2255 addend.isFinite()) {
2256 lostFraction lost_fraction;
2257
2258 lost_fraction = multiplySignificand(multiplicand, addend);
2259 fs = normalize(rounding_mode, lost_fraction);
2260 if (lost_fraction != lfExactlyZero)
2261 fs = (opStatus) (fs | opInexact);
2262
2263 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2264 positive zero unless rounding to minus infinity, except that
2265 adding two like-signed zeroes gives that zero. */
2266 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2267 sign = (rounding_mode == rmTowardNegative);
2268 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2269 sign = false;
2270 }
2271 } else {
2272 fs = multiplySpecials(multiplicand);
2273
2274 /* FS can only be opOK or opInvalidOp. There is no more work
2275 to do in the latter case. The IEEE-754R standard says it is
2276 implementation-defined in this case whether, if ADDEND is a
2277 quiet NaN, we raise invalid op; this implementation does so.
2278
2279 If we need to do the addition we can do so with normal
2280 precision. */
2281 if (fs == opOK)
2282 fs = addOrSubtract(addend, rounding_mode, false);
2283 }
2284
2285 return fs;
2286}
2287
2288/* Rounding-mode correct round to integral value. */
2290 if (isInfinity())
2291 // [IEEE Std 754-2008 6.1]:
2292 // The behavior of infinity in floating-point arithmetic is derived from the
2293 // limiting cases of real arithmetic with operands of arbitrarily
2294 // large magnitude, when such a limit exists.
2295 // ...
2296 // Operations on infinite operands are usually exact and therefore signal no
2297 // exceptions ...
2298 return opOK;
2299
2300 if (isNaN()) {
2301 if (isSignaling()) {
2302 // [IEEE Std 754-2008 6.2]:
2303 // Under default exception handling, any operation signaling an invalid
2304 // operation exception and for which a floating-point result is to be
2305 // delivered shall deliver a quiet NaN.
2306 makeQuiet();
2307 // [IEEE Std 754-2008 6.2]:
2308 // Signaling NaNs shall be reserved operands that, under default exception
2309 // handling, signal the invalid operation exception(see 7.2) for every
2310 // general-computational and signaling-computational operation except for
2311 // the conversions described in 5.12.
2312 return opInvalidOp;
2313 } else {
2314 // [IEEE Std 754-2008 6.2]:
2315 // For an operation with quiet NaN inputs, other than maximum and minimum
2316 // operations, if a floating-point result is to be delivered the result
2317 // shall be a quiet NaN which should be one of the input NaNs.
2318 // ...
2319 // Every general-computational and quiet-computational operation involving
2320 // one or more input NaNs, none of them signaling, shall signal no
2321 // exception, except fusedMultiplyAdd might signal the invalid operation
2322 // exception(see 7.2).
2323 return opOK;
2324 }
2325 }
2326
2327 if (isZero()) {
2328 // [IEEE Std 754-2008 6.3]:
2329 // ... the sign of the result of conversions, the quantize operation, the
2330 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2331 // the sign of the first or only operand.
2332 return opOK;
2333 }
2334
2335 // If the exponent is large enough, we know that this value is already
2336 // integral, and the arithmetic below would potentially cause it to saturate
2337 // to +/-Inf. Bail out early instead.
2338 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2339 return opOK;
2340
2341 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2342 // precision of our format, and then subtract it back off again. The choice
2343 // of rounding modes for the addition/subtraction determines the rounding mode
2344 // for our integral rounding as well.
2345 // NOTE: When the input value is negative, we do subtraction followed by
2346 // addition instead.
2347 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2348 1);
2349 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2350 IEEEFloat MagicConstant(*semantics);
2351 opStatus fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2353 assert(fs == opOK);
2354 MagicConstant.sign = sign;
2355
2356 // Preserve the input sign so that we can handle the case of zero result
2357 // correctly.
2358 bool inputSign = isNegative();
2359
2360 fs = add(MagicConstant, rounding_mode);
2361
2362 // Current value and 'MagicConstant' are both integers, so the result of the
2363 // subtraction is always exact according to Sterbenz' lemma.
2364 subtract(MagicConstant, rounding_mode);
2365
2366 // Restore the input sign.
2367 if (inputSign != isNegative())
2368 changeSign();
2369
2370 return fs;
2371}
2372
2373/* Comparison requires normalized numbers. */
2375 assert(semantics == rhs.semantics);
2376
2377 switch (PackCategoriesIntoKey(category, rhs.category)) {
2378 default:
2379 llvm_unreachable(nullptr);
2380
2388 return cmpUnordered;
2389
2393 if (sign)
2394 return cmpLessThan;
2395 else
2396 return cmpGreaterThan;
2397
2401 if (rhs.sign)
2402 return cmpGreaterThan;
2403 else
2404 return cmpLessThan;
2405
2407 if (sign == rhs.sign)
2408 return cmpEqual;
2409 else if (sign)
2410 return cmpLessThan;
2411 else
2412 return cmpGreaterThan;
2413
2415 return cmpEqual;
2416
2418 break;
2419 }
2420
2421 cmpResult result;
2422 /* Two normal numbers. Do they have the same sign? */
2423 if (sign != rhs.sign) {
2424 if (sign)
2425 result = cmpLessThan;
2426 else
2427 result = cmpGreaterThan;
2428 } else {
2429 /* Compare absolute values; invert result if negative. */
2430 result = compareAbsoluteValue(rhs);
2431
2432 if (sign) {
2433 if (result == cmpLessThan)
2434 result = cmpGreaterThan;
2435 else if (result == cmpGreaterThan)
2436 result = cmpLessThan;
2437 }
2438 }
2439
2440 return result;
2441}
2442
2443/// IEEEFloat::convert - convert a value of one floating point type to another.
2444/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2445/// records whether the transformation lost information, i.e. whether
2446/// converting the result back to the original type will produce the
2447/// original value (this is almost the same as return value==fsOK, but there
2448/// are edge cases where this is not so).
2449
2451 roundingMode rounding_mode,
2452 bool *losesInfo) {
2453 opStatus fs;
2454 const fltSemantics &fromSemantics = *semantics;
2455 bool is_signaling = isSignaling();
2456
2458 unsigned newPartCount = partCountForBits(toSemantics.precision + 1);
2459 unsigned oldPartCount = partCount();
2460 int shift = toSemantics.precision - fromSemantics.precision;
2461
2462 bool X86SpecialNan = false;
2463 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2464 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2465 (!(*significandParts() & 0x8000000000000000ULL) ||
2466 !(*significandParts() & 0x4000000000000000ULL))) {
2467 // x86 has some unusual NaNs which cannot be represented in any other
2468 // format; note them here.
2469 X86SpecialNan = true;
2470 }
2471
2472 // If this is a truncation of a denormal number, and the target semantics
2473 // has larger exponent range than the source semantics (this can happen
2474 // when truncating from PowerPC double-double to double format), the
2475 // right shift could lose result mantissa bits. Adjust exponent instead
2476 // of performing excessive shift.
2477 // Also do a similar trick in case shifting denormal would produce zero
2478 // significand as this case isn't handled correctly by normalize.
2479 if (shift < 0 && isFiniteNonZero()) {
2480 int omsb = significandMSB() + 1;
2481 int exponentChange = omsb - fromSemantics.precision;
2482 if (exponent + exponentChange < toSemantics.minExponent)
2483 exponentChange = toSemantics.minExponent - exponent;
2484 exponentChange = std::max(exponentChange, shift);
2485 if (exponentChange < 0) {
2486 shift -= exponentChange;
2487 exponent += exponentChange;
2488 } else if (omsb <= -shift) {
2489 exponentChange = omsb + shift - 1; // leave at least one bit set
2490 shift -= exponentChange;
2491 exponent += exponentChange;
2492 }
2493 }
2494
2495 // If this is a truncation, perform the shift before we narrow the storage.
2496 if (shift < 0 && (isFiniteNonZero() ||
2497 (category == fcNaN && semantics->nonFiniteBehavior !=
2499 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2500
2501 // Fix the storage so it can hold to new value.
2502 if (newPartCount > oldPartCount) {
2503 // The new type requires more storage; make it available.
2504 integerPart *newParts;
2505 newParts = new integerPart[newPartCount];
2506 APInt::tcSet(newParts, 0, newPartCount);
2507 if (isFiniteNonZero() || category==fcNaN)
2508 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2509 freeSignificand();
2510 significand.parts = newParts;
2511 } else if (newPartCount == 1 && oldPartCount != 1) {
2512 // Switch to built-in storage for a single part.
2513 integerPart newPart = 0;
2514 if (isFiniteNonZero() || category==fcNaN)
2515 newPart = significandParts()[0];
2516 freeSignificand();
2517 significand.part = newPart;
2518 }
2519
2520 // Now that we have the right storage, switch the semantics.
2521 semantics = &toSemantics;
2522
2523 // If this is an extension, perform the shift now that the storage is
2524 // available.
2525 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2526 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2527
2528 if (isFiniteNonZero()) {
2529 fs = normalize(rounding_mode, lostFraction);
2530 *losesInfo = (fs != opOK);
2531 } else if (category == fcNaN) {
2532 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2533 *losesInfo =
2535 makeNaN(false, sign);
2536 return is_signaling ? opInvalidOp : opOK;
2537 }
2538
2539 // If NaN is negative zero, we need to create a new NaN to avoid converting
2540 // NaN to -Inf.
2541 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2542 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2543 makeNaN(false, false);
2544
2545 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2546
2547 // For x87 extended precision, we want to make a NaN, not a special NaN if
2548 // the input wasn't special either.
2549 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2550 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2551
2552 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2553 // This also guarantees that a sNaN does not become Inf on a truncation
2554 // that loses all payload bits.
2555 if (is_signaling) {
2556 makeQuiet();
2557 fs = opInvalidOp;
2558 } else {
2559 fs = opOK;
2560 }
2561 } else if (category == fcInfinity &&
2562 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2563 makeNaN(false, sign);
2564 *losesInfo = true;
2565 fs = opInexact;
2566 } else if (category == fcZero &&
2567 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2568 // Negative zero loses info, but positive zero doesn't.
2569 *losesInfo =
2570 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2571 fs = *losesInfo ? opInexact : opOK;
2572 // NaN is negative zero means -0 -> +0, which can lose information
2573 sign = false;
2574 } else {
2575 *losesInfo = false;
2576 fs = opOK;
2577 }
2578
2579 if (category == fcZero && !semantics->hasZero)
2581 return fs;
2582}
2583
2584/* Convert a floating point number to an integer according to the
2585 rounding mode. If the rounded integer value is out of range this
2586 returns an invalid operation exception and the contents of the
2587 destination parts are unspecified. If the rounded value is in
2588 range but the floating point number is not the exact integer, the C
2589 standard doesn't require an inexact exception to be raised. IEEE
2590 854 does require it so we do that.
2591
2592 Note that for conversions to integer type the C standard requires
2593 round-to-zero to always be used. */
2594APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2595 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2596 roundingMode rounding_mode, bool *isExact) const {
2597 *isExact = false;
2598
2599 /* Handle the three special cases first. */
2600 if (category == fcInfinity || category == fcNaN)
2601 return opInvalidOp;
2602
2603 unsigned dstPartsCount = partCountForBits(width);
2604 assert(dstPartsCount <= parts.size() && "Integer too big");
2605
2606 if (category == fcZero) {
2607 APInt::tcSet(parts.data(), 0, dstPartsCount);
2608 // Negative zero can't be represented as an int.
2609 *isExact = !sign;
2610 return opOK;
2611 }
2612
2613 const integerPart *src = significandParts();
2614
2615 unsigned truncatedBits;
2616 /* Step 1: place our absolute value, with any fraction truncated, in
2617 the destination. */
2618 if (exponent < 0) {
2619 /* Our absolute value is less than one; truncate everything. */
2620 APInt::tcSet(parts.data(), 0, dstPartsCount);
2621 /* For exponent -1 the integer bit represents .5, look at that.
2622 For smaller exponents leftmost truncated bit is 0. */
2623 truncatedBits = semantics->precision -1U - exponent;
2624 } else {
2625 /* We want the most significant (exponent + 1) bits; the rest are
2626 truncated. */
2627 unsigned int bits = exponent + 1U;
2628
2629 /* Hopelessly large in magnitude? */
2630 if (bits > width)
2631 return opInvalidOp;
2632
2633 if (bits < semantics->precision) {
2634 /* We truncate (semantics->precision - bits) bits. */
2635 truncatedBits = semantics->precision - bits;
2636 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2637 } else {
2638 /* We want at least as many bits as are available. */
2639 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2640 0);
2641 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2642 bits - semantics->precision);
2643 truncatedBits = 0;
2644 }
2645 }
2646
2647 /* Step 2: work out any lost fraction, and increment the absolute
2648 value if we would round away from zero. */
2649 lostFraction lost_fraction;
2650 if (truncatedBits) {
2651 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2652 truncatedBits);
2653 if (lost_fraction != lfExactlyZero &&
2654 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2655 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2656 return opInvalidOp; /* Overflow. */
2657 }
2658 } else {
2659 lost_fraction = lfExactlyZero;
2660 }
2661
2662 /* Step 3: check if we fit in the destination. */
2663 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2664
2665 if (sign) {
2666 if (!isSigned) {
2667 /* Negative numbers cannot be represented as unsigned. */
2668 if (omsb != 0)
2669 return opInvalidOp;
2670 } else {
2671 /* It takes omsb bits to represent the unsigned integer value.
2672 We lose a bit for the sign, but care is needed as the
2673 maximally negative integer is a special case. */
2674 if (omsb == width &&
2675 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2676 return opInvalidOp;
2677
2678 /* This case can happen because of rounding. */
2679 if (omsb > width)
2680 return opInvalidOp;
2681 }
2682
2683 APInt::tcNegate (parts.data(), dstPartsCount);
2684 } else {
2685 if (omsb >= width + !isSigned)
2686 return opInvalidOp;
2687 }
2688
2689 if (lost_fraction == lfExactlyZero) {
2690 *isExact = true;
2691 return opOK;
2692 }
2693 return opInexact;
2694}
2695
2696/* Same as convertToSignExtendedInteger, except we provide
2697 deterministic values in case of an invalid operation exception,
2698 namely zero for NaNs and the minimal or maximal value respectively
2699 for underflow or overflow.
2700 The *isExact output tells whether the result is exact, in the sense
2701 that converting it back to the original floating point type produces
2702 the original value. This is almost equivalent to result==opOK,
2703 except for negative zeroes.
2704*/
2707 unsigned int width, bool isSigned,
2708 roundingMode rounding_mode, bool *isExact) const {
2709 opStatus fs = convertToSignExtendedInteger(parts, width, isSigned,
2710 rounding_mode, isExact);
2711
2712 if (fs == opInvalidOp) {
2713 unsigned int bits, dstPartsCount;
2714
2715 dstPartsCount = partCountForBits(width);
2716 assert(dstPartsCount <= parts.size() && "Integer too big");
2717
2718 if (category == fcNaN)
2719 bits = 0;
2720 else if (sign)
2721 bits = isSigned;
2722 else
2723 bits = width - isSigned;
2724
2725 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2726 if (sign && isSigned)
2727 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2728 }
2729
2730 return fs;
2731}
2732
2733/* Convert an unsigned integer SRC to a floating point number,
2734 rounding according to ROUNDING_MODE. The sign of the floating
2735 point number is not modified. */
2736APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2737 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2738 category = fcNormal;
2739 unsigned omsb = APInt::tcMSB(src, srcCount) + 1;
2740 integerPart *dst = significandParts();
2741 unsigned dstCount = partCount();
2742 unsigned precision = semantics->precision;
2743
2744 /* We want the most significant PRECISION bits of SRC. There may not
2745 be that many; extract what we can. */
2746 lostFraction lost_fraction;
2747 if (precision <= omsb) {
2748 exponent = omsb - 1;
2749 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2750 omsb - precision);
2751 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2752 } else {
2753 exponent = precision - 1;
2754 lost_fraction = lfExactlyZero;
2755 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2756 }
2757
2758 return normalize(rounding_mode, lost_fraction);
2759}
2760
2762 roundingMode rounding_mode) {
2763 unsigned int partCount = Val.getNumWords();
2764 APInt api = Val;
2765
2766 sign = false;
2767 if (isSigned && api.isNegative()) {
2768 sign = true;
2769 api = -api;
2770 }
2771
2772 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2773}
2774
2776IEEEFloat::convertFromHexadecimalString(StringRef s,
2777 roundingMode rounding_mode) {
2778 lostFraction lost_fraction = lfExactlyZero;
2779
2780 category = fcNormal;
2781 zeroSignificand();
2782 exponent = 0;
2783
2784 integerPart *significand = significandParts();
2785 unsigned partsCount = partCount();
2786 unsigned bitPos = partsCount * integerPartWidth;
2787 bool computedTrailingFraction = false;
2788
2789 // Skip leading zeroes and any (hexa)decimal point.
2790 StringRef::iterator begin = s.begin();
2791 StringRef::iterator end = s.end();
2793 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2794 if (!PtrOrErr)
2795 return PtrOrErr.takeError();
2796 StringRef::iterator p = *PtrOrErr;
2797 StringRef::iterator firstSignificantDigit = p;
2798
2799 while (p != end) {
2800 integerPart hex_value;
2801
2802 if (*p == '.') {
2803 if (dot != end)
2804 return createError("String contains multiple dots");
2805 dot = p++;
2806 continue;
2807 }
2808
2809 hex_value = hexDigitValue(*p);
2810 if (hex_value == UINT_MAX)
2811 break;
2812
2813 p++;
2814
2815 // Store the number while we have space.
2816 if (bitPos) {
2817 bitPos -= 4;
2818 hex_value <<= bitPos % integerPartWidth;
2819 significand[bitPos / integerPartWidth] |= hex_value;
2820 } else if (!computedTrailingFraction) {
2821 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2822 if (!FractOrErr)
2823 return FractOrErr.takeError();
2824 lost_fraction = *FractOrErr;
2825 computedTrailingFraction = true;
2826 }
2827 }
2828
2829 /* Hex floats require an exponent but not a hexadecimal point. */
2830 if (p == end)
2831 return createError("Hex strings require an exponent");
2832 if (*p != 'p' && *p != 'P')
2833 return createError("Invalid character in significand");
2834 if (p == begin)
2835 return createError("Significand has no digits");
2836 if (dot != end && p - begin == 1)
2837 return createError("Significand has no digits");
2838
2839 /* Ignore the exponent if we are zero. */
2840 if (p != firstSignificantDigit) {
2841 int expAdjustment;
2842
2843 /* Implicit hexadecimal point? */
2844 if (dot == end)
2845 dot = p;
2846
2847 /* Calculate the exponent adjustment implicit in the number of
2848 significant digits. */
2849 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2850 if (expAdjustment < 0)
2851 expAdjustment++;
2852 expAdjustment = expAdjustment * 4 - 1;
2853
2854 /* Adjust for writing the significand starting at the most
2855 significant nibble. */
2856 expAdjustment += semantics->precision;
2857 expAdjustment -= partsCount * integerPartWidth;
2858
2859 /* Adjust for the given exponent. */
2860 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2861 if (!ExpOrErr)
2862 return ExpOrErr.takeError();
2863 exponent = *ExpOrErr;
2864 }
2865
2866 return normalize(rounding_mode, lost_fraction);
2867}
2868
2870IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2871 unsigned sigPartCount, int exp,
2872 roundingMode rounding_mode) {
2873 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2875
2876 bool isNearest = rounding_mode == rmNearestTiesToEven ||
2877 rounding_mode == rmNearestTiesToAway;
2878
2879 unsigned parts = partCountForBits(semantics->precision + 11);
2880
2881 /* Calculate pow(5, abs(exp)). */
2882 unsigned pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp : -exp);
2883
2884 for (;; parts *= 2) {
2885 unsigned int excessPrecision, truncatedBits;
2886
2887 calcSemantics.precision = parts * integerPartWidth - 1;
2888 excessPrecision = calcSemantics.precision - semantics->precision;
2889 truncatedBits = excessPrecision;
2890
2891 IEEEFloat decSig(calcSemantics, uninitialized);
2892 decSig.makeZero(sign);
2893 IEEEFloat pow5(calcSemantics);
2894
2895 opStatus sigStatus = decSig.convertFromUnsignedParts(
2896 decSigParts, sigPartCount, rmNearestTiesToEven);
2897 opStatus powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2899 /* Add exp, as 10^n = 5^n * 2^n. */
2900 decSig.exponent += exp;
2901
2902 lostFraction calcLostFraction;
2903 integerPart HUerr, HUdistance;
2904 unsigned int powHUerr;
2905
2906 if (exp >= 0) {
2907 /* multiplySignificand leaves the precision-th bit set to 1. */
2908 calcLostFraction = decSig.multiplySignificand(pow5);
2909 powHUerr = powStatus != opOK;
2910 } else {
2911 calcLostFraction = decSig.divideSignificand(pow5);
2912 /* Denormal numbers have less precision. */
2913 if (decSig.exponent < semantics->minExponent) {
2914 excessPrecision += (semantics->minExponent - decSig.exponent);
2915 truncatedBits = excessPrecision;
2916 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
2917 }
2918 /* Extra half-ulp lost in reciprocal of exponent. */
2919 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2920 }
2921
2922 /* Both multiplySignificand and divideSignificand return the
2923 result with the integer bit set. */
2925 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2926
2927 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2928 powHUerr);
2929 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2930 excessPrecision, isNearest);
2931
2932 /* Are we guaranteed to round correctly if we truncate? */
2933 if (HUdistance >= HUerr) {
2934 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2935 calcSemantics.precision - excessPrecision,
2936 excessPrecision);
2937 /* Take the exponent of decSig. If we tcExtract-ed less bits
2938 above we must adjust our exponent to compensate for the
2939 implicit right shift. */
2940 exponent = (decSig.exponent + semantics->precision
2941 - (calcSemantics.precision - excessPrecision));
2942 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2943 decSig.partCount(),
2944 truncatedBits);
2945 return static_cast<opStatus>(normalize(rounding_mode, calcLostFraction) |
2946 ((sigStatus | powStatus) & opInexact));
2947 }
2948 }
2949}
2950
2951Expected<APFloat::opStatus>
2952IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2953 decimalInfo D;
2954 opStatus fs;
2955
2956 /* Scan the text. */
2957 StringRef::iterator p = str.begin();
2958 if (Error Err = interpretDecimal(p, str.end(), &D))
2959 return std::move(Err);
2960
2961 /* Handle the quick cases. First the case of no significant digits,
2962 i.e. zero, and then exponents that are obviously too large or too
2963 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2964 definitely overflows if
2965
2966 (exp - 1) * L >= maxExponent
2967
2968 and definitely underflows to zero where
2969
2970 (exp + 1) * L <= minExponent - precision
2971
2972 With integer arithmetic the tightest bounds for L are
2973
2974 93/28 < L < 196/59 [ numerator <= 256 ]
2975 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2976 */
2977
2978 // Test if we have a zero number allowing for strings with no null terminators
2979 // and zero decimals with non-zero exponents.
2980 //
2981 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2982 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2983 // be at most one dot. On the other hand, if we have a zero with a non-zero
2984 // exponent, then we know that D.firstSigDigit will be non-numeric.
2985 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2986 category = fcZero;
2987 fs = opOK;
2988 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2989 sign = false;
2990 if (!semantics->hasZero)
2992
2993 /* Check whether the normalized exponent is high enough to overflow
2994 max during the log-rebasing in the max-exponent check below. */
2995 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2996 fs = handleOverflow(rounding_mode);
2997
2998 /* If it wasn't, then it also wasn't high enough to overflow max
2999 during the log-rebasing in the min-exponent check. Check that it
3000 won't overflow min in either check, then perform the min-exponent
3001 check. */
3002 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3003 (D.normalizedExponent + 1) * 28738 <=
3004 8651 * (semantics->minExponent - (int) semantics->precision)) {
3005 /* Underflow to zero and round. */
3006 category = fcNormal;
3007 zeroSignificand();
3008 fs = normalize(rounding_mode, lfLessThanHalf);
3009
3010 /* We can finally safely perform the max-exponent check. */
3011 } else if ((D.normalizedExponent - 1) * 42039
3012 >= 12655 * semantics->maxExponent) {
3013 /* Overflow and round. */
3014 fs = handleOverflow(rounding_mode);
3015 } else {
3016 integerPart *decSignificand;
3017 unsigned int partCount;
3018
3019 /* A tight upper bound on number of bits required to hold an
3020 N-digit decimal integer is N * 196 / 59. Allocate enough space
3021 to hold the full significand, and an extra part required by
3022 tcMultiplyPart. */
3023 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3024 partCount = partCountForBits(1 + 196 * partCount / 59);
3025 decSignificand = new integerPart[partCount + 1];
3026 partCount = 0;
3027
3028 /* Convert to binary efficiently - we do almost all multiplication
3029 in an integerPart. When this would overflow do we do a single
3030 bignum multiplication, and then revert again to multiplication
3031 in an integerPart. */
3032 do {
3033 integerPart decValue, val, multiplier;
3034
3035 val = 0;
3036 multiplier = 1;
3037
3038 do {
3039 if (*p == '.') {
3040 p++;
3041 if (p == str.end()) {
3042 break;
3043 }
3044 }
3045 decValue = decDigitValue(*p++);
3046 if (decValue >= 10U) {
3047 delete[] decSignificand;
3048 return createError("Invalid character in significand");
3049 }
3050 multiplier *= 10;
3051 val = val * 10 + decValue;
3052 /* The maximum number that can be multiplied by ten with any
3053 digit added without overflowing an integerPart. */
3054 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3055
3056 /* Multiply out the current part. */
3057 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3058 partCount, partCount + 1, false);
3059
3060 /* If we used another part (likely but not guaranteed), increase
3061 the count. */
3062 if (decSignificand[partCount])
3063 partCount++;
3064 } while (p <= D.lastSigDigit);
3065
3066 category = fcNormal;
3067 fs = roundSignificandWithExponent(decSignificand, partCount,
3068 D.exponent, rounding_mode);
3069
3070 delete [] decSignificand;
3071 }
3072
3073 return fs;
3074}
3075
3076bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3077 const size_t MIN_NAME_SIZE = 3;
3078
3079 if (str.size() < MIN_NAME_SIZE)
3080 return false;
3081
3082 if (str == "inf" || str == "INFINITY" || str == "+Inf" || str == "+inf") {
3083 makeInf(false);
3084 return true;
3085 }
3086
3087 bool IsNegative = str.consume_front("-");
3088 if (IsNegative) {
3089 if (str.size() < MIN_NAME_SIZE)
3090 return false;
3091
3092 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3093 makeInf(true);
3094 return true;
3095 }
3096 }
3097
3098 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3099 bool IsSignaling = str.consume_front_insensitive("s");
3100 if (IsSignaling) {
3101 if (str.size() < MIN_NAME_SIZE)
3102 return false;
3103 }
3104
3105 if (str.consume_front("nan") || str.consume_front("NaN")) {
3106 // A NaN without payload.
3107 if (str.empty()) {
3108 makeNaN(IsSignaling, IsNegative);
3109 return true;
3110 }
3111
3112 // Allow the payload to be inside parentheses.
3113 if (str.front() == '(') {
3114 // Parentheses should be balanced (and not empty).
3115 if (str.size() <= 2 || str.back() != ')')
3116 return false;
3117
3118 str = str.slice(1, str.size() - 1);
3119 }
3120
3121 // Determine the payload number's radix.
3122 unsigned Radix = 10;
3123 if (str[0] == '0') {
3124 if (str.size() > 1 && tolower(str[1]) == 'x') {
3125 str = str.drop_front(2);
3126 Radix = 16;
3127 } else {
3128 Radix = 8;
3129 }
3130 }
3131
3132 // Parse the payload and make the NaN.
3133 APInt Payload;
3134 if (!str.getAsInteger(Radix, Payload)) {
3135 makeNaN(IsSignaling, IsNegative, &Payload);
3136 return true;
3137 }
3138 }
3139
3140 return false;
3141}
3142
3143Expected<APFloat::opStatus>
3145 if (str.empty())
3146 return createError("Invalid string length");
3147
3148 // Handle special cases.
3149 if (convertFromStringSpecials(str))
3150 return opOK;
3151
3152 /* Handle a leading minus sign. */
3153 StringRef::iterator p = str.begin();
3154 size_t slen = str.size();
3155 sign = *p == '-' ? 1 : 0;
3156 if (sign && !semantics->hasSignedRepr)
3158 "This floating point format does not support signed values");
3159
3160 if (*p == '-' || *p == '+') {
3161 p++;
3162 slen--;
3163 if (!slen)
3164 return createError("String has no digits");
3165 }
3166
3167 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3168 if (slen == 2)
3169 return createError("Invalid string");
3170 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3171 rounding_mode);
3172 }
3173
3174 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3175}
3176
3177/* Write out a hexadecimal representation of the floating point value
3178 to DST, which must be of sufficient size, in the C99 form
3179 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3180 excluding the terminating NUL.
3181
3182 If UPPERCASE, the output is in upper case, otherwise in lower case.
3183
3184 HEXDIGITS digits appear altogether, rounding the value if
3185 necessary. If HEXDIGITS is 0, the minimal precision to display the
3186 number precisely is used instead. If nothing would appear after
3187 the decimal point it is suppressed.
3188
3189 The decimal exponent is always printed and has at least one digit.
3190 Zero values display an exponent of zero. Infinities and NaNs
3191 appear as "infinity" or "nan" respectively.
3192
3193 The above rules are as specified by C99. There is ambiguity about
3194 what the leading hexadecimal digit should be. This implementation
3195 uses whatever is necessary so that the exponent is displayed as
3196 stored. This implies the exponent will fall within the IEEE format
3197 range, and the leading hexadecimal digit will be 0 (for denormals),
3198 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3199 any other digits zero).
3200*/
3201unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3202 bool upperCase,
3203 roundingMode rounding_mode) const {
3204 char *p = dst;
3205 if (sign)
3206 *dst++ = '-';
3207
3208 switch (category) {
3209 case fcInfinity:
3210 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3211 dst += sizeof infinityL - 1;
3212 break;
3213
3214 case fcNaN:
3215 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3216 dst += sizeof NaNU - 1;
3217 break;
3218
3219 case fcZero:
3220 *dst++ = '0';
3221 *dst++ = upperCase ? 'X': 'x';
3222 *dst++ = '0';
3223 if (hexDigits > 1) {
3224 *dst++ = '.';
3225 memset (dst, '0', hexDigits - 1);
3226 dst += hexDigits - 1;
3227 }
3228 *dst++ = upperCase ? 'P': 'p';
3229 *dst++ = '0';
3230 break;
3231
3232 case fcNormal:
3233 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3234 break;
3235 }
3236
3237 *dst = 0;
3238
3239 return static_cast<unsigned int>(dst - p);
3240}
3241
3242/* Does the hard work of outputting the correctly rounded hexadecimal
3243 form of a normal floating point number with the specified number of
3244 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3245 digits necessary to print the value precisely is output. */
3246char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3247 bool upperCase,
3248 roundingMode rounding_mode) const {
3249 *dst++ = '0';
3250 *dst++ = upperCase ? 'X': 'x';
3251
3252 bool roundUp = false;
3253 const char *hexDigitChars = upperCase ? hexDigitsUpper : hexDigitsLower;
3254
3255 const integerPart *significand = significandParts();
3256 unsigned partsCount = partCount();
3257
3258 /* +3 because the first digit only uses the single integer bit, so
3259 we have 3 virtual zero most-significant-bits. */
3260 unsigned valueBits = semantics->precision + 3;
3261 unsigned shift = integerPartWidth - valueBits % integerPartWidth;
3262
3263 /* The natural number of digits required ignoring trailing
3264 insignificant zeroes. */
3265 unsigned outputDigits = (valueBits - significandLSB() + 3) / 4;
3266
3267 /* hexDigits of zero means use the required number for the
3268 precision. Otherwise, see if we are truncating. If we are,
3269 find out if we need to round away from zero. */
3270 if (hexDigits) {
3271 if (hexDigits < outputDigits) {
3272 /* We are dropping non-zero bits, so need to check how to round.
3273 "bits" is the number of dropped bits. */
3274 unsigned int bits;
3275 lostFraction fraction;
3276
3277 bits = valueBits - hexDigits * 4;
3278 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3279 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3280 }
3281 outputDigits = hexDigits;
3282 }
3283
3284 /* Write the digits consecutively, and start writing in the location
3285 of the hexadecimal point. We move the most significant digit
3286 left and add the hexadecimal point later. */
3287 char *p = ++dst;
3288
3289 unsigned count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3290
3291 while (outputDigits && count) {
3292 integerPart part;
3293
3294 /* Put the most significant integerPartWidth bits in "part". */
3295 if (--count == partsCount)
3296 part = 0; /* An imaginary higher zero part. */
3297 else
3298 part = significand[count] << shift;
3299
3300 if (count && shift)
3301 part |= significand[count - 1] >> (integerPartWidth - shift);
3302
3303 /* Convert as much of "part" to hexdigits as we can. */
3304 unsigned int curDigits = integerPartWidth / 4;
3305
3306 curDigits = std::min(curDigits, outputDigits);
3307 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3308 outputDigits -= curDigits;
3309 }
3310
3311 if (roundUp) {
3312 char *q = dst;
3313
3314 /* Note that hexDigitChars has a trailing '0'. */
3315 do {
3316 q--;
3317 *q = hexDigitChars[hexDigitValue (*q) + 1];
3318 } while (*q == '0');
3319 assert(q >= p);
3320 } else {
3321 /* Add trailing zeroes. */
3322 memset (dst, '0', outputDigits);
3323 dst += outputDigits;
3324 }
3325
3326 /* Move the most significant digit to before the point, and if there
3327 is something after the decimal point add it. This must come
3328 after rounding above. */
3329 p[-1] = p[0];
3330 if (dst -1 == p)
3331 dst--;
3332 else
3333 p[0] = '.';
3334
3335 /* Finally output the exponent. */
3336 *dst++ = upperCase ? 'P': 'p';
3337
3338 return writeSignedDecimal (dst, exponent);
3339}
3340
3342 if (!Arg.isFiniteNonZero())
3343 return hash_combine((uint8_t)Arg.category,
3344 // NaN has no sign, fix it at zero.
3345 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3346 Arg.semantics->precision);
3347
3348 // Normal floats need their exponent and significand hashed.
3349 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3350 Arg.semantics->precision, Arg.exponent,
3352 Arg.significandParts(),
3353 Arg.significandParts() + Arg.partCount()));
3354}
3355
3356// Conversion from APFloat to/from host float/double. It may eventually be
3357// possible to eliminate these and have everybody deal with APFloats, but that
3358// will take a while. This approach will not easily extend to long double.
3359// Current implementation requires integerPartWidth==64, which is correct at
3360// the moment but could be made more general.
3361
3362// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3363// the actual IEEE respresentations. We compensate for that here.
3364
3365APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3366 assert(semantics ==
3367 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3368 assert(partCount()==2);
3369
3370 uint64_t myexponent, mysignificand;
3371
3372 if (isFiniteNonZero()) {
3373 myexponent = exponent+16383; //bias
3374 mysignificand = significandParts()[0];
3375 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3376 myexponent = 0; // denormal
3377 } else if (category==fcZero) {
3378 myexponent = 0;
3379 mysignificand = 0;
3380 } else if (category==fcInfinity) {
3381 myexponent = 0x7fff;
3382 mysignificand = 0x8000000000000000ULL;
3383 } else {
3384 assert(category == fcNaN && "Unknown category");
3385 myexponent = 0x7fff;
3386 mysignificand = significandParts()[0];
3387 }
3388
3389 uint64_t words[2];
3390 words[0] = mysignificand;
3391 words[1] = ((uint64_t)(sign & 1) << 15) |
3392 (myexponent & 0x7fffLL);
3393 return APInt(80, words);
3394}
3395
3396APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3397 assert(semantics ==
3398 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3399 assert(partCount()==2);
3400
3401 uint64_t words[2];
3402 bool losesInfo;
3403
3404 // Convert number to double. To avoid spurious underflows, we re-
3405 // normalize against the "double" minExponent first, and only *then*
3406 // truncate the mantissa. The result of that second conversion
3407 // may be inexact, but should never underflow.
3408 // Declare fltSemantics before APFloat that uses it (and
3409 // saves pointer to it) to ensure correct destruction order.
3410 fltSemantics extendedSemantics = *semantics;
3411 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3412 IEEEFloat extended(*this);
3413 [[maybe_unused]] opStatus fs =
3414 extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3415 assert(fs == opOK && !losesInfo);
3416
3417 IEEEFloat u(extended);
3418 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3419 assert(fs == opOK || fs == opInexact);
3420 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3421
3422 // If conversion was exact or resulted in a special case, we're done;
3423 // just set the second double to zero. Otherwise, re-convert back to
3424 // the extended format and compute the difference. This now should
3425 // convert exactly to double.
3426 if (u.isFiniteNonZero() && losesInfo) {
3427 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3428 assert(fs == opOK && !losesInfo);
3429
3430 IEEEFloat v(extended);
3431 v.subtract(u, rmNearestTiesToEven);
3432 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3433 assert(fs == opOK && !losesInfo);
3434 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3435 } else {
3436 words[1] = 0;
3437 }
3438
3439 return APInt(128, words);
3440}
3441
3442template <const fltSemantics &S>
3443APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3444 assert(semantics == &S);
3445 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3446 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3447 constexpr integerPart integer_bit =
3448 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3449 constexpr uint64_t significand_mask = integer_bit - 1;
3450 constexpr unsigned int exponent_bits =
3451 S.sizeInBits - (S.hasSignedRepr ? 1 : 0) - trailing_significand_bits;
3452 static_assert(exponent_bits < 64);
3453 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3454 constexpr bool is_zero_exp_reserved = S.hasDenormals || S.hasZero;
3455 constexpr int bias = -(S.minExponent - (is_zero_exp_reserved ? 1 : 0));
3456
3457 uint64_t myexponent;
3458 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3459 mysignificand;
3460
3461 if (isFiniteNonZero()) {
3462 myexponent = exponent + bias;
3463 std::copy_n(significandParts(), mysignificand.size(),
3464 mysignificand.begin());
3465 if (myexponent == 1 &&
3466 !(significandParts()[integer_bit_part] & integer_bit))
3467 myexponent = 0; // denormal
3468 } else if (category == fcZero) {
3469 if (!S.hasZero)
3470 llvm_unreachable("semantics does not support zero!");
3471 myexponent = ::exponentZero(S) + bias;
3472 mysignificand.fill(0);
3473 } else if (category == fcInfinity) {
3474 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3475 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3476 llvm_unreachable("semantics don't support inf!");
3477 myexponent = ::exponentInf(S) + bias;
3478 mysignificand.fill(0);
3479 } else {
3480 assert(category == fcNaN && "Unknown category!");
3481 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3482 llvm_unreachable("semantics don't support NaN!");
3483 myexponent = ::exponentNaN(S) + bias;
3484 std::copy_n(significandParts(), mysignificand.size(),
3485 mysignificand.begin());
3486 }
3487 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3488 auto words_iter =
3489 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3490 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3491 // Clear the integer bit.
3492 words[mysignificand.size() - 1] &= significand_mask;
3493 }
3494 std::fill(words_iter, words.end(), uint64_t{0});
3495 constexpr size_t last_word = words.size() - 1;
3496 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3497 << ((S.sizeInBits - 1) % 64);
3498 words[last_word] |= shifted_sign;
3499 uint64_t shifted_exponent = (myexponent & exponent_mask)
3500 << (trailing_significand_bits % 64);
3501 words[last_word] |= shifted_exponent;
3502 if constexpr (last_word == 0) {
3503 return APInt(S.sizeInBits, words[0]);
3504 }
3505 return APInt(S.sizeInBits, words);
3506}
3507
3508APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3509 assert(partCount() == 2);
3510 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3511}
3512
3513APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3514 assert(partCount()==1);
3515 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3516}
3517
3518APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3519 assert(partCount()==1);
3520 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3521}
3522
3523APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3524 assert(partCount() == 1);
3525 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3526}
3527
3528APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3529 assert(partCount()==1);
3530 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3531}
3532
3533APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3534 assert(partCount() == 1);
3535 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3536}
3537
3538APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3539 assert(partCount() == 1);
3540 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3541}
3542
3543APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3544 assert(partCount() == 1);
3545 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3546}
3547
3548APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3549 assert(partCount() == 1);
3550 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3551}
3552
3553APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3554 assert(partCount() == 1);
3555 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3556}
3557
3558APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3559 assert(partCount() == 1);
3560 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3561}
3562
3563APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3564 assert(partCount() == 1);
3565 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3566}
3567
3568APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3569 assert(partCount() == 1);
3570 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3571}
3572
3573APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3574 assert(partCount() == 1);
3575 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3576}
3577
3578APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3579 assert(partCount() == 1);
3580 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3581}
3582
3583APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3584 assert(partCount() == 1);
3585 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3586}
3587
3588APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3589 assert(partCount() == 1);
3590 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3591}
3592
3593// This function creates an APInt that is just a bit map of the floating
3594// point constant as it would appear in memory. It is not a conversion,
3595// and treating the result as a normal integer is unlikely to be useful.
3596
3598 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3599 return convertHalfAPFloatToAPInt();
3600
3601 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3602 return convertBFloatAPFloatToAPInt();
3603
3604 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3605 return convertFloatAPFloatToAPInt();
3606
3607 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3608 return convertDoubleAPFloatToAPInt();
3609
3610 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3611 return convertQuadrupleAPFloatToAPInt();
3612
3613 if (semantics ==
3614 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3615 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3616
3617 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3618 return convertFloat8E5M2APFloatToAPInt();
3619
3620 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3621 return convertFloat8E5M2FNUZAPFloatToAPInt();
3622
3623 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3624 return convertFloat8E4M3APFloatToAPInt();
3625
3626 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3627 return convertFloat8E4M3FNAPFloatToAPInt();
3628
3629 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3630 return convertFloat8E4M3FNUZAPFloatToAPInt();
3631
3632 if (semantics ==
3633 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3634 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3635
3636 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3637 return convertFloat8E3M4APFloatToAPInt();
3638
3639 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3640 return convertFloatTF32APFloatToAPInt();
3641
3642 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3643 return convertFloat8E8M0FNUAPFloatToAPInt();
3644
3645 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3646 return convertFloat6E3M2FNAPFloatToAPInt();
3647
3648 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3649 return convertFloat6E2M3FNAPFloatToAPInt();
3650
3651 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3652 return convertFloat4E2M1FNAPFloatToAPInt();
3653
3654 assert(semantics ==
3655 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3656 "unknown format!");
3657 return convertF80LongDoubleAPFloatToAPInt();
3658}
3659
3661 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3662 "Float semantics are not IEEEsingle");
3663 APInt api = bitcastToAPInt();
3664 return api.bitsToFloat();
3665}
3666
3668 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3669 "Float semantics are not IEEEdouble");
3670 APInt api = bitcastToAPInt();
3671 return api.bitsToDouble();
3672}
3673
3674#ifdef HAS_IEE754_FLOAT128
3675float128 IEEEFloat::convertToQuad() const {
3676 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3677 "Float semantics are not IEEEquads");
3678 APInt api = bitcastToAPInt();
3679 return api.bitsToQuad();
3680}
3681#endif
3682
3683void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3684 return initFromIEEEAPInt<APFloatBase::semX87DoubleExtended>(api);
3685}
3686
3687void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3688 uint64_t i1 = api.getRawData()[0];
3689 uint64_t i2 = api.getRawData()[1];
3690 bool losesInfo;
3691
3692 // Get the first double and convert to our format.
3693 initFromDoubleAPInt(APInt(64, i1));
3694 [[maybe_unused]] opStatus fs = convert(APFloatBase::semPPCDoubleDoubleLegacy,
3695 rmNearestTiesToEven, &losesInfo);
3696 // (convert may return opInvalidOp if i1 is an sNaN).
3697 assert((fs == opOK || fs == opInvalidOp) && !losesInfo);
3698
3699 // Unless we have a special case, add in second double.
3700 if (isFiniteNonZero()) {
3701 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3702 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3703 &losesInfo);
3704 assert(fs == opOK && !losesInfo);
3705
3707 }
3708}
3709
3710// The E8M0 format has the following characteristics:
3711// It is an 8-bit unsigned format with only exponents (no actual significand).
3712// No encodings for {zero, infinities or denorms}.
3713// NaN is represented by all 1's.
3714// Bias is 127.
3715void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3716 initFromIEEEAPInt<APFloatBase::semFloat8E8M0FNU>(api);
3717}
3718
3719template <const fltSemantics &S>
3720void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3721 assert(api.getBitWidth() == S.sizeInBits);
3722
3723 constexpr unsigned int trailing_significand_bits =
3724 S.precision - 1 + S.hasExplicitIntegerBit;
3725 constexpr integerPart integer_bit =
3726 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3727 constexpr uint64_t significand_mask = integer_bit - 1;
3728 constexpr unsigned int exponent_bits =
3729 S.sizeInBits - (S.hasSignedRepr ? 1 : 0) - trailing_significand_bits;
3730 static_assert(exponent_bits < 64);
3731 constexpr unsigned int stored_significand_parts =
3732 partCountForBits(trailing_significand_bits + 1);
3733 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3734 constexpr bool is_zero_exp_reserved = S.hasDenormals || S.hasZero;
3735 constexpr int bias = -(S.minExponent - (is_zero_exp_reserved ? 1 : 0));
3736 constexpr bool has_significand = trailing_significand_bits > 0;
3737
3738 // Copy the bits of the significand. We need to clear out the exponent and
3739 // sign bit in the last word.
3740 std::array<integerPart, stored_significand_parts> mysignificand;
3741 if constexpr (has_significand) {
3742 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3743 if constexpr (significand_mask != 0 || S.precision >= integerPartWidth) {
3744 mysignificand[mysignificand.size() - 1] &= significand_mask;
3745 }
3746 } else {
3747 std::fill_n(mysignificand.begin(), mysignificand.size(), 0);
3748 // Always set integer bit to 1 for consistency in APFloat's internal
3749 // representation.
3750 mysignificand[0] = 1;
3751 }
3752
3753 // We assume the last word holds the sign bit, the exponent, and potentially
3754 // some of the trailing significand field.
3755 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3756 uint64_t myexponent =
3757 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3758
3759 initialize(&S);
3760 assert(partCount() == mysignificand.size());
3761
3762 sign = S.hasSignedRepr
3763 ? static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64))
3764 : 0;
3765
3766 bool all_zero_significand =
3767 has_significand && llvm::all_of(mysignificand, equal_to(0));
3768
3769 bool is_zero = myexponent == 0 && all_zero_significand && S.hasZero;
3770
3771 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3772 bool is_inf = false;
3773
3774 if constexpr (S.hasExplicitIntegerBit) {
3775 // This is only used and tested for x87DoubleExtended
3776 static_assert(S.precision == 64);
3777 constexpr integerPart significand_mask_no_int_bit =
3778 (uint64_t{1} << (trailing_significand_bits - 1)) - 1;
3779 const integerPart myintegerbit =
3780 mysignificand[0] >> (trailing_significand_bits - 1);
3781
3782 is_inf = myexponent - bias == ::exponentInf(S) && myintegerbit == 1 &&
3783 (mysignificand[0] & significand_mask_no_int_bit) == 0;
3784 } else {
3785 is_inf = myexponent - bias == ::exponentInf(S) && all_zero_significand;
3786 }
3787
3788 if (is_inf) {
3789 makeInf(sign);
3790 return;
3791 }
3792 }
3793
3794 bool is_nan = false;
3795
3796 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3797 if constexpr (S.hasExplicitIntegerBit) {
3798 // This is only used and tested for x87DoubleExtended
3799 static_assert(S.precision == 64);
3800 const integerPart myintegerbit =
3801 mysignificand[0] >> (trailing_significand_bits - 1);
3802 constexpr integerPart significand_mask_no_int_bit =
3803 (uint64_t{1} << (trailing_significand_bits - 1)) - 1;
3804
3805 if (myexponent - bias == ::exponentNaN(S) &&
3806 (mysignificand[0] & significand_mask_no_int_bit) != 0) {
3807 // regular NaN and pseudoNaN
3808 is_nan = true;
3809 } else if (myexponent - bias == ::exponentNaN(S) &&
3810 (mysignificand[0] & significand_mask_no_int_bit) == 0) {
3811 // pseudoinfinity
3812 is_nan = true;
3813 } else if (myexponent - bias != ::exponentNaN(S) && myexponent != 0 &&
3814 myintegerbit == 0) {
3815 // unnormal
3816 is_nan = true;
3817 }
3818 } else {
3819 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3820 }
3821 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3822 bool all_ones_significand =
3823 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3824 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3825 (!significand_mask ||
3826 mysignificand[mysignificand.size() - 1] == significand_mask);
3827 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3828 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3829 is_nan = is_zero && sign;
3830 }
3831
3832 if (is_nan) {
3833 category = fcNaN;
3834 exponent = ::exponentNaN(S);
3835 std::copy_n(mysignificand.begin(), mysignificand.size(),
3836 significandParts());
3837 return;
3838 }
3839
3840 if (is_zero) {
3841 makeZero(sign);
3842 return;
3843 }
3844
3845 category = fcNormal;
3846 exponent = myexponent - bias;
3847 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3848 if (myexponent == 0 && S.hasDenormals) // denormal
3849 exponent = S.minExponent;
3850 else {
3851 if constexpr (!S.hasExplicitIntegerBit) {
3852 significandParts()[mysignificand.size() - 1] |= integer_bit;
3853 }
3854 }
3855}
3856
3857void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3858 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
3859}
3860
3861void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3862 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
3863}
3864
3865void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3866 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
3867}
3868
3869void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3870 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
3871}
3872
3873void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3874 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
3875}
3876
3877void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3878 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
3879}
3880
3881void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3882 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
3883}
3884
3885void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3886 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
3887}
3888
3889void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3890 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
3891}
3892
3893void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3894 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
3895}
3896
3897void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3898 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
3899}
3900
3901void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
3902 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
3903}
3904
3905void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3906 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
3907}
3908
3909void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3910 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
3911}
3912
3913void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3914 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
3915}
3916
3917void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3918 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
3919}
3920
3921/// Treat api as containing the bits of a floating point number.
3922void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3923 assert(api.getBitWidth() == Sem->sizeInBits);
3924 if (Sem == &APFloatBase::semIEEEhalf)
3925 return initFromHalfAPInt(api);
3926 if (Sem == &APFloatBase::semBFloat)
3927 return initFromBFloatAPInt(api);
3928 if (Sem == &APFloatBase::semIEEEsingle)
3929 return initFromFloatAPInt(api);
3930 if (Sem == &APFloatBase::semIEEEdouble)
3931 return initFromDoubleAPInt(api);
3932 if (Sem == &APFloatBase::semX87DoubleExtended)
3933 return initFromF80LongDoubleAPInt(api);
3934 if (Sem == &APFloatBase::semIEEEquad)
3935 return initFromQuadrupleAPInt(api);
3936 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
3937 return initFromPPCDoubleDoubleLegacyAPInt(api);
3938 if (Sem == &APFloatBase::semFloat8E5M2)
3939 return initFromFloat8E5M2APInt(api);
3940 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
3941 return initFromFloat8E5M2FNUZAPInt(api);
3942 if (Sem == &APFloatBase::semFloat8E4M3)
3943 return initFromFloat8E4M3APInt(api);
3944 if (Sem == &APFloatBase::semFloat8E4M3FN)
3945 return initFromFloat8E4M3FNAPInt(api);
3946 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
3947 return initFromFloat8E4M3FNUZAPInt(api);
3948 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
3949 return initFromFloat8E4M3B11FNUZAPInt(api);
3950 if (Sem == &APFloatBase::semFloat8E3M4)
3951 return initFromFloat8E3M4APInt(api);
3952 if (Sem == &APFloatBase::semFloatTF32)
3953 return initFromFloatTF32APInt(api);
3954 if (Sem == &APFloatBase::semFloat8E8M0FNU)
3955 return initFromFloat8E8M0FNUAPInt(api);
3956 if (Sem == &APFloatBase::semFloat6E3M2FN)
3957 return initFromFloat6E3M2FNAPInt(api);
3958 if (Sem == &APFloatBase::semFloat6E2M3FN)
3959 return initFromFloat6E2M3FNAPInt(api);
3960 if (Sem == &APFloatBase::semFloat4E2M1FN)
3961 return initFromFloat4E2M1FNAPInt(api);
3962
3963 llvm_unreachable("unsupported semantics");
3964}
3965
3966/// Make this number the largest magnitude normal number in the given
3967/// semantics.
3968void IEEEFloat::makeLargest(bool Negative) {
3969 if (Negative && !semantics->hasSignedRepr)
3971 "This floating point format does not support signed values");
3972 // We want (in interchange format):
3973 // sign = {Negative}
3974 // exponent = 1..10
3975 // significand = 1..1
3976 category = fcNormal;
3977 sign = Negative;
3978 exponent = semantics->maxExponent;
3979
3980 // Use memset to set all but the highest integerPart to all ones.
3981 integerPart *significand = significandParts();
3982 unsigned PartCount = partCount();
3983 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3984
3985 // Set the high integerPart especially setting all unused top bits for
3986 // internal consistency.
3987 const unsigned NumUnusedHighBits =
3988 PartCount*integerPartWidth - semantics->precision;
3989 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3990 ? (~integerPart(0) >> NumUnusedHighBits)
3991 : 0;
3992 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3993 semantics->nanEncoding == fltNanEncoding::AllOnes &&
3994 (semantics->precision > 1))
3995 significand[0] &= ~integerPart(1);
3996}
3997
3998/// Make this number the smallest magnitude denormal number in the given
3999/// semantics.
4000void IEEEFloat::makeSmallest(bool Negative) {
4001 if (Negative && !semantics->hasSignedRepr)
4003 "This floating point format does not support signed values");
4004 // We want (in interchange format):
4005 // sign = {Negative}
4006 // exponent = 0..0
4007 // significand = 0..01
4008 category = fcNormal;
4009 sign = Negative;
4010 exponent = semantics->minExponent;
4011 APInt::tcSet(significandParts(), 1, partCount());
4012}
4013
4015 if (Negative && !semantics->hasSignedRepr)
4017 "This floating point format does not support signed values");
4018 // We want (in interchange format):
4019 // sign = {Negative}
4020 // exponent = 0..0
4021 // significand = 10..0
4022
4023 category = fcNormal;
4024 zeroSignificand();
4025 sign = Negative;
4026 exponent = semantics->minExponent;
4027 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4028}
4029
4030IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4031 initFromAPInt(&Sem, API);
4032}
4033
4035 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4036}
4037
4039 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4040}
4041
4042namespace {
4043 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4044 Buffer.append(Str.begin(), Str.end());
4045 }
4046
4047 /// Removes data from the given significand until it is no more
4048 /// precise than is required for the desired precision.
4049 void AdjustToPrecision(APInt &significand,
4050 int &exp, unsigned FormatPrecision) {
4051 unsigned bits = significand.getActiveBits();
4052
4053 // 196/59 is a very slight overestimate of lg_2(10).
4054 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4055
4056 if (bits <= bitsRequired) return;
4057
4058 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4059 if (!tensRemovable) return;
4060
4061 exp += tensRemovable;
4062
4063 APInt divisor(significand.getBitWidth(), 1);
4064 APInt powten(significand.getBitWidth(), 10);
4065 while (true) {
4066 if (tensRemovable & 1)
4067 divisor *= powten;
4068 tensRemovable >>= 1;
4069 if (!tensRemovable) break;
4070 powten *= powten;
4071 }
4072
4073 significand = significand.udiv(divisor);
4074
4075 // Truncate the significand down to its active bit count.
4076 significand = significand.trunc(significand.getActiveBits());
4077 }
4078
4079
4080 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4081 int &exp, unsigned FormatPrecision) {
4082 unsigned N = buffer.size();
4083 if (N <= FormatPrecision) return;
4084
4085 // The most significant figures are the last ones in the buffer.
4086 unsigned FirstSignificant = N - FormatPrecision;
4087
4088 // Round.
4089 // FIXME: this probably shouldn't use 'round half up'.
4090
4091 // Rounding down is just a truncation, except we also want to drop
4092 // trailing zeros from the new result.
4093 if (buffer[FirstSignificant - 1] < '5') {
4094 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4095 FirstSignificant++;
4096
4097 exp += FirstSignificant;
4098 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4099 return;
4100 }
4101
4102 // Rounding up requires a decimal add-with-carry. If we continue
4103 // the carry, the newly-introduced zeros will just be truncated.
4104 for (unsigned I = FirstSignificant; I != N; ++I) {
4105 if (buffer[I] == '9') {
4106 FirstSignificant++;
4107 } else {
4108 buffer[I]++;
4109 break;
4110 }
4111 }
4112
4113 // If we carried through, we have exactly one digit of precision.
4114 if (FirstSignificant == N) {
4115 exp += FirstSignificant;
4116 buffer.clear();
4117 buffer.push_back('1');
4118 return;
4119 }
4120
4121 exp += FirstSignificant;
4122 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4123 }
4124
4125 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4126 APInt significand, unsigned FormatPrecision,
4127 unsigned FormatMaxPadding, bool TruncateZero) {
4128 const int semanticsPrecision = significand.getBitWidth();
4129
4130 if (isNeg)
4131 Str.push_back('-');
4132
4133 // Set FormatPrecision if zero. We want to do this before we
4134 // truncate trailing zeros, as those are part of the precision.
4135 if (!FormatPrecision) {
4136 // We use enough digits so the number can be round-tripped back to an
4137 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4138 // Accurately" by Steele and White.
4139 // FIXME: Using a formula based purely on the precision is conservative;
4140 // we can print fewer digits depending on the actual value being printed.
4141
4142 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4143 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4144 }
4145
4146 // Ignore trailing binary zeros.
4147 int trailingZeros = significand.countr_zero();
4148 exp += trailingZeros;
4149 significand.lshrInPlace(trailingZeros);
4150
4151 // Change the exponent from 2^e to 10^e.
4152 if (exp == 0) {
4153 // Nothing to do.
4154 } else if (exp > 0) {
4155 // Just shift left.
4156 significand = significand.zext(semanticsPrecision + exp);
4157 significand <<= exp;
4158 exp = 0;
4159 } else { /* exp < 0 */
4160 int texp = -exp;
4161
4162 // We transform this using the identity:
4163 // (N)(2^-e) == (N)(5^e)(10^-e)
4164 // This means we have to multiply N (the significand) by 5^e.
4165 // To avoid overflow, we have to operate on numbers large
4166 // enough to store N * 5^e:
4167 // log2(N * 5^e) == log2(N) + e * log2(5)
4168 // <= semantics->precision + e * 137 / 59
4169 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4170
4171 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4172
4173 // Multiply significand by 5^e.
4174 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4175 significand = significand.zext(precision);
4176 APInt five_to_the_i(precision, 5);
4177 while (true) {
4178 if (texp & 1)
4179 significand *= five_to_the_i;
4180
4181 texp >>= 1;
4182 if (!texp)
4183 break;
4184 five_to_the_i *= five_to_the_i;
4185 }
4186 }
4187
4188 AdjustToPrecision(significand, exp, FormatPrecision);
4189
4191
4192 // Fill the buffer.
4193 unsigned precision = significand.getBitWidth();
4194 if (precision < 4) {
4195 // We need enough precision to store the value 10.
4196 precision = 4;
4197 significand = significand.zext(precision);
4198 }
4199 APInt ten(precision, 10);
4200 APInt digit(precision, 0);
4201
4202 bool inTrail = true;
4203 while (significand != 0) {
4204 // digit <- significand % 10
4205 // significand <- significand / 10
4206 APInt::udivrem(significand, ten, significand, digit);
4207
4208 unsigned d = digit.getZExtValue();
4209
4210 // Drop trailing zeros.
4211 if (inTrail && !d)
4212 exp++;
4213 else {
4214 buffer.push_back((char) ('0' + d));
4215 inTrail = false;
4216 }
4217 }
4218
4219 assert(!buffer.empty() && "no characters in buffer!");
4220
4221 // Drop down to FormatPrecision.
4222 // TODO: don't do more precise calculations above than are required.
4223 AdjustToPrecision(buffer, exp, FormatPrecision);
4224
4225 unsigned NDigits = buffer.size();
4226
4227 // Check whether we should use scientific notation.
4228 bool FormatScientific;
4229 if (!FormatMaxPadding) {
4230 FormatScientific = true;
4231 } else {
4232 if (exp >= 0) {
4233 // 765e3 --> 765000
4234 // ^^^
4235 // But we shouldn't make the number look more precise than it is.
4236 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4237 NDigits + (unsigned) exp > FormatPrecision);
4238 } else {
4239 // Power of the most significant digit.
4240 int MSD = exp + (int) (NDigits - 1);
4241 if (MSD >= 0) {
4242 // 765e-2 == 7.65
4243 FormatScientific = false;
4244 } else {
4245 // 765e-5 == 0.00765
4246 // ^ ^^
4247 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4248 }
4249 }
4250 }
4251
4252 // Scientific formatting is pretty straightforward.
4253 if (FormatScientific) {
4254 exp += (NDigits - 1);
4255
4256 Str.push_back(buffer[NDigits-1]);
4257 Str.push_back('.');
4258 if (NDigits == 1 && TruncateZero)
4259 Str.push_back('0');
4260 else
4261 for (unsigned I = 1; I != NDigits; ++I)
4262 Str.push_back(buffer[NDigits-1-I]);
4263 // Fill with zeros up to FormatPrecision.
4264 if (!TruncateZero && FormatPrecision > NDigits - 1)
4265 Str.append(FormatPrecision - NDigits + 1, '0');
4266 // For !TruncateZero we use lower 'e'.
4267 Str.push_back(TruncateZero ? 'E' : 'e');
4268
4269 Str.push_back(exp >= 0 ? '+' : '-');
4270 if (exp < 0)
4271 exp = -exp;
4272 SmallVector<char, 6> expbuf;
4273 do {
4274 expbuf.push_back((char) ('0' + (exp % 10)));
4275 exp /= 10;
4276 } while (exp);
4277 // Exponent always at least two digits if we do not truncate zeros.
4278 if (!TruncateZero && expbuf.size() < 2)
4279 expbuf.push_back('0');
4280 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4281 Str.push_back(expbuf[E-1-I]);
4282 return;
4283 }
4284
4285 // Non-scientific, positive exponents.
4286 if (exp >= 0) {
4287 for (unsigned I = 0; I != NDigits; ++I)
4288 Str.push_back(buffer[NDigits-1-I]);
4289 for (unsigned I = 0; I != (unsigned) exp; ++I)
4290 Str.push_back('0');
4291 return;
4292 }
4293
4294 // Non-scientific, negative exponents.
4295
4296 // The number of digits to the left of the decimal point.
4297 int NWholeDigits = exp + (int) NDigits;
4298
4299 unsigned I = 0;
4300 if (NWholeDigits > 0) {
4301 for (; I != (unsigned) NWholeDigits; ++I)
4302 Str.push_back(buffer[NDigits-I-1]);
4303 Str.push_back('.');
4304 } else {
4305 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4306
4307 Str.push_back('0');
4308 Str.push_back('.');
4309 for (unsigned Z = 1; Z != NZeros; ++Z)
4310 Str.push_back('0');
4311 }
4312
4313 for (; I != NDigits; ++I)
4314 Str.push_back(buffer[NDigits-I-1]);
4315
4316 }
4317} // namespace
4318
4319void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4320 unsigned FormatMaxPadding, bool TruncateZero) const {
4321 switch (category) {
4322 case fcInfinity:
4323 if (isNegative())
4324 return append(Str, "-Inf");
4325 else
4326 return append(Str, "+Inf");
4327
4328 case fcNaN: return append(Str, "NaN");
4329
4330 case fcZero:
4331 if (isNegative())
4332 Str.push_back('-');
4333
4334 if (!FormatMaxPadding) {
4335 if (TruncateZero)
4336 append(Str, "0.0E+0");
4337 else {
4338 append(Str, "0.0");
4339 if (FormatPrecision > 1)
4340 Str.append(FormatPrecision - 1, '0');
4341 append(Str, "e+00");
4342 }
4343 } else {
4344 Str.push_back('0');
4345 }
4346 return;
4347
4348 case fcNormal:
4349 break;
4350 }
4351
4352 // Decompose the number into an APInt and an exponent.
4353 int exp = exponent - ((int) semantics->precision - 1);
4354 APInt significand(
4355 semantics->precision,
4356 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4357
4358 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4359 FormatMaxPadding, TruncateZero);
4360
4361}
4362
4364 if (!isFinite() || isZero())
4365 return INT_MIN;
4366
4367 const integerPart *Parts = significandParts();
4368 const int PartCount = partCountForBits(semantics->precision);
4369
4370 int PopCount = 0;
4371 for (int i = 0; i < PartCount; ++i) {
4372 PopCount += llvm::popcount(Parts[i]);
4373 if (PopCount > 1)
4374 return INT_MIN;
4375 }
4376
4377 if (exponent != semantics->minExponent)
4378 return exponent;
4379
4380 int CountrParts = 0;
4381 for (int i = 0; i < PartCount;
4382 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4383 if (Parts[i] != 0) {
4384 return exponent - semantics->precision + CountrParts +
4385 llvm::countr_zero(Parts[i]) + 1;
4386 }
4387 }
4388
4389 llvm_unreachable("didn't find the set bit");
4390}
4391
4393 if (!isNaN())
4394 return false;
4395 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4396 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4397 return false;
4398
4399 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4400 // first bit of the trailing significand being 0.
4401 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4402}
4403
4404/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4405///
4406/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4407/// appropriate sign switching before/after the computation.
4409 // If we are performing nextDown, swap sign so we have -x.
4410 if (nextDown)
4411 changeSign();
4412
4413 // Compute nextUp(x)
4414 opStatus result = opOK;
4415
4416 // Handle each float category separately.
4417 switch (category) {
4418 case fcInfinity:
4419 // nextUp(+inf) = +inf
4420 if (!isNegative())
4421 break;
4422 // nextUp(-inf) = -getLargest()
4423 makeLargest(true);
4424 break;
4425 case fcNaN:
4426 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4427 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4428 // change the payload.
4429 if (isSignaling()) {
4430 result = opInvalidOp;
4431 // For consistency, propagate the sign of the sNaN to the qNaN.
4432 makeNaN(false, isNegative(), nullptr);
4433 }
4434 break;
4435 case fcZero:
4436 // nextUp(pm 0) = +getSmallest()
4437 makeSmallest(false);
4438 break;
4439 case fcNormal:
4440 // nextUp(-getSmallest()) = -0
4441 if (isSmallest() && isNegative()) {
4442 APInt::tcSet(significandParts(), 0, partCount());
4443 category = fcZero;
4444 exponent = 0;
4445 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4446 sign = false;
4447 if (!semantics->hasZero)
4449 break;
4450 }
4451
4452 if (isLargest() && !isNegative()) {
4453 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4454 // nextUp(getLargest()) == NAN
4455 makeNaN();
4456 break;
4457 } else if (semantics->nonFiniteBehavior ==
4459 // nextUp(getLargest()) == getLargest()
4460 break;
4461 } else {
4462 // nextUp(getLargest()) == INFINITY
4463 APInt::tcSet(significandParts(), 0, partCount());
4464 category = fcInfinity;
4465 exponent = semantics->maxExponent + 1;
4466 break;
4467 }
4468 }
4469
4470 // nextUp(normal) == normal + inc.
4471 if (isNegative()) {
4472 // If we are negative, we need to decrement the significand.
4473
4474 // We only cross a binade boundary that requires adjusting the exponent
4475 // if:
4476 // 1. exponent != semantics->minExponent. This implies we are not in the
4477 // smallest binade or are dealing with denormals.
4478 // 2. Our significand excluding the integral bit is all zeros.
4479 bool WillCrossBinadeBoundary =
4480 exponent != semantics->minExponent && isSignificandAllZeros();
4481
4482 // Decrement the significand.
4483 //
4484 // We always do this since:
4485 // 1. If we are dealing with a non-binade decrement, by definition we
4486 // just decrement the significand.
4487 // 2. If we are dealing with a normal -> normal binade decrement, since
4488 // we have an explicit integral bit the fact that all bits but the
4489 // integral bit are zero implies that subtracting one will yield a
4490 // significand with 0 integral bit and 1 in all other spots. Thus we
4491 // must just adjust the exponent and set the integral bit to 1.
4492 // 3. If we are dealing with a normal -> denormal binade decrement,
4493 // since we set the integral bit to 0 when we represent denormals, we
4494 // just decrement the significand.
4495 integerPart *Parts = significandParts();
4496 APInt::tcDecrement(Parts, partCount());
4497
4498 if (WillCrossBinadeBoundary) {
4499 // Our result is a normal number. Do the following:
4500 // 1. Set the integral bit to 1.
4501 // 2. Decrement the exponent.
4502 APInt::tcSetBit(Parts, semantics->precision - 1);
4503 exponent--;
4504 }
4505 } else {
4506 // If we are positive, we need to increment the significand.
4507
4508 // We only cross a binade boundary that requires adjusting the exponent if
4509 // the input is not a denormal and all of said input's significand bits
4510 // are set. If all of said conditions are true: clear the significand, set
4511 // the integral bit to 1, and increment the exponent. If we have a
4512 // denormal always increment since moving denormals and the numbers in the
4513 // smallest normal binade have the same exponent in our representation.
4514 // If there are only exponents, any increment always crosses the
4515 // BinadeBoundary.
4516 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4517 (!isDenormal() && isSignificandAllOnes());
4518
4519 if (WillCrossBinadeBoundary) {
4520 integerPart *Parts = significandParts();
4521 APInt::tcSet(Parts, 0, partCount());
4522 APInt::tcSetBit(Parts, semantics->precision - 1);
4523 assert(exponent != semantics->maxExponent &&
4524 "We can not increment an exponent beyond the maxExponent allowed"
4525 " by the given floating point semantics.");
4526 exponent++;
4527 } else {
4528 incrementSignificand();
4529 }
4530 }
4531 break;
4532 }
4533
4534 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4535 if (nextDown)
4536 changeSign();
4537
4538 return result;
4539}
4540
4542 assert(isNaN() && "Can only be called on NaN values");
4543 // Number of bits in the payload, excluding the (maybe implied) integer bit.
4544 unsigned Bits = semantics->precision - 1;
4545 return APInt(Bits, ArrayRef(significandParts(), partCountForBits(Bits)));
4546}
4547
4548APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4549 return ::exponentNaN(*semantics);
4550}
4551
4552APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4553 return ::exponentInf(*semantics);
4554}
4555
4556APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4557 return ::exponentZero(*semantics);
4558}
4559
4560void IEEEFloat::makeInf(bool Negative) {
4561 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4562 llvm_unreachable("This floating point format does not support Inf");
4563
4564 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4565 // There is no Inf, so make NaN instead.
4566 makeNaN(false, Negative);
4567 return;
4568 }
4569 category = fcInfinity;
4570 sign = Negative;
4571 exponent = exponentInf();
4572 APInt::tcSet(significandParts(), 0, partCount());
4573}
4574
4575void IEEEFloat::makeZero(bool Negative) {
4576 if (!semantics->hasZero)
4577 llvm_unreachable("This floating point format does not support Zero");
4578
4579 category = fcZero;
4580 sign = Negative;
4581 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4582 // Merge negative zero to positive because 0b10000...000 is used for NaN
4583 sign = false;
4584 }
4585 exponent = exponentZero();
4586 APInt::tcSet(significandParts(), 0, partCount());
4587}
4588
4590 assert(isNaN());
4591 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4592 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4593}
4594
4595int ilogb(const IEEEFloat &Arg) {
4596 if (Arg.isNaN())
4597 return APFloat::IEK_NaN;
4598 if (Arg.isZero())
4599 return APFloat::IEK_Zero;
4600 if (Arg.isInfinity())
4601 return APFloat::IEK_Inf;
4602 if (!Arg.isDenormal())
4603 return Arg.exponent;
4604
4605 IEEEFloat Normalized(Arg);
4606 int SignificandBits = Arg.getSemantics().precision - 1;
4607
4608 Normalized.exponent += SignificandBits;
4609 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4610 return Normalized.exponent - SignificandBits;
4611}
4612
4614 auto MaxExp = X.getSemantics().maxExponent;
4615 auto MinExp = X.getSemantics().minExponent;
4616
4617 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4618 // overflow; clamp it to a safe range before adding, but ensure that the range
4619 // is large enough that the clamp does not change the result. The range we
4620 // need to support is the difference between the largest possible exponent and
4621 // the normalized exponent of half the smallest denormal.
4622
4623 int SignificandBits = X.getSemantics().precision - 1;
4624 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4625
4626 // Clamp to one past the range ends to let normalize handle overlflow.
4627 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4628 X.normalize(RoundingMode, lfExactlyZero);
4629 if (X.isNaN())
4630 X.makeQuiet();
4631 return X;
4632}
4633
4634IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4635 Exp = ilogb(Val);
4636
4637 // Quiet signalling nans.
4638 if (Exp == APFloat::IEK_NaN) {
4639 IEEEFloat Quiet(Val);
4640 Quiet.makeQuiet();
4641 return Quiet;
4642 }
4643
4644 if (Exp == APFloat::IEK_Inf)
4645 return Val;
4646
4647 // 1 is added because frexp is defined to return a normalized fraction in
4648 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4649 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4650 return scalbn(Val, -Exp, RM);
4651}
4652
4654 : Semantics(&S),
4655 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4656 APFloat(APFloatBase::semIEEEdouble)}) {
4657 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4658}
4659
4661 : Semantics(&S), Floats(new APFloat[2]{
4662 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4663 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4664 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4665}
4666
4668 : Semantics(&S),
4669 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4670 APFloat(APFloatBase::semIEEEdouble)}) {
4671 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4672}
4673
4675 : Semantics(&S),
4676 Floats(new APFloat[2]{
4677 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4678 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4679 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4680}
4681
4683 APFloat &&Second)
4684 : Semantics(&S),
4685 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4686 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4687 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4688 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4689}
4690
4692 : Semantics(RHS.Semantics),
4693 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4694 APFloat(RHS.Floats[1])}
4695 : nullptr) {
4696 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4697}
4698
4700 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4701 RHS.Semantics = &APFloatBase::semBogus;
4702 RHS.Floats = nullptr;
4703 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4704}
4705
4707 if (Semantics == RHS.Semantics && RHS.Floats) {
4708 Floats[0] = RHS.Floats[0];
4709 Floats[1] = RHS.Floats[1];
4710 } else if (this != &RHS) {
4711 this->~DoubleAPFloat();
4712 new (this) DoubleAPFloat(RHS);
4713 }
4714 return *this;
4715}
4716
4717// Returns a result such that:
4718// 1. abs(Lo) <= ulp(Hi)/2
4719// 2. Hi == RTNE(Hi + Lo)
4720// 3. Hi + Lo == X + Y
4721//
4722// Requires that log2(X) >= log2(Y).
4723static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4724 if (!X.isFinite())
4725 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4726 APFloat Hi = X + Y;
4727 APFloat Delta = Hi - X;
4728 APFloat Lo = Y - Delta;
4729 return {Hi, Lo};
4730}
4731
4732// Implement addition, subtraction, multiplication and division based on:
4733// "Software for Doubled-Precision Floating-Point Computations",
4734// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4735APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4736 const APFloat &c, const APFloat &cc,
4737 roundingMode RM) {
4738 int Status = opOK;
4739 APFloat z = a;
4740 Status |= z.add(c, RM);
4741 if (!z.isFinite()) {
4742 if (!z.isInfinity()) {
4743 Floats[0] = std::move(z);
4744 Floats[1].makeZero(/* Neg = */ false);
4745 return (opStatus)Status;
4746 }
4747 Status = opOK;
4748 auto AComparedToC = a.compareAbsoluteValue(c);
4749 z = cc;
4750 Status |= z.add(aa, RM);
4751 if (AComparedToC == APFloat::cmpGreaterThan) {
4752 // z = cc + aa + c + a;
4753 Status |= z.add(c, RM);
4754 Status |= z.add(a, RM);
4755 } else {
4756 // z = cc + aa + a + c;
4757 Status |= z.add(a, RM);
4758 Status |= z.add(c, RM);
4759 }
4760 if (!z.isFinite()) {
4761 Floats[0] = std::move(z);
4762 Floats[1].makeZero(/* Neg = */ false);
4763 return (opStatus)Status;
4764 }
4765 Floats[0] = z;
4766 APFloat zz = aa;
4767 Status |= zz.add(cc, RM);
4768 if (AComparedToC == APFloat::cmpGreaterThan) {
4769 // Floats[1] = a - z + c + zz;
4770 Floats[1] = a;
4771 Status |= Floats[1].subtract(z, RM);
4772 Status |= Floats[1].add(c, RM);
4773 Status |= Floats[1].add(zz, RM);
4774 } else {
4775 // Floats[1] = c - z + a + zz;
4776 Floats[1] = c;
4777 Status |= Floats[1].subtract(z, RM);
4778 Status |= Floats[1].add(a, RM);
4779 Status |= Floats[1].add(zz, RM);
4780 }
4781 } else {
4782 // q = a - z;
4783 APFloat q = a;
4784 Status |= q.subtract(z, RM);
4785
4786 // zz = q + c + (a - (q + z)) + aa + cc;
4787 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4788 auto zz = q;
4789 Status |= zz.add(c, RM);
4790 Status |= q.add(z, RM);
4791 Status |= q.subtract(a, RM);
4792 q.changeSign();
4793 Status |= zz.add(q, RM);
4794 Status |= zz.add(aa, RM);
4795 Status |= zz.add(cc, RM);
4796 if (zz.isZero() && !zz.isNegative()) {
4797 Floats[0] = std::move(z);
4798 Floats[1].makeZero(/* Neg = */ false);
4799 return opOK;
4800 }
4801 Floats[0] = z;
4802 Status |= Floats[0].add(zz, RM);
4803 if (!Floats[0].isFinite()) {
4804 Floats[1].makeZero(/* Neg = */ false);
4805 return (opStatus)Status;
4806 }
4807 Floats[1] = std::move(z);
4808 Status |= Floats[1].subtract(Floats[0], RM);
4809 Status |= Floats[1].add(zz, RM);
4810 }
4811 return (opStatus)Status;
4812}
4813
4814APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4815 const DoubleAPFloat &RHS,
4816 DoubleAPFloat &Out,
4817 roundingMode RM) {
4818 if (LHS.getCategory() == fcNaN) {
4819 Out = LHS;
4820 return opOK;
4821 }
4822 if (RHS.getCategory() == fcNaN) {
4823 Out = RHS;
4824 return opOK;
4825 }
4826 if (LHS.getCategory() == fcZero) {
4827 Out = RHS;
4828 return opOK;
4829 }
4830 if (RHS.getCategory() == fcZero) {
4831 Out = LHS;
4832 return opOK;
4833 }
4834 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4835 LHS.isNegative() != RHS.isNegative()) {
4836 Out.makeNaN(false, Out.isNegative(), nullptr);
4837 return opInvalidOp;
4838 }
4839 if (LHS.getCategory() == fcInfinity) {
4840 Out = LHS;
4841 return opOK;
4842 }
4843 if (RHS.getCategory() == fcInfinity) {
4844 Out = RHS;
4845 return opOK;
4846 }
4847 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4848
4849 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4850 CC(RHS.Floats[1]);
4851 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4852 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4853 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4854 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4855 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4856 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4857 return Out.addImpl(A, AA, C, CC, RM);
4858}
4859
4861 roundingMode RM) {
4862 return addWithSpecial(*this, RHS, *this, RM);
4863}
4864
4866 roundingMode RM) {
4867 changeSign();
4868 auto Ret = add(RHS, RM);
4869 changeSign();
4870 return Ret;
4871}
4872
4875 const auto &LHS = *this;
4876 auto &Out = *this;
4877 /* Interesting observation: For special categories, finding the lowest
4878 common ancestor of the following layered graph gives the correct
4879 return category:
4880
4881 NaN
4882 / \
4883 Zero Inf
4884 \ /
4885 Normal
4886
4887 e.g. NaN * NaN = NaN
4888 Zero * Inf = NaN
4889 Normal * Zero = Zero
4890 Normal * Inf = Inf
4891 */
4892 if (LHS.getCategory() == fcNaN) {
4893 Out = LHS;
4894 return opOK;
4895 }
4896 if (RHS.getCategory() == fcNaN) {
4897 Out = RHS;
4898 return opOK;
4899 }
4900 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4901 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4902 Out.makeNaN(false, false, nullptr);
4903 return opOK;
4904 }
4905 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4906 Out = LHS;
4907 return opOK;
4908 }
4909 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4910 Out = RHS;
4911 return opOK;
4912 }
4913 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4914 "Special cases not handled exhaustively");
4915
4916 int Status = opOK;
4917 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4918 // t = a * c
4919 APFloat T = A;
4920 Status |= T.multiply(C, RM);
4921 if (!T.isFiniteNonZero()) {
4922 Floats[0] = std::move(T);
4923 Floats[1].makeZero(/* Neg = */ false);
4924 return (opStatus)Status;
4925 }
4926
4927 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4928 APFloat Tau = A;
4929 T.changeSign();
4930 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4931 T.changeSign();
4932 {
4933 // v = a * d
4934 APFloat V = A;
4935 Status |= V.multiply(D, RM);
4936 // w = b * c
4937 APFloat W = B;
4938 Status |= W.multiply(C, RM);
4939 Status |= V.add(W, RM);
4940 // tau += v + w
4941 Status |= Tau.add(V, RM);
4942 }
4943 // u = t + tau
4944 APFloat U = T;
4945 Status |= U.add(Tau, RM);
4946
4947 Floats[0] = U;
4948 if (!U.isFinite()) {
4949 Floats[1].makeZero(/* Neg = */ false);
4950 } else {
4951 // Floats[1] = (t - u) + tau
4952 Status |= T.subtract(U, RM);
4953 Status |= T.add(Tau, RM);
4954 Floats[1] = std::move(T);
4955 }
4956 return (opStatus)Status;
4957}
4958
4961 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4962 "Unexpected Semantics");
4963 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4964 auto Ret = Tmp.divide(
4965 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4966 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4967 return Ret;
4968}
4969
4971 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4972 "Unexpected Semantics");
4973 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4974 auto Ret = Tmp.remainder(
4975 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4976 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4977 return Ret;
4978}
4979
4981 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4982 "Unexpected Semantics");
4983 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4984 auto Ret = Tmp.mod(
4985 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4986 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4987 return Ret;
4988}
4989
4992 const DoubleAPFloat &Addend,
4994 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4995 "Unexpected Semantics");
4996 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4997 auto Ret = Tmp.fusedMultiplyAdd(
4998 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
4999 Multiplicand.bitcastToAPInt()),
5000 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
5001 RM);
5002 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5003 return Ret;
5004}
5005
5007 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5008 "Unexpected Semantics");
5009 const APFloat &Hi = getFirst();
5010 const APFloat &Lo = getSecond();
5011
5012 APFloat RoundedHi = Hi;
5013 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
5014
5015 // We can reduce the problem to just the high part if the input:
5016 // 1. Represents a non-finite value.
5017 // 2. Has a component which is zero.
5018 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5019 Floats[0] = std::move(RoundedHi);
5020 Floats[1].makeZero(/*Neg=*/false);
5021 return HiStatus;
5022 }
5023
5024 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5025 // halfway point.
5026 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5027 APFloat TieBreaker) {
5028 // RoundingError tells us which direction we rounded:
5029 // - RoundingError > 0: we rounded up.
5030 // - RoundingError < 0: we rounded down.
5031 // Sterbenz' lemma ensures that RoundingError is exact.
5032 const APFloat RoundingError = Rounded - ToRound;
5033 if (TieBreaker.isNonZero() &&
5034 TieBreaker.isNegative() != RoundingError.isNegative() &&
5035 abs(RoundingError).isExactlyValue(0.5))
5036 Rounded.add(
5037 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5039 return Rounded;
5040 };
5041
5042 // Case 1: Hi is not an integer.
5043 // Special cases are for rounding modes that are sensitive to ties.
5044 if (RoundedHi != Hi) {
5045 // We need to consider the case where Hi was between two integers and the
5046 // rounding mode broke the tie when, in fact, Lo may have had a different
5047 // sign than Hi.
5048 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5049 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5050
5051 Floats[0] = std::move(RoundedHi);
5052 Floats[1].makeZero(/*Neg=*/false);
5053 return HiStatus;
5054 }
5055
5056 // Case 2: Hi is an integer.
5057 // Special cases are for rounding modes which are rounding towards or away from zero.
5058 RoundingMode LoRoundingMode;
5059 if (RM == rmTowardZero)
5060 // When our input is positive, we want the Lo component rounded toward
5061 // negative infinity to get the smallest result magnitude. Likewise,
5062 // negative inputs want the Lo component rounded toward positive infinity.
5063 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5064 else
5065 LoRoundingMode = RM;
5066
5067 APFloat RoundedLo = Lo;
5068 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5069 if (LoRoundingMode == rmNearestTiesToAway)
5070 // We need to consider the case where Lo was between two integers and the
5071 // rounding mode broke the tie when, in fact, Hi may have had a different
5072 // sign than Lo.
5073 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5074
5075 // We must ensure that the final result has no overlap between the two APFloat values.
5076 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5077
5078 Floats[0] = std::move(RoundedHi);
5079 Floats[1] = std::move(RoundedLo);
5080 return LoStatus;
5081}
5082
5084 Floats[0].changeSign();
5085 Floats[1].changeSign();
5086}
5087
5090 // Compare absolute values of the high parts.
5091 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5092 if (HiPartCmp != cmpEqual)
5093 return HiPartCmp;
5094
5095 // Zero, regardless of sign, is equal.
5096 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5097 return cmpEqual;
5098
5099 // At this point, |this->Hi| == |RHS.Hi|.
5100 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5101 // same, and Hi-|Lo| if signs are different.
5102 const bool ThisIsSubtractive =
5103 Floats[0].isNegative() != Floats[1].isNegative();
5104 const bool RHSIsSubtractive =
5105 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5106
5107 // Case 1: The low part of 'this' is zero.
5108 if (Floats[1].isZero())
5109 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5110 // If RHS is subtractive, its magnitude is smaller.
5111 // If RHS is additive, its magnitude is larger.
5112 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5113
5114 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5115 if (RHS.Floats[1].isZero())
5116 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5117 // If 'this' is subtractive, its magnitude is smaller.
5118 // If 'this' is additive, its magnitude is larger.
5119 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5120
5121 // If their natures differ, the additive one is larger.
5122 if (ThisIsSubtractive != RHSIsSubtractive)
5123 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5124
5125 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5126 // The comparison now depends on the magnitude of the low parts.
5127 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5128
5129 if (ThisIsSubtractive) {
5130 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5131 if (LoPartCmp == cmpLessThan)
5132 return cmpGreaterThan;
5133 if (LoPartCmp == cmpGreaterThan)
5134 return cmpLessThan;
5135 }
5136
5137 // If additive, the comparison of |Lo| is direct.
5138 // If equal, they are equal.
5139 return LoPartCmp;
5140}
5141
5143 return Floats[0].getCategory();
5144}
5145
5146bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5147
5149 Floats[0].makeInf(Neg);
5150 Floats[1].makeZero(/* Neg = */ false);
5151}
5152
5154 Floats[0].makeZero(Neg);
5155 Floats[1].makeZero(/* Neg = */ false);
5156}
5157
5159 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5160 "Unexpected Semantics");
5161 Floats[0] =
5162 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5163 Floats[1] =
5164 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5165 if (Neg)
5166 changeSign();
5167}
5168
5170 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5171 "Unexpected Semantics");
5172 Floats[0].makeSmallest(Neg);
5173 Floats[1].makeZero(/* Neg = */ false);
5174}
5175
5177 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5178 "Unexpected Semantics");
5179 Floats[0] =
5180 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5181 if (Neg)
5182 Floats[0].changeSign();
5183 Floats[1].makeZero(/* Neg = */ false);
5184}
5185
5186void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5187 Floats[0].makeNaN(SNaN, Neg, fill);
5188 Floats[1].makeZero(/* Neg = */ false);
5189}
5190
5192 auto Result = Floats[0].compare(RHS.Floats[0]);
5193 // |Float[0]| > |Float[1]|
5194 if (Result == APFloat::cmpEqual)
5195 return Floats[1].compare(RHS.Floats[1]);
5196 return Result;
5197}
5198
5200 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5201 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5202}
5203
5205 if (Arg.Floats)
5206 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5207 return hash_combine(Arg.Semantics);
5208}
5209
5211 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5212 "Unexpected Semantics");
5213 uint64_t Data[] = {
5214 Floats[0].bitcastToAPInt().getRawData()[0],
5215 Floats[1].bitcastToAPInt().getRawData()[0],
5216 };
5217 return APInt(128, Data);
5218}
5219
5221 roundingMode RM) {
5222 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5223 "Unexpected Semantics");
5224 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5225 auto Ret = Tmp.convertFromString(S, RM);
5226 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5227 return Ret;
5228}
5229
5230// The double-double lattice of values corresponds to numbers which obey:
5231// - abs(lo) <= 1/2 * ulp(hi)
5232// - roundTiesToEven(hi + lo) == hi
5233//
5234// nextUp must choose the smallest output > input that follows these rules.
5235// nexDown must choose the largest output < input that follows these rules.
5237 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5238 "Unexpected Semantics");
5239 // nextDown(x) = -nextUp(-x)
5240 if (nextDown) {
5241 changeSign();
5242 APFloat::opStatus Result = next(/*nextDown=*/false);
5243 changeSign();
5244 return Result;
5245 }
5246 switch (getCategory()) {
5247 case fcInfinity:
5248 // nextUp(+inf) = +inf
5249 // nextUp(-inf) = -getLargest()
5250 if (isNegative())
5251 makeLargest(true);
5252 return opOK;
5253
5254 case fcNaN:
5255 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5256 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5257 // change the payload.
5258 if (getFirst().isSignaling()) {
5259 // For consistency, propagate the sign of the sNaN to the qNaN.
5260 makeNaN(false, isNegative(), nullptr);
5261 return opInvalidOp;
5262 }
5263 return opOK;
5264
5265 case fcZero:
5266 // nextUp(pm 0) = +getSmallest()
5267 makeSmallest(false);
5268 return opOK;
5269
5270 case fcNormal:
5271 break;
5272 }
5273
5274 const APFloat &HiOld = getFirst();
5275 const APFloat &LoOld = getSecond();
5276
5277 APFloat NextLo = LoOld;
5278 NextLo.next(/*nextDown=*/false);
5279
5280 // We want to admit values where:
5281 // 1. abs(Lo) <= ulp(Hi)/2
5282 // 2. Hi == RTNE(Hi + lo)
5283 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5284 return Hi + Lo == Hi;
5285 };
5286
5287 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5288 if (InLattice(HiOld, NextLo)) {
5289 // Yes, the result is (HiOld, nextUp(LoOld)).
5290 Floats[1] = std::move(NextLo);
5291
5292 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5293 // value is defined to have exactly 106 bits of precision. This limitation
5294 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5295 // value.
5296 DoubleAPFloat Largest{*Semantics, uninitialized};
5297 Largest.makeLargest(/*Neg=*/false);
5298 if (compare(Largest) == cmpGreaterThan)
5299 makeInf(/*Neg=*/false);
5300
5301 return opOK;
5302 }
5303
5304 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5305 // correct result. We know the new hi component will be nextUp(HiOld) but our
5306 // lattice rules make it a little ambiguous what the correct NextLo must be.
5307 APFloat NextHi = HiOld;
5308 NextHi.next(/*nextDown=*/false);
5309
5310 // nextUp(getLargest()) == INFINITY
5311 if (NextHi.isInfinity()) {
5312 makeInf(/*Neg=*/false);
5313 return opOK;
5314 }
5315
5316 // IEEE 754-2019 5.3.1:
5317 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5318 // -0."
5319 if (NextHi.isZero()) {
5320 makeZero(/*Neg=*/true);
5321 return opOK;
5322 }
5323
5324 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5325 // negative infinity as possible.
5326 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5327 if (!InLattice(NextHi, NextLo))
5328 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5329 NextLo.next(/*nextDown=*/false);
5330
5331 Floats[0] = std::move(NextHi);
5332 Floats[1] = std::move(NextLo);
5333
5334 return opOK;
5335}
5336
5337APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5338 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5339 roundingMode RM, bool *IsExact) const {
5340 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5341 "Unexpected Semantics");
5342
5343 // If Hi is not finite, or Lo is zero, the value is entirely represented
5344 // by Hi. Delegate to the simpler single-APFloat conversion.
5345 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5346 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5347
5348 // First, round the full double-double value to an integral value. This
5349 // simplifies the rest of the function, as we no longer need to consider
5350 // fractional parts.
5351 *IsExact = false;
5352 DoubleAPFloat Integral = *this;
5353 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5354 if (RoundStatus == opInvalidOp)
5355 return opInvalidOp;
5356 const APFloat &IntegralHi = Integral.getFirst();
5357 const APFloat &IntegralLo = Integral.getSecond();
5358
5359 // If rounding results in either component being zero, the sum is trivial.
5360 // Delegate to the simpler single-APFloat conversion.
5361 bool HiIsExact;
5362 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5363 const opStatus HiStatus =
5364 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5365 // The conversion from an integer-valued float to an APInt may fail if the
5366 // result would be out of range. Regardless, taking this path is only
5367 // possible if rounding occurred during the initial `roundToIntegral`.
5368 return HiStatus == opOK ? opInexact : HiStatus;
5369 }
5370
5371 // A negative number cannot be represented by an unsigned integer.
5372 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5373 if (!IsSigned && IntegralHi.isNegative())
5374 return opInvalidOp;
5375
5376 // Handle the special boundary case where |Hi| is exactly the power of two
5377 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5378 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5379 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5380 // signed, N for unsigned).
5381 bool LoIsExact;
5382 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5383 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5384 if (HiExactLog2 >= 0 &&
5385 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5386 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5387 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5388 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5389 return opInvalidOp;
5390
5391 // If the signs differ, the sum will fit. We can compute the result using
5392 // properties of two's complement arithmetic without a wide intermediate
5393 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5394 const opStatus LoStatus = IntegralLo.convertToInteger(
5395 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5396 if (LoStatus == opInvalidOp)
5397 return opInvalidOp;
5398
5399 // Adjust the bit pattern of Lo to account for Hi's value:
5400 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5401 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5402 // already produced the correct final bit pattern.
5403 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5404 // can be computed by taking the two's complement pattern for `Lo` and
5405 // clearing the sign bit.
5406 if (IsSigned && !IntegralHi.isNegative())
5407 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5408 *IsExact = RoundStatus == opOK;
5409 return RoundStatus;
5410 }
5411
5412 // Convert Hi into an integer. This may not fit but that is OK: we know that
5413 // Hi + Lo would not fit either in this situation.
5414 const opStatus HiStatus = IntegralHi.convertToInteger(
5415 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5416 if (HiStatus == opInvalidOp)
5417 return HiStatus;
5418
5419 // Convert Lo into a temporary integer of the same width.
5420 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5421 const opStatus LoStatus =
5422 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5423 if (LoStatus == opInvalidOp)
5424 return LoStatus;
5425
5426 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5427 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5428 // where the sum could cross the integer type's boundary is when Hi is a
5429 // power of two, which is handled by the special case block above.
5430 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5431
5432 *IsExact = RoundStatus == opOK;
5433 return RoundStatus;
5434}
5435
5438 unsigned int Width, bool IsSigned,
5439 roundingMode RM, bool *IsExact) const {
5440 opStatus FS =
5441 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5442
5443 if (FS == opInvalidOp) {
5444 const unsigned DstPartsCount = partCountForBits(Width);
5445 assert(DstPartsCount <= Input.size() && "Integer too big");
5446
5447 unsigned Bits;
5448 if (getCategory() == fcNaN)
5449 Bits = 0;
5450 else if (isNegative())
5451 Bits = IsSigned;
5452 else
5453 Bits = Width - IsSigned;
5454
5455 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5456 if (isNegative() && IsSigned)
5457 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5458 }
5459
5460 return FS;
5461}
5462
5463APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5464 switch (RM) {
5466 makeLargest(/*Neg=*/isNegative());
5467 break;
5469 if (isNegative())
5470 makeInf(/*Neg=*/true);
5471 else
5472 makeLargest(/*Neg=*/false);
5473 break;
5475 if (isNegative())
5476 makeLargest(/*Neg=*/true);
5477 else
5478 makeInf(/*Neg=*/false);
5479 break;
5482 makeInf(/*Neg=*/isNegative());
5483 break;
5484 default:
5485 llvm_unreachable("Invalid rounding mode found");
5486 }
5487 opStatus S = opInexact;
5488 if (!getFirst().isFinite())
5489 S = static_cast<opStatus>(S | opOverflow);
5490 return S;
5491}
5492
5493APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5494 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5495 // Find the most significant bit of the source integer. APInt::tcMSB returns
5496 // UINT_MAX for a zero value.
5497 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5498 if (SrcMSB == UINT_MAX) {
5499 // The source integer is 0.
5500 makeZero(/*Neg=*/false);
5501 return opOK;
5502 }
5503
5504 // Create a minimally-sized APInt to represent the source value.
5505 const unsigned SrcBitWidth = SrcMSB + 1;
5506 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5507 /*isUnsigned=*/true};
5508
5509 // Stage 1: Initial Approximation.
5510 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5511 // We use round-to-nearest because it minimizes the initial error, which is
5512 // crucial for the subsequent steps.
5514 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5515
5516 // If the first approximation already overflows, the number is too large.
5517 // NOTE: The underlying semantics are *more* conservative when choosing to
5518 // overflow because their notion of ULP is much larger. As such, it is always
5519 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5520 if (!Hi.isFinite())
5521 return handleOverflow(RM);
5522
5523 // Stage 2: Exact Error Calculation.
5524 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5525 // This is done by converting Hi back to an integer and subtracting it from
5526 // the original source.
5527 bool HiAsIntIsExact;
5528 // Create an integer representation of Hi. Its width is determined by the
5529 // exponent of Hi, ensuring it's just large enough. This width can exceed
5530 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5531 // accurately when converted back to an integer.
5532 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5533 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5534 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5535
5536 // Stage 3: Error Approximation and Rounding.
5537 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5538 // captures the remainder of the original number. The rounding mode for this
5539 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5540 // ensure the final sum (Hi + Lo) rounds correctly.
5541 roundingMode LoRM = RM;
5542 // Adjustments are only necessary when the initial approximation Hi was an
5543 // overestimate, making the Error negative.
5544 if (Error.isNegative()) {
5545 if (RM == rmNearestTiesToAway) {
5546 // For rmNearestTiesToAway, a tie should round away from zero. Since
5547 // SrcInt is positive, this means rounding toward +infinity.
5548 // A standard conversion of a negative Error would round ties toward
5549 // -infinity, causing the final sum Hi + Lo to be smaller. To
5550 // counteract this, we detect the tie case and override the rounding
5551 // mode for Lo to rmTowardPositive.
5552 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5553 const unsigned LoPrecision = getSecond().getSemantics().precision;
5554 if (ErrorActiveBits > LoPrecision) {
5555 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5556 // A tie occurs when the bits to be truncated are of the form 100...0.
5557 // This is detected by checking if the number of trailing zeros is
5558 // exactly one less than the number of bits being truncated.
5559 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5560 LoRM = rmTowardPositive;
5561 }
5562 } else if (RM == rmTowardZero) {
5563 // For rmTowardZero, the final positive result must be truncated (rounded
5564 // down). When Hi is an overestimate, Error is negative. A standard
5565 // rmTowardZero conversion of Error would make it *less* negative,
5566 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5567 // rounds down correctly, we force Lo to round toward -infinity.
5568 LoRM = rmTowardNegative;
5569 }
5570 }
5571
5573 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5574
5575 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5576 // components do not overlap. fastTwoSum performs this operation.
5577 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5578 Floats[0] = std::move(Hi);
5579 Floats[1] = std::move(Lo);
5580
5581 // A final check for overflow is needed because fastTwoSum can cause a
5582 // carry-out from Lo that pushes Hi to infinity.
5583 if (!getFirst().isFinite())
5584 return handleOverflow(RM);
5585
5586 // The largest DoubleAPFloat must be canonical. Values which are larger are
5587 // not canonical and are equivalent to overflow.
5588 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5589 DoubleAPFloat Largest{*Semantics};
5590 Largest.makeLargest(/*Neg=*/false);
5591 if (compare(Largest) == APFloat::cmpGreaterThan)
5592 return handleOverflow(RM);
5593 }
5594
5595 // The final status of the operation is determined by the conversion of the
5596 // error term. If Lo could represent Error exactly, the entire conversion
5597 // is exact. Otherwise, it's inexact.
5598 return Status;
5599}
5600
5602 bool IsSigned,
5603 roundingMode RM) {
5604 const bool NegateInput = IsSigned && Input.isNegative();
5605 APInt API = Input;
5606 if (NegateInput)
5607 API.negate();
5608
5610 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5611 if (NegateInput)
5612 changeSign();
5613 return Status;
5614}
5615
5617 unsigned int HexDigits,
5618 bool UpperCase,
5619 roundingMode RM) const {
5620 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5621 "Unexpected Semantics");
5622 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5623 .convertToHexString(DST, HexDigits, UpperCase, RM);
5624}
5625
5627 return getCategory() == fcNormal &&
5628 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5629 // (double)(Hi + Lo) == Hi defines a normal number.
5630 Floats[0] != Floats[0] + Floats[1]);
5631}
5632
5634 if (getCategory() != fcNormal)
5635 return false;
5636 DoubleAPFloat Tmp(*this);
5637 Tmp.makeSmallest(this->isNegative());
5638 return Tmp.compare(*this) == cmpEqual;
5639}
5640
5642 if (getCategory() != fcNormal)
5643 return false;
5644
5645 DoubleAPFloat Tmp(*this);
5647 return Tmp.compare(*this) == cmpEqual;
5648}
5649
5651 if (getCategory() != fcNormal)
5652 return false;
5653 DoubleAPFloat Tmp(*this);
5654 Tmp.makeLargest(this->isNegative());
5655 return Tmp.compare(*this) == cmpEqual;
5656}
5657
5659 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5660 "Unexpected Semantics");
5661 return Floats[0].isInteger() && Floats[1].isInteger();
5662}
5663
5665 unsigned FormatPrecision,
5666 unsigned FormatMaxPadding,
5667 bool TruncateZero) const {
5668 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5669 "Unexpected Semantics");
5670 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5671 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5672}
5673
5675 // In order for Hi + Lo to be a power of two, the following must be true:
5676 // 1. Hi must be a power of two.
5677 // 2. Lo must be zero.
5678 if (getSecond().isNonZero())
5679 return INT_MIN;
5680 return getFirst().getExactLog2Abs();
5681}
5682
5683int ilogb(const DoubleAPFloat &Arg) {
5684 const APFloat &Hi = Arg.getFirst();
5685 const APFloat &Lo = Arg.getSecond();
5686 int IlogbResult = ilogb(Hi);
5687 // Zero and non-finite values can delegate to ilogb(Hi).
5688 if (Arg.getCategory() != fcNormal)
5689 return IlogbResult;
5690 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5691 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5692 return IlogbResult;
5693 if (Hi.getExactLog2Abs() == INT_MIN)
5694 return IlogbResult;
5695 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5696 // get nudged out of the binade by the low component.
5697 return IlogbResult - 1;
5698}
5699
5702 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5703 "Unexpected Semantics");
5705 scalbn(Arg.Floats[0], Exp, RM),
5706 scalbn(Arg.Floats[1], Exp, RM));
5707}
5708
5709DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5711 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5712 "Unexpected Semantics");
5713
5714 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5715 // [1.0, 2.0).
5716 Exp = ilogb(Arg);
5717
5718 // For NaNs, quiet any signaling NaN and return the result, as per standard
5719 // practice.
5720 if (Exp == APFloat::IEK_NaN) {
5721 DoubleAPFloat Quiet{Arg};
5722 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5723 return Quiet;
5724 }
5725
5726 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5727 if (Exp == APFloat::IEK_Inf)
5728 return Arg;
5729
5730 // For zero, the fraction is zero and the standard requires the exponent be 0.
5731 if (Exp == APFloat::IEK_Zero) {
5732 Exp = 0;
5733 return Arg;
5734 }
5735
5736 const APFloat &Hi = Arg.getFirst();
5737 const APFloat &Lo = Arg.getSecond();
5738
5739 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5740 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5741 // Increment the exponent to ensure the fraction is in the correct range.
5742 ++Exp;
5743
5744 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5745 APFloat Second = Lo;
5746 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5747 roundingMode LoRoundingMode;
5748 // The interpretation of rmTowardZero depends on the sign of the combined
5749 // Arg rather than the sign of the component.
5750 if (RM == rmTowardZero)
5751 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5752 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5753 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5754 // "away from zero" based on its own sign would move the value in the
5755 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5756 // direction-agnostic. We only need to bother with this if Lo is scaled
5757 // down.
5758 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5759 LoRoundingMode = rmNearestTiesToEven;
5760 else
5761 LoRoundingMode = RM;
5762 Second = scalbn(Lo, -Exp, LoRoundingMode);
5763 // The rmNearestTiesToEven proxy is correct most of the time, but it
5764 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5765 // exact midpoint.
5766 // NOTE: This is morally equivalent to roundTiesTowardZero.
5767 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5768 // Re-scale the result back to check if rounding occurred.
5769 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5770 if (RecomposedLo != Lo) {
5771 // RoundingError tells us which direction we rounded:
5772 // - RoundingError > 0: we rounded up.
5773 // - RoundingError < 0: we down up.
5774 const APFloat RoundingError = RecomposedLo - Lo;
5775 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5776 // We do this by checking if the absolute rounding error is exactly
5777 // half a ULP of the result.
5778 const APFloat UlpOfSecond = harrisonUlp(Second);
5779 const APFloat ScaledUlpOfSecond =
5780 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5781 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5782 const bool RoundedLoAway =
5783 Second.isNegative() == RoundingError.isNegative();
5784 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5785 // decrease the magnitude of Second to increase the magnitude
5786 // First+Second.
5787 if (IsMidpoint && RoundedLoAway)
5788 Second.next(/*nextDown=*/!Second.isNegative());
5789 }
5790 }
5791 // Handle a tricky edge case where Arg is slightly less than a power of two
5792 // (e.g., Arg = 2^k - epsilon). In this situation:
5793 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5794 // 2. ilogb(Arg) correctly returns k-1.
5795 // 3. Our initial Exp becomes (k-1) + 1 = k.
5796 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5797 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5798 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5799 // We detect this specific case by checking if Hi is a power of two and if
5800 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5801 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5802 // valid fraction.
5803 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5804 ++Exp;
5805 }
5806
5807 APFloat First = scalbn(Hi, -Exp, RM);
5809 std::move(Second));
5810}
5811
5812APInt DoubleAPFloat::getNaNPayload() const { return Floats[0].getNaNPayload(); }
5813} // namespace detail
5814
5815APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5816 if (usesLayout<IEEEFloat>(Semantics)) {
5817 new (&IEEE) IEEEFloat(std::move(F));
5818 return;
5819 }
5820 if (usesLayout<DoubleAPFloat>(Semantics)) {
5821 const fltSemantics& S = F.getSemantics();
5822 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5824 return;
5825 }
5826 llvm_unreachable("Unexpected semantics");
5827}
5828
5833
5834hash_code hash_value(const APFloat &Arg) {
5835 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5836 return hash_value(Arg.U.IEEE);
5837 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5838 return hash_value(Arg.U.Double);
5839 llvm_unreachable("Unexpected semantics");
5840}
5841
5843 : APFloat(Semantics) {
5844 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5845 assert(StatusOrErr && "Invalid floating point representation");
5846 consumeError(StatusOrErr.takeError());
5847}
5848
5850 if (isZero())
5851 return isNegative() ? fcNegZero : fcPosZero;
5852 if (isNormal())
5853 return isNegative() ? fcNegNormal : fcPosNormal;
5854 if (isDenormal())
5856 if (isInfinity())
5857 return isNegative() ? fcNegInf : fcPosInf;
5858 assert(isNaN() && "Other class of FP constant");
5859 return isSignaling() ? fcSNan : fcQNan;
5860}
5861
5862bool APFloat::getExactInverse(APFloat *Inv) const {
5863 // Only finite, non-zero numbers can have a useful, representable inverse.
5864 // This check filters out +/- zero, +/- infinity, and NaN.
5865 if (!isFiniteNonZero())
5866 return false;
5867
5868 // Historically, this function rejects subnormal inputs. One reason why this
5869 // might be important is that subnormals may behave differently under FTZ/DAZ
5870 // runtime behavior.
5871 if (isDenormal())
5872 return false;
5873
5874 // A number has an exact, representable inverse if and only if it is a power
5875 // of two.
5876 //
5877 // Mathematical Rationale:
5878 // 1. A binary floating-point number x is a dyadic rational, meaning it can
5879 // be written as x = M / 2^k for integers M (the significand) and k.
5880 // 2. The inverse is 1/x = 2^k / M.
5881 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
5882 // in binary), its denominator M must also be a power of two.
5883 // Let's say M = 2^m.
5884 // 4. Substituting this back into the formula for x, we get
5885 // x = (2^m) / (2^k) = 2^(m-k).
5886 //
5887 // This proves that x must be a power of two.
5888
5889 // getExactLog2Abs() returns the integer exponent if the number is a power of
5890 // two or INT_MIN if it is not.
5891 const int Exp = getExactLog2Abs();
5892 if (Exp == INT_MIN)
5893 return false;
5894
5895 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
5896 // scaling 1.0 by the negated exponent.
5897 APFloat Reciprocal =
5898 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
5899 rmTowardZero);
5900
5901 // scalbn might round if the resulting exponent -Exp is outside the
5902 // representable range, causing overflow (to infinity) or underflow. We
5903 // must verify that the result is still the exact power of two we expect.
5904 if (Reciprocal.getExactLog2Abs() != -Exp)
5905 return false;
5906
5907 // Avoid multiplication with a subnormal, it is not safe on all platforms and
5908 // may be slower than a normal division.
5909 if (Reciprocal.isDenormal())
5910 return false;
5911
5912 assert(Reciprocal.isFiniteNonZero());
5913
5914 if (Inv)
5915 *Inv = std::move(Reciprocal);
5916
5917 return true;
5918}
5919
5921 roundingMode RM, bool *losesInfo) {
5922 if (&getSemantics() == &ToSemantics) {
5923 *losesInfo = false;
5924 return opOK;
5925 }
5926 if (usesLayout<IEEEFloat>(getSemantics()) &&
5927 usesLayout<IEEEFloat>(ToSemantics))
5928 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5929 if (usesLayout<IEEEFloat>(getSemantics()) &&
5930 usesLayout<DoubleAPFloat>(ToSemantics)) {
5931 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
5932 auto Ret =
5933 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
5934 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5935 return Ret;
5936 }
5937 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5938 usesLayout<IEEEFloat>(ToSemantics)) {
5939 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5940 *this = APFloat(std::move(getIEEE()), ToSemantics);
5941 return Ret;
5942 }
5943 llvm_unreachable("Unexpected semantics");
5944}
5945
5949
5951 SmallVector<char, 16> Buffer;
5952 toString(Buffer);
5953 OS << Buffer;
5954}
5955
5956#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5958 print(dbgs());
5959 dbgs() << '\n';
5960}
5961#endif
5962
5964 NID.Add(bitcastToAPInt());
5965}
5966
5968 roundingMode rounding_mode,
5969 bool *isExact) const {
5970 unsigned bitWidth = result.getBitWidth();
5971 SmallVector<uint64_t, 4> parts(result.getNumWords());
5972 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5973 rounding_mode, isExact);
5974 // Keeps the original signed-ness.
5975 result = APInt(bitWidth, parts);
5976 return status;
5977}
5978
5980 if (&getSemantics() == &APFloatBase::semIEEEdouble)
5981 return getIEEE().convertToDouble();
5982 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
5983 "Float semantics is not representable by IEEEdouble");
5984 APFloat Temp = *this;
5985 bool LosesInfo;
5986 [[maybe_unused]] opStatus St =
5987 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5988 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5989 return Temp.getIEEE().convertToDouble();
5990}
5991
5992#ifdef HAS_IEE754_FLOAT128
5993float128 APFloat::convertToQuad() const {
5994 if (&getSemantics() == &APFloatBase::semIEEEquad)
5995 return getIEEE().convertToQuad();
5996 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
5997 "Float semantics is not representable by IEEEquad");
5998 APFloat Temp = *this;
5999 bool LosesInfo;
6000 [[maybe_unused]] opStatus St =
6001 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
6002 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6003 return Temp.getIEEE().convertToQuad();
6004}
6005#endif
6006
6008 if (&getSemantics() == &APFloatBase::semIEEEsingle)
6009 return getIEEE().convertToFloat();
6010 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
6011 "Float semantics is not representable by IEEEsingle");
6012 APFloat Temp = *this;
6013 bool LosesInfo;
6014 [[maybe_unused]] opStatus St =
6015 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
6016 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6017 return Temp.getIEEE().convertToFloat();
6018}
6019
6021 static constexpr StringLiteral ValidFormats[] = {
6022 "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
6023 "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
6024 "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
6025 return llvm::is_contained(ValidFormats, Format);
6026}
6027
6029 // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
6030 // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
6032 .Case("Float8E5M2", &semFloat8E5M2)
6033 .Case("Float8E4M3FN", &semFloat8E4M3FN)
6034 .Case("Float4E2M1FN", &semFloat4E2M1FN)
6035 .Case("Float6E3M2FN", &semFloat6E3M2FN)
6036 .Case("Float6E2M3FN", &semFloat6E2M3FN)
6037 .Default(nullptr);
6038}
6039
6040APFloat::Storage::~Storage() {
6041 if (usesLayout<IEEEFloat>(*semantics)) {
6042 IEEE.~IEEEFloat();
6043 return;
6044 }
6045 if (usesLayout<DoubleAPFloat>(*semantics)) {
6046 Double.~DoubleAPFloat();
6047 return;
6048 }
6049 llvm_unreachable("Unexpected semantics");
6050}
6051
6052APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6053 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6054 new (this) IEEEFloat(RHS.IEEE);
6055 return;
6056 }
6057 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6058 new (this) DoubleAPFloat(RHS.Double);
6059 return;
6060 }
6061 llvm_unreachable("Unexpected semantics");
6062}
6063
6064APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6065 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6066 new (this) IEEEFloat(std::move(RHS.IEEE));
6067 return;
6068 }
6069 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6070 new (this) DoubleAPFloat(std::move(RHS.Double));
6071 return;
6072 }
6073 llvm_unreachable("Unexpected semantics");
6074}
6075
6076APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6077 if (usesLayout<IEEEFloat>(*semantics) &&
6078 usesLayout<IEEEFloat>(*RHS.semantics)) {
6079 IEEE = RHS.IEEE;
6080 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6081 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6082 Double = RHS.Double;
6083 } else if (this != &RHS) {
6084 this->~Storage();
6085 new (this) Storage(RHS);
6086 }
6087 return *this;
6088}
6089
6090APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6091 if (usesLayout<IEEEFloat>(*semantics) &&
6092 usesLayout<IEEEFloat>(*RHS.semantics)) {
6093 IEEE = std::move(RHS.IEEE);
6094 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6095 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6096 Double = std::move(RHS.Double);
6097 } else if (this != &RHS) {
6098 this->~Storage();
6099 new (this) Storage(std::move(RHS));
6100 }
6101 return *this;
6102}
6103
6104namespace {
6105
6106APFloat::opStatus getOpStatusFromLibc(int libc_exceptions) {
6108 if (libc_exceptions & FE_INVALID)
6110 if (libc_exceptions & FE_DIVBYZERO)
6112 if (libc_exceptions & FE_OVERFLOW)
6114 if (libc_exceptions & FE_UNDERFLOW)
6116 if (libc_exceptions & FE_INEXACT)
6118 return status;
6119}
6120
6121} // namespace
6122
6123// TODO: Support other rounding modes when LLVM libc math implement static
6124// roundings.
6125std::optional<APFloat> exp(const APFloat &x, RoundingMode rounding_mode,
6126 APFloat::opStatus *status) {
6127
6128 if (rounding_mode == APFloatBase::rmNearestTiesToEven) {
6129 if (APFloat::SemanticsToEnum(x.getSemantics()) ==
6131 float x_val = x.convertToFloat();
6132 int exc =
6133 LIBC_NAMESPACE::shared::check::exp_exceptions(x_val, FE_TONEAREST);
6134 if (status) {
6135 *status = getOpStatusFromLibc(exc);
6136 if (x.isSignaling()) {
6137 // 32-bit x86 will silence sNaN when loading floats, so we explicitly
6138 // add the INVALID exception here.
6139 *status =
6140 static_cast<APFloat::opStatus>(*status | APFloat::opInvalidOp);
6141 }
6142 }
6143 float result = LIBC_NAMESPACE::shared::expf(x_val);
6144 return APFloat(result);
6145 }
6146 if (APFloat::SemanticsToEnum(x.getSemantics()) ==
6148 double x_val = x.convertToDouble();
6149 int exc =
6150 LIBC_NAMESPACE::shared::check::exp_exceptions(x_val, FE_TONEAREST);
6151 if (status) {
6152 *status = getOpStatusFromLibc(exc);
6153 if (x.isSignaling()) {
6154 // 32-bit x86 will silence sNaN when loading floats, so we explicitly
6155 // add the INVALID exception here.
6156 *status =
6157 static_cast<APFloat::opStatus>(*status | APFloat::opInvalidOp);
6158 }
6159 }
6160 double result = LIBC_NAMESPACE::shared::exp(x_val);
6161 return APFloat(result);
6162 }
6163 }
6164 return std::nullopt;
6165}
6166
6167} // namespace llvm
6168
6169#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:63
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:27
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:663
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
static bool isSigned(unsigned Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:297
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:307
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:123
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:272
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:335
static constexpr roundingMode rmTowardZero
Definition APFloat.h:349
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:247
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:343
static const fltSemantics & BFloat()
Definition APFloat.h:296
static const fltSemantics & IEEEquad()
Definition APFloat.h:299
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:250
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:314
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:268
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:303
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:318
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static LLVM_ABI bool isValidArbitraryFPFormat(StringRef Format)
Returns true if the given string is a valid arbitrary floating-point format interpretation for llvm....
Definition APFloat.cpp:6020
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:285
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:243
friend class APFloat
Definition APFloat.h:292
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:239
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:276
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:170
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:156
static constexpr unsigned integerPartWidth
Definition APFloat.h:153
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:301
APInt::WordType integerPart
Definition APFloat.h:152
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:264
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:289
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:308
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:347
static const fltSemantics & IEEEhalf()
Definition APFloat.h:295
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:317
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:316
static const fltSemantics & Float8E4M3()
Definition APFloat.h:306
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:309
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:215
static const fltSemantics & Float8E3M4()
Definition APFloat.h:312
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:280
static const fltSemantics & Float8E5M2()
Definition APFloat.h:304
fltCategory
Category of internally-represented number.
Definition APFloat.h:371
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:350
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:300
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:315
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:361
static LLVM_ABI const fltSemantics * getArbitraryFPSemantics(StringRef Format)
Returns the fltSemantics for a given arbitrary FP format string, or nullptr if invalid.
Definition APFloat.cpp:6028
static const fltSemantics & FloatTF32()
Definition APFloat.h:313
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:253
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1200
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:5963
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1288
bool isFiniteNonZero() const
Definition APFloat.h:1569
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5920
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1615
bool isNegative() const
Definition APFloat.h:1559
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5862
cmpResult compareAbsoluteValue(const APFloat &RHS) const
Definition APFloat.h:1514
friend DoubleAPFloat
Definition APFloat.h:1647
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:5979
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1596
bool isNormal() const
Definition APFloat.h:1563
bool isDenormal() const
Definition APFloat.h:1560
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1261
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:5946
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5834
const fltSemantics & getSemantics() const
Definition APFloat.h:1567
bool isFinite() const
Definition APFloat.h:1564
bool isNaN() const
Definition APFloat.h:1557
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1168
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1549
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6007
bool isSignaling() const
Definition APFloat.h:1561
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1315
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1297
bool isZero() const
Definition APFloat.h:1555
APInt bitcastToAPInt() const
Definition APFloat.h:1451
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1412
opStatus next(bool nextDown)
Definition APFloat.h:1334
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1178
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1228
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5849
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1306
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Fill this APFloat with the result of a string conversion.
Definition APFloat.cpp:5829
friend IEEEFloat
Definition APFloat.h:1646
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:5957
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:5950
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1328
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1253
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1159
bool isInfinity() const
Definition APFloat.h:1556
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1599
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2403
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2375
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1793
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2398
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2477
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2447
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2787
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1775
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2383
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2761
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2667
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1518
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2408
void negate()
Negate this APInt in place.
Definition APInt.h:1491
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1941
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2414
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2734
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2389
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2427
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1759
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2565
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2512
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2551
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1767
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1936
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:576
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:208
void Add(const T &x)
Definition FoldingSet.h:248
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:888
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
const char * iterator
Definition StringRef.h:60
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:635
iterator begin() const
Definition StringRef.h:114
char back() const
Get the last character in the string.
Definition StringRef.h:153
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:720
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
char front() const
Get the first character in the string.
Definition StringRef.h:147
iterator end() const
Definition StringRef.h:116
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:661
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:681
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5176
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4706
LLVM_ABI void changeSign()
Definition APFloat.cpp:5083
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5650
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4970
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4873
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5142
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5199
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5674
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5601
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5210
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5220
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5633
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4865
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5204
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5089
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5626
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5437
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5169
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5683
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5236
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5148
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5658
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5153
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4959
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5641
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4980
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4653
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5664
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5158
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5191
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:5006
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:4991
LLVM_ABI APInt getNaNPayload() const
Definition APFloat.cpp:5812
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5616
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5146
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4860
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5186
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3201
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1450
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2206
fltCategory getCategory() const
Definition APFloat.h:583
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2761
LLVM_ABI APInt getNaNPayload() const
Definition APFloat.cpp:4541
bool isFiniteNonZero() const
Definition APFloat.h:586
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:473
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:3968
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4363
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3597
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4613
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2374
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:548
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2080
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:573
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2098
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3667
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3660
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2056
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4319
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:4000
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4560
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:971
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4589
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1073
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2050
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:560
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3144
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:859
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2062
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2289
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:931
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1098
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:4014
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1090
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1125
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2243
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4595
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4408
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:570
const fltSemantics & getSemantics() const
Definition APFloat.h:584
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:563
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4392
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4575
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2450
LLVM_ABI void changeSign()
Definition APFloat.cpp:2008
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:956
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2706
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:963
An opaque object representing a hash code.
Definition Hashing.h:77
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:449
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:451
static constexpr opStatus opDivByZero
Definition APFloat.h:446
static constexpr opStatus opOverflow
Definition APFloat.h:447
static constexpr cmpResult cmpLessThan
Definition APFloat.h:441
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1473
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:437
static constexpr uninitializedTag uninitialized
Definition APFloat.h:431
static constexpr fltCategory fcZero
Definition APFloat.h:453
static constexpr opStatus opOK
Definition APFloat.h:444
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:442
static constexpr unsigned integerPartWidth
Definition APFloat.h:439
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3341
APFloatBase::ExponentType ExponentType
Definition APFloat.h:430
static constexpr fltCategory fcNormal
Definition APFloat.h:452
static constexpr opStatus opInvalidOp
Definition APFloat.h:445
APFloatBase::opStatus opStatus
Definition APFloat.h:427
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4634
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:425
static constexpr cmpResult cmpUnordered
Definition APFloat.h:443
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:436
APFloatBase::roundingMode roundingMode
Definition APFloat.h:426
APFloatBase::cmpResult cmpResult
Definition APFloat.h:428
static constexpr fltCategory fcInfinity
Definition APFloat.h:450
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:434
static constexpr roundingMode rmTowardZero
Definition APFloat.h:438
static constexpr opStatus opUnderflow
Definition APFloat.h:448
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:432
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4595
static constexpr cmpResult cmpEqual
Definition APFloat.h:440
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4613
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4723
APFloatBase::integerPart integerPart
Definition APFloat.h:424
FormattedNumber decValue(uint64_t N, unsigned Width=DEC_WIDTH)
Definition LVSupport.h:123
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:756
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
static const char infinityL[]
Definition APFloat.cpp:747
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:334
static const char NaNU[]
Definition APFloat.cpp:750
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:632
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:691
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:803
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:308
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:391
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:234
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1668
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:773
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
const unsigned int maxPrecision
Definition APFloat.cpp:233
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1689
static const char NaNL[]
Definition APFloat.cpp:749
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
static const char infinityU[]
Definition APFloat.cpp:748
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:51
@ lfMoreThanHalf
Definition APFloat.h:55
@ lfLessThanHalf
Definition APFloat.h:53
@ lfExactlyHalf
Definition APFloat.h:54
@ lfExactlyZero
Definition APFloat.h:52
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:481
LLVM_READONLY LLVM_ABI std::optional< APFloat > exp(const APFloat &X, RoundingMode RM=APFloat::rmNearestTiesToEven, APFloat::opStatus *Status=nullptr)
Implement IEEE 754-2019 exp functions.
Definition APFloat.cpp:6125
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:235
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1677
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:318
static Error createError(const Twine &Err)
Definition APFloat.cpp:330
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:600
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
static const char hexDigitsUpper[]
Definition APFloat.cpp:746
const unsigned int maxExponent
Definition APFloat.cpp:232
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:341
fltNonfiniteBehavior
Definition APFloat.h:953
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:611
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:441
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:313
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:580
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1703
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:646
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:789
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:305
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:551
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1106
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:351
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:285
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:745
#define N
const char * lastSigDigit
Definition APFloat.cpp:476
const char * firstSigDigit
Definition APFloat.cpp:475
APFloatBase::ExponentType maxExponent
Definition APFloat.h:1002
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1015
APFloatBase::ExponentType minExponent
Definition APFloat.h:1006
unsigned int sizeInBits
Definition APFloat.h:1013
unsigned int precision
Definition APFloat.h:1010
fltNanEncoding nanEncoding
Definition APFloat.h:1017