LLVM 23.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Error.h"
29#include <cstring>
30#include <limits.h>
31
32#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
33 do { \
34 if (usesLayout<IEEEFloat>(getSemantics())) \
35 return U.IEEE.METHOD_CALL; \
36 if (usesLayout<DoubleAPFloat>(getSemantics())) \
37 return U.Double.METHOD_CALL; \
38 llvm_unreachable("Unexpected semantics"); \
39 } while (false)
40
41using namespace llvm;
42
43/// A macro used to combine two fcCategory enums into one key which can be used
44/// in a switch statement to classify how the interaction of two APFloat's
45/// categories affects an operation.
46///
47/// TODO: If clang source code is ever allowed to use constexpr in its own
48/// codebase, change this into a static inline function.
49#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
50
51/* Assumed in hexadecimal significand parsing, and conversion to
52 hexadecimal strings. */
53static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
54
55namespace llvm {
56
57constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
58constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
59constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
60constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
61constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
62constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
63constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
65constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
66constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
68constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
70constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
72constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
73constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
74constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
75 127,
76 -127,
77 1,
78 8,
81 false,
82 false,
83 false};
84
85constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
87constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
89constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
91constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
92 80};
93constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
94constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
95constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
96 1023, -1022 + 53, 53 + 53, 128};
97
99 switch (S) {
100 case S_IEEEhalf:
101 return IEEEhalf();
102 case S_BFloat:
103 return BFloat();
104 case S_IEEEsingle:
105 return IEEEsingle();
106 case S_IEEEdouble:
107 return IEEEdouble();
108 case S_IEEEquad:
109 return IEEEquad();
111 return PPCDoubleDouble();
113 return PPCDoubleDoubleLegacy();
114 case S_Float8E5M2:
115 return Float8E5M2();
116 case S_Float8E5M2FNUZ:
117 return Float8E5M2FNUZ();
118 case S_Float8E4M3:
119 return Float8E4M3();
120 case S_Float8E4M3FN:
121 return Float8E4M3FN();
122 case S_Float8E4M3FNUZ:
123 return Float8E4M3FNUZ();
125 return Float8E4M3B11FNUZ();
126 case S_Float8E3M4:
127 return Float8E3M4();
128 case S_FloatTF32:
129 return FloatTF32();
130 case S_Float8E8M0FNU:
131 return Float8E8M0FNU();
132 case S_Float6E3M2FN:
133 return Float6E3M2FN();
134 case S_Float6E2M3FN:
135 return Float6E2M3FN();
136 case S_Float4E2M1FN:
137 return Float4E2M1FN();
139 return x87DoubleExtended();
140 }
141 llvm_unreachable("Unrecognised floating semantics");
142}
143
146 if (&Sem == &llvm::APFloat::IEEEhalf())
147 return S_IEEEhalf;
148 else if (&Sem == &llvm::APFloat::BFloat())
149 return S_BFloat;
150 else if (&Sem == &llvm::APFloat::IEEEsingle())
151 return S_IEEEsingle;
152 else if (&Sem == &llvm::APFloat::IEEEdouble())
153 return S_IEEEdouble;
154 else if (&Sem == &llvm::APFloat::IEEEquad())
155 return S_IEEEquad;
156 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
157 return S_PPCDoubleDouble;
158 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
160 else if (&Sem == &llvm::APFloat::Float8E5M2())
161 return S_Float8E5M2;
162 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
163 return S_Float8E5M2FNUZ;
164 else if (&Sem == &llvm::APFloat::Float8E4M3())
165 return S_Float8E4M3;
166 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
167 return S_Float8E4M3FN;
168 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
169 return S_Float8E4M3FNUZ;
170 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
171 return S_Float8E4M3B11FNUZ;
172 else if (&Sem == &llvm::APFloat::Float8E3M4())
173 return S_Float8E3M4;
174 else if (&Sem == &llvm::APFloat::FloatTF32())
175 return S_FloatTF32;
176 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
177 return S_Float8E8M0FNU;
178 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
179 return S_Float6E3M2FN;
180 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
181 return S_Float6E2M3FN;
182 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
183 return S_Float4E2M1FN;
184 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
185 return S_x87DoubleExtended;
186 else
187 llvm_unreachable("Unknown floating semantics");
188}
189
191 const fltSemantics &B) {
192 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
193 A.precision <= B.precision;
194}
195
196/* A tight upper bound on number of parts required to hold the value
197 pow(5, power) is
198
199 power * 815 / (351 * integerPartWidth) + 1
200
201 However, whilst the result may require only this many parts,
202 because we are multiplying two values to get it, the
203 multiplication may require an extra part with the excess part
204 being zero (consider the trivial case of 1 * 1, tcFullMultiply
205 requires two parts to hold the single-part result). So we add an
206 extra one to guarantee enough space whilst multiplying. */
207const unsigned int maxExponent = 16383;
208const unsigned int maxPrecision = 113;
210const unsigned int maxPowerOfFiveParts =
211 2 +
213
214unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
215 return semantics.precision;
216}
219 return semantics.maxExponent;
220}
223 return semantics.minExponent;
224}
225unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
226 return semantics.sizeInBits;
227}
229 bool isSigned) {
230 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
231 // at least one more bit than the MaxExponent to hold the max FP value.
232 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
233 // Extra sign bit needed.
234 if (isSigned)
235 ++MinBitWidth;
236 return MinBitWidth;
237}
238
240 return semantics.hasZero;
241}
242
244 return semantics.hasSignedRepr;
245}
246
250
254
256 // Keep in sync with Type::isIEEELikeFPTy
257 return SemanticsToEnum(semantics) <= S_IEEEquad;
258}
259
261 return semantics.hasSignBitInMSB;
262}
263
265 const fltSemantics &Dst) {
266 // Exponent range must be larger.
267 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
268 return false;
269
270 // If the mantissa is long enough, the result value could still be denormal
271 // with a larger exponent range.
272 //
273 // FIXME: This condition is probably not accurate but also shouldn't be a
274 // practical concern with existing types.
275 return Dst.precision >= Src.precision;
276}
277
279 return Sem.sizeInBits;
280}
281
282static constexpr APFloatBase::ExponentType
283exponentZero(const fltSemantics &semantics) {
284 return semantics.minExponent - 1;
285}
286
287static constexpr APFloatBase::ExponentType
288exponentInf(const fltSemantics &semantics) {
289 return semantics.maxExponent + 1;
290}
291
292static constexpr APFloatBase::ExponentType
293exponentNaN(const fltSemantics &semantics) {
296 return exponentZero(semantics);
297 if (semantics.hasSignedRepr)
298 return semantics.maxExponent;
299 }
300 return semantics.maxExponent + 1;
301}
302
303/* A bunch of private, handy routines. */
304
305static inline Error createError(const Twine &Err) {
307}
308
309static constexpr inline unsigned int partCountForBits(unsigned int bits) {
310 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
312}
313
314/* Returns 0U-9U. Return values >= 10U are not digits. */
315static inline unsigned int
316decDigitValue(unsigned int c)
317{
318 return c - '0';
319}
320
321/* Return the value of a decimal exponent of the form
322 [+-]ddddddd.
323
324 If the exponent overflows, returns a large exponent with the
325 appropriate sign. */
328 const unsigned int overlargeExponent = 24000; /* FIXME. */
329 StringRef::iterator p = begin;
330
331 // Treat no exponent as 0 to match binutils
332 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end))
333 return 0;
334
335 bool isNegative = *p == '-';
336 if (*p == '-' || *p == '+') {
337 p++;
338 if (p == end)
339 return createError("Exponent has no digits");
340 }
341
342 unsigned absExponent = decDigitValue(*p++);
343 if (absExponent >= 10U)
344 return createError("Invalid character in exponent");
345
346 for (; p != end; ++p) {
347 unsigned value = decDigitValue(*p);
348 if (value >= 10U)
349 return createError("Invalid character in exponent");
350
351 absExponent = absExponent * 10U + value;
352 if (absExponent >= overlargeExponent) {
353 absExponent = overlargeExponent;
354 break;
355 }
356 }
357
358 if (isNegative)
359 return -(int) absExponent;
360 else
361 return (int) absExponent;
362}
363
364/* This is ugly and needs cleaning up, but I don't immediately see
365 how whilst remaining safe. */
368 int exponentAdjustment) {
369 int exponent = 0;
370
371 if (p == end)
372 return createError("Exponent has no digits");
373
374 bool negative = *p == '-';
375 if (*p == '-' || *p == '+') {
376 p++;
377 if (p == end)
378 return createError("Exponent has no digits");
379 }
380
381 int unsignedExponent = 0;
382 bool overflow = false;
383 for (; p != end; ++p) {
384 unsigned int value;
385
386 value = decDigitValue(*p);
387 if (value >= 10U)
388 return createError("Invalid character in exponent");
389
390 unsignedExponent = unsignedExponent * 10 + value;
391 if (unsignedExponent > 32767) {
392 overflow = true;
393 break;
394 }
395 }
396
397 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
398 overflow = true;
399
400 if (!overflow) {
401 exponent = unsignedExponent;
402 if (negative)
403 exponent = -exponent;
404 exponent += exponentAdjustment;
405 if (exponent > 32767 || exponent < -32768)
406 overflow = true;
407 }
408
409 if (overflow)
410 exponent = negative ? -32768: 32767;
411
412 return exponent;
413}
414
417 StringRef::iterator *dot) {
418 StringRef::iterator p = begin;
419 *dot = end;
420 while (p != end && *p == '0')
421 p++;
422
423 if (p != end && *p == '.') {
424 *dot = p++;
425
426 if (end - begin == 1)
427 return createError("Significand has no digits");
428
429 while (p != end && *p == '0')
430 p++;
431 }
432
433 return p;
434}
435
436/* Given a normal decimal floating point number of the form
437
438 dddd.dddd[eE][+-]ddd
439
440 where the decimal point and exponent are optional, fill out the
441 structure D. Exponent is appropriate if the significand is
442 treated as an integer, and normalizedExponent if the significand
443 is taken to have the decimal point after a single leading
444 non-zero digit.
445
446 If the value is zero, V->firstSigDigit points to a non-digit, and
447 the return exponent is zero.
448*/
450 const char *firstSigDigit;
451 const char *lastSigDigit;
454};
455
458 StringRef::iterator dot = end;
459
460 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
461 if (!PtrOrErr)
462 return PtrOrErr.takeError();
463 StringRef::iterator p = *PtrOrErr;
464
465 D->firstSigDigit = p;
466 D->exponent = 0;
467 D->normalizedExponent = 0;
468
469 for (; p != end; ++p) {
470 if (*p == '.') {
471 if (dot != end)
472 return createError("String contains multiple dots");
473 dot = p++;
474 if (p == end)
475 break;
476 }
477 if (decDigitValue(*p) >= 10U)
478 break;
479 }
480
481 if (p != end) {
482 if (*p != 'e' && *p != 'E')
483 return createError("Invalid character in significand");
484 if (p == begin)
485 return createError("Significand has no digits");
486 if (dot != end && p - begin == 1)
487 return createError("Significand has no digits");
488
489 /* p points to the first non-digit in the string */
490 auto ExpOrErr = readExponent(p + 1, end);
491 if (!ExpOrErr)
492 return ExpOrErr.takeError();
493 D->exponent = *ExpOrErr;
494
495 /* Implied decimal point? */
496 if (dot == end)
497 dot = p;
498 }
499
500 /* If number is all zeroes accept any exponent. */
501 if (p != D->firstSigDigit) {
502 /* Drop insignificant trailing zeroes. */
503 if (p != begin) {
504 do
505 do
506 p--;
507 while (p != begin && *p == '0');
508 while (p != begin && *p == '.');
509 }
510
511 /* Adjust the exponents for any decimal point. */
512 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
513 D->normalizedExponent = (D->exponent +
514 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
515 - (dot > D->firstSigDigit && dot < p)));
516 }
517
518 D->lastSigDigit = p;
519 return Error::success();
520}
521
522/* Return the trailing fraction of a hexadecimal number.
523 DIGITVALUE is the first hex digit of the fraction, P points to
524 the next digit. */
527 unsigned int digitValue) {
528 /* If the first trailing digit isn't 0 or 8 we can work out the
529 fraction immediately. */
530 if (digitValue > 8)
531 return lfMoreThanHalf;
532 else if (digitValue < 8 && digitValue > 0)
533 return lfLessThanHalf;
534
535 // Otherwise we need to find the first non-zero digit.
536 while (p != end && (*p == '0' || *p == '.'))
537 p++;
538
539 if (p == end)
540 return createError("Invalid trailing hexadecimal fraction!");
541
542 unsigned hexDigit = hexDigitValue(*p);
543
544 /* If we ran off the end it is exactly zero or one-half, otherwise
545 a little more. */
546 if (hexDigit == UINT_MAX)
547 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
548 else
549 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
550}
551
552/* Return the fraction lost were a bignum truncated losing the least
553 significant BITS bits. */
554static lostFraction
556 unsigned int partCount,
557 unsigned int bits)
558{
559 unsigned lsb = APInt::tcLSB(parts, partCount);
560
561 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
562 if (bits <= lsb)
563 return lfExactlyZero;
564 if (bits == lsb + 1)
565 return lfExactlyHalf;
566 if (bits <= partCount * APFloatBase::integerPartWidth &&
567 APInt::tcExtractBit(parts, bits - 1))
568 return lfMoreThanHalf;
569
570 return lfLessThanHalf;
571}
572
573/* Shift DST right BITS bits noting lost fraction. */
574static lostFraction
575shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
576{
577 lostFraction lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
578
579 APInt::tcShiftRight(dst, parts, bits);
580
581 return lost_fraction;
582}
583
584/* Combine the effect of two lost fractions. */
585static lostFraction
587 lostFraction lessSignificant)
588{
589 if (lessSignificant != lfExactlyZero) {
590 if (moreSignificant == lfExactlyZero)
591 moreSignificant = lfLessThanHalf;
592 else if (moreSignificant == lfExactlyHalf)
593 moreSignificant = lfMoreThanHalf;
594 }
595
596 return moreSignificant;
597}
598
599/* The error from the true value, in half-ulps, on multiplying two
600 floating point numbers, which differ from the value they
601 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
602 than the returned value.
603
604 See "How to Read Floating Point Numbers Accurately" by William D
605 Clinger. */
606static unsigned int
607HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
608{
609 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
610
611 if (HUerr1 + HUerr2 == 0)
612 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
613 else
614 return inexactMultiply + 2 * (HUerr1 + HUerr2);
615}
616
617/* The number of ulps from the boundary (zero, or half if ISNEAREST)
618 when the least significant BITS are truncated. BITS cannot be
619 zero. */
621ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
622 bool isNearest) {
623 assert(bits != 0);
624
625 bits--;
626 unsigned count = bits / APFloatBase::integerPartWidth;
627 unsigned partBits = bits % APFloatBase::integerPartWidth + 1;
628
630 parts[count] & (~(APFloatBase::integerPart)0 >>
631 (APFloatBase::integerPartWidth - partBits));
632
634 if (isNearest)
635 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
636 else
637 boundary = 0;
638
639 if (count == 0) {
640 if (part - boundary <= boundary - part)
641 return part - boundary;
642 else
643 return boundary - part;
644 }
645
646 if (part == boundary) {
647 while (--count)
648 if (parts[count])
649 return ~(APFloatBase::integerPart) 0; /* A lot. */
650
651 return parts[0];
652 } else if (part == boundary - 1) {
653 while (--count)
654 if (~parts[count])
655 return ~(APFloatBase::integerPart) 0; /* A lot. */
656
657 return -parts[0];
658 }
659
660 return ~(APFloatBase::integerPart) 0; /* A lot. */
661}
662
663/* Place pow(5, power) in DST, and return the number of parts used.
664 DST must be at least one part larger than size of the answer. */
665static unsigned int
666powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
667 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
669 pow5s[0] = 78125 * 5;
670
671 unsigned int partsCount = 1;
672 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
673 assert(power <= maxExponent);
674
675 p1 = dst;
676 p2 = scratch;
677
678 *p1 = firstEightPowers[power & 7];
679 power >>= 3;
680
681 unsigned result = 1;
682 pow5 = pow5s;
683
684 for (unsigned int n = 0; power; power >>= 1, n++) {
685 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
686 if (n != 0) {
687 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
688 partsCount, partsCount);
689 partsCount *= 2;
690 if (pow5[partsCount - 1] == 0)
691 partsCount--;
692 }
693
694 if (power & 1) {
696
697 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
698 result += partsCount;
699 if (p2[result - 1] == 0)
700 result--;
701
702 /* Now result is in p1 with partsCount parts and p2 is scratch
703 space. */
704 tmp = p1;
705 p1 = p2;
706 p2 = tmp;
707 }
708
709 pow5 += partsCount;
710 }
711
712 if (p1 != dst)
713 APInt::tcAssign(dst, p1, result);
714
715 return result;
716}
717
718/* Zero at the end to avoid modular arithmetic when adding one; used
719 when rounding up during hexadecimal output. */
720static const char hexDigitsLower[] = "0123456789abcdef0";
721static const char hexDigitsUpper[] = "0123456789ABCDEF0";
722static const char infinityL[] = "infinity";
723static const char infinityU[] = "INFINITY";
724static const char NaNL[] = "nan";
725static const char NaNU[] = "NAN";
726
727/* Write out an integerPart in hexadecimal, starting with the most
728 significant nibble. Write out exactly COUNT hexdigits, return
729 COUNT. */
730static unsigned int
731partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
732 const char *hexDigitChars)
733{
734 unsigned int result = count;
735
737
738 part >>= (APFloatBase::integerPartWidth - 4 * count);
739 while (count--) {
740 dst[count] = hexDigitChars[part & 0xf];
741 part >>= 4;
742 }
743
744 return result;
745}
746
747/* Write out an unsigned decimal integer. */
748static char *writeUnsignedDecimal(char *dst, unsigned int n) {
749 char buff[40], *p;
750
751 p = buff;
752 do
753 *p++ = '0' + n % 10;
754 while (n /= 10);
755
756 do
757 *dst++ = *--p;
758 while (p != buff);
759
760 return dst;
761}
762
763/* Write out a signed decimal integer. */
764static char *writeSignedDecimal(char *dst, int value) {
765 if (value < 0) {
766 *dst++ = '-';
767 dst = writeUnsignedDecimal(dst, -(unsigned) value);
768 } else {
769 dst = writeUnsignedDecimal(dst, value);
770 }
771
772 return dst;
773}
774
775// Compute the ULP of the input using a definition from:
776// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
777// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
778static APFloat harrisonUlp(const APFloat &X) {
779 const fltSemantics &Sem = X.getSemantics();
780 switch (X.getCategory()) {
781 case APFloat::fcNaN:
782 return APFloat::getQNaN(Sem);
784 return APFloat::getInf(Sem);
785 case APFloat::fcZero:
786 return APFloat::getSmallest(Sem);
788 break;
789 }
790 if (X.isDenormal() || X.isSmallestNormalized())
791 return APFloat::getSmallest(Sem);
792 int Exp = ilogb(X);
793 if (X.getExactLog2() != INT_MIN)
794 Exp -= 1;
795 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
797}
798
799namespace detail {
800/* Constructors. */
801void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
802 semantics = ourSemantics;
803 unsigned count = partCount();
804 if (count > 1)
805 significand.parts = new integerPart[count];
806}
807
808void IEEEFloat::freeSignificand() {
809 if (needsCleanup())
810 delete [] significand.parts;
811}
812
813void IEEEFloat::assign(const IEEEFloat &rhs) {
814 assert(semantics == rhs.semantics);
815
816 sign = rhs.sign;
817 category = rhs.category;
818 exponent = rhs.exponent;
819 if (isFiniteNonZero() || category == fcNaN)
820 copySignificand(rhs);
821}
822
823void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
824 assert(isFiniteNonZero() || category == fcNaN);
825 assert(rhs.partCount() >= partCount());
826
827 APInt::tcAssign(significandParts(), rhs.significandParts(),
828 partCount());
829}
830
831/* Make this number a NaN, with an arbitrary but deterministic value
832 for the significand. If double or longer, this is a signalling NaN,
833 which may not be ideal. If float, this is QNaN(0). */
834void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
835 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
836 llvm_unreachable("This floating point format does not support NaN");
837
838 if (Negative && !semantics->hasSignedRepr)
840 "This floating point format does not support signed values");
841
842 category = fcNaN;
843 sign = Negative;
844 exponent = exponentNaN();
845
846 integerPart *significand = significandParts();
847 unsigned numParts = partCount();
848
849 APInt fill_storage;
850 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
851 // Finite-only types do not distinguish signalling and quiet NaN, so
852 // make them all signalling.
853 SNaN = false;
854 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
855 sign = true;
856 fill_storage = APInt::getZero(semantics->precision - 1);
857 } else {
858 fill_storage = APInt::getAllOnes(semantics->precision - 1);
859 }
860 fill = &fill_storage;
861 }
862
863 // Set the significand bits to the fill.
864 if (!fill || fill->getNumWords() < numParts)
865 APInt::tcSet(significand, 0, numParts);
866 if (fill) {
867 APInt::tcAssign(significand, fill->getRawData(),
868 std::min(fill->getNumWords(), numParts));
869
870 // Zero out the excess bits of the significand.
871 unsigned bitsToPreserve = semantics->precision - 1;
872 unsigned part = bitsToPreserve / 64;
873 bitsToPreserve %= 64;
874 significand[part] &= ((1ULL << bitsToPreserve) - 1);
875 for (part++; part != numParts; ++part)
876 significand[part] = 0;
877 }
878
879 unsigned QNaNBit =
880 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
881
882 if (SNaN) {
883 // We always have to clear the QNaN bit to make it an SNaN.
884 APInt::tcClearBit(significand, QNaNBit);
885
886 // If there are no bits set in the payload, we have to set
887 // *something* to make it a NaN instead of an infinity;
888 // conventionally, this is the next bit down from the QNaN bit.
889 if (APInt::tcIsZero(significand, numParts))
890 APInt::tcSetBit(significand, QNaNBit - 1);
891 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
892 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
893 // Do nothing.
894 } else {
895 // We always have to set the QNaN bit to make it a QNaN.
896 APInt::tcSetBit(significand, QNaNBit);
897 }
898
899 // For x87 extended precision, we want to make a NaN, not a
900 // pseudo-NaN. Maybe we should expose the ability to make
901 // pseudo-NaNs?
902 if (semantics == &APFloatBase::semX87DoubleExtended)
903 APInt::tcSetBit(significand, QNaNBit + 1);
904}
905
907 if (this != &rhs) {
908 if (semantics != rhs.semantics) {
909 freeSignificand();
910 initialize(rhs.semantics);
911 }
912 assign(rhs);
913 }
914
915 return *this;
916}
917
919 freeSignificand();
920
921 semantics = rhs.semantics;
922 significand = rhs.significand;
923 exponent = rhs.exponent;
924 category = rhs.category;
925 sign = rhs.sign;
926
927 rhs.semantics = &APFloatBase::semBogus;
928 return *this;
929}
930
932 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
933 (APInt::tcExtractBit(significandParts(),
934 semantics->precision - 1) == 0);
935}
936
938 // The smallest number by magnitude in our format will be the smallest
939 // denormal, i.e. the floating point number with exponent being minimum
940 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
941 return isFiniteNonZero() && exponent == semantics->minExponent &&
942 significandMSB() == 0;
943}
944
946 return getCategory() == fcNormal && exponent == semantics->minExponent &&
947 isSignificandAllZerosExceptMSB();
948}
949
950unsigned int IEEEFloat::getNumHighBits() const {
951 const unsigned int PartCount = partCountForBits(semantics->precision);
952 const unsigned int Bits = PartCount * integerPartWidth;
953
954 // Compute how many bits are used in the final word.
955 // When precision is just 1, it represents the 'Pth'
956 // Precision bit and not the actual significand bit.
957 const unsigned int NumHighBits = (semantics->precision > 1)
958 ? (Bits - semantics->precision + 1)
959 : (Bits - semantics->precision);
960 return NumHighBits;
961}
962
963bool IEEEFloat::isSignificandAllOnes() const {
964 // Test if the significand excluding the integral bit is all ones. This allows
965 // us to test for binade boundaries.
966 const integerPart *Parts = significandParts();
967 const unsigned PartCount = partCountForBits(semantics->precision);
968 for (unsigned i = 0; i < PartCount - 1; i++)
969 if (~Parts[i])
970 return false;
971
972 // Set the unused high bits to all ones when we compare.
973 const unsigned NumHighBits = getNumHighBits();
974 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
975 "Can not have more high bits to fill than integerPartWidth");
976 const integerPart HighBitFill =
977 ~integerPart(0) << (integerPartWidth - NumHighBits);
978 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
979 return false;
980
981 return true;
982}
983
984bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
985 // Test if the significand excluding the integral bit is all ones except for
986 // the least significant bit.
987 const integerPart *Parts = significandParts();
988
989 if (Parts[0] & 1)
990 return false;
991
992 const unsigned PartCount = partCountForBits(semantics->precision);
993 for (unsigned i = 0; i < PartCount - 1; i++) {
994 if (~Parts[i] & ~unsigned{!i})
995 return false;
996 }
997
998 // Set the unused high bits to all ones when we compare.
999 const unsigned NumHighBits = getNumHighBits();
1000 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1001 "Can not have more high bits to fill than integerPartWidth");
1002 const integerPart HighBitFill = ~integerPart(0)
1003 << (integerPartWidth - NumHighBits);
1004 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1005 return false;
1006
1007 return true;
1008}
1009
1010bool IEEEFloat::isSignificandAllZeros() const {
1011 // Test if the significand excluding the integral bit is all zeros. This
1012 // allows us to test for binade boundaries.
1013 const integerPart *Parts = significandParts();
1014 const unsigned PartCount = partCountForBits(semantics->precision);
1015
1016 for (unsigned i = 0; i < PartCount - 1; i++)
1017 if (Parts[i])
1018 return false;
1019
1020 // Compute how many bits are used in the final word.
1021 const unsigned NumHighBits = getNumHighBits();
1022 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1023 "clear than integerPartWidth");
1024 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1025
1026 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1027 return false;
1028
1029 return true;
1030}
1031
1032bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1033 const integerPart *Parts = significandParts();
1034 const unsigned PartCount = partCountForBits(semantics->precision);
1035
1036 for (unsigned i = 0; i < PartCount - 1; i++) {
1037 if (Parts[i])
1038 return false;
1039 }
1040
1041 const unsigned NumHighBits = getNumHighBits();
1042 const integerPart MSBMask = integerPart(1)
1043 << (integerPartWidth - NumHighBits);
1044 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1045}
1046
1048 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1049 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1050 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1051 // The largest number by magnitude in our format will be the floating point
1052 // number with maximum exponent and with significand that is all ones except
1053 // the LSB.
1054 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1055 ? isSignificandAllOnesExceptLSB()
1056 : IsMaxExp;
1057 } else {
1058 // The largest number by magnitude in our format will be the floating point
1059 // number with maximum exponent and with significand that is all ones.
1060 return IsMaxExp && isSignificandAllOnes();
1061 }
1062}
1063
1065 // This could be made more efficient; I'm going for obviously correct.
1066 if (!isFinite()) return false;
1067 IEEEFloat truncated = *this;
1068 truncated.roundToIntegral(rmTowardZero);
1069 return compare(truncated) == cmpEqual;
1070}
1071
1072bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1073 if (this == &rhs)
1074 return true;
1075 if (semantics != rhs.semantics ||
1076 category != rhs.category ||
1077 sign != rhs.sign)
1078 return false;
1079 if (category==fcZero || category==fcInfinity)
1080 return true;
1081
1082 if (isFiniteNonZero() && exponent != rhs.exponent)
1083 return false;
1084
1085 return std::equal(significandParts(), significandParts() + partCount(),
1086 rhs.significandParts());
1087}
1088
1090 initialize(&ourSemantics);
1091 sign = 0;
1092 category = fcNormal;
1093 zeroSignificand();
1094 exponent = ourSemantics.precision - 1;
1095 significandParts()[0] = value;
1097}
1098
1100 initialize(&ourSemantics);
1101 // The Float8E8MOFNU format does not have a representation
1102 // for zero. So, use the closest representation instead.
1103 // Moreover, the all-zero encoding represents a valid
1104 // normal value (which is the smallestNormalized here).
1105 // Hence, we call makeSmallestNormalized (where category is
1106 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1107 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1108}
1109
1110// Delegate to the previous constructor, because later copy constructor may
1111// actually inspects category, which can't be garbage.
1113 : IEEEFloat(ourSemantics) {}
1114
1116 initialize(rhs.semantics);
1117 assign(rhs);
1118}
1119
1120IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1121 *this = std::move(rhs);
1122}
1123
1124IEEEFloat::~IEEEFloat() { freeSignificand(); }
1125
1126unsigned int IEEEFloat::partCount() const {
1127 return partCountForBits(semantics->precision + 1);
1128}
1129
1130const APFloat::integerPart *IEEEFloat::significandParts() const {
1131 return const_cast<IEEEFloat *>(this)->significandParts();
1132}
1133
1134APFloat::integerPart *IEEEFloat::significandParts() {
1135 if (partCount() > 1)
1136 return significand.parts;
1137 else
1138 return &significand.part;
1139}
1140
1141void IEEEFloat::zeroSignificand() {
1142 APInt::tcSet(significandParts(), 0, partCount());
1143}
1144
1145/* Increment an fcNormal floating point number's significand. */
1146void IEEEFloat::incrementSignificand() {
1147 [[maybe_unused]] integerPart carry =
1148 APInt::tcIncrement(significandParts(), partCount());
1149
1150 /* Our callers should never cause us to overflow. */
1151 assert(carry == 0);
1152}
1153
1154/* Add the significand of the RHS. Returns the carry flag. */
1155APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1156 integerPart *parts = significandParts();
1157
1158 assert(semantics == rhs.semantics);
1159 assert(exponent == rhs.exponent);
1160
1161 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1162}
1163
1164/* Subtract the significand of the RHS with a borrow flag. Returns
1165 the borrow flag. */
1166APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1167 integerPart borrow) {
1168 integerPart *parts = significandParts();
1169
1170 assert(semantics == rhs.semantics);
1171 assert(exponent == rhs.exponent);
1172
1173 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1174 partCount());
1175}
1176
1177/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1178 on to the full-precision result of the multiplication. Returns the
1179 lost fraction. */
1180lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1181 IEEEFloat addend,
1182 bool ignoreAddend) {
1183 integerPart scratch[4];
1184 bool ignored;
1185
1186 assert(semantics == rhs.semantics);
1187
1188 unsigned precision = semantics->precision;
1189
1190 // Allocate space for twice as many bits as the original significand, plus one
1191 // extra bit for the addition to overflow into.
1192 unsigned newPartsCount = partCountForBits(precision * 2 + 1);
1193
1194 // FIXME: Replace with SmallVector<4>.
1195 integerPart *fullSignificand =
1196 newPartsCount > 4 ? new integerPart[newPartsCount] : scratch;
1197
1198 integerPart *lhsSignificand = significandParts();
1199 unsigned partsCount = partCount();
1200
1201 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1202 rhs.significandParts(), partsCount, partsCount);
1203
1204 lostFraction lost_fraction = lfExactlyZero;
1205 // One, not zero, based MSB.
1206 unsigned omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1207 exponent += rhs.exponent;
1208
1209 // Assume the operands involved in the multiplication are single-precision
1210 // FP, and the two multiplicants are:
1211 // *this = a23 . a22 ... a0 * 2^e1
1212 // rhs = b23 . b22 ... b0 * 2^e2
1213 // the result of multiplication is:
1214 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1215 // Note that there are three significant bits at the left-hand side of the
1216 // radix point: two for the multiplication, and an overflow bit for the
1217 // addition (that will always be zero at this point). Move the radix point
1218 // toward left by two bits, and adjust exponent accordingly.
1219 exponent += 2;
1220
1221 if (!ignoreAddend && addend.isNonZero()) {
1222 // The intermediate result of the multiplication has "2 * precision"
1223 // signicant bit; adjust the addend to be consistent with mul result.
1224 //
1225 Significand savedSignificand = significand;
1226 const fltSemantics *savedSemantics = semantics;
1227
1228 // Normalize our MSB to one below the top bit to allow for overflow.
1229 unsigned extendedPrecision = 2 * precision + 1;
1230 if (omsb != extendedPrecision - 1) {
1231 assert(extendedPrecision > omsb);
1232 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1233 (extendedPrecision - 1) - omsb);
1234 exponent -= (extendedPrecision - 1) - omsb;
1235 }
1236
1237 /* Create new semantics. */
1238 fltSemantics extendedSemantics = *semantics;
1239 extendedSemantics.precision = extendedPrecision;
1240
1241 if (newPartsCount == 1)
1242 significand.part = fullSignificand[0];
1243 else
1244 significand.parts = fullSignificand;
1245 semantics = &extendedSemantics;
1246
1247 // Make a copy so we can convert it to the extended semantics.
1248 // Note that we cannot convert the addend directly, as the extendedSemantics
1249 // is a local variable (which we take a reference to).
1250 IEEEFloat extendedAddend(addend);
1251 [[maybe_unused]] opStatus status = extendedAddend.convert(
1252 extendedSemantics, APFloat::rmTowardZero, &ignored);
1253 assert(status == APFloat::opOK);
1254
1255 // Shift the significand of the addend right by one bit. This guarantees
1256 // that the high bit of the significand is zero (same as fullSignificand),
1257 // so the addition will overflow (if it does overflow at all) into the top bit.
1258 lost_fraction = extendedAddend.shiftSignificandRight(1);
1259 assert(lost_fraction == lfExactlyZero &&
1260 "Lost precision while shifting addend for fused-multiply-add.");
1261
1262 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1263
1264 /* Restore our state. */
1265 if (newPartsCount == 1)
1266 fullSignificand[0] = significand.part;
1267 significand = savedSignificand;
1268 semantics = savedSemantics;
1269
1270 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1271 }
1272
1273 // Convert the result having "2 * precision" significant-bits back to the one
1274 // having "precision" significant-bits. First, move the radix point from
1275 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1276 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1277 exponent -= precision + 1;
1278
1279 // In case MSB resides at the left-hand side of radix point, shift the
1280 // mantissa right by some amount to make sure the MSB reside right before
1281 // the radix point (i.e. "MSB . rest-significant-bits").
1282 //
1283 // Note that the result is not normalized when "omsb < precision". So, the
1284 // caller needs to call IEEEFloat::normalize() if normalized value is
1285 // expected.
1286 if (omsb > precision) {
1287 unsigned int bits, significantParts;
1288 lostFraction lf;
1289
1290 bits = omsb - precision;
1291 significantParts = partCountForBits(omsb);
1292 lf = shiftRight(fullSignificand, significantParts, bits);
1293 lost_fraction = combineLostFractions(lf, lost_fraction);
1294 exponent += bits;
1295 }
1296
1297 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1298
1299 if (newPartsCount > 4)
1300 delete [] fullSignificand;
1301
1302 return lost_fraction;
1303}
1304
1305lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1306 // When the given semantics has zero, the addend here is a zero.
1307 // i.e . it belongs to the 'fcZero' category.
1308 // But when the semantics does not support zero, we need to
1309 // explicitly convey that this addend should be ignored
1310 // for multiplication.
1311 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1312}
1313
1314/* Multiply the significands of LHS and RHS to DST. */
1315lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1316 integerPart scratch[4];
1317
1318 assert(semantics == rhs.semantics);
1319
1320 integerPart *lhsSignificand = significandParts();
1321 const integerPart *rhsSignificand = rhs.significandParts();
1322 unsigned partsCount = partCount();
1323
1324 integerPart *dividend =
1325 partsCount > 2 ? new integerPart[partsCount * 2] : scratch;
1326 integerPart *divisor = dividend + partsCount;
1327
1328 /* Copy the dividend and divisor as they will be modified in-place. */
1329 for (unsigned i = 0; i < partsCount; i++) {
1330 dividend[i] = lhsSignificand[i];
1331 divisor[i] = rhsSignificand[i];
1332 lhsSignificand[i] = 0;
1333 }
1334
1335 exponent -= rhs.exponent;
1336
1337 unsigned int precision = semantics->precision;
1338
1339 /* Normalize the divisor. */
1340 unsigned bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1341 if (bit) {
1342 exponent += bit;
1343 APInt::tcShiftLeft(divisor, partsCount, bit);
1344 }
1345
1346 /* Normalize the dividend. */
1347 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1348 if (bit) {
1349 exponent -= bit;
1350 APInt::tcShiftLeft(dividend, partsCount, bit);
1351 }
1352
1353 /* Ensure the dividend >= divisor initially for the loop below.
1354 Incidentally, this means that the division loop below is
1355 guaranteed to set the integer bit to one. */
1356 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1357 exponent--;
1358 APInt::tcShiftLeft(dividend, partsCount, 1);
1359 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1360 }
1361
1362 /* Long division. */
1363 for (bit = precision; bit; bit -= 1) {
1364 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1365 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1366 APInt::tcSetBit(lhsSignificand, bit - 1);
1367 }
1368
1369 APInt::tcShiftLeft(dividend, partsCount, 1);
1370 }
1371
1372 /* Figure out the lost fraction. */
1373 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1374
1375 lostFraction lost_fraction;
1376 if (cmp > 0)
1377 lost_fraction = lfMoreThanHalf;
1378 else if (cmp == 0)
1379 lost_fraction = lfExactlyHalf;
1380 else if (APInt::tcIsZero(dividend, partsCount))
1381 lost_fraction = lfExactlyZero;
1382 else
1383 lost_fraction = lfLessThanHalf;
1384
1385 if (partsCount > 2)
1386 delete [] dividend;
1387
1388 return lost_fraction;
1389}
1390
1391unsigned int IEEEFloat::significandMSB() const {
1392 return APInt::tcMSB(significandParts(), partCount());
1393}
1394
1395unsigned int IEEEFloat::significandLSB() const {
1396 return APInt::tcLSB(significandParts(), partCount());
1397}
1398
1399/* Note that a zero result is NOT normalized to fcZero. */
1400lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1401 /* Our exponent should not overflow. */
1402 assert((ExponentType) (exponent + bits) >= exponent);
1403
1404 exponent += bits;
1405
1406 return shiftRight(significandParts(), partCount(), bits);
1407}
1408
1409/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1410void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1411 assert(bits < semantics->precision ||
1412 (semantics->precision == 1 && bits <= 1));
1413
1414 if (bits) {
1415 unsigned int partsCount = partCount();
1416
1417 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1418 exponent -= bits;
1419
1420 assert(!APInt::tcIsZero(significandParts(), partsCount));
1421 }
1422}
1423
1425 assert(semantics == rhs.semantics);
1427 assert(rhs.isFiniteNonZero());
1428
1429 int compare = exponent - rhs.exponent;
1430
1431 /* If exponents are equal, do an unsigned bignum comparison of the
1432 significands. */
1433 if (compare == 0)
1434 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1435 partCount());
1436
1437 if (compare > 0)
1438 return cmpGreaterThan;
1439 else if (compare < 0)
1440 return cmpLessThan;
1441 else
1442 return cmpEqual;
1443}
1444
1445/* Set the least significant BITS bits of a bignum, clear the
1446 rest. */
1447static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1448 unsigned bits) {
1449 unsigned i = 0;
1450 while (bits > APInt::APINT_BITS_PER_WORD) {
1451 dst[i++] = ~(APInt::WordType)0;
1453 }
1454
1455 if (bits)
1456 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1457
1458 while (i < parts)
1459 dst[i++] = 0;
1460}
1461
1462/* Handle overflow. Sign is preserved. We either become infinity or
1463 the largest finite number. */
1464APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1466 /* Infinity? */
1467 if (rounding_mode == rmNearestTiesToEven ||
1468 rounding_mode == rmNearestTiesToAway ||
1469 (rounding_mode == rmTowardPositive && !sign) ||
1470 (rounding_mode == rmTowardNegative && sign)) {
1472 makeNaN(false, sign);
1473 else
1474 category = fcInfinity;
1475 return static_cast<opStatus>(opOverflow | opInexact);
1476 }
1477 }
1478
1479 /* Otherwise we become the largest finite number. */
1480 category = fcNormal;
1481 exponent = semantics->maxExponent;
1482 tcSetLeastSignificantBits(significandParts(), partCount(),
1483 semantics->precision);
1484 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1485 semantics->nanEncoding == fltNanEncoding::AllOnes)
1486 APInt::tcClearBit(significandParts(), 0);
1487
1488 return opInexact;
1489}
1490
1491/* Returns TRUE if, when truncating the current number, with BIT the
1492 new LSB, with the given lost fraction and rounding mode, the result
1493 would need to be rounded away from zero (i.e., by increasing the
1494 signficand). This routine must work for fcZero of both signs, and
1495 fcNormal numbers. */
1496bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1497 lostFraction lost_fraction,
1498 unsigned int bit) const {
1499 /* NaNs and infinities should not have lost fractions. */
1500 assert(isFiniteNonZero() || category == fcZero);
1501
1502 /* Current callers never pass this so we don't handle it. */
1503 assert(lost_fraction != lfExactlyZero);
1504
1505 switch (rounding_mode) {
1507 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1508
1510 if (lost_fraction == lfMoreThanHalf)
1511 return true;
1512
1513 /* Our zeroes don't have a significand to test. */
1514 if (lost_fraction == lfExactlyHalf && category != fcZero)
1515 return APInt::tcExtractBit(significandParts(), bit);
1516
1517 return false;
1518
1519 case rmTowardZero:
1520 return false;
1521
1522 case rmTowardPositive:
1523 return !sign;
1524
1525 case rmTowardNegative:
1526 return sign;
1527
1528 default:
1529 break;
1530 }
1531 llvm_unreachable("Invalid rounding mode found");
1532}
1533
1534APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1535 lostFraction lost_fraction) {
1536 if (!isFiniteNonZero())
1537 return opOK;
1538
1539 /* Before rounding normalize the exponent of fcNormal numbers. */
1540 /* One, not zero, based MSB. */
1541 unsigned omsb = significandMSB() + 1;
1542
1543 // Only skip this `if` if the value is exactly zero.
1544 if (omsb || lost_fraction != lfExactlyZero) {
1545 /* OMSB is numbered from 1. We want to place it in the integer
1546 bit numbered PRECISION if possible, with a compensating change in
1547 the exponent. */
1548 int exponentChange = omsb - semantics->precision;
1549
1550 /* If the resulting exponent is too high, overflow according to
1551 the rounding mode. */
1552 if (exponent + exponentChange > semantics->maxExponent)
1553 return handleOverflow(rounding_mode);
1554
1555 /* Subnormal numbers have exponent minExponent, and their MSB
1556 is forced based on that. */
1557 if (exponent + exponentChange < semantics->minExponent)
1558 exponentChange = semantics->minExponent - exponent;
1559
1560 /* Shifting left is easy as we don't lose precision. */
1561 if (exponentChange < 0) {
1562 assert(lost_fraction == lfExactlyZero);
1563
1564 shiftSignificandLeft(-exponentChange);
1565
1566 return opOK;
1567 }
1568
1569 if (exponentChange > 0) {
1570 lostFraction lf;
1571
1572 /* Shift right and capture any new lost fraction. */
1573 lf = shiftSignificandRight(exponentChange);
1574
1575 lost_fraction = combineLostFractions(lf, lost_fraction);
1576
1577 /* Keep OMSB up-to-date. */
1578 if (omsb > (unsigned) exponentChange)
1579 omsb -= exponentChange;
1580 else
1581 omsb = 0;
1582 }
1583 }
1584
1585 // The all-ones values is an overflow if NaN is all ones. If NaN is
1586 // represented by negative zero, then it is a valid finite value.
1587 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1588 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1589 exponent == semantics->maxExponent && isSignificandAllOnes())
1590 return handleOverflow(rounding_mode);
1591
1592 /* Now round the number according to rounding_mode given the lost
1593 fraction. */
1594
1595 /* As specified in IEEE 754, since we do not trap we do not report
1596 underflow for exact results. */
1597 if (lost_fraction == lfExactlyZero) {
1598 /* Canonicalize zeroes. */
1599 if (omsb == 0) {
1600 category = fcZero;
1601 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1602 sign = false;
1603 if (!semantics->hasZero)
1605 }
1606
1607 return opOK;
1608 }
1609
1610 /* Increment the significand if we're rounding away from zero. */
1611 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1612 if (omsb == 0)
1613 exponent = semantics->minExponent;
1614
1615 incrementSignificand();
1616 omsb = significandMSB() + 1;
1617
1618 /* Did the significand increment overflow? */
1619 if (omsb == (unsigned) semantics->precision + 1) {
1620 /* Renormalize by incrementing the exponent and shifting our
1621 significand right one. However if we already have the
1622 maximum exponent we overflow to infinity. */
1623 if (exponent == semantics->maxExponent)
1624 // Invoke overflow handling with a rounding mode that will guarantee
1625 // that the result gets turned into the correct infinity representation.
1626 // This is needed instead of just setting the category to infinity to
1627 // account for 8-bit floating point types that have no inf, only NaN.
1628 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1629
1630 shiftSignificandRight(1);
1631
1632 return opInexact;
1633 }
1634
1635 // The all-ones values is an overflow if NaN is all ones. If NaN is
1636 // represented by negative zero, then it is a valid finite value.
1637 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1638 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1639 exponent == semantics->maxExponent && isSignificandAllOnes())
1640 return handleOverflow(rounding_mode);
1641 }
1642
1643 /* The normal case - we were and are not denormal, and any
1644 significand increment above didn't overflow. */
1645 if (omsb == semantics->precision)
1646 return opInexact;
1647
1648 /* We have a non-zero denormal. */
1649 assert(omsb < semantics->precision);
1650
1651 /* Canonicalize zeroes. */
1652 if (omsb == 0) {
1653 category = fcZero;
1654 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1655 sign = false;
1656 // This condition handles the case where the semantics
1657 // does not have zero but uses the all-zero encoding
1658 // to represent the smallest normal value.
1659 if (!semantics->hasZero)
1661 }
1662
1663 /* The fcZero case is a denormal that underflowed to zero. */
1664 return (opStatus) (opUnderflow | opInexact);
1665}
1666
1667APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1668 bool subtract) {
1669 switch (PackCategoriesIntoKey(category, rhs.category)) {
1670 default:
1671 llvm_unreachable(nullptr);
1672
1676 assign(rhs);
1677 [[fallthrough]];
1682 if (isSignaling()) {
1683 makeQuiet();
1684 return opInvalidOp;
1685 }
1686 return rhs.isSignaling() ? opInvalidOp : opOK;
1687
1691 return opOK;
1692
1695 category = fcInfinity;
1696 sign = rhs.sign ^ subtract;
1697 return opOK;
1698
1700 assign(rhs);
1701 sign = rhs.sign ^ subtract;
1702 return opOK;
1703
1705 /* Sign depends on rounding mode; handled by caller. */
1706 return opOK;
1707
1709 /* Differently signed infinities can only be validly
1710 subtracted. */
1711 if (((sign ^ rhs.sign)!=0) != subtract) {
1712 makeNaN();
1713 return opInvalidOp;
1714 }
1715
1716 return opOK;
1717
1719 return opDivByZero;
1720 }
1721}
1722
1723/* Add or subtract two normal numbers. */
1724lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1725 bool subtract) {
1726 [[maybe_unused]] integerPart carry = 0;
1727 lostFraction lost_fraction;
1728
1729 /* Determine if the operation on the absolute values is effectively
1730 an addition or subtraction. */
1731 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1732
1733 /* Are we bigger exponent-wise than the RHS? */
1734 int bits = exponent - rhs.exponent;
1735
1736 /* Subtraction is more subtle than one might naively expect. */
1737 if (subtract) {
1738 if ((bits < 0) && !semantics->hasSignedRepr)
1740 "This floating point format does not support signed values");
1741
1742 IEEEFloat temp_rhs(rhs);
1743 bool lost_fraction_is_from_rhs = false;
1744
1745 if (bits == 0)
1746 lost_fraction = lfExactlyZero;
1747 else if (bits > 0) {
1748 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1749 lost_fraction_is_from_rhs = true;
1750 shiftSignificandLeft(1);
1751 } else {
1752 lost_fraction = shiftSignificandRight(-bits - 1);
1753 temp_rhs.shiftSignificandLeft(1);
1754 }
1755
1756 // Should we reverse the subtraction.
1757 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1758 if (cmp_result == cmpLessThan) {
1759 bool borrow =
1760 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1761 if (borrow) {
1762 // The lost fraction is being subtracted, borrow from the significand
1763 // and invert `lost_fraction`.
1764 if (lost_fraction == lfLessThanHalf)
1765 lost_fraction = lfMoreThanHalf;
1766 else if (lost_fraction == lfMoreThanHalf)
1767 lost_fraction = lfLessThanHalf;
1768 }
1769 carry = temp_rhs.subtractSignificand(*this, borrow);
1770 copySignificand(temp_rhs);
1771 sign = !sign;
1772 } else if (cmp_result == cmpGreaterThan) {
1773 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1774 if (borrow) {
1775 // The lost fraction is being subtracted, borrow from the significand
1776 // and invert `lost_fraction`.
1777 if (lost_fraction == lfLessThanHalf)
1778 lost_fraction = lfMoreThanHalf;
1779 else if (lost_fraction == lfMoreThanHalf)
1780 lost_fraction = lfLessThanHalf;
1781 }
1782 carry = subtractSignificand(temp_rhs, borrow);
1783 } else { // cmpEqual
1784 zeroSignificand();
1785 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1786 // rhs is slightly larger due to the lost fraction, flip the sign.
1787 sign = !sign;
1788 }
1789 }
1790
1791 /* The code above is intended to ensure that no borrow is
1792 necessary. */
1793 assert(!carry);
1794 } else {
1795 if (bits > 0) {
1796 IEEEFloat temp_rhs(rhs);
1797
1798 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1799 carry = addSignificand(temp_rhs);
1800 } else {
1801 lost_fraction = shiftSignificandRight(-bits);
1802 carry = addSignificand(rhs);
1803 }
1804
1805 /* We have a guard bit; generating a carry cannot happen. */
1806 assert(!carry);
1807 }
1808
1809 return lost_fraction;
1810}
1811
1812APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1813 switch (PackCategoriesIntoKey(category, rhs.category)) {
1814 default:
1815 llvm_unreachable(nullptr);
1816
1820 assign(rhs);
1821 sign = false;
1822 [[fallthrough]];
1827 sign ^= rhs.sign; // restore the original sign
1828 if (isSignaling()) {
1829 makeQuiet();
1830 return opInvalidOp;
1831 }
1832 return rhs.isSignaling() ? opInvalidOp : opOK;
1833
1837 category = fcInfinity;
1838 return opOK;
1839
1843 category = fcZero;
1844 return opOK;
1845
1848 makeNaN();
1849 return opInvalidOp;
1850
1852 return opOK;
1853 }
1854}
1855
1856APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1857 switch (PackCategoriesIntoKey(category, rhs.category)) {
1858 default:
1859 llvm_unreachable(nullptr);
1860
1864 assign(rhs);
1865 sign = false;
1866 [[fallthrough]];
1871 sign ^= rhs.sign; // restore the original sign
1872 if (isSignaling()) {
1873 makeQuiet();
1874 return opInvalidOp;
1875 }
1876 return rhs.isSignaling() ? opInvalidOp : opOK;
1877
1882 return opOK;
1883
1885 category = fcZero;
1886 return opOK;
1887
1889 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1890 makeNaN(false, sign);
1891 else
1892 category = fcInfinity;
1893 return opDivByZero;
1894
1897 makeNaN();
1898 return opInvalidOp;
1899
1901 return opOK;
1902 }
1903}
1904
1905APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1906 switch (PackCategoriesIntoKey(category, rhs.category)) {
1907 default:
1908 llvm_unreachable(nullptr);
1909
1913 assign(rhs);
1914 [[fallthrough]];
1919 if (isSignaling()) {
1920 makeQuiet();
1921 return opInvalidOp;
1922 }
1923 return rhs.isSignaling() ? opInvalidOp : opOK;
1924
1928 return opOK;
1929
1935 makeNaN();
1936 return opInvalidOp;
1937
1939 return opOK;
1940 }
1941}
1942
1943APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1944 switch (PackCategoriesIntoKey(category, rhs.category)) {
1945 default:
1946 llvm_unreachable(nullptr);
1947
1951 assign(rhs);
1952 [[fallthrough]];
1957 if (isSignaling()) {
1958 makeQuiet();
1959 return opInvalidOp;
1960 }
1961 return rhs.isSignaling() ? opInvalidOp : opOK;
1962
1966 return opOK;
1967
1973 makeNaN();
1974 return opInvalidOp;
1975
1977 return opDivByZero; // fake status, indicating this is not a special case
1978 }
1979}
1980
1981/* Change sign. */
1983 // With NaN-as-negative-zero, neither NaN or negative zero can change
1984 // their signs.
1985 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
1986 (isZero() || isNaN()))
1987 return;
1988 /* Look mummy, this one's easy. */
1989 sign = !sign;
1990}
1991
1992/* Normalized addition or subtraction. */
1993APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1994 roundingMode rounding_mode,
1995 bool subtract) {
1996 opStatus fs = addOrSubtractSpecials(rhs, subtract);
1997
1998 /* This return code means it was not a simple case. */
1999 if (fs == opDivByZero) {
2000 lostFraction lost_fraction;
2001
2002 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2003 fs = normalize(rounding_mode, lost_fraction);
2004
2005 /* Can only be zero if we lost no fraction. */
2006 assert(category != fcZero || lost_fraction == lfExactlyZero);
2007 }
2008
2009 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2010 positive zero unless rounding to minus infinity, except that
2011 adding two like-signed zeroes gives that zero. */
2012 if (category == fcZero) {
2013 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2014 sign = (rounding_mode == rmTowardNegative);
2015 // NaN-in-negative-zero means zeros need to be normalized to +0.
2016 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2017 sign = false;
2018 }
2019
2020 return fs;
2021}
2022
2023/* Normalized addition. */
2025 roundingMode rounding_mode) {
2026 return addOrSubtract(rhs, rounding_mode, false);
2027}
2028
2029/* Normalized subtraction. */
2031 roundingMode rounding_mode) {
2032 return addOrSubtract(rhs, rounding_mode, true);
2033}
2034
2035/* Normalized multiply. */
2037 roundingMode rounding_mode) {
2038 sign ^= rhs.sign;
2039 opStatus fs = multiplySpecials(rhs);
2040
2041 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2042 sign = false;
2043 if (isFiniteNonZero()) {
2044 lostFraction lost_fraction = multiplySignificand(rhs);
2045 fs = normalize(rounding_mode, lost_fraction);
2046 if (lost_fraction != lfExactlyZero)
2047 fs = (opStatus) (fs | opInexact);
2048 }
2049
2050 return fs;
2051}
2052
2053/* Normalized divide. */
2055 roundingMode rounding_mode) {
2056 sign ^= rhs.sign;
2057 opStatus fs = divideSpecials(rhs);
2058
2059 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2060 sign = false;
2061 if (isFiniteNonZero()) {
2062 lostFraction lost_fraction = divideSignificand(rhs);
2063 fs = normalize(rounding_mode, lost_fraction);
2064 if (lost_fraction != lfExactlyZero)
2065 fs = (opStatus) (fs | opInexact);
2066 }
2067
2068 return fs;
2069}
2070
2071/* Normalized remainder. */
2073 unsigned int origSign = sign;
2074
2075 // First handle the special cases.
2076 opStatus fs = remainderSpecials(rhs);
2077 if (fs != opDivByZero)
2078 return fs;
2079
2080 fs = opOK;
2081
2082 // Make sure the current value is less than twice the denom. If the addition
2083 // did not succeed (an overflow has happened), which means that the finite
2084 // value we currently posses must be less than twice the denom (as we are
2085 // using the same semantics).
2086 IEEEFloat P2 = rhs;
2087 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2088 fs = mod(P2);
2089 assert(fs == opOK);
2090 }
2091
2092 // Lets work with absolute numbers.
2093 IEEEFloat P = rhs;
2094 P.sign = false;
2095 sign = false;
2096
2097 //
2098 // To calculate the remainder we use the following scheme.
2099 //
2100 // The remainder is defained as follows:
2101 //
2102 // remainder = numer - rquot * denom = x - r * p
2103 //
2104 // Where r is the result of: x/p, rounded toward the nearest integral value
2105 // (with halfway cases rounded toward the even number).
2106 //
2107 // Currently, (after x mod 2p):
2108 // r is the number of 2p's present inside x, which is inherently, an even
2109 // number of p's.
2110 //
2111 // We may split the remaining calculation into 4 options:
2112 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2113 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2114 // are done as well.
2115 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2116 // to subtract 1p at least once.
2117 // - if x >= p then we must subtract p at least once, as x must be a
2118 // remainder.
2119 //
2120 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2121 //
2122 // We can now split the remaining calculation to the following 3 options:
2123 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2124 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2125 // must round up to the next even number. so we must subtract p once more.
2126 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2127 // integral, and subtract p once more.
2128 //
2129
2130 // Extend the semantics to prevent an overflow/underflow or inexact result.
2131 bool losesInfo;
2132 fltSemantics extendedSemantics = *semantics;
2133 extendedSemantics.maxExponent++;
2134 extendedSemantics.minExponent--;
2135 extendedSemantics.precision += 2;
2136
2137 IEEEFloat VEx = *this;
2138 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2139 assert(fs == opOK && !losesInfo);
2140 IEEEFloat PEx = P;
2141 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2142 assert(fs == opOK && !losesInfo);
2143
2144 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2145 // any fraction.
2146 fs = VEx.add(VEx, rmNearestTiesToEven);
2147 assert(fs == opOK);
2148
2149 if (VEx.compare(PEx) == cmpGreaterThan) {
2151 assert(fs == opOK);
2152
2153 // Make VEx = this.add(this), but because we have different semantics, we do
2154 // not want to `convert` again, so we just subtract PEx twice (which equals
2155 // to the desired value).
2156 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2157 assert(fs == opOK);
2158 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2159 assert(fs == opOK);
2160
2161 cmpResult result = VEx.compare(PEx);
2162 if (result == cmpGreaterThan || result == cmpEqual) {
2164 assert(fs == opOK);
2165 }
2166 }
2167
2168 if (isZero()) {
2169 sign = origSign; // IEEE754 requires this
2170 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2171 // But some 8-bit floats only have positive 0.
2172 sign = false;
2173 } else {
2174 sign ^= origSign;
2175 }
2176 return fs;
2177}
2178
2179/* Normalized llvm frem (C fmod). */
2181 opStatus fs = modSpecials(rhs);
2182 unsigned int origSign = sign;
2183
2184 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2186 int Exp = ilogb(*this) - ilogb(rhs);
2187 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2188 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2189 // check for it.
2190 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2191 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2192 V.sign = sign;
2193
2195
2196 // When the semantics supports zero, this loop's
2197 // exit-condition is handled by the 'isFiniteNonZero'
2198 // category check above. However, when the semantics
2199 // does not have 'fcZero' and we have reached the
2200 // minimum possible value, (and any further subtract
2201 // will underflow to the same value) explicitly
2202 // provide an exit-path here.
2203 if (!semantics->hasZero && this->isSmallest())
2204 break;
2205
2206 assert(fs==opOK);
2207 }
2208 if (isZero()) {
2209 sign = origSign; // fmod requires this
2210 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2211 sign = false;
2212 }
2213 return fs;
2214}
2215
2216/* Normalized fused-multiply-add. */
2218 const IEEEFloat &addend,
2219 roundingMode rounding_mode) {
2220 opStatus fs;
2221
2222 /* Post-multiplication sign, before addition. */
2223 sign ^= multiplicand.sign;
2224
2225 /* If and only if all arguments are normal do we need to do an
2226 extended-precision calculation. */
2227 if (isFiniteNonZero() &&
2228 multiplicand.isFiniteNonZero() &&
2229 addend.isFinite()) {
2230 lostFraction lost_fraction;
2231
2232 lost_fraction = multiplySignificand(multiplicand, addend);
2233 fs = normalize(rounding_mode, lost_fraction);
2234 if (lost_fraction != lfExactlyZero)
2235 fs = (opStatus) (fs | opInexact);
2236
2237 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2238 positive zero unless rounding to minus infinity, except that
2239 adding two like-signed zeroes gives that zero. */
2240 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2241 sign = (rounding_mode == rmTowardNegative);
2242 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2243 sign = false;
2244 }
2245 } else {
2246 fs = multiplySpecials(multiplicand);
2247
2248 /* FS can only be opOK or opInvalidOp. There is no more work
2249 to do in the latter case. The IEEE-754R standard says it is
2250 implementation-defined in this case whether, if ADDEND is a
2251 quiet NaN, we raise invalid op; this implementation does so.
2252
2253 If we need to do the addition we can do so with normal
2254 precision. */
2255 if (fs == opOK)
2256 fs = addOrSubtract(addend, rounding_mode, false);
2257 }
2258
2259 return fs;
2260}
2261
2262/* Rounding-mode correct round to integral value. */
2264 if (isInfinity())
2265 // [IEEE Std 754-2008 6.1]:
2266 // The behavior of infinity in floating-point arithmetic is derived from the
2267 // limiting cases of real arithmetic with operands of arbitrarily
2268 // large magnitude, when such a limit exists.
2269 // ...
2270 // Operations on infinite operands are usually exact and therefore signal no
2271 // exceptions ...
2272 return opOK;
2273
2274 if (isNaN()) {
2275 if (isSignaling()) {
2276 // [IEEE Std 754-2008 6.2]:
2277 // Under default exception handling, any operation signaling an invalid
2278 // operation exception and for which a floating-point result is to be
2279 // delivered shall deliver a quiet NaN.
2280 makeQuiet();
2281 // [IEEE Std 754-2008 6.2]:
2282 // Signaling NaNs shall be reserved operands that, under default exception
2283 // handling, signal the invalid operation exception(see 7.2) for every
2284 // general-computational and signaling-computational operation except for
2285 // the conversions described in 5.12.
2286 return opInvalidOp;
2287 } else {
2288 // [IEEE Std 754-2008 6.2]:
2289 // For an operation with quiet NaN inputs, other than maximum and minimum
2290 // operations, if a floating-point result is to be delivered the result
2291 // shall be a quiet NaN which should be one of the input NaNs.
2292 // ...
2293 // Every general-computational and quiet-computational operation involving
2294 // one or more input NaNs, none of them signaling, shall signal no
2295 // exception, except fusedMultiplyAdd might signal the invalid operation
2296 // exception(see 7.2).
2297 return opOK;
2298 }
2299 }
2300
2301 if (isZero()) {
2302 // [IEEE Std 754-2008 6.3]:
2303 // ... the sign of the result of conversions, the quantize operation, the
2304 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2305 // the sign of the first or only operand.
2306 return opOK;
2307 }
2308
2309 // If the exponent is large enough, we know that this value is already
2310 // integral, and the arithmetic below would potentially cause it to saturate
2311 // to +/-Inf. Bail out early instead.
2312 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2313 return opOK;
2314
2315 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2316 // precision of our format, and then subtract it back off again. The choice
2317 // of rounding modes for the addition/subtraction determines the rounding mode
2318 // for our integral rounding as well.
2319 // NOTE: When the input value is negative, we do subtraction followed by
2320 // addition instead.
2321 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2322 1);
2323 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2324 IEEEFloat MagicConstant(*semantics);
2325 opStatus fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2327 assert(fs == opOK);
2328 MagicConstant.sign = sign;
2329
2330 // Preserve the input sign so that we can handle the case of zero result
2331 // correctly.
2332 bool inputSign = isNegative();
2333
2334 fs = add(MagicConstant, rounding_mode);
2335
2336 // Current value and 'MagicConstant' are both integers, so the result of the
2337 // subtraction is always exact according to Sterbenz' lemma.
2338 subtract(MagicConstant, rounding_mode);
2339
2340 // Restore the input sign.
2341 if (inputSign != isNegative())
2342 changeSign();
2343
2344 return fs;
2345}
2346
2347/* Comparison requires normalized numbers. */
2349 assert(semantics == rhs.semantics);
2350
2351 switch (PackCategoriesIntoKey(category, rhs.category)) {
2352 default:
2353 llvm_unreachable(nullptr);
2354
2362 return cmpUnordered;
2363
2367 if (sign)
2368 return cmpLessThan;
2369 else
2370 return cmpGreaterThan;
2371
2375 if (rhs.sign)
2376 return cmpGreaterThan;
2377 else
2378 return cmpLessThan;
2379
2381 if (sign == rhs.sign)
2382 return cmpEqual;
2383 else if (sign)
2384 return cmpLessThan;
2385 else
2386 return cmpGreaterThan;
2387
2389 return cmpEqual;
2390
2392 break;
2393 }
2394
2395 cmpResult result;
2396 /* Two normal numbers. Do they have the same sign? */
2397 if (sign != rhs.sign) {
2398 if (sign)
2399 result = cmpLessThan;
2400 else
2401 result = cmpGreaterThan;
2402 } else {
2403 /* Compare absolute values; invert result if negative. */
2404 result = compareAbsoluteValue(rhs);
2405
2406 if (sign) {
2407 if (result == cmpLessThan)
2408 result = cmpGreaterThan;
2409 else if (result == cmpGreaterThan)
2410 result = cmpLessThan;
2411 }
2412 }
2413
2414 return result;
2415}
2416
2417/// IEEEFloat::convert - convert a value of one floating point type to another.
2418/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2419/// records whether the transformation lost information, i.e. whether
2420/// converting the result back to the original type will produce the
2421/// original value (this is almost the same as return value==fsOK, but there
2422/// are edge cases where this is not so).
2423
2425 roundingMode rounding_mode,
2426 bool *losesInfo) {
2427 opStatus fs;
2428 const fltSemantics &fromSemantics = *semantics;
2429 bool is_signaling = isSignaling();
2430
2432 unsigned newPartCount = partCountForBits(toSemantics.precision + 1);
2433 unsigned oldPartCount = partCount();
2434 int shift = toSemantics.precision - fromSemantics.precision;
2435
2436 bool X86SpecialNan = false;
2437 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2438 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2439 (!(*significandParts() & 0x8000000000000000ULL) ||
2440 !(*significandParts() & 0x4000000000000000ULL))) {
2441 // x86 has some unusual NaNs which cannot be represented in any other
2442 // format; note them here.
2443 X86SpecialNan = true;
2444 }
2445
2446 // If this is a truncation of a denormal number, and the target semantics
2447 // has larger exponent range than the source semantics (this can happen
2448 // when truncating from PowerPC double-double to double format), the
2449 // right shift could lose result mantissa bits. Adjust exponent instead
2450 // of performing excessive shift.
2451 // Also do a similar trick in case shifting denormal would produce zero
2452 // significand as this case isn't handled correctly by normalize.
2453 if (shift < 0 && isFiniteNonZero()) {
2454 int omsb = significandMSB() + 1;
2455 int exponentChange = omsb - fromSemantics.precision;
2456 if (exponent + exponentChange < toSemantics.minExponent)
2457 exponentChange = toSemantics.minExponent - exponent;
2458 exponentChange = std::max(exponentChange, shift);
2459 if (exponentChange < 0) {
2460 shift -= exponentChange;
2461 exponent += exponentChange;
2462 } else if (omsb <= -shift) {
2463 exponentChange = omsb + shift - 1; // leave at least one bit set
2464 shift -= exponentChange;
2465 exponent += exponentChange;
2466 }
2467 }
2468
2469 // If this is a truncation, perform the shift before we narrow the storage.
2470 if (shift < 0 && (isFiniteNonZero() ||
2471 (category == fcNaN && semantics->nonFiniteBehavior !=
2473 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2474
2475 // Fix the storage so it can hold to new value.
2476 if (newPartCount > oldPartCount) {
2477 // The new type requires more storage; make it available.
2478 integerPart *newParts;
2479 newParts = new integerPart[newPartCount];
2480 APInt::tcSet(newParts, 0, newPartCount);
2481 if (isFiniteNonZero() || category==fcNaN)
2482 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2483 freeSignificand();
2484 significand.parts = newParts;
2485 } else if (newPartCount == 1 && oldPartCount != 1) {
2486 // Switch to built-in storage for a single part.
2487 integerPart newPart = 0;
2488 if (isFiniteNonZero() || category==fcNaN)
2489 newPart = significandParts()[0];
2490 freeSignificand();
2491 significand.part = newPart;
2492 }
2493
2494 // Now that we have the right storage, switch the semantics.
2495 semantics = &toSemantics;
2496
2497 // If this is an extension, perform the shift now that the storage is
2498 // available.
2499 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2500 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2501
2502 if (isFiniteNonZero()) {
2503 fs = normalize(rounding_mode, lostFraction);
2504 *losesInfo = (fs != opOK);
2505 } else if (category == fcNaN) {
2506 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2507 *losesInfo =
2509 makeNaN(false, sign);
2510 return is_signaling ? opInvalidOp : opOK;
2511 }
2512
2513 // If NaN is negative zero, we need to create a new NaN to avoid converting
2514 // NaN to -Inf.
2515 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2516 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2517 makeNaN(false, false);
2518
2519 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2520
2521 // For x87 extended precision, we want to make a NaN, not a special NaN if
2522 // the input wasn't special either.
2523 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2524 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2525
2526 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2527 // This also guarantees that a sNaN does not become Inf on a truncation
2528 // that loses all payload bits.
2529 if (is_signaling) {
2530 makeQuiet();
2531 fs = opInvalidOp;
2532 } else {
2533 fs = opOK;
2534 }
2535 } else if (category == fcInfinity &&
2536 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2537 makeNaN(false, sign);
2538 *losesInfo = true;
2539 fs = opInexact;
2540 } else if (category == fcZero &&
2541 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2542 // Negative zero loses info, but positive zero doesn't.
2543 *losesInfo =
2544 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2545 fs = *losesInfo ? opInexact : opOK;
2546 // NaN is negative zero means -0 -> +0, which can lose information
2547 sign = false;
2548 } else {
2549 *losesInfo = false;
2550 fs = opOK;
2551 }
2552
2553 if (category == fcZero && !semantics->hasZero)
2555 return fs;
2556}
2557
2558/* Convert a floating point number to an integer according to the
2559 rounding mode. If the rounded integer value is out of range this
2560 returns an invalid operation exception and the contents of the
2561 destination parts are unspecified. If the rounded value is in
2562 range but the floating point number is not the exact integer, the C
2563 standard doesn't require an inexact exception to be raised. IEEE
2564 854 does require it so we do that.
2565
2566 Note that for conversions to integer type the C standard requires
2567 round-to-zero to always be used. */
2568APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2569 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2570 roundingMode rounding_mode, bool *isExact) const {
2571 *isExact = false;
2572
2573 /* Handle the three special cases first. */
2574 if (category == fcInfinity || category == fcNaN)
2575 return opInvalidOp;
2576
2577 unsigned dstPartsCount = partCountForBits(width);
2578 assert(dstPartsCount <= parts.size() && "Integer too big");
2579
2580 if (category == fcZero) {
2581 APInt::tcSet(parts.data(), 0, dstPartsCount);
2582 // Negative zero can't be represented as an int.
2583 *isExact = !sign;
2584 return opOK;
2585 }
2586
2587 const integerPart *src = significandParts();
2588
2589 unsigned truncatedBits;
2590 /* Step 1: place our absolute value, with any fraction truncated, in
2591 the destination. */
2592 if (exponent < 0) {
2593 /* Our absolute value is less than one; truncate everything. */
2594 APInt::tcSet(parts.data(), 0, dstPartsCount);
2595 /* For exponent -1 the integer bit represents .5, look at that.
2596 For smaller exponents leftmost truncated bit is 0. */
2597 truncatedBits = semantics->precision -1U - exponent;
2598 } else {
2599 /* We want the most significant (exponent + 1) bits; the rest are
2600 truncated. */
2601 unsigned int bits = exponent + 1U;
2602
2603 /* Hopelessly large in magnitude? */
2604 if (bits > width)
2605 return opInvalidOp;
2606
2607 if (bits < semantics->precision) {
2608 /* We truncate (semantics->precision - bits) bits. */
2609 truncatedBits = semantics->precision - bits;
2610 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2611 } else {
2612 /* We want at least as many bits as are available. */
2613 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2614 0);
2615 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2616 bits - semantics->precision);
2617 truncatedBits = 0;
2618 }
2619 }
2620
2621 /* Step 2: work out any lost fraction, and increment the absolute
2622 value if we would round away from zero. */
2623 lostFraction lost_fraction;
2624 if (truncatedBits) {
2625 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2626 truncatedBits);
2627 if (lost_fraction != lfExactlyZero &&
2628 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2629 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2630 return opInvalidOp; /* Overflow. */
2631 }
2632 } else {
2633 lost_fraction = lfExactlyZero;
2634 }
2635
2636 /* Step 3: check if we fit in the destination. */
2637 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2638
2639 if (sign) {
2640 if (!isSigned) {
2641 /* Negative numbers cannot be represented as unsigned. */
2642 if (omsb != 0)
2643 return opInvalidOp;
2644 } else {
2645 /* It takes omsb bits to represent the unsigned integer value.
2646 We lose a bit for the sign, but care is needed as the
2647 maximally negative integer is a special case. */
2648 if (omsb == width &&
2649 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2650 return opInvalidOp;
2651
2652 /* This case can happen because of rounding. */
2653 if (omsb > width)
2654 return opInvalidOp;
2655 }
2656
2657 APInt::tcNegate (parts.data(), dstPartsCount);
2658 } else {
2659 if (omsb >= width + !isSigned)
2660 return opInvalidOp;
2661 }
2662
2663 if (lost_fraction == lfExactlyZero) {
2664 *isExact = true;
2665 return opOK;
2666 }
2667 return opInexact;
2668}
2669
2670/* Same as convertToSignExtendedInteger, except we provide
2671 deterministic values in case of an invalid operation exception,
2672 namely zero for NaNs and the minimal or maximal value respectively
2673 for underflow or overflow.
2674 The *isExact output tells whether the result is exact, in the sense
2675 that converting it back to the original floating point type produces
2676 the original value. This is almost equivalent to result==opOK,
2677 except for negative zeroes.
2678*/
2681 unsigned int width, bool isSigned,
2682 roundingMode rounding_mode, bool *isExact) const {
2683 opStatus fs = convertToSignExtendedInteger(parts, width, isSigned,
2684 rounding_mode, isExact);
2685
2686 if (fs == opInvalidOp) {
2687 unsigned int bits, dstPartsCount;
2688
2689 dstPartsCount = partCountForBits(width);
2690 assert(dstPartsCount <= parts.size() && "Integer too big");
2691
2692 if (category == fcNaN)
2693 bits = 0;
2694 else if (sign)
2695 bits = isSigned;
2696 else
2697 bits = width - isSigned;
2698
2699 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2700 if (sign && isSigned)
2701 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2702 }
2703
2704 return fs;
2705}
2706
2707/* Convert an unsigned integer SRC to a floating point number,
2708 rounding according to ROUNDING_MODE. The sign of the floating
2709 point number is not modified. */
2710APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2711 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2712 category = fcNormal;
2713 unsigned omsb = APInt::tcMSB(src, srcCount) + 1;
2714 integerPart *dst = significandParts();
2715 unsigned dstCount = partCount();
2716 unsigned precision = semantics->precision;
2717
2718 /* We want the most significant PRECISION bits of SRC. There may not
2719 be that many; extract what we can. */
2720 lostFraction lost_fraction;
2721 if (precision <= omsb) {
2722 exponent = omsb - 1;
2723 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2724 omsb - precision);
2725 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2726 } else {
2727 exponent = precision - 1;
2728 lost_fraction = lfExactlyZero;
2729 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2730 }
2731
2732 return normalize(rounding_mode, lost_fraction);
2733}
2734
2736 roundingMode rounding_mode) {
2737 unsigned int partCount = Val.getNumWords();
2738 APInt api = Val;
2739
2740 sign = false;
2741 if (isSigned && api.isNegative()) {
2742 sign = true;
2743 api = -api;
2744 }
2745
2746 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2747}
2748
2750IEEEFloat::convertFromHexadecimalString(StringRef s,
2751 roundingMode rounding_mode) {
2752 lostFraction lost_fraction = lfExactlyZero;
2753
2754 category = fcNormal;
2755 zeroSignificand();
2756 exponent = 0;
2757
2758 integerPart *significand = significandParts();
2759 unsigned partsCount = partCount();
2760 unsigned bitPos = partsCount * integerPartWidth;
2761 bool computedTrailingFraction = false;
2762
2763 // Skip leading zeroes and any (hexa)decimal point.
2764 StringRef::iterator begin = s.begin();
2765 StringRef::iterator end = s.end();
2767 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2768 if (!PtrOrErr)
2769 return PtrOrErr.takeError();
2770 StringRef::iterator p = *PtrOrErr;
2771 StringRef::iterator firstSignificantDigit = p;
2772
2773 while (p != end) {
2774 integerPart hex_value;
2775
2776 if (*p == '.') {
2777 if (dot != end)
2778 return createError("String contains multiple dots");
2779 dot = p++;
2780 continue;
2781 }
2782
2783 hex_value = hexDigitValue(*p);
2784 if (hex_value == UINT_MAX)
2785 break;
2786
2787 p++;
2788
2789 // Store the number while we have space.
2790 if (bitPos) {
2791 bitPos -= 4;
2792 hex_value <<= bitPos % integerPartWidth;
2793 significand[bitPos / integerPartWidth] |= hex_value;
2794 } else if (!computedTrailingFraction) {
2795 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2796 if (!FractOrErr)
2797 return FractOrErr.takeError();
2798 lost_fraction = *FractOrErr;
2799 computedTrailingFraction = true;
2800 }
2801 }
2802
2803 /* Hex floats require an exponent but not a hexadecimal point. */
2804 if (p == end)
2805 return createError("Hex strings require an exponent");
2806 if (*p != 'p' && *p != 'P')
2807 return createError("Invalid character in significand");
2808 if (p == begin)
2809 return createError("Significand has no digits");
2810 if (dot != end && p - begin == 1)
2811 return createError("Significand has no digits");
2812
2813 /* Ignore the exponent if we are zero. */
2814 if (p != firstSignificantDigit) {
2815 int expAdjustment;
2816
2817 /* Implicit hexadecimal point? */
2818 if (dot == end)
2819 dot = p;
2820
2821 /* Calculate the exponent adjustment implicit in the number of
2822 significant digits. */
2823 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2824 if (expAdjustment < 0)
2825 expAdjustment++;
2826 expAdjustment = expAdjustment * 4 - 1;
2827
2828 /* Adjust for writing the significand starting at the most
2829 significant nibble. */
2830 expAdjustment += semantics->precision;
2831 expAdjustment -= partsCount * integerPartWidth;
2832
2833 /* Adjust for the given exponent. */
2834 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2835 if (!ExpOrErr)
2836 return ExpOrErr.takeError();
2837 exponent = *ExpOrErr;
2838 }
2839
2840 return normalize(rounding_mode, lost_fraction);
2841}
2842
2844IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2845 unsigned sigPartCount, int exp,
2846 roundingMode rounding_mode) {
2847 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2849
2850 bool isNearest = rounding_mode == rmNearestTiesToEven ||
2851 rounding_mode == rmNearestTiesToAway;
2852
2853 unsigned parts = partCountForBits(semantics->precision + 11);
2854
2855 /* Calculate pow(5, abs(exp)). */
2856 unsigned pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp : -exp);
2857
2858 for (;; parts *= 2) {
2859 opStatus sigStatus, powStatus;
2860 unsigned int excessPrecision, truncatedBits;
2861
2862 calcSemantics.precision = parts * integerPartWidth - 1;
2863 excessPrecision = calcSemantics.precision - semantics->precision;
2864 truncatedBits = excessPrecision;
2865
2866 IEEEFloat decSig(calcSemantics, uninitialized);
2867 decSig.makeZero(sign);
2868 IEEEFloat pow5(calcSemantics);
2869
2870 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2872 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2874 /* Add exp, as 10^n = 5^n * 2^n. */
2875 decSig.exponent += exp;
2876
2877 lostFraction calcLostFraction;
2878 integerPart HUerr, HUdistance;
2879 unsigned int powHUerr;
2880
2881 if (exp >= 0) {
2882 /* multiplySignificand leaves the precision-th bit set to 1. */
2883 calcLostFraction = decSig.multiplySignificand(pow5);
2884 powHUerr = powStatus != opOK;
2885 } else {
2886 calcLostFraction = decSig.divideSignificand(pow5);
2887 /* Denormal numbers have less precision. */
2888 if (decSig.exponent < semantics->minExponent) {
2889 excessPrecision += (semantics->minExponent - decSig.exponent);
2890 truncatedBits = excessPrecision;
2891 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
2892 }
2893 /* Extra half-ulp lost in reciprocal of exponent. */
2894 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2895 }
2896
2897 /* Both multiplySignificand and divideSignificand return the
2898 result with the integer bit set. */
2900 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2901
2902 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2903 powHUerr);
2904 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2905 excessPrecision, isNearest);
2906
2907 /* Are we guaranteed to round correctly if we truncate? */
2908 if (HUdistance >= HUerr) {
2909 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2910 calcSemantics.precision - excessPrecision,
2911 excessPrecision);
2912 /* Take the exponent of decSig. If we tcExtract-ed less bits
2913 above we must adjust our exponent to compensate for the
2914 implicit right shift. */
2915 exponent = (decSig.exponent + semantics->precision
2916 - (calcSemantics.precision - excessPrecision));
2917 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2918 decSig.partCount(),
2919 truncatedBits);
2920 return normalize(rounding_mode, calcLostFraction);
2921 }
2922 }
2923}
2924
2925Expected<APFloat::opStatus>
2926IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2927 decimalInfo D;
2928 opStatus fs;
2929
2930 /* Scan the text. */
2931 StringRef::iterator p = str.begin();
2932 if (Error Err = interpretDecimal(p, str.end(), &D))
2933 return std::move(Err);
2934
2935 /* Handle the quick cases. First the case of no significant digits,
2936 i.e. zero, and then exponents that are obviously too large or too
2937 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2938 definitely overflows if
2939
2940 (exp - 1) * L >= maxExponent
2941
2942 and definitely underflows to zero where
2943
2944 (exp + 1) * L <= minExponent - precision
2945
2946 With integer arithmetic the tightest bounds for L are
2947
2948 93/28 < L < 196/59 [ numerator <= 256 ]
2949 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2950 */
2951
2952 // Test if we have a zero number allowing for strings with no null terminators
2953 // and zero decimals with non-zero exponents.
2954 //
2955 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2956 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2957 // be at most one dot. On the other hand, if we have a zero with a non-zero
2958 // exponent, then we know that D.firstSigDigit will be non-numeric.
2959 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2960 category = fcZero;
2961 fs = opOK;
2962 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2963 sign = false;
2964 if (!semantics->hasZero)
2966
2967 /* Check whether the normalized exponent is high enough to overflow
2968 max during the log-rebasing in the max-exponent check below. */
2969 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2970 fs = handleOverflow(rounding_mode);
2971
2972 /* If it wasn't, then it also wasn't high enough to overflow max
2973 during the log-rebasing in the min-exponent check. Check that it
2974 won't overflow min in either check, then perform the min-exponent
2975 check. */
2976 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2977 (D.normalizedExponent + 1) * 28738 <=
2978 8651 * (semantics->minExponent - (int) semantics->precision)) {
2979 /* Underflow to zero and round. */
2980 category = fcNormal;
2981 zeroSignificand();
2982 fs = normalize(rounding_mode, lfLessThanHalf);
2983
2984 /* We can finally safely perform the max-exponent check. */
2985 } else if ((D.normalizedExponent - 1) * 42039
2986 >= 12655 * semantics->maxExponent) {
2987 /* Overflow and round. */
2988 fs = handleOverflow(rounding_mode);
2989 } else {
2990 integerPart *decSignificand;
2991 unsigned int partCount;
2992
2993 /* A tight upper bound on number of bits required to hold an
2994 N-digit decimal integer is N * 196 / 59. Allocate enough space
2995 to hold the full significand, and an extra part required by
2996 tcMultiplyPart. */
2997 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2998 partCount = partCountForBits(1 + 196 * partCount / 59);
2999 decSignificand = new integerPart[partCount + 1];
3000 partCount = 0;
3001
3002 /* Convert to binary efficiently - we do almost all multiplication
3003 in an integerPart. When this would overflow do we do a single
3004 bignum multiplication, and then revert again to multiplication
3005 in an integerPart. */
3006 do {
3007 integerPart decValue, val, multiplier;
3008
3009 val = 0;
3010 multiplier = 1;
3011
3012 do {
3013 if (*p == '.') {
3014 p++;
3015 if (p == str.end()) {
3016 break;
3017 }
3018 }
3019 decValue = decDigitValue(*p++);
3020 if (decValue >= 10U) {
3021 delete[] decSignificand;
3022 return createError("Invalid character in significand");
3023 }
3024 multiplier *= 10;
3025 val = val * 10 + decValue;
3026 /* The maximum number that can be multiplied by ten with any
3027 digit added without overflowing an integerPart. */
3028 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3029
3030 /* Multiply out the current part. */
3031 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3032 partCount, partCount + 1, false);
3033
3034 /* If we used another part (likely but not guaranteed), increase
3035 the count. */
3036 if (decSignificand[partCount])
3037 partCount++;
3038 } while (p <= D.lastSigDigit);
3039
3040 category = fcNormal;
3041 fs = roundSignificandWithExponent(decSignificand, partCount,
3042 D.exponent, rounding_mode);
3043
3044 delete [] decSignificand;
3045 }
3046
3047 return fs;
3048}
3049
3050bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3051 const size_t MIN_NAME_SIZE = 3;
3052
3053 if (str.size() < MIN_NAME_SIZE)
3054 return false;
3055
3056 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3057 makeInf(false);
3058 return true;
3059 }
3060
3061 bool IsNegative = str.consume_front("-");
3062 if (IsNegative) {
3063 if (str.size() < MIN_NAME_SIZE)
3064 return false;
3065
3066 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3067 makeInf(true);
3068 return true;
3069 }
3070 }
3071
3072 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3073 bool IsSignaling = str.consume_front_insensitive("s");
3074 if (IsSignaling) {
3075 if (str.size() < MIN_NAME_SIZE)
3076 return false;
3077 }
3078
3079 if (str.consume_front("nan") || str.consume_front("NaN")) {
3080 // A NaN without payload.
3081 if (str.empty()) {
3082 makeNaN(IsSignaling, IsNegative);
3083 return true;
3084 }
3085
3086 // Allow the payload to be inside parentheses.
3087 if (str.front() == '(') {
3088 // Parentheses should be balanced (and not empty).
3089 if (str.size() <= 2 || str.back() != ')')
3090 return false;
3091
3092 str = str.slice(1, str.size() - 1);
3093 }
3094
3095 // Determine the payload number's radix.
3096 unsigned Radix = 10;
3097 if (str[0] == '0') {
3098 if (str.size() > 1 && tolower(str[1]) == 'x') {
3099 str = str.drop_front(2);
3100 Radix = 16;
3101 } else {
3102 Radix = 8;
3103 }
3104 }
3105
3106 // Parse the payload and make the NaN.
3107 APInt Payload;
3108 if (!str.getAsInteger(Radix, Payload)) {
3109 makeNaN(IsSignaling, IsNegative, &Payload);
3110 return true;
3111 }
3112 }
3113
3114 return false;
3115}
3116
3117Expected<APFloat::opStatus>
3119 if (str.empty())
3120 return createError("Invalid string length");
3121
3122 // Handle special cases.
3123 if (convertFromStringSpecials(str))
3124 return opOK;
3125
3126 /* Handle a leading minus sign. */
3127 StringRef::iterator p = str.begin();
3128 size_t slen = str.size();
3129 sign = *p == '-' ? 1 : 0;
3130 if (sign && !semantics->hasSignedRepr)
3132 "This floating point format does not support signed values");
3133
3134 if (*p == '-' || *p == '+') {
3135 p++;
3136 slen--;
3137 if (!slen)
3138 return createError("String has no digits");
3139 }
3140
3141 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3142 if (slen == 2)
3143 return createError("Invalid string");
3144 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3145 rounding_mode);
3146 }
3147
3148 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3149}
3150
3151/* Write out a hexadecimal representation of the floating point value
3152 to DST, which must be of sufficient size, in the C99 form
3153 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3154 excluding the terminating NUL.
3155
3156 If UPPERCASE, the output is in upper case, otherwise in lower case.
3157
3158 HEXDIGITS digits appear altogether, rounding the value if
3159 necessary. If HEXDIGITS is 0, the minimal precision to display the
3160 number precisely is used instead. If nothing would appear after
3161 the decimal point it is suppressed.
3162
3163 The decimal exponent is always printed and has at least one digit.
3164 Zero values display an exponent of zero. Infinities and NaNs
3165 appear as "infinity" or "nan" respectively.
3166
3167 The above rules are as specified by C99. There is ambiguity about
3168 what the leading hexadecimal digit should be. This implementation
3169 uses whatever is necessary so that the exponent is displayed as
3170 stored. This implies the exponent will fall within the IEEE format
3171 range, and the leading hexadecimal digit will be 0 (for denormals),
3172 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3173 any other digits zero).
3174*/
3175unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3176 bool upperCase,
3177 roundingMode rounding_mode) const {
3178 char *p = dst;
3179 if (sign)
3180 *dst++ = '-';
3181
3182 switch (category) {
3183 case fcInfinity:
3184 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3185 dst += sizeof infinityL - 1;
3186 break;
3187
3188 case fcNaN:
3189 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3190 dst += sizeof NaNU - 1;
3191 break;
3192
3193 case fcZero:
3194 *dst++ = '0';
3195 *dst++ = upperCase ? 'X': 'x';
3196 *dst++ = '0';
3197 if (hexDigits > 1) {
3198 *dst++ = '.';
3199 memset (dst, '0', hexDigits - 1);
3200 dst += hexDigits - 1;
3201 }
3202 *dst++ = upperCase ? 'P': 'p';
3203 *dst++ = '0';
3204 break;
3205
3206 case fcNormal:
3207 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3208 break;
3209 }
3210
3211 *dst = 0;
3212
3213 return static_cast<unsigned int>(dst - p);
3214}
3215
3216/* Does the hard work of outputting the correctly rounded hexadecimal
3217 form of a normal floating point number with the specified number of
3218 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3219 digits necessary to print the value precisely is output. */
3220char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3221 bool upperCase,
3222 roundingMode rounding_mode) const {
3223 *dst++ = '0';
3224 *dst++ = upperCase ? 'X': 'x';
3225
3226 bool roundUp = false;
3227 const char *hexDigitChars = upperCase ? hexDigitsUpper : hexDigitsLower;
3228
3229 const integerPart *significand = significandParts();
3230 unsigned partsCount = partCount();
3231
3232 /* +3 because the first digit only uses the single integer bit, so
3233 we have 3 virtual zero most-significant-bits. */
3234 unsigned valueBits = semantics->precision + 3;
3235 unsigned shift = integerPartWidth - valueBits % integerPartWidth;
3236
3237 /* The natural number of digits required ignoring trailing
3238 insignificant zeroes. */
3239 unsigned outputDigits = (valueBits - significandLSB() + 3) / 4;
3240
3241 /* hexDigits of zero means use the required number for the
3242 precision. Otherwise, see if we are truncating. If we are,
3243 find out if we need to round away from zero. */
3244 if (hexDigits) {
3245 if (hexDigits < outputDigits) {
3246 /* We are dropping non-zero bits, so need to check how to round.
3247 "bits" is the number of dropped bits. */
3248 unsigned int bits;
3249 lostFraction fraction;
3250
3251 bits = valueBits - hexDigits * 4;
3252 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3253 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3254 }
3255 outputDigits = hexDigits;
3256 }
3257
3258 /* Write the digits consecutively, and start writing in the location
3259 of the hexadecimal point. We move the most significant digit
3260 left and add the hexadecimal point later. */
3261 char *p = ++dst;
3262
3263 unsigned count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3264
3265 while (outputDigits && count) {
3266 integerPart part;
3267
3268 /* Put the most significant integerPartWidth bits in "part". */
3269 if (--count == partsCount)
3270 part = 0; /* An imaginary higher zero part. */
3271 else
3272 part = significand[count] << shift;
3273
3274 if (count && shift)
3275 part |= significand[count - 1] >> (integerPartWidth - shift);
3276
3277 /* Convert as much of "part" to hexdigits as we can. */
3278 unsigned int curDigits = integerPartWidth / 4;
3279
3280 curDigits = std::min(curDigits, outputDigits);
3281 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3282 outputDigits -= curDigits;
3283 }
3284
3285 if (roundUp) {
3286 char *q = dst;
3287
3288 /* Note that hexDigitChars has a trailing '0'. */
3289 do {
3290 q--;
3291 *q = hexDigitChars[hexDigitValue (*q) + 1];
3292 } while (*q == '0');
3293 assert(q >= p);
3294 } else {
3295 /* Add trailing zeroes. */
3296 memset (dst, '0', outputDigits);
3297 dst += outputDigits;
3298 }
3299
3300 /* Move the most significant digit to before the point, and if there
3301 is something after the decimal point add it. This must come
3302 after rounding above. */
3303 p[-1] = p[0];
3304 if (dst -1 == p)
3305 dst--;
3306 else
3307 p[0] = '.';
3308
3309 /* Finally output the exponent. */
3310 *dst++ = upperCase ? 'P': 'p';
3311
3312 return writeSignedDecimal (dst, exponent);
3313}
3314
3316 if (!Arg.isFiniteNonZero())
3317 return hash_combine((uint8_t)Arg.category,
3318 // NaN has no sign, fix it at zero.
3319 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3320 Arg.semantics->precision);
3321
3322 // Normal floats need their exponent and significand hashed.
3323 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3324 Arg.semantics->precision, Arg.exponent,
3326 Arg.significandParts(),
3327 Arg.significandParts() + Arg.partCount()));
3328}
3329
3330// Conversion from APFloat to/from host float/double. It may eventually be
3331// possible to eliminate these and have everybody deal with APFloats, but that
3332// will take a while. This approach will not easily extend to long double.
3333// Current implementation requires integerPartWidth==64, which is correct at
3334// the moment but could be made more general.
3335
3336// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3337// the actual IEEE respresentations. We compensate for that here.
3338
3339APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3340 assert(semantics ==
3341 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3342 assert(partCount()==2);
3343
3344 uint64_t myexponent, mysignificand;
3345
3346 if (isFiniteNonZero()) {
3347 myexponent = exponent+16383; //bias
3348 mysignificand = significandParts()[0];
3349 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3350 myexponent = 0; // denormal
3351 } else if (category==fcZero) {
3352 myexponent = 0;
3353 mysignificand = 0;
3354 } else if (category==fcInfinity) {
3355 myexponent = 0x7fff;
3356 mysignificand = 0x8000000000000000ULL;
3357 } else {
3358 assert(category == fcNaN && "Unknown category");
3359 myexponent = 0x7fff;
3360 mysignificand = significandParts()[0];
3361 }
3362
3363 uint64_t words[2];
3364 words[0] = mysignificand;
3365 words[1] = ((uint64_t)(sign & 1) << 15) |
3366 (myexponent & 0x7fffLL);
3367 return APInt(80, words);
3368}
3369
3370APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3371 assert(semantics ==
3372 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3373 assert(partCount()==2);
3374
3375 uint64_t words[2];
3376 bool losesInfo;
3377
3378 // Convert number to double. To avoid spurious underflows, we re-
3379 // normalize against the "double" minExponent first, and only *then*
3380 // truncate the mantissa. The result of that second conversion
3381 // may be inexact, but should never underflow.
3382 // Declare fltSemantics before APFloat that uses it (and
3383 // saves pointer to it) to ensure correct destruction order.
3384 fltSemantics extendedSemantics = *semantics;
3385 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3386 IEEEFloat extended(*this);
3387 [[maybe_unused]] opStatus fs =
3388 extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3389 assert(fs == opOK && !losesInfo);
3390
3391 IEEEFloat u(extended);
3392 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3393 assert(fs == opOK || fs == opInexact);
3394 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3395
3396 // If conversion was exact or resulted in a special case, we're done;
3397 // just set the second double to zero. Otherwise, re-convert back to
3398 // the extended format and compute the difference. This now should
3399 // convert exactly to double.
3400 if (u.isFiniteNonZero() && losesInfo) {
3401 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3402 assert(fs == opOK && !losesInfo);
3403
3404 IEEEFloat v(extended);
3405 v.subtract(u, rmNearestTiesToEven);
3406 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3407 assert(fs == opOK && !losesInfo);
3408 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3409 } else {
3410 words[1] = 0;
3411 }
3412
3413 return APInt(128, words);
3414}
3415
3416template <const fltSemantics &S>
3417APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3418 assert(semantics == &S);
3419 const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
3420 ? -S.minExponent
3421 : -(S.minExponent - 1);
3422 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3423 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3424 constexpr integerPart integer_bit =
3425 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3426 constexpr uint64_t significand_mask = integer_bit - 1;
3427 constexpr unsigned int exponent_bits =
3428 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3429 : S.sizeInBits;
3430 static_assert(exponent_bits < 64);
3431 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3432
3433 uint64_t myexponent;
3434 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3435 mysignificand;
3436
3437 if (isFiniteNonZero()) {
3438 myexponent = exponent + bias;
3439 std::copy_n(significandParts(), mysignificand.size(),
3440 mysignificand.begin());
3441 if (myexponent == 1 &&
3442 !(significandParts()[integer_bit_part] & integer_bit))
3443 myexponent = 0; // denormal
3444 } else if (category == fcZero) {
3445 if (!S.hasZero)
3446 llvm_unreachable("semantics does not support zero!");
3447 myexponent = ::exponentZero(S) + bias;
3448 mysignificand.fill(0);
3449 } else if (category == fcInfinity) {
3450 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3451 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3452 llvm_unreachable("semantics don't support inf!");
3453 myexponent = ::exponentInf(S) + bias;
3454 mysignificand.fill(0);
3455 } else {
3456 assert(category == fcNaN && "Unknown category!");
3457 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3458 llvm_unreachable("semantics don't support NaN!");
3459 myexponent = ::exponentNaN(S) + bias;
3460 std::copy_n(significandParts(), mysignificand.size(),
3461 mysignificand.begin());
3462 }
3463 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3464 auto words_iter =
3465 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3466 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3467 // Clear the integer bit.
3468 words[mysignificand.size() - 1] &= significand_mask;
3469 }
3470 std::fill(words_iter, words.end(), uint64_t{0});
3471 constexpr size_t last_word = words.size() - 1;
3472 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3473 << ((S.sizeInBits - 1) % 64);
3474 words[last_word] |= shifted_sign;
3475 uint64_t shifted_exponent = (myexponent & exponent_mask)
3476 << (trailing_significand_bits % 64);
3477 words[last_word] |= shifted_exponent;
3478 if constexpr (last_word == 0) {
3479 return APInt(S.sizeInBits, words[0]);
3480 }
3481 return APInt(S.sizeInBits, words);
3482}
3483
3484APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3485 assert(partCount() == 2);
3486 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3487}
3488
3489APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3490 assert(partCount()==1);
3491 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3492}
3493
3494APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3495 assert(partCount()==1);
3496 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3497}
3498
3499APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3500 assert(partCount() == 1);
3501 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3502}
3503
3504APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3505 assert(partCount()==1);
3506 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3507}
3508
3509APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3510 assert(partCount() == 1);
3511 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3512}
3513
3514APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3515 assert(partCount() == 1);
3516 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3517}
3518
3519APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3520 assert(partCount() == 1);
3521 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3522}
3523
3524APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3525 assert(partCount() == 1);
3526 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3527}
3528
3529APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3530 assert(partCount() == 1);
3531 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3532}
3533
3534APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3535 assert(partCount() == 1);
3536 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3537}
3538
3539APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3540 assert(partCount() == 1);
3541 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3542}
3543
3544APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3545 assert(partCount() == 1);
3546 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3547}
3548
3549APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3550 assert(partCount() == 1);
3551 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3552}
3553
3554APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3555 assert(partCount() == 1);
3556 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3557}
3558
3559APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3560 assert(partCount() == 1);
3561 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3562}
3563
3564APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3565 assert(partCount() == 1);
3566 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3567}
3568
3569// This function creates an APInt that is just a bit map of the floating
3570// point constant as it would appear in memory. It is not a conversion,
3571// and treating the result as a normal integer is unlikely to be useful.
3572
3574 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3575 return convertHalfAPFloatToAPInt();
3576
3577 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3578 return convertBFloatAPFloatToAPInt();
3579
3580 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3581 return convertFloatAPFloatToAPInt();
3582
3583 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3584 return convertDoubleAPFloatToAPInt();
3585
3586 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3587 return convertQuadrupleAPFloatToAPInt();
3588
3589 if (semantics ==
3590 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3591 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3592
3593 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3594 return convertFloat8E5M2APFloatToAPInt();
3595
3596 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3597 return convertFloat8E5M2FNUZAPFloatToAPInt();
3598
3599 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3600 return convertFloat8E4M3APFloatToAPInt();
3601
3602 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3603 return convertFloat8E4M3FNAPFloatToAPInt();
3604
3605 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3606 return convertFloat8E4M3FNUZAPFloatToAPInt();
3607
3608 if (semantics ==
3609 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3610 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3611
3612 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3613 return convertFloat8E3M4APFloatToAPInt();
3614
3615 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3616 return convertFloatTF32APFloatToAPInt();
3617
3618 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3619 return convertFloat8E8M0FNUAPFloatToAPInt();
3620
3621 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3622 return convertFloat6E3M2FNAPFloatToAPInt();
3623
3624 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3625 return convertFloat6E2M3FNAPFloatToAPInt();
3626
3627 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3628 return convertFloat4E2M1FNAPFloatToAPInt();
3629
3630 assert(semantics ==
3631 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3632 "unknown format!");
3633 return convertF80LongDoubleAPFloatToAPInt();
3634}
3635
3637 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3638 "Float semantics are not IEEEsingle");
3639 APInt api = bitcastToAPInt();
3640 return api.bitsToFloat();
3641}
3642
3644 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3645 "Float semantics are not IEEEdouble");
3646 APInt api = bitcastToAPInt();
3647 return api.bitsToDouble();
3648}
3649
3650#ifdef HAS_IEE754_FLOAT128
3651float128 IEEEFloat::convertToQuad() const {
3652 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3653 "Float semantics are not IEEEquads");
3654 APInt api = bitcastToAPInt();
3655 return api.bitsToQuad();
3656}
3657#endif
3658
3659/// Integer bit is explicit in this format. Intel hardware (387 and later)
3660/// does not support these bit patterns:
3661/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3662/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3663/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3664/// exponent = 0, integer bit 1 ("pseudodenormal")
3665/// At the moment, the first three are treated as NaNs, the last one as Normal.
3666void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3667 uint64_t i1 = api.getRawData()[0];
3668 uint64_t i2 = api.getRawData()[1];
3669 uint64_t myexponent = (i2 & 0x7fff);
3670 uint64_t mysignificand = i1;
3671 uint8_t myintegerbit = mysignificand >> 63;
3672
3673 initialize(&APFloatBase::semX87DoubleExtended);
3674 assert(partCount()==2);
3675
3676 sign = static_cast<unsigned int>(i2>>15);
3677 if (myexponent == 0 && mysignificand == 0) {
3678 makeZero(sign);
3679 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3680 makeInf(sign);
3681 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3682 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3683 category = fcNaN;
3684 exponent = exponentNaN();
3685 significandParts()[0] = mysignificand;
3686 significandParts()[1] = 0;
3687 } else {
3688 category = fcNormal;
3689 exponent = myexponent - 16383;
3690 significandParts()[0] = mysignificand;
3691 significandParts()[1] = 0;
3692 if (myexponent==0) // denormal
3693 exponent = -16382;
3694 }
3695}
3696
3697void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3698 uint64_t i1 = api.getRawData()[0];
3699 uint64_t i2 = api.getRawData()[1];
3700 bool losesInfo;
3701
3702 // Get the first double and convert to our format.
3703 initFromDoubleAPInt(APInt(64, i1));
3704 [[maybe_unused]] opStatus fs = convert(APFloatBase::semPPCDoubleDoubleLegacy,
3705 rmNearestTiesToEven, &losesInfo);
3706 assert(fs == opOK && !losesInfo);
3707
3708 // Unless we have a special case, add in second double.
3709 if (isFiniteNonZero()) {
3710 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3711 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3712 &losesInfo);
3713 assert(fs == opOK && !losesInfo);
3714
3716 }
3717}
3718
3719// The E8M0 format has the following characteristics:
3720// It is an 8-bit unsigned format with only exponents (no actual significand).
3721// No encodings for {zero, infinities or denorms}.
3722// NaN is represented by all 1's.
3723// Bias is 127.
3724void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3725 const uint64_t exponent_mask = 0xff;
3726 uint64_t val = api.getRawData()[0];
3727 uint64_t myexponent = val & exponent_mask;
3728
3729 initialize(&APFloatBase::semFloat8E8M0FNU);
3730 assert(partCount() == 1);
3731
3732 // This format has unsigned representation only
3733 sign = 0;
3734
3735 // Set the significand
3736 // This format does not have any significand but the 'Pth' precision bit is
3737 // always set to 1 for consistency in APFloat's internal representation.
3738 uint64_t mysignificand = 1;
3739 significandParts()[0] = mysignificand;
3740
3741 // This format can either have a NaN or fcNormal
3742 // All 1's i.e. 255 is a NaN
3743 if (val == exponent_mask) {
3744 category = fcNaN;
3745 exponent = exponentNaN();
3746 return;
3747 }
3748 // Handle fcNormal...
3749 category = fcNormal;
3750 exponent = myexponent - 127; // 127 is bias
3751}
3752
3753template <const fltSemantics &S>
3754void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3755 assert(api.getBitWidth() == S.sizeInBits);
3756 constexpr integerPart integer_bit = integerPart{1}
3757 << ((S.precision - 1) % integerPartWidth);
3758 constexpr uint64_t significand_mask = integer_bit - 1;
3759 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3760 constexpr unsigned int stored_significand_parts =
3761 partCountForBits(trailing_significand_bits);
3762 constexpr unsigned int exponent_bits =
3763 S.sizeInBits - 1 - trailing_significand_bits;
3764 static_assert(exponent_bits < 64);
3765 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3766 constexpr int bias = -(S.minExponent - 1);
3767
3768 // Copy the bits of the significand. We need to clear out the exponent and
3769 // sign bit in the last word.
3770 std::array<integerPart, stored_significand_parts> mysignificand;
3771 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3772 if constexpr (significand_mask != 0) {
3773 mysignificand[mysignificand.size() - 1] &= significand_mask;
3774 }
3775
3776 // We assume the last word holds the sign bit, the exponent, and potentially
3777 // some of the trailing significand field.
3778 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3779 uint64_t myexponent =
3780 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3781
3782 initialize(&S);
3783 assert(partCount() == mysignificand.size());
3784
3785 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3786
3787 bool all_zero_significand = llvm::all_of(mysignificand, equal_to(0));
3788
3789 bool is_zero = myexponent == 0 && all_zero_significand;
3790
3791 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3792 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3793 makeInf(sign);
3794 return;
3795 }
3796 }
3797
3798 bool is_nan = false;
3799
3800 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3801 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3802 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3803 bool all_ones_significand =
3804 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3805 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3806 (!significand_mask ||
3807 mysignificand[mysignificand.size() - 1] == significand_mask);
3808 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3809 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3810 is_nan = is_zero && sign;
3811 }
3812
3813 if (is_nan) {
3814 category = fcNaN;
3815 exponent = ::exponentNaN(S);
3816 std::copy_n(mysignificand.begin(), mysignificand.size(),
3817 significandParts());
3818 return;
3819 }
3820
3821 if (is_zero) {
3822 makeZero(sign);
3823 return;
3824 }
3825
3826 category = fcNormal;
3827 exponent = myexponent - bias;
3828 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3829 if (myexponent == 0) // denormal
3830 exponent = S.minExponent;
3831 else
3832 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3833}
3834
3835void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3836 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
3837}
3838
3839void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3840 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
3841}
3842
3843void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3844 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
3845}
3846
3847void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3848 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
3849}
3850
3851void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3852 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
3853}
3854
3855void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3856 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
3857}
3858
3859void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3860 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
3861}
3862
3863void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3864 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
3865}
3866
3867void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3868 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
3869}
3870
3871void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3872 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
3873}
3874
3875void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3876 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
3877}
3878
3879void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
3880 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
3881}
3882
3883void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3884 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
3885}
3886
3887void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3888 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
3889}
3890
3891void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3892 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
3893}
3894
3895void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3896 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
3897}
3898
3899/// Treat api as containing the bits of a floating point number.
3900void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3901 assert(api.getBitWidth() == Sem->sizeInBits);
3902 if (Sem == &APFloatBase::semIEEEhalf)
3903 return initFromHalfAPInt(api);
3904 if (Sem == &APFloatBase::semBFloat)
3905 return initFromBFloatAPInt(api);
3906 if (Sem == &APFloatBase::semIEEEsingle)
3907 return initFromFloatAPInt(api);
3908 if (Sem == &APFloatBase::semIEEEdouble)
3909 return initFromDoubleAPInt(api);
3910 if (Sem == &APFloatBase::semX87DoubleExtended)
3911 return initFromF80LongDoubleAPInt(api);
3912 if (Sem == &APFloatBase::semIEEEquad)
3913 return initFromQuadrupleAPInt(api);
3914 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
3915 return initFromPPCDoubleDoubleLegacyAPInt(api);
3916 if (Sem == &APFloatBase::semFloat8E5M2)
3917 return initFromFloat8E5M2APInt(api);
3918 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
3919 return initFromFloat8E5M2FNUZAPInt(api);
3920 if (Sem == &APFloatBase::semFloat8E4M3)
3921 return initFromFloat8E4M3APInt(api);
3922 if (Sem == &APFloatBase::semFloat8E4M3FN)
3923 return initFromFloat8E4M3FNAPInt(api);
3924 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
3925 return initFromFloat8E4M3FNUZAPInt(api);
3926 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
3927 return initFromFloat8E4M3B11FNUZAPInt(api);
3928 if (Sem == &APFloatBase::semFloat8E3M4)
3929 return initFromFloat8E3M4APInt(api);
3930 if (Sem == &APFloatBase::semFloatTF32)
3931 return initFromFloatTF32APInt(api);
3932 if (Sem == &APFloatBase::semFloat8E8M0FNU)
3933 return initFromFloat8E8M0FNUAPInt(api);
3934 if (Sem == &APFloatBase::semFloat6E3M2FN)
3935 return initFromFloat6E3M2FNAPInt(api);
3936 if (Sem == &APFloatBase::semFloat6E2M3FN)
3937 return initFromFloat6E2M3FNAPInt(api);
3938 if (Sem == &APFloatBase::semFloat4E2M1FN)
3939 return initFromFloat4E2M1FNAPInt(api);
3940
3941 llvm_unreachable("unsupported semantics");
3942}
3943
3944/// Make this number the largest magnitude normal number in the given
3945/// semantics.
3946void IEEEFloat::makeLargest(bool Negative) {
3947 if (Negative && !semantics->hasSignedRepr)
3949 "This floating point format does not support signed values");
3950 // We want (in interchange format):
3951 // sign = {Negative}
3952 // exponent = 1..10
3953 // significand = 1..1
3954 category = fcNormal;
3955 sign = Negative;
3956 exponent = semantics->maxExponent;
3957
3958 // Use memset to set all but the highest integerPart to all ones.
3959 integerPart *significand = significandParts();
3960 unsigned PartCount = partCount();
3961 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3962
3963 // Set the high integerPart especially setting all unused top bits for
3964 // internal consistency.
3965 const unsigned NumUnusedHighBits =
3966 PartCount*integerPartWidth - semantics->precision;
3967 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3968 ? (~integerPart(0) >> NumUnusedHighBits)
3969 : 0;
3970 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3971 semantics->nanEncoding == fltNanEncoding::AllOnes &&
3972 (semantics->precision > 1))
3973 significand[0] &= ~integerPart(1);
3974}
3975
3976/// Make this number the smallest magnitude denormal number in the given
3977/// semantics.
3978void IEEEFloat::makeSmallest(bool Negative) {
3979 if (Negative && !semantics->hasSignedRepr)
3981 "This floating point format does not support signed values");
3982 // We want (in interchange format):
3983 // sign = {Negative}
3984 // exponent = 0..0
3985 // significand = 0..01
3986 category = fcNormal;
3987 sign = Negative;
3988 exponent = semantics->minExponent;
3989 APInt::tcSet(significandParts(), 1, partCount());
3990}
3991
3993 if (Negative && !semantics->hasSignedRepr)
3995 "This floating point format does not support signed values");
3996 // We want (in interchange format):
3997 // sign = {Negative}
3998 // exponent = 0..0
3999 // significand = 10..0
4000
4001 category = fcNormal;
4002 zeroSignificand();
4003 sign = Negative;
4004 exponent = semantics->minExponent;
4005 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4006}
4007
4008IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4009 initFromAPInt(&Sem, API);
4010}
4011
4013 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4014}
4015
4017 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4018}
4019
4020namespace {
4021 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4022 Buffer.append(Str.begin(), Str.end());
4023 }
4024
4025 /// Removes data from the given significand until it is no more
4026 /// precise than is required for the desired precision.
4027 void AdjustToPrecision(APInt &significand,
4028 int &exp, unsigned FormatPrecision) {
4029 unsigned bits = significand.getActiveBits();
4030
4031 // 196/59 is a very slight overestimate of lg_2(10).
4032 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4033
4034 if (bits <= bitsRequired) return;
4035
4036 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4037 if (!tensRemovable) return;
4038
4039 exp += tensRemovable;
4040
4041 APInt divisor(significand.getBitWidth(), 1);
4042 APInt powten(significand.getBitWidth(), 10);
4043 while (true) {
4044 if (tensRemovable & 1)
4045 divisor *= powten;
4046 tensRemovable >>= 1;
4047 if (!tensRemovable) break;
4048 powten *= powten;
4049 }
4050
4051 significand = significand.udiv(divisor);
4052
4053 // Truncate the significand down to its active bit count.
4054 significand = significand.trunc(significand.getActiveBits());
4055 }
4056
4057
4058 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4059 int &exp, unsigned FormatPrecision) {
4060 unsigned N = buffer.size();
4061 if (N <= FormatPrecision) return;
4062
4063 // The most significant figures are the last ones in the buffer.
4064 unsigned FirstSignificant = N - FormatPrecision;
4065
4066 // Round.
4067 // FIXME: this probably shouldn't use 'round half up'.
4068
4069 // Rounding down is just a truncation, except we also want to drop
4070 // trailing zeros from the new result.
4071 if (buffer[FirstSignificant - 1] < '5') {
4072 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4073 FirstSignificant++;
4074
4075 exp += FirstSignificant;
4076 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4077 return;
4078 }
4079
4080 // Rounding up requires a decimal add-with-carry. If we continue
4081 // the carry, the newly-introduced zeros will just be truncated.
4082 for (unsigned I = FirstSignificant; I != N; ++I) {
4083 if (buffer[I] == '9') {
4084 FirstSignificant++;
4085 } else {
4086 buffer[I]++;
4087 break;
4088 }
4089 }
4090
4091 // If we carried through, we have exactly one digit of precision.
4092 if (FirstSignificant == N) {
4093 exp += FirstSignificant;
4094 buffer.clear();
4095 buffer.push_back('1');
4096 return;
4097 }
4098
4099 exp += FirstSignificant;
4100 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4101 }
4102
4103 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4104 APInt significand, unsigned FormatPrecision,
4105 unsigned FormatMaxPadding, bool TruncateZero) {
4106 const int semanticsPrecision = significand.getBitWidth();
4107
4108 if (isNeg)
4109 Str.push_back('-');
4110
4111 // Set FormatPrecision if zero. We want to do this before we
4112 // truncate trailing zeros, as those are part of the precision.
4113 if (!FormatPrecision) {
4114 // We use enough digits so the number can be round-tripped back to an
4115 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4116 // Accurately" by Steele and White.
4117 // FIXME: Using a formula based purely on the precision is conservative;
4118 // we can print fewer digits depending on the actual value being printed.
4119
4120 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4121 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4122 }
4123
4124 // Ignore trailing binary zeros.
4125 int trailingZeros = significand.countr_zero();
4126 exp += trailingZeros;
4127 significand.lshrInPlace(trailingZeros);
4128
4129 // Change the exponent from 2^e to 10^e.
4130 if (exp == 0) {
4131 // Nothing to do.
4132 } else if (exp > 0) {
4133 // Just shift left.
4134 significand = significand.zext(semanticsPrecision + exp);
4135 significand <<= exp;
4136 exp = 0;
4137 } else { /* exp < 0 */
4138 int texp = -exp;
4139
4140 // We transform this using the identity:
4141 // (N)(2^-e) == (N)(5^e)(10^-e)
4142 // This means we have to multiply N (the significand) by 5^e.
4143 // To avoid overflow, we have to operate on numbers large
4144 // enough to store N * 5^e:
4145 // log2(N * 5^e) == log2(N) + e * log2(5)
4146 // <= semantics->precision + e * 137 / 59
4147 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4148
4149 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4150
4151 // Multiply significand by 5^e.
4152 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4153 significand = significand.zext(precision);
4154 APInt five_to_the_i(precision, 5);
4155 while (true) {
4156 if (texp & 1)
4157 significand *= five_to_the_i;
4158
4159 texp >>= 1;
4160 if (!texp)
4161 break;
4162 five_to_the_i *= five_to_the_i;
4163 }
4164 }
4165
4166 AdjustToPrecision(significand, exp, FormatPrecision);
4167
4169
4170 // Fill the buffer.
4171 unsigned precision = significand.getBitWidth();
4172 if (precision < 4) {
4173 // We need enough precision to store the value 10.
4174 precision = 4;
4175 significand = significand.zext(precision);
4176 }
4177 APInt ten(precision, 10);
4178 APInt digit(precision, 0);
4179
4180 bool inTrail = true;
4181 while (significand != 0) {
4182 // digit <- significand % 10
4183 // significand <- significand / 10
4184 APInt::udivrem(significand, ten, significand, digit);
4185
4186 unsigned d = digit.getZExtValue();
4187
4188 // Drop trailing zeros.
4189 if (inTrail && !d)
4190 exp++;
4191 else {
4192 buffer.push_back((char) ('0' + d));
4193 inTrail = false;
4194 }
4195 }
4196
4197 assert(!buffer.empty() && "no characters in buffer!");
4198
4199 // Drop down to FormatPrecision.
4200 // TODO: don't do more precise calculations above than are required.
4201 AdjustToPrecision(buffer, exp, FormatPrecision);
4202
4203 unsigned NDigits = buffer.size();
4204
4205 // Check whether we should use scientific notation.
4206 bool FormatScientific;
4207 if (!FormatMaxPadding) {
4208 FormatScientific = true;
4209 } else {
4210 if (exp >= 0) {
4211 // 765e3 --> 765000
4212 // ^^^
4213 // But we shouldn't make the number look more precise than it is.
4214 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4215 NDigits + (unsigned) exp > FormatPrecision);
4216 } else {
4217 // Power of the most significant digit.
4218 int MSD = exp + (int) (NDigits - 1);
4219 if (MSD >= 0) {
4220 // 765e-2 == 7.65
4221 FormatScientific = false;
4222 } else {
4223 // 765e-5 == 0.00765
4224 // ^ ^^
4225 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4226 }
4227 }
4228 }
4229
4230 // Scientific formatting is pretty straightforward.
4231 if (FormatScientific) {
4232 exp += (NDigits - 1);
4233
4234 Str.push_back(buffer[NDigits-1]);
4235 Str.push_back('.');
4236 if (NDigits == 1 && TruncateZero)
4237 Str.push_back('0');
4238 else
4239 for (unsigned I = 1; I != NDigits; ++I)
4240 Str.push_back(buffer[NDigits-1-I]);
4241 // Fill with zeros up to FormatPrecision.
4242 if (!TruncateZero && FormatPrecision > NDigits - 1)
4243 Str.append(FormatPrecision - NDigits + 1, '0');
4244 // For !TruncateZero we use lower 'e'.
4245 Str.push_back(TruncateZero ? 'E' : 'e');
4246
4247 Str.push_back(exp >= 0 ? '+' : '-');
4248 if (exp < 0)
4249 exp = -exp;
4250 SmallVector<char, 6> expbuf;
4251 do {
4252 expbuf.push_back((char) ('0' + (exp % 10)));
4253 exp /= 10;
4254 } while (exp);
4255 // Exponent always at least two digits if we do not truncate zeros.
4256 if (!TruncateZero && expbuf.size() < 2)
4257 expbuf.push_back('0');
4258 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4259 Str.push_back(expbuf[E-1-I]);
4260 return;
4261 }
4262
4263 // Non-scientific, positive exponents.
4264 if (exp >= 0) {
4265 for (unsigned I = 0; I != NDigits; ++I)
4266 Str.push_back(buffer[NDigits-1-I]);
4267 for (unsigned I = 0; I != (unsigned) exp; ++I)
4268 Str.push_back('0');
4269 return;
4270 }
4271
4272 // Non-scientific, negative exponents.
4273
4274 // The number of digits to the left of the decimal point.
4275 int NWholeDigits = exp + (int) NDigits;
4276
4277 unsigned I = 0;
4278 if (NWholeDigits > 0) {
4279 for (; I != (unsigned) NWholeDigits; ++I)
4280 Str.push_back(buffer[NDigits-I-1]);
4281 Str.push_back('.');
4282 } else {
4283 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4284
4285 Str.push_back('0');
4286 Str.push_back('.');
4287 for (unsigned Z = 1; Z != NZeros; ++Z)
4288 Str.push_back('0');
4289 }
4290
4291 for (; I != NDigits; ++I)
4292 Str.push_back(buffer[NDigits-I-1]);
4293
4294 }
4295} // namespace
4296
4297void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4298 unsigned FormatMaxPadding, bool TruncateZero) const {
4299 switch (category) {
4300 case fcInfinity:
4301 if (isNegative())
4302 return append(Str, "-Inf");
4303 else
4304 return append(Str, "+Inf");
4305
4306 case fcNaN: return append(Str, "NaN");
4307
4308 case fcZero:
4309 if (isNegative())
4310 Str.push_back('-');
4311
4312 if (!FormatMaxPadding) {
4313 if (TruncateZero)
4314 append(Str, "0.0E+0");
4315 else {
4316 append(Str, "0.0");
4317 if (FormatPrecision > 1)
4318 Str.append(FormatPrecision - 1, '0');
4319 append(Str, "e+00");
4320 }
4321 } else {
4322 Str.push_back('0');
4323 }
4324 return;
4325
4326 case fcNormal:
4327 break;
4328 }
4329
4330 // Decompose the number into an APInt and an exponent.
4331 int exp = exponent - ((int) semantics->precision - 1);
4332 APInt significand(
4333 semantics->precision,
4334 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4335
4336 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4337 FormatMaxPadding, TruncateZero);
4338
4339}
4340
4342 if (!isFinite() || isZero())
4343 return INT_MIN;
4344
4345 const integerPart *Parts = significandParts();
4346 const int PartCount = partCountForBits(semantics->precision);
4347
4348 int PopCount = 0;
4349 for (int i = 0; i < PartCount; ++i) {
4350 PopCount += llvm::popcount(Parts[i]);
4351 if (PopCount > 1)
4352 return INT_MIN;
4353 }
4354
4355 if (exponent != semantics->minExponent)
4356 return exponent;
4357
4358 int CountrParts = 0;
4359 for (int i = 0; i < PartCount;
4360 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4361 if (Parts[i] != 0) {
4362 return exponent - semantics->precision + CountrParts +
4363 llvm::countr_zero(Parts[i]) + 1;
4364 }
4365 }
4366
4367 llvm_unreachable("didn't find the set bit");
4368}
4369
4371 if (!isNaN())
4372 return false;
4373 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4374 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4375 return false;
4376
4377 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4378 // first bit of the trailing significand being 0.
4379 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4380}
4381
4382/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4383///
4384/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4385/// appropriate sign switching before/after the computation.
4387 // If we are performing nextDown, swap sign so we have -x.
4388 if (nextDown)
4389 changeSign();
4390
4391 // Compute nextUp(x)
4392 opStatus result = opOK;
4393
4394 // Handle each float category separately.
4395 switch (category) {
4396 case fcInfinity:
4397 // nextUp(+inf) = +inf
4398 if (!isNegative())
4399 break;
4400 // nextUp(-inf) = -getLargest()
4401 makeLargest(true);
4402 break;
4403 case fcNaN:
4404 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4405 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4406 // change the payload.
4407 if (isSignaling()) {
4408 result = opInvalidOp;
4409 // For consistency, propagate the sign of the sNaN to the qNaN.
4410 makeNaN(false, isNegative(), nullptr);
4411 }
4412 break;
4413 case fcZero:
4414 // nextUp(pm 0) = +getSmallest()
4415 makeSmallest(false);
4416 break;
4417 case fcNormal:
4418 // nextUp(-getSmallest()) = -0
4419 if (isSmallest() && isNegative()) {
4420 APInt::tcSet(significandParts(), 0, partCount());
4421 category = fcZero;
4422 exponent = 0;
4423 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4424 sign = false;
4425 if (!semantics->hasZero)
4427 break;
4428 }
4429
4430 if (isLargest() && !isNegative()) {
4431 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4432 // nextUp(getLargest()) == NAN
4433 makeNaN();
4434 break;
4435 } else if (semantics->nonFiniteBehavior ==
4437 // nextUp(getLargest()) == getLargest()
4438 break;
4439 } else {
4440 // nextUp(getLargest()) == INFINITY
4441 APInt::tcSet(significandParts(), 0, partCount());
4442 category = fcInfinity;
4443 exponent = semantics->maxExponent + 1;
4444 break;
4445 }
4446 }
4447
4448 // nextUp(normal) == normal + inc.
4449 if (isNegative()) {
4450 // If we are negative, we need to decrement the significand.
4451
4452 // We only cross a binade boundary that requires adjusting the exponent
4453 // if:
4454 // 1. exponent != semantics->minExponent. This implies we are not in the
4455 // smallest binade or are dealing with denormals.
4456 // 2. Our significand excluding the integral bit is all zeros.
4457 bool WillCrossBinadeBoundary =
4458 exponent != semantics->minExponent && isSignificandAllZeros();
4459
4460 // Decrement the significand.
4461 //
4462 // We always do this since:
4463 // 1. If we are dealing with a non-binade decrement, by definition we
4464 // just decrement the significand.
4465 // 2. If we are dealing with a normal -> normal binade decrement, since
4466 // we have an explicit integral bit the fact that all bits but the
4467 // integral bit are zero implies that subtracting one will yield a
4468 // significand with 0 integral bit and 1 in all other spots. Thus we
4469 // must just adjust the exponent and set the integral bit to 1.
4470 // 3. If we are dealing with a normal -> denormal binade decrement,
4471 // since we set the integral bit to 0 when we represent denormals, we
4472 // just decrement the significand.
4473 integerPart *Parts = significandParts();
4474 APInt::tcDecrement(Parts, partCount());
4475
4476 if (WillCrossBinadeBoundary) {
4477 // Our result is a normal number. Do the following:
4478 // 1. Set the integral bit to 1.
4479 // 2. Decrement the exponent.
4480 APInt::tcSetBit(Parts, semantics->precision - 1);
4481 exponent--;
4482 }
4483 } else {
4484 // If we are positive, we need to increment the significand.
4485
4486 // We only cross a binade boundary that requires adjusting the exponent if
4487 // the input is not a denormal and all of said input's significand bits
4488 // are set. If all of said conditions are true: clear the significand, set
4489 // the integral bit to 1, and increment the exponent. If we have a
4490 // denormal always increment since moving denormals and the numbers in the
4491 // smallest normal binade have the same exponent in our representation.
4492 // If there are only exponents, any increment always crosses the
4493 // BinadeBoundary.
4494 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4495 (!isDenormal() && isSignificandAllOnes());
4496
4497 if (WillCrossBinadeBoundary) {
4498 integerPart *Parts = significandParts();
4499 APInt::tcSet(Parts, 0, partCount());
4500 APInt::tcSetBit(Parts, semantics->precision - 1);
4501 assert(exponent != semantics->maxExponent &&
4502 "We can not increment an exponent beyond the maxExponent allowed"
4503 " by the given floating point semantics.");
4504 exponent++;
4505 } else {
4506 incrementSignificand();
4507 }
4508 }
4509 break;
4510 }
4511
4512 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4513 if (nextDown)
4514 changeSign();
4515
4516 return result;
4517}
4518
4519APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4520 return ::exponentNaN(*semantics);
4521}
4522
4523APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4524 return ::exponentInf(*semantics);
4525}
4526
4527APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4528 return ::exponentZero(*semantics);
4529}
4530
4531void IEEEFloat::makeInf(bool Negative) {
4532 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4533 llvm_unreachable("This floating point format does not support Inf");
4534
4535 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4536 // There is no Inf, so make NaN instead.
4537 makeNaN(false, Negative);
4538 return;
4539 }
4540 category = fcInfinity;
4541 sign = Negative;
4542 exponent = exponentInf();
4543 APInt::tcSet(significandParts(), 0, partCount());
4544}
4545
4546void IEEEFloat::makeZero(bool Negative) {
4547 if (!semantics->hasZero)
4548 llvm_unreachable("This floating point format does not support Zero");
4549
4550 category = fcZero;
4551 sign = Negative;
4552 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4553 // Merge negative zero to positive because 0b10000...000 is used for NaN
4554 sign = false;
4555 }
4556 exponent = exponentZero();
4557 APInt::tcSet(significandParts(), 0, partCount());
4558}
4559
4561 assert(isNaN());
4562 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4563 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4564}
4565
4566int ilogb(const IEEEFloat &Arg) {
4567 if (Arg.isNaN())
4568 return APFloat::IEK_NaN;
4569 if (Arg.isZero())
4570 return APFloat::IEK_Zero;
4571 if (Arg.isInfinity())
4572 return APFloat::IEK_Inf;
4573 if (!Arg.isDenormal())
4574 return Arg.exponent;
4575
4576 IEEEFloat Normalized(Arg);
4577 int SignificandBits = Arg.getSemantics().precision - 1;
4578
4579 Normalized.exponent += SignificandBits;
4580 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4581 return Normalized.exponent - SignificandBits;
4582}
4583
4585 auto MaxExp = X.getSemantics().maxExponent;
4586 auto MinExp = X.getSemantics().minExponent;
4587
4588 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4589 // overflow; clamp it to a safe range before adding, but ensure that the range
4590 // is large enough that the clamp does not change the result. The range we
4591 // need to support is the difference between the largest possible exponent and
4592 // the normalized exponent of half the smallest denormal.
4593
4594 int SignificandBits = X.getSemantics().precision - 1;
4595 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4596
4597 // Clamp to one past the range ends to let normalize handle overlflow.
4598 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4599 X.normalize(RoundingMode, lfExactlyZero);
4600 if (X.isNaN())
4601 X.makeQuiet();
4602 return X;
4603}
4604
4605IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4606 Exp = ilogb(Val);
4607
4608 // Quiet signalling nans.
4609 if (Exp == APFloat::IEK_NaN) {
4610 IEEEFloat Quiet(Val);
4611 Quiet.makeQuiet();
4612 return Quiet;
4613 }
4614
4615 if (Exp == APFloat::IEK_Inf)
4616 return Val;
4617
4618 // 1 is added because frexp is defined to return a normalized fraction in
4619 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4620 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4621 return scalbn(Val, -Exp, RM);
4622}
4623
4625 : Semantics(&S),
4626 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4627 APFloat(APFloatBase::semIEEEdouble)}) {
4628 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4629}
4630
4632 : Semantics(&S), Floats(new APFloat[2]{
4633 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4634 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4635 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4636}
4637
4639 : Semantics(&S),
4640 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4641 APFloat(APFloatBase::semIEEEdouble)}) {
4642 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4643}
4644
4646 : Semantics(&S),
4647 Floats(new APFloat[2]{
4648 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4649 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4650 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4651}
4652
4654 APFloat &&Second)
4655 : Semantics(&S),
4656 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4657 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4658 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4659 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4660}
4661
4663 : Semantics(RHS.Semantics),
4664 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4665 APFloat(RHS.Floats[1])}
4666 : nullptr) {
4667 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4668}
4669
4671 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4672 RHS.Semantics = &APFloatBase::semBogus;
4673 RHS.Floats = nullptr;
4674 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4675}
4676
4678 if (Semantics == RHS.Semantics && RHS.Floats) {
4679 Floats[0] = RHS.Floats[0];
4680 Floats[1] = RHS.Floats[1];
4681 } else if (this != &RHS) {
4682 this->~DoubleAPFloat();
4683 new (this) DoubleAPFloat(RHS);
4684 }
4685 return *this;
4686}
4687
4688// Returns a result such that:
4689// 1. abs(Lo) <= ulp(Hi)/2
4690// 2. Hi == RTNE(Hi + Lo)
4691// 3. Hi + Lo == X + Y
4692//
4693// Requires that log2(X) >= log2(Y).
4694static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4695 if (!X.isFinite())
4696 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4697 APFloat Hi = X + Y;
4698 APFloat Delta = Hi - X;
4699 APFloat Lo = Y - Delta;
4700 return {Hi, Lo};
4701}
4702
4703// Implement addition, subtraction, multiplication and division based on:
4704// "Software for Doubled-Precision Floating-Point Computations",
4705// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4706APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4707 const APFloat &c, const APFloat &cc,
4708 roundingMode RM) {
4709 int Status = opOK;
4710 APFloat z = a;
4711 Status |= z.add(c, RM);
4712 if (!z.isFinite()) {
4713 if (!z.isInfinity()) {
4714 Floats[0] = std::move(z);
4715 Floats[1].makeZero(/* Neg = */ false);
4716 return (opStatus)Status;
4717 }
4718 Status = opOK;
4719 auto AComparedToC = a.compareAbsoluteValue(c);
4720 z = cc;
4721 Status |= z.add(aa, RM);
4722 if (AComparedToC == APFloat::cmpGreaterThan) {
4723 // z = cc + aa + c + a;
4724 Status |= z.add(c, RM);
4725 Status |= z.add(a, RM);
4726 } else {
4727 // z = cc + aa + a + c;
4728 Status |= z.add(a, RM);
4729 Status |= z.add(c, RM);
4730 }
4731 if (!z.isFinite()) {
4732 Floats[0] = std::move(z);
4733 Floats[1].makeZero(/* Neg = */ false);
4734 return (opStatus)Status;
4735 }
4736 Floats[0] = z;
4737 APFloat zz = aa;
4738 Status |= zz.add(cc, RM);
4739 if (AComparedToC == APFloat::cmpGreaterThan) {
4740 // Floats[1] = a - z + c + zz;
4741 Floats[1] = a;
4742 Status |= Floats[1].subtract(z, RM);
4743 Status |= Floats[1].add(c, RM);
4744 Status |= Floats[1].add(zz, RM);
4745 } else {
4746 // Floats[1] = c - z + a + zz;
4747 Floats[1] = c;
4748 Status |= Floats[1].subtract(z, RM);
4749 Status |= Floats[1].add(a, RM);
4750 Status |= Floats[1].add(zz, RM);
4751 }
4752 } else {
4753 // q = a - z;
4754 APFloat q = a;
4755 Status |= q.subtract(z, RM);
4756
4757 // zz = q + c + (a - (q + z)) + aa + cc;
4758 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4759 auto zz = q;
4760 Status |= zz.add(c, RM);
4761 Status |= q.add(z, RM);
4762 Status |= q.subtract(a, RM);
4763 q.changeSign();
4764 Status |= zz.add(q, RM);
4765 Status |= zz.add(aa, RM);
4766 Status |= zz.add(cc, RM);
4767 if (zz.isZero() && !zz.isNegative()) {
4768 Floats[0] = std::move(z);
4769 Floats[1].makeZero(/* Neg = */ false);
4770 return opOK;
4771 }
4772 Floats[0] = z;
4773 Status |= Floats[0].add(zz, RM);
4774 if (!Floats[0].isFinite()) {
4775 Floats[1].makeZero(/* Neg = */ false);
4776 return (opStatus)Status;
4777 }
4778 Floats[1] = std::move(z);
4779 Status |= Floats[1].subtract(Floats[0], RM);
4780 Status |= Floats[1].add(zz, RM);
4781 }
4782 return (opStatus)Status;
4783}
4784
4785APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4786 const DoubleAPFloat &RHS,
4787 DoubleAPFloat &Out,
4788 roundingMode RM) {
4789 if (LHS.getCategory() == fcNaN) {
4790 Out = LHS;
4791 return opOK;
4792 }
4793 if (RHS.getCategory() == fcNaN) {
4794 Out = RHS;
4795 return opOK;
4796 }
4797 if (LHS.getCategory() == fcZero) {
4798 Out = RHS;
4799 return opOK;
4800 }
4801 if (RHS.getCategory() == fcZero) {
4802 Out = LHS;
4803 return opOK;
4804 }
4805 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4806 LHS.isNegative() != RHS.isNegative()) {
4807 Out.makeNaN(false, Out.isNegative(), nullptr);
4808 return opInvalidOp;
4809 }
4810 if (LHS.getCategory() == fcInfinity) {
4811 Out = LHS;
4812 return opOK;
4813 }
4814 if (RHS.getCategory() == fcInfinity) {
4815 Out = RHS;
4816 return opOK;
4817 }
4818 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4819
4820 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4821 CC(RHS.Floats[1]);
4822 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4823 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4824 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4825 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4826 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4827 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4828 return Out.addImpl(A, AA, C, CC, RM);
4829}
4830
4832 roundingMode RM) {
4833 return addWithSpecial(*this, RHS, *this, RM);
4834}
4835
4837 roundingMode RM) {
4838 changeSign();
4839 auto Ret = add(RHS, RM);
4840 changeSign();
4841 return Ret;
4842}
4843
4846 const auto &LHS = *this;
4847 auto &Out = *this;
4848 /* Interesting observation: For special categories, finding the lowest
4849 common ancestor of the following layered graph gives the correct
4850 return category:
4851
4852 NaN
4853 / \
4854 Zero Inf
4855 \ /
4856 Normal
4857
4858 e.g. NaN * NaN = NaN
4859 Zero * Inf = NaN
4860 Normal * Zero = Zero
4861 Normal * Inf = Inf
4862 */
4863 if (LHS.getCategory() == fcNaN) {
4864 Out = LHS;
4865 return opOK;
4866 }
4867 if (RHS.getCategory() == fcNaN) {
4868 Out = RHS;
4869 return opOK;
4870 }
4871 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4872 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4873 Out.makeNaN(false, false, nullptr);
4874 return opOK;
4875 }
4876 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4877 Out = LHS;
4878 return opOK;
4879 }
4880 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4881 Out = RHS;
4882 return opOK;
4883 }
4884 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4885 "Special cases not handled exhaustively");
4886
4887 int Status = opOK;
4888 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4889 // t = a * c
4890 APFloat T = A;
4891 Status |= T.multiply(C, RM);
4892 if (!T.isFiniteNonZero()) {
4893 Floats[0] = std::move(T);
4894 Floats[1].makeZero(/* Neg = */ false);
4895 return (opStatus)Status;
4896 }
4897
4898 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4899 APFloat Tau = A;
4900 T.changeSign();
4901 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4902 T.changeSign();
4903 {
4904 // v = a * d
4905 APFloat V = A;
4906 Status |= V.multiply(D, RM);
4907 // w = b * c
4908 APFloat W = B;
4909 Status |= W.multiply(C, RM);
4910 Status |= V.add(W, RM);
4911 // tau += v + w
4912 Status |= Tau.add(V, RM);
4913 }
4914 // u = t + tau
4915 APFloat U = T;
4916 Status |= U.add(Tau, RM);
4917
4918 Floats[0] = U;
4919 if (!U.isFinite()) {
4920 Floats[1].makeZero(/* Neg = */ false);
4921 } else {
4922 // Floats[1] = (t - u) + tau
4923 Status |= T.subtract(U, RM);
4924 Status |= T.add(Tau, RM);
4925 Floats[1] = std::move(T);
4926 }
4927 return (opStatus)Status;
4928}
4929
4932 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4933 "Unexpected Semantics");
4934 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4935 auto Ret = Tmp.divide(
4936 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4937 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4938 return Ret;
4939}
4940
4942 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4943 "Unexpected Semantics");
4944 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4945 auto Ret = Tmp.remainder(
4946 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4947 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4948 return Ret;
4949}
4950
4952 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4953 "Unexpected Semantics");
4954 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4955 auto Ret = Tmp.mod(
4956 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4957 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4958 return Ret;
4959}
4960
4963 const DoubleAPFloat &Addend,
4965 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4966 "Unexpected Semantics");
4967 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4968 auto Ret = Tmp.fusedMultiplyAdd(
4969 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
4970 Multiplicand.bitcastToAPInt()),
4971 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
4972 RM);
4973 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4974 return Ret;
4975}
4976
4978 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4979 "Unexpected Semantics");
4980 const APFloat &Hi = getFirst();
4981 const APFloat &Lo = getSecond();
4982
4983 APFloat RoundedHi = Hi;
4984 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
4985
4986 // We can reduce the problem to just the high part if the input:
4987 // 1. Represents a non-finite value.
4988 // 2. Has a component which is zero.
4989 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
4990 Floats[0] = std::move(RoundedHi);
4991 Floats[1].makeZero(/*Neg=*/false);
4992 return HiStatus;
4993 }
4994
4995 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
4996 // halfway point.
4997 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
4998 APFloat TieBreaker) {
4999 // RoundingError tells us which direction we rounded:
5000 // - RoundingError > 0: we rounded up.
5001 // - RoundingError < 0: we rounded down.
5002 // Sterbenz' lemma ensures that RoundingError is exact.
5003 const APFloat RoundingError = Rounded - ToRound;
5004 if (TieBreaker.isNonZero() &&
5005 TieBreaker.isNegative() != RoundingError.isNegative() &&
5006 abs(RoundingError).isExactlyValue(0.5))
5007 Rounded.add(
5008 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5010 return Rounded;
5011 };
5012
5013 // Case 1: Hi is not an integer.
5014 // Special cases are for rounding modes that are sensitive to ties.
5015 if (RoundedHi != Hi) {
5016 // We need to consider the case where Hi was between two integers and the
5017 // rounding mode broke the tie when, in fact, Lo may have had a different
5018 // sign than Hi.
5019 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5020 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5021
5022 Floats[0] = std::move(RoundedHi);
5023 Floats[1].makeZero(/*Neg=*/false);
5024 return HiStatus;
5025 }
5026
5027 // Case 2: Hi is an integer.
5028 // Special cases are for rounding modes which are rounding towards or away from zero.
5029 RoundingMode LoRoundingMode;
5030 if (RM == rmTowardZero)
5031 // When our input is positive, we want the Lo component rounded toward
5032 // negative infinity to get the smallest result magnitude. Likewise,
5033 // negative inputs want the Lo component rounded toward positive infinity.
5034 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5035 else
5036 LoRoundingMode = RM;
5037
5038 APFloat RoundedLo = Lo;
5039 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5040 if (LoRoundingMode == rmNearestTiesToAway)
5041 // We need to consider the case where Lo was between two integers and the
5042 // rounding mode broke the tie when, in fact, Hi may have had a different
5043 // sign than Lo.
5044 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5045
5046 // We must ensure that the final result has no overlap between the two APFloat values.
5047 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5048
5049 Floats[0] = std::move(RoundedHi);
5050 Floats[1] = std::move(RoundedLo);
5051 return LoStatus;
5052}
5053
5055 Floats[0].changeSign();
5056 Floats[1].changeSign();
5057}
5058
5061 // Compare absolute values of the high parts.
5062 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5063 if (HiPartCmp != cmpEqual)
5064 return HiPartCmp;
5065
5066 // Zero, regardless of sign, is equal.
5067 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5068 return cmpEqual;
5069
5070 // At this point, |this->Hi| == |RHS.Hi|.
5071 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5072 // same, and Hi-|Lo| if signs are different.
5073 const bool ThisIsSubtractive =
5074 Floats[0].isNegative() != Floats[1].isNegative();
5075 const bool RHSIsSubtractive =
5076 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5077
5078 // Case 1: The low part of 'this' is zero.
5079 if (Floats[1].isZero())
5080 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5081 // If RHS is subtractive, its magnitude is smaller.
5082 // If RHS is additive, its magnitude is larger.
5083 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5084
5085 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5086 if (RHS.Floats[1].isZero())
5087 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5088 // If 'this' is subtractive, its magnitude is smaller.
5089 // If 'this' is additive, its magnitude is larger.
5090 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5091
5092 // If their natures differ, the additive one is larger.
5093 if (ThisIsSubtractive != RHSIsSubtractive)
5094 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5095
5096 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5097 // The comparison now depends on the magnitude of the low parts.
5098 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5099
5100 if (ThisIsSubtractive) {
5101 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5102 if (LoPartCmp == cmpLessThan)
5103 return cmpGreaterThan;
5104 if (LoPartCmp == cmpGreaterThan)
5105 return cmpLessThan;
5106 }
5107
5108 // If additive, the comparison of |Lo| is direct.
5109 // If equal, they are equal.
5110 return LoPartCmp;
5111}
5112
5114 return Floats[0].getCategory();
5115}
5116
5117bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5118
5120 Floats[0].makeInf(Neg);
5121 Floats[1].makeZero(/* Neg = */ false);
5122}
5123
5125 Floats[0].makeZero(Neg);
5126 Floats[1].makeZero(/* Neg = */ false);
5127}
5128
5130 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5131 "Unexpected Semantics");
5132 Floats[0] =
5133 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5134 Floats[1] =
5135 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5136 if (Neg)
5137 changeSign();
5138}
5139
5141 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5142 "Unexpected Semantics");
5143 Floats[0].makeSmallest(Neg);
5144 Floats[1].makeZero(/* Neg = */ false);
5145}
5146
5148 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5149 "Unexpected Semantics");
5150 Floats[0] =
5151 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5152 if (Neg)
5153 Floats[0].changeSign();
5154 Floats[1].makeZero(/* Neg = */ false);
5155}
5156
5157void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5158 Floats[0].makeNaN(SNaN, Neg, fill);
5159 Floats[1].makeZero(/* Neg = */ false);
5160}
5161
5163 auto Result = Floats[0].compare(RHS.Floats[0]);
5164 // |Float[0]| > |Float[1]|
5165 if (Result == APFloat::cmpEqual)
5166 return Floats[1].compare(RHS.Floats[1]);
5167 return Result;
5168}
5169
5171 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5172 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5173}
5174
5176 if (Arg.Floats)
5177 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5178 return hash_combine(Arg.Semantics);
5179}
5180
5182 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5183 "Unexpected Semantics");
5184 uint64_t Data[] = {
5185 Floats[0].bitcastToAPInt().getRawData()[0],
5186 Floats[1].bitcastToAPInt().getRawData()[0],
5187 };
5188 return APInt(128, Data);
5189}
5190
5192 roundingMode RM) {
5193 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5194 "Unexpected Semantics");
5195 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5196 auto Ret = Tmp.convertFromString(S, RM);
5197 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5198 return Ret;
5199}
5200
5201// The double-double lattice of values corresponds to numbers which obey:
5202// - abs(lo) <= 1/2 * ulp(hi)
5203// - roundTiesToEven(hi + lo) == hi
5204//
5205// nextUp must choose the smallest output > input that follows these rules.
5206// nexDown must choose the largest output < input that follows these rules.
5208 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5209 "Unexpected Semantics");
5210 // nextDown(x) = -nextUp(-x)
5211 if (nextDown) {
5212 changeSign();
5213 APFloat::opStatus Result = next(/*nextDown=*/false);
5214 changeSign();
5215 return Result;
5216 }
5217 switch (getCategory()) {
5218 case fcInfinity:
5219 // nextUp(+inf) = +inf
5220 // nextUp(-inf) = -getLargest()
5221 if (isNegative())
5222 makeLargest(true);
5223 return opOK;
5224
5225 case fcNaN:
5226 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5227 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5228 // change the payload.
5229 if (getFirst().isSignaling()) {
5230 // For consistency, propagate the sign of the sNaN to the qNaN.
5231 makeNaN(false, isNegative(), nullptr);
5232 return opInvalidOp;
5233 }
5234 return opOK;
5235
5236 case fcZero:
5237 // nextUp(pm 0) = +getSmallest()
5238 makeSmallest(false);
5239 return opOK;
5240
5241 case fcNormal:
5242 break;
5243 }
5244
5245 const APFloat &HiOld = getFirst();
5246 const APFloat &LoOld = getSecond();
5247
5248 APFloat NextLo = LoOld;
5249 NextLo.next(/*nextDown=*/false);
5250
5251 // We want to admit values where:
5252 // 1. abs(Lo) <= ulp(Hi)/2
5253 // 2. Hi == RTNE(Hi + lo)
5254 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5255 return Hi + Lo == Hi;
5256 };
5257
5258 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5259 if (InLattice(HiOld, NextLo)) {
5260 // Yes, the result is (HiOld, nextUp(LoOld)).
5261 Floats[1] = std::move(NextLo);
5262
5263 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5264 // value is defined to have exactly 106 bits of precision. This limitation
5265 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5266 // value.
5267 DoubleAPFloat Largest{*Semantics, uninitialized};
5268 Largest.makeLargest(/*Neg=*/false);
5269 if (compare(Largest) == cmpGreaterThan)
5270 makeInf(/*Neg=*/false);
5271
5272 return opOK;
5273 }
5274
5275 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5276 // correct result. We know the new hi component will be nextUp(HiOld) but our
5277 // lattice rules make it a little ambiguous what the correct NextLo must be.
5278 APFloat NextHi = HiOld;
5279 NextHi.next(/*nextDown=*/false);
5280
5281 // nextUp(getLargest()) == INFINITY
5282 if (NextHi.isInfinity()) {
5283 makeInf(/*Neg=*/false);
5284 return opOK;
5285 }
5286
5287 // IEEE 754-2019 5.3.1:
5288 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5289 // -0."
5290 if (NextHi.isZero()) {
5291 makeZero(/*Neg=*/true);
5292 return opOK;
5293 }
5294
5295 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5296 // negative infinity as possible.
5297 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5298 if (!InLattice(NextHi, NextLo))
5299 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5300 NextLo.next(/*nextDown=*/false);
5301
5302 Floats[0] = std::move(NextHi);
5303 Floats[1] = std::move(NextLo);
5304
5305 return opOK;
5306}
5307
5308APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5309 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5310 roundingMode RM, bool *IsExact) const {
5311 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5312 "Unexpected Semantics");
5313
5314 // If Hi is not finite, or Lo is zero, the value is entirely represented
5315 // by Hi. Delegate to the simpler single-APFloat conversion.
5316 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5317 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5318
5319 // First, round the full double-double value to an integral value. This
5320 // simplifies the rest of the function, as we no longer need to consider
5321 // fractional parts.
5322 *IsExact = false;
5323 DoubleAPFloat Integral = *this;
5324 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5325 if (RoundStatus == opInvalidOp)
5326 return opInvalidOp;
5327 const APFloat &IntegralHi = Integral.getFirst();
5328 const APFloat &IntegralLo = Integral.getSecond();
5329
5330 // If rounding results in either component being zero, the sum is trivial.
5331 // Delegate to the simpler single-APFloat conversion.
5332 bool HiIsExact;
5333 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5334 const opStatus HiStatus =
5335 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5336 // The conversion from an integer-valued float to an APInt may fail if the
5337 // result would be out of range. Regardless, taking this path is only
5338 // possible if rounding occurred during the initial `roundToIntegral`.
5339 return HiStatus == opOK ? opInexact : HiStatus;
5340 }
5341
5342 // A negative number cannot be represented by an unsigned integer.
5343 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5344 if (!IsSigned && IntegralHi.isNegative())
5345 return opInvalidOp;
5346
5347 // Handle the special boundary case where |Hi| is exactly the power of two
5348 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5349 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5350 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5351 // signed, N for unsigned).
5352 bool LoIsExact;
5353 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5354 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5355 if (HiExactLog2 >= 0 &&
5356 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5357 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5358 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5359 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5360 return opInvalidOp;
5361
5362 // If the signs differ, the sum will fit. We can compute the result using
5363 // properties of two's complement arithmetic without a wide intermediate
5364 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5365 const opStatus LoStatus = IntegralLo.convertToInteger(
5366 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5367 if (LoStatus == opInvalidOp)
5368 return opInvalidOp;
5369
5370 // Adjust the bit pattern of Lo to account for Hi's value:
5371 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5372 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5373 // already produced the correct final bit pattern.
5374 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5375 // can be computed by taking the two's complement pattern for `Lo` and
5376 // clearing the sign bit.
5377 if (IsSigned && !IntegralHi.isNegative())
5378 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5379 *IsExact = RoundStatus == opOK;
5380 return RoundStatus;
5381 }
5382
5383 // Convert Hi into an integer. This may not fit but that is OK: we know that
5384 // Hi + Lo would not fit either in this situation.
5385 const opStatus HiStatus = IntegralHi.convertToInteger(
5386 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5387 if (HiStatus == opInvalidOp)
5388 return HiStatus;
5389
5390 // Convert Lo into a temporary integer of the same width.
5391 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5392 const opStatus LoStatus =
5393 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5394 if (LoStatus == opInvalidOp)
5395 return LoStatus;
5396
5397 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5398 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5399 // where the sum could cross the integer type's boundary is when Hi is a
5400 // power of two, which is handled by the special case block above.
5401 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5402
5403 *IsExact = RoundStatus == opOK;
5404 return RoundStatus;
5405}
5406
5409 unsigned int Width, bool IsSigned,
5410 roundingMode RM, bool *IsExact) const {
5411 opStatus FS =
5412 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5413
5414 if (FS == opInvalidOp) {
5415 const unsigned DstPartsCount = partCountForBits(Width);
5416 assert(DstPartsCount <= Input.size() && "Integer too big");
5417
5418 unsigned Bits;
5419 if (getCategory() == fcNaN)
5420 Bits = 0;
5421 else if (isNegative())
5422 Bits = IsSigned;
5423 else
5424 Bits = Width - IsSigned;
5425
5426 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5427 if (isNegative() && IsSigned)
5428 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5429 }
5430
5431 return FS;
5432}
5433
5434APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5435 switch (RM) {
5437 makeLargest(/*Neg=*/isNegative());
5438 break;
5440 if (isNegative())
5441 makeInf(/*Neg=*/true);
5442 else
5443 makeLargest(/*Neg=*/false);
5444 break;
5446 if (isNegative())
5447 makeLargest(/*Neg=*/true);
5448 else
5449 makeInf(/*Neg=*/false);
5450 break;
5453 makeInf(/*Neg=*/isNegative());
5454 break;
5455 default:
5456 llvm_unreachable("Invalid rounding mode found");
5457 }
5458 opStatus S = opInexact;
5459 if (!getFirst().isFinite())
5460 S = static_cast<opStatus>(S | opOverflow);
5461 return S;
5462}
5463
5464APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5465 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5466 // Find the most significant bit of the source integer. APInt::tcMSB returns
5467 // UINT_MAX for a zero value.
5468 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5469 if (SrcMSB == UINT_MAX) {
5470 // The source integer is 0.
5471 makeZero(/*Neg=*/false);
5472 return opOK;
5473 }
5474
5475 // Create a minimally-sized APInt to represent the source value.
5476 const unsigned SrcBitWidth = SrcMSB + 1;
5477 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5478 /*isUnsigned=*/true};
5479
5480 // Stage 1: Initial Approximation.
5481 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5482 // We use round-to-nearest because it minimizes the initial error, which is
5483 // crucial for the subsequent steps.
5485 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5486
5487 // If the first approximation already overflows, the number is too large.
5488 // NOTE: The underlying semantics are *more* conservative when choosing to
5489 // overflow because their notion of ULP is much larger. As such, it is always
5490 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5491 if (!Hi.isFinite())
5492 return handleOverflow(RM);
5493
5494 // Stage 2: Exact Error Calculation.
5495 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5496 // This is done by converting Hi back to an integer and subtracting it from
5497 // the original source.
5498 bool HiAsIntIsExact;
5499 // Create an integer representation of Hi. Its width is determined by the
5500 // exponent of Hi, ensuring it's just large enough. This width can exceed
5501 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5502 // accurately when converted back to an integer.
5503 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5504 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5505 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5506
5507 // Stage 3: Error Approximation and Rounding.
5508 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5509 // captures the remainder of the original number. The rounding mode for this
5510 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5511 // ensure the final sum (Hi + Lo) rounds correctly.
5512 roundingMode LoRM = RM;
5513 // Adjustments are only necessary when the initial approximation Hi was an
5514 // overestimate, making the Error negative.
5515 if (Error.isNegative()) {
5516 if (RM == rmNearestTiesToAway) {
5517 // For rmNearestTiesToAway, a tie should round away from zero. Since
5518 // SrcInt is positive, this means rounding toward +infinity.
5519 // A standard conversion of a negative Error would round ties toward
5520 // -infinity, causing the final sum Hi + Lo to be smaller. To
5521 // counteract this, we detect the tie case and override the rounding
5522 // mode for Lo to rmTowardPositive.
5523 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5524 const unsigned LoPrecision = getSecond().getSemantics().precision;
5525 if (ErrorActiveBits > LoPrecision) {
5526 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5527 // A tie occurs when the bits to be truncated are of the form 100...0.
5528 // This is detected by checking if the number of trailing zeros is
5529 // exactly one less than the number of bits being truncated.
5530 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5531 LoRM = rmTowardPositive;
5532 }
5533 } else if (RM == rmTowardZero) {
5534 // For rmTowardZero, the final positive result must be truncated (rounded
5535 // down). When Hi is an overestimate, Error is negative. A standard
5536 // rmTowardZero conversion of Error would make it *less* negative,
5537 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5538 // rounds down correctly, we force Lo to round toward -infinity.
5539 LoRM = rmTowardNegative;
5540 }
5541 }
5542
5544 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5545
5546 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5547 // components do not overlap. fastTwoSum performs this operation.
5548 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5549 Floats[0] = std::move(Hi);
5550 Floats[1] = std::move(Lo);
5551
5552 // A final check for overflow is needed because fastTwoSum can cause a
5553 // carry-out from Lo that pushes Hi to infinity.
5554 if (!getFirst().isFinite())
5555 return handleOverflow(RM);
5556
5557 // The largest DoubleAPFloat must be canonical. Values which are larger are
5558 // not canonical and are equivalent to overflow.
5559 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5560 DoubleAPFloat Largest{*Semantics};
5561 Largest.makeLargest(/*Neg=*/false);
5562 if (compare(Largest) == APFloat::cmpGreaterThan)
5563 return handleOverflow(RM);
5564 }
5565
5566 // The final status of the operation is determined by the conversion of the
5567 // error term. If Lo could represent Error exactly, the entire conversion
5568 // is exact. Otherwise, it's inexact.
5569 return Status;
5570}
5571
5573 bool IsSigned,
5574 roundingMode RM) {
5575 const bool NegateInput = IsSigned && Input.isNegative();
5576 APInt API = Input;
5577 if (NegateInput)
5578 API.negate();
5579
5581 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5582 if (NegateInput)
5583 changeSign();
5584 return Status;
5585}
5586
5588 unsigned int HexDigits,
5589 bool UpperCase,
5590 roundingMode RM) const {
5591 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5592 "Unexpected Semantics");
5593 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5594 .convertToHexString(DST, HexDigits, UpperCase, RM);
5595}
5596
5598 return getCategory() == fcNormal &&
5599 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5600 // (double)(Hi + Lo) == Hi defines a normal number.
5601 Floats[0] != Floats[0] + Floats[1]);
5602}
5603
5605 if (getCategory() != fcNormal)
5606 return false;
5607 DoubleAPFloat Tmp(*this);
5608 Tmp.makeSmallest(this->isNegative());
5609 return Tmp.compare(*this) == cmpEqual;
5610}
5611
5613 if (getCategory() != fcNormal)
5614 return false;
5615
5616 DoubleAPFloat Tmp(*this);
5618 return Tmp.compare(*this) == cmpEqual;
5619}
5620
5622 if (getCategory() != fcNormal)
5623 return false;
5624 DoubleAPFloat Tmp(*this);
5625 Tmp.makeLargest(this->isNegative());
5626 return Tmp.compare(*this) == cmpEqual;
5627}
5628
5630 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5631 "Unexpected Semantics");
5632 return Floats[0].isInteger() && Floats[1].isInteger();
5633}
5634
5636 unsigned FormatPrecision,
5637 unsigned FormatMaxPadding,
5638 bool TruncateZero) const {
5639 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5640 "Unexpected Semantics");
5641 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5642 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5643}
5644
5646 // In order for Hi + Lo to be a power of two, the following must be true:
5647 // 1. Hi must be a power of two.
5648 // 2. Lo must be zero.
5649 if (getSecond().isNonZero())
5650 return INT_MIN;
5651 return getFirst().getExactLog2Abs();
5652}
5653
5654int ilogb(const DoubleAPFloat &Arg) {
5655 const APFloat &Hi = Arg.getFirst();
5656 const APFloat &Lo = Arg.getSecond();
5657 int IlogbResult = ilogb(Hi);
5658 // Zero and non-finite values can delegate to ilogb(Hi).
5659 if (Arg.getCategory() != fcNormal)
5660 return IlogbResult;
5661 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5662 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5663 return IlogbResult;
5664 if (Hi.getExactLog2Abs() == INT_MIN)
5665 return IlogbResult;
5666 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5667 // get nudged out of the binade by the low component.
5668 return IlogbResult - 1;
5669}
5670
5673 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5674 "Unexpected Semantics");
5676 scalbn(Arg.Floats[0], Exp, RM),
5677 scalbn(Arg.Floats[1], Exp, RM));
5678}
5679
5680DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5682 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5683 "Unexpected Semantics");
5684
5685 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5686 // [1.0, 2.0).
5687 Exp = ilogb(Arg);
5688
5689 // For NaNs, quiet any signaling NaN and return the result, as per standard
5690 // practice.
5691 if (Exp == APFloat::IEK_NaN) {
5692 DoubleAPFloat Quiet{Arg};
5693 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5694 return Quiet;
5695 }
5696
5697 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5698 if (Exp == APFloat::IEK_Inf)
5699 return Arg;
5700
5701 // For zero, the fraction is zero and the standard requires the exponent be 0.
5702 if (Exp == APFloat::IEK_Zero) {
5703 Exp = 0;
5704 return Arg;
5705 }
5706
5707 const APFloat &Hi = Arg.getFirst();
5708 const APFloat &Lo = Arg.getSecond();
5709
5710 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5711 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5712 // Increment the exponent to ensure the fraction is in the correct range.
5713 ++Exp;
5714
5715 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5716 APFloat Second = Lo;
5717 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5718 roundingMode LoRoundingMode;
5719 // The interpretation of rmTowardZero depends on the sign of the combined
5720 // Arg rather than the sign of the component.
5721 if (RM == rmTowardZero)
5722 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5723 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5724 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5725 // "away from zero" based on its own sign would move the value in the
5726 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5727 // direction-agnostic. We only need to bother with this if Lo is scaled
5728 // down.
5729 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5730 LoRoundingMode = rmNearestTiesToEven;
5731 else
5732 LoRoundingMode = RM;
5733 Second = scalbn(Lo, -Exp, LoRoundingMode);
5734 // The rmNearestTiesToEven proxy is correct most of the time, but it
5735 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5736 // exact midpoint.
5737 // NOTE: This is morally equivalent to roundTiesTowardZero.
5738 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5739 // Re-scale the result back to check if rounding occurred.
5740 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5741 if (RecomposedLo != Lo) {
5742 // RoundingError tells us which direction we rounded:
5743 // - RoundingError > 0: we rounded up.
5744 // - RoundingError < 0: we down up.
5745 const APFloat RoundingError = RecomposedLo - Lo;
5746 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5747 // We do this by checking if the absolute rounding error is exactly
5748 // half a ULP of the result.
5749 const APFloat UlpOfSecond = harrisonUlp(Second);
5750 const APFloat ScaledUlpOfSecond =
5751 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5752 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5753 const bool RoundedLoAway =
5754 Second.isNegative() == RoundingError.isNegative();
5755 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5756 // decrease the magnitude of Second to increase the magnitude
5757 // First+Second.
5758 if (IsMidpoint && RoundedLoAway)
5759 Second.next(/*nextDown=*/!Second.isNegative());
5760 }
5761 }
5762 // Handle a tricky edge case where Arg is slightly less than a power of two
5763 // (e.g., Arg = 2^k - epsilon). In this situation:
5764 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5765 // 2. ilogb(Arg) correctly returns k-1.
5766 // 3. Our initial Exp becomes (k-1) + 1 = k.
5767 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5768 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5769 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5770 // We detect this specific case by checking if Hi is a power of two and if
5771 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5772 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5773 // valid fraction.
5774 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5775 ++Exp;
5776 }
5777
5778 APFloat First = scalbn(Hi, -Exp, RM);
5780 std::move(Second));
5781}
5782
5783} // namespace detail
5784
5785APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5786 if (usesLayout<IEEEFloat>(Semantics)) {
5787 new (&IEEE) IEEEFloat(std::move(F));
5788 return;
5789 }
5790 if (usesLayout<DoubleAPFloat>(Semantics)) {
5791 const fltSemantics& S = F.getSemantics();
5792 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5794 return;
5795 }
5796 llvm_unreachable("Unexpected semantics");
5797}
5798
5803
5804hash_code hash_value(const APFloat &Arg) {
5805 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5806 return hash_value(Arg.U.IEEE);
5807 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5808 return hash_value(Arg.U.Double);
5809 llvm_unreachable("Unexpected semantics");
5810}
5811
5813 : APFloat(Semantics) {
5814 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5815 assert(StatusOrErr && "Invalid floating point representation");
5816 consumeError(StatusOrErr.takeError());
5817}
5818
5820 if (isZero())
5821 return isNegative() ? fcNegZero : fcPosZero;
5822 if (isNormal())
5823 return isNegative() ? fcNegNormal : fcPosNormal;
5824 if (isDenormal())
5826 if (isInfinity())
5827 return isNegative() ? fcNegInf : fcPosInf;
5828 assert(isNaN() && "Other class of FP constant");
5829 return isSignaling() ? fcSNan : fcQNan;
5830}
5831
5832bool APFloat::getExactInverse(APFloat *Inv) const {
5833 // Only finite, non-zero numbers can have a useful, representable inverse.
5834 // This check filters out +/- zero, +/- infinity, and NaN.
5835 if (!isFiniteNonZero())
5836 return false;
5837
5838 // Historically, this function rejects subnormal inputs. One reason why this
5839 // might be important is that subnormals may behave differently under FTZ/DAZ
5840 // runtime behavior.
5841 if (isDenormal())
5842 return false;
5843
5844 // A number has an exact, representable inverse if and only if it is a power
5845 // of two.
5846 //
5847 // Mathematical Rationale:
5848 // 1. A binary floating-point number x is a dyadic rational, meaning it can
5849 // be written as x = M / 2^k for integers M (the significand) and k.
5850 // 2. The inverse is 1/x = 2^k / M.
5851 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
5852 // in binary), its denominator M must also be a power of two.
5853 // Let's say M = 2^m.
5854 // 4. Substituting this back into the formula for x, we get
5855 // x = (2^m) / (2^k) = 2^(m-k).
5856 //
5857 // This proves that x must be a power of two.
5858
5859 // getExactLog2Abs() returns the integer exponent if the number is a power of
5860 // two or INT_MIN if it is not.
5861 const int Exp = getExactLog2Abs();
5862 if (Exp == INT_MIN)
5863 return false;
5864
5865 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
5866 // scaling 1.0 by the negated exponent.
5867 APFloat Reciprocal =
5868 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
5869 rmTowardZero);
5870
5871 // scalbn might round if the resulting exponent -Exp is outside the
5872 // representable range, causing overflow (to infinity) or underflow. We
5873 // must verify that the result is still the exact power of two we expect.
5874 if (Reciprocal.getExactLog2Abs() != -Exp)
5875 return false;
5876
5877 // Avoid multiplication with a subnormal, it is not safe on all platforms and
5878 // may be slower than a normal division.
5879 if (Reciprocal.isDenormal())
5880 return false;
5881
5882 assert(Reciprocal.isFiniteNonZero());
5883
5884 if (Inv)
5885 *Inv = std::move(Reciprocal);
5886
5887 return true;
5888}
5889
5891 roundingMode RM, bool *losesInfo) {
5892 if (&getSemantics() == &ToSemantics) {
5893 *losesInfo = false;
5894 return opOK;
5895 }
5896 if (usesLayout<IEEEFloat>(getSemantics()) &&
5897 usesLayout<IEEEFloat>(ToSemantics))
5898 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5899 if (usesLayout<IEEEFloat>(getSemantics()) &&
5900 usesLayout<DoubleAPFloat>(ToSemantics)) {
5901 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
5902 auto Ret =
5903 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
5904 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5905 return Ret;
5906 }
5907 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5908 usesLayout<IEEEFloat>(ToSemantics)) {
5909 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5910 *this = APFloat(std::move(getIEEE()), ToSemantics);
5911 return Ret;
5912 }
5913 llvm_unreachable("Unexpected semantics");
5914}
5915
5919
5921 SmallVector<char, 16> Buffer;
5922 toString(Buffer);
5923 OS << Buffer;
5924}
5925
5926#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5928 print(dbgs());
5929 dbgs() << '\n';
5930}
5931#endif
5932
5934 NID.Add(bitcastToAPInt());
5935}
5936
5938 roundingMode rounding_mode,
5939 bool *isExact) const {
5940 unsigned bitWidth = result.getBitWidth();
5941 SmallVector<uint64_t, 4> parts(result.getNumWords());
5942 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5943 rounding_mode, isExact);
5944 // Keeps the original signed-ness.
5945 result = APInt(bitWidth, parts);
5946 return status;
5947}
5948
5950 if (&getSemantics() == &APFloatBase::semIEEEdouble)
5951 return getIEEE().convertToDouble();
5952 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
5953 "Float semantics is not representable by IEEEdouble");
5954 APFloat Temp = *this;
5955 bool LosesInfo;
5956 [[maybe_unused]] opStatus St =
5957 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5958 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5959 return Temp.getIEEE().convertToDouble();
5960}
5961
5962#ifdef HAS_IEE754_FLOAT128
5963float128 APFloat::convertToQuad() const {
5964 if (&getSemantics() == &APFloatBase::semIEEEquad)
5965 return getIEEE().convertToQuad();
5966 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
5967 "Float semantics is not representable by IEEEquad");
5968 APFloat Temp = *this;
5969 bool LosesInfo;
5970 [[maybe_unused]] opStatus St =
5971 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5972 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5973 return Temp.getIEEE().convertToQuad();
5974}
5975#endif
5976
5978 if (&getSemantics() == &APFloatBase::semIEEEsingle)
5979 return getIEEE().convertToFloat();
5980 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
5981 "Float semantics is not representable by IEEEsingle");
5982 APFloat Temp = *this;
5983 bool LosesInfo;
5984 [[maybe_unused]] opStatus St =
5985 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5986 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5987 return Temp.getIEEE().convertToFloat();
5988}
5989
5991 static constexpr StringLiteral ValidFormats[] = {
5992 "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
5993 "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
5994 "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
5995 return llvm::is_contained(ValidFormats, Format);
5996}
5997
5999 // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
6000 // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
6002 .Case("Float8E5M2", &semFloat8E5M2)
6003 .Case("Float8E4M3FN", &semFloat8E4M3FN)
6004 .Case("Float4E2M1FN", &semFloat4E2M1FN)
6005 .Case("Float6E3M2FN", &semFloat6E3M2FN)
6006 .Case("Float6E2M3FN", &semFloat6E2M3FN)
6007 .Default(nullptr);
6008}
6009
6010APFloat::Storage::~Storage() {
6011 if (usesLayout<IEEEFloat>(*semantics)) {
6012 IEEE.~IEEEFloat();
6013 return;
6014 }
6015 if (usesLayout<DoubleAPFloat>(*semantics)) {
6016 Double.~DoubleAPFloat();
6017 return;
6018 }
6019 llvm_unreachable("Unexpected semantics");
6020}
6021
6022APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6023 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6024 new (this) IEEEFloat(RHS.IEEE);
6025 return;
6026 }
6027 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6028 new (this) DoubleAPFloat(RHS.Double);
6029 return;
6030 }
6031 llvm_unreachable("Unexpected semantics");
6032}
6033
6034APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6035 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6036 new (this) IEEEFloat(std::move(RHS.IEEE));
6037 return;
6038 }
6039 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6040 new (this) DoubleAPFloat(std::move(RHS.Double));
6041 return;
6042 }
6043 llvm_unreachable("Unexpected semantics");
6044}
6045
6046APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6047 if (usesLayout<IEEEFloat>(*semantics) &&
6048 usesLayout<IEEEFloat>(*RHS.semantics)) {
6049 IEEE = RHS.IEEE;
6050 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6051 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6052 Double = RHS.Double;
6053 } else if (this != &RHS) {
6054 this->~Storage();
6055 new (this) Storage(RHS);
6056 }
6057 return *this;
6058}
6059
6060APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6061 if (usesLayout<IEEEFloat>(*semantics) &&
6062 usesLayout<IEEEFloat>(*RHS.semantics)) {
6063 IEEE = std::move(RHS.IEEE);
6064 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6065 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6066 Double = std::move(RHS.Double);
6067 } else if (this != &RHS) {
6068 this->~Storage();
6069 new (this) Storage(std::move(RHS));
6070 }
6071 return *this;
6072}
6073
6074} // namespace llvm
6075
6076#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:49
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:26
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:306
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:98
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:247
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:334
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:222
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEquad()
Definition APFloat.h:298
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:225
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:313
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:243
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:278
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:317
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool isValidArbitraryFPFormat(StringRef Format)
Returns true if the given string is a valid arbitrary floating-point format interpretation for llvm....
Definition APFloat.cpp:5990
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:260
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:218
friend class APFloat
Definition APFloat.h:291
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:251
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:145
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:155
static constexpr unsigned integerPartWidth
Definition APFloat.h:152
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:300
APInt::WordType integerPart
Definition APFloat.h:151
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:239
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:264
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:304
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:307
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:316
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:315
static const fltSemantics & Float8E4M3()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:308
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:190
static const fltSemantics & Float8E3M4()
Definition APFloat.h:311
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:255
static const fltSemantics & Float8E5M2()
Definition APFloat.h:303
fltCategory
Category of internally-represented number.
Definition APFloat.h:370
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:314
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static LLVM_ABI const fltSemantics * getArbitraryFPSemantics(StringRef Format)
Returns the fltSemantics for a given arbitrary FP format string, or nullptr if invalid.
Definition APFloat.cpp:5998
static const fltSemantics & FloatTF32()
Definition APFloat.h:312
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:228
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1175
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:5933
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1263
bool isFiniteNonZero() const
Definition APFloat.h:1526
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5890
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1564
bool isNegative() const
Definition APFloat.h:1516
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5832
cmpResult compareAbsoluteValue(const APFloat &RHS) const
Definition APFloat.h:1471
friend DoubleAPFloat
Definition APFloat.h:1580
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:5949
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1545
bool isNormal() const
Definition APFloat.h:1520
bool isDenormal() const
Definition APFloat.h:1517
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1236
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:5916
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5804
const fltSemantics & getSemantics() const
Definition APFloat.h:1524
bool isFinite() const
Definition APFloat.h:1521
bool isNaN() const
Definition APFloat.h:1514
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1143
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1506
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:5977
bool isSignaling() const
Definition APFloat.h:1518
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1290
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1272
bool isZero() const
Definition APFloat.h:1512
APInt bitcastToAPInt() const
Definition APFloat.h:1408
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1387
opStatus next(bool nextDown)
Definition APFloat.h:1309
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1153
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1203
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5819
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1281
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5799
friend IEEEFloat
Definition APFloat.h:1579
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:5927
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:5920
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1303
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1228
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1134
bool isInfinity() const
Definition APFloat.h:1513
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1584
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2379
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2351
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1769
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2374
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2453
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2423
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2763
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1767
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2359
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2737
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2643
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1510
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2384
void negate()
Negate this APInt in place.
Definition APInt.h:1483
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1933
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2390
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2710
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2365
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2403
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1751
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2541
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2488
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2527
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1759
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1928
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1737
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:576
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
FoldingSetNodeID - This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:209
void Add(const T &x)
Definition FoldingSet.h:249
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
const char * iterator
Definition StringRef.h:59
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
iterator begin() const
Definition StringRef.h:113
char back() const
back - Get the last character in the string.
Definition StringRef.h:152
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:714
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
char front() const
front - Get the first character in the string.
Definition StringRef.h:146
iterator end() const
Definition StringRef.h:115
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:675
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5147
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4677
LLVM_ABI void changeSign()
Definition APFloat.cpp:5054
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5621
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4941
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4844
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5113
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5170
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5645
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5572
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5181
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5191
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5604
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4836
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5175
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5060
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5597
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5408
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5140
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5654
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5207
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5119
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5629
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5124
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4930
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5612
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4951
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4624
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5635
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5129
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5162
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:4977
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:4962
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5587
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5117
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4831
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5157
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3175
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1424
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2180
fltCategory getCategory() const
Definition APFloat.h:582
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2735
bool isFiniteNonZero() const
Definition APFloat.h:585
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:472
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:3946
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4341
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3573
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4584
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2348
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:547
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2054
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:572
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2072
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3643
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3636
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2030
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4297
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:3978
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4531
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:945
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4560
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1047
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2024
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:559
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3118
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:834
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2036
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2263
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:906
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1072
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:3992
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1064
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1099
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2217
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4566
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4386
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:569
const fltSemantics & getSemantics() const
Definition APFloat.h:583
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:562
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4370
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4546
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2424
LLVM_ABI void changeSign()
Definition APFloat.cpp:1982
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:931
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2680
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:937
An opaque object representing a hash code.
Definition Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:448
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:450
static constexpr opStatus opDivByZero
Definition APFloat.h:445
static constexpr opStatus opOverflow
Definition APFloat.h:446
static constexpr cmpResult cmpLessThan
Definition APFloat.h:440
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1447
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:436
static constexpr uninitializedTag uninitialized
Definition APFloat.h:430
static constexpr fltCategory fcZero
Definition APFloat.h:452
static constexpr opStatus opOK
Definition APFloat.h:443
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:441
static constexpr unsigned integerPartWidth
Definition APFloat.h:438
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3315
APFloatBase::ExponentType ExponentType
Definition APFloat.h:429
static constexpr fltCategory fcNormal
Definition APFloat.h:451
static constexpr opStatus opInvalidOp
Definition APFloat.h:444
APFloatBase::opStatus opStatus
Definition APFloat.h:426
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4605
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:424
static constexpr cmpResult cmpUnordered
Definition APFloat.h:442
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:435
APFloatBase::roundingMode roundingMode
Definition APFloat.h:425
APFloatBase::cmpResult cmpResult
Definition APFloat.h:427
static constexpr fltCategory fcInfinity
Definition APFloat.h:449
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:433
static constexpr roundingMode rmTowardZero
Definition APFloat.h:437
static constexpr opStatus opUnderflow
Definition APFloat.h:447
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:431
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4566
static constexpr cmpResult cmpEqual
Definition APFloat.h:439
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4584
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4694
APFloatBase::integerPart integerPart
Definition APFloat.h:423
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:731
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
static const char infinityL[]
Definition APFloat.cpp:722
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:309
static const char NaNU[]
Definition APFloat.cpp:725
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:607
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:666
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:778
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:283
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:366
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:209
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1601
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:748
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
const unsigned int maxPrecision
Definition APFloat.cpp:208
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1622
static const char NaNL[]
Definition APFloat.cpp:724
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
static const char infinityU[]
Definition APFloat.cpp:723
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:50
@ lfMoreThanHalf
Definition APFloat.h:54
@ lfLessThanHalf
Definition APFloat.h:52
@ lfExactlyHalf
Definition APFloat.h:53
@ lfExactlyZero
Definition APFloat.h:51
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:456
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:210
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1610
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:293
static Error createError(const Twine &Err)
Definition APFloat.cpp:305
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:575
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
static const char hexDigitsUpper[]
Definition APFloat.cpp:721
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
const unsigned int maxExponent
Definition APFloat.cpp:207
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:316
fltNonfiniteBehavior
Definition APFloat.h:948
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:586
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:416
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:288
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:555
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1636
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:621
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:764
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:526
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:326
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:720
#define N
const char * lastSigDigit
Definition APFloat.cpp:451
const char * firstSigDigit
Definition APFloat.cpp:450
APFloatBase::ExponentType maxExponent
Definition APFloat.h:996
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1009
APFloatBase::ExponentType minExponent
Definition APFloat.h:1000
unsigned int sizeInBits
Definition APFloat.h:1007
unsigned int precision
Definition APFloat.h:1004
fltNanEncoding nanEncoding
Definition APFloat.h:1011