LLVM 23.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Error.h"
29#include <cstring>
30#include <limits.h>
31
32#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
33 do { \
34 if (usesLayout<IEEEFloat>(getSemantics())) \
35 return U.IEEE.METHOD_CALL; \
36 if (usesLayout<DoubleAPFloat>(getSemantics())) \
37 return U.Double.METHOD_CALL; \
38 llvm_unreachable("Unexpected semantics"); \
39 } while (false)
40
41using namespace llvm;
42
43/// A macro used to combine two fcCategory enums into one key which can be used
44/// in a switch statement to classify how the interaction of two APFloat's
45/// categories affects an operation.
46///
47/// TODO: If clang source code is ever allowed to use constexpr in its own
48/// codebase, change this into a static inline function.
49#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
50
51/* Assumed in hexadecimal significand parsing, and conversion to
52 hexadecimal strings. */
53static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
54
55namespace llvm {
56
57constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
58constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
59constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
60constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
61constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
62constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
63constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
65constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
66constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
68constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
70constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
72constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
73constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
74constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
75 127,
76 -127,
77 1,
78 8,
81 false,
82 false,
83 false};
84
85constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
87constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
89constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
91constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
92 80};
93constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
94constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
95constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
96 1023, -1022 + 53, 53 + 53, 128};
97
99 switch (S) {
100 case S_IEEEhalf:
101 return IEEEhalf();
102 case S_BFloat:
103 return BFloat();
104 case S_IEEEsingle:
105 return IEEEsingle();
106 case S_IEEEdouble:
107 return IEEEdouble();
108 case S_IEEEquad:
109 return IEEEquad();
111 return PPCDoubleDouble();
113 return PPCDoubleDoubleLegacy();
114 case S_Float8E5M2:
115 return Float8E5M2();
116 case S_Float8E5M2FNUZ:
117 return Float8E5M2FNUZ();
118 case S_Float8E4M3:
119 return Float8E4M3();
120 case S_Float8E4M3FN:
121 return Float8E4M3FN();
122 case S_Float8E4M3FNUZ:
123 return Float8E4M3FNUZ();
125 return Float8E4M3B11FNUZ();
126 case S_Float8E3M4:
127 return Float8E3M4();
128 case S_FloatTF32:
129 return FloatTF32();
130 case S_Float8E8M0FNU:
131 return Float8E8M0FNU();
132 case S_Float6E3M2FN:
133 return Float6E3M2FN();
134 case S_Float6E2M3FN:
135 return Float6E2M3FN();
136 case S_Float4E2M1FN:
137 return Float4E2M1FN();
139 return x87DoubleExtended();
140 }
141 llvm_unreachable("Unrecognised floating semantics");
142}
143
146 if (&Sem == &llvm::APFloat::IEEEhalf())
147 return S_IEEEhalf;
148 else if (&Sem == &llvm::APFloat::BFloat())
149 return S_BFloat;
150 else if (&Sem == &llvm::APFloat::IEEEsingle())
151 return S_IEEEsingle;
152 else if (&Sem == &llvm::APFloat::IEEEdouble())
153 return S_IEEEdouble;
154 else if (&Sem == &llvm::APFloat::IEEEquad())
155 return S_IEEEquad;
156 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
157 return S_PPCDoubleDouble;
158 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
160 else if (&Sem == &llvm::APFloat::Float8E5M2())
161 return S_Float8E5M2;
162 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
163 return S_Float8E5M2FNUZ;
164 else if (&Sem == &llvm::APFloat::Float8E4M3())
165 return S_Float8E4M3;
166 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
167 return S_Float8E4M3FN;
168 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
169 return S_Float8E4M3FNUZ;
170 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
171 return S_Float8E4M3B11FNUZ;
172 else if (&Sem == &llvm::APFloat::Float8E3M4())
173 return S_Float8E3M4;
174 else if (&Sem == &llvm::APFloat::FloatTF32())
175 return S_FloatTF32;
176 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
177 return S_Float8E8M0FNU;
178 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
179 return S_Float6E3M2FN;
180 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
181 return S_Float6E2M3FN;
182 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
183 return S_Float4E2M1FN;
184 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
185 return S_x87DoubleExtended;
186 else
187 llvm_unreachable("Unknown floating semantics");
188}
189
191 const fltSemantics &B) {
192 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
193 A.precision <= B.precision;
194}
195
196/* A tight upper bound on number of parts required to hold the value
197 pow(5, power) is
198
199 power * 815 / (351 * integerPartWidth) + 1
200
201 However, whilst the result may require only this many parts,
202 because we are multiplying two values to get it, the
203 multiplication may require an extra part with the excess part
204 being zero (consider the trivial case of 1 * 1, tcFullMultiply
205 requires two parts to hold the single-part result). So we add an
206 extra one to guarantee enough space whilst multiplying. */
207const unsigned int maxExponent = 16383;
208const unsigned int maxPrecision = 113;
210const unsigned int maxPowerOfFiveParts =
211 2 +
213
214unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
215 return semantics.precision;
216}
219 return semantics.maxExponent;
220}
223 return semantics.minExponent;
224}
225unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
226 return semantics.sizeInBits;
227}
229 bool isSigned) {
230 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
231 // at least one more bit than the MaxExponent to hold the max FP value.
232 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
233 // Extra sign bit needed.
234 if (isSigned)
235 ++MinBitWidth;
236 return MinBitWidth;
237}
238
240 return semantics.hasZero;
241}
242
244 return semantics.hasSignedRepr;
245}
246
250
254
256 // Keep in sync with Type::isIEEELikeFPTy
257 return SemanticsToEnum(semantics) <= S_IEEEquad;
258}
259
261 return semantics.hasSignBitInMSB;
262}
263
265 const fltSemantics &Dst) {
266 // Exponent range must be larger.
267 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
268 return false;
269
270 // If the mantissa is long enough, the result value could still be denormal
271 // with a larger exponent range.
272 //
273 // FIXME: This condition is probably not accurate but also shouldn't be a
274 // practical concern with existing types.
275 return Dst.precision >= Src.precision;
276}
277
279 return Sem.sizeInBits;
280}
281
282static constexpr APFloatBase::ExponentType
283exponentZero(const fltSemantics &semantics) {
284 return semantics.minExponent - 1;
285}
286
287static constexpr APFloatBase::ExponentType
288exponentInf(const fltSemantics &semantics) {
289 return semantics.maxExponent + 1;
290}
291
292static constexpr APFloatBase::ExponentType
293exponentNaN(const fltSemantics &semantics) {
296 return exponentZero(semantics);
297 if (semantics.hasSignedRepr)
298 return semantics.maxExponent;
299 }
300 return semantics.maxExponent + 1;
301}
302
303/* A bunch of private, handy routines. */
304
305static inline Error createError(const Twine &Err) {
307}
308
309static constexpr inline unsigned int partCountForBits(unsigned int bits) {
310 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
312}
313
314/* Returns 0U-9U. Return values >= 10U are not digits. */
315static inline unsigned int
316decDigitValue(unsigned int c)
317{
318 return c - '0';
319}
320
321/* Return the value of a decimal exponent of the form
322 [+-]ddddddd.
323
324 If the exponent overflows, returns a large exponent with the
325 appropriate sign. */
328 bool isNegative;
329 unsigned int absExponent;
330 const unsigned int overlargeExponent = 24000; /* FIXME. */
331 StringRef::iterator p = begin;
332
333 // Treat no exponent as 0 to match binutils
334 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
335 return 0;
336 }
337
338 isNegative = (*p == '-');
339 if (*p == '-' || *p == '+') {
340 p++;
341 if (p == end)
342 return createError("Exponent has no digits");
343 }
344
345 absExponent = decDigitValue(*p++);
346 if (absExponent >= 10U)
347 return createError("Invalid character in exponent");
348
349 for (; p != end; ++p) {
350 unsigned int value;
351
352 value = decDigitValue(*p);
353 if (value >= 10U)
354 return createError("Invalid character in exponent");
355
356 absExponent = absExponent * 10U + value;
357 if (absExponent >= overlargeExponent) {
358 absExponent = overlargeExponent;
359 break;
360 }
361 }
362
363 if (isNegative)
364 return -(int) absExponent;
365 else
366 return (int) absExponent;
367}
368
369/* This is ugly and needs cleaning up, but I don't immediately see
370 how whilst remaining safe. */
373 int exponentAdjustment) {
374 int unsignedExponent;
375 bool negative, overflow;
376 int exponent = 0;
377
378 if (p == end)
379 return createError("Exponent has no digits");
380
381 negative = *p == '-';
382 if (*p == '-' || *p == '+') {
383 p++;
384 if (p == end)
385 return createError("Exponent has no digits");
386 }
387
388 unsignedExponent = 0;
389 overflow = false;
390 for (; p != end; ++p) {
391 unsigned int value;
392
393 value = decDigitValue(*p);
394 if (value >= 10U)
395 return createError("Invalid character in exponent");
396
397 unsignedExponent = unsignedExponent * 10 + value;
398 if (unsignedExponent > 32767) {
399 overflow = true;
400 break;
401 }
402 }
403
404 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
405 overflow = true;
406
407 if (!overflow) {
408 exponent = unsignedExponent;
409 if (negative)
410 exponent = -exponent;
411 exponent += exponentAdjustment;
412 if (exponent > 32767 || exponent < -32768)
413 overflow = true;
414 }
415
416 if (overflow)
417 exponent = negative ? -32768: 32767;
418
419 return exponent;
420}
421
424 StringRef::iterator *dot) {
425 StringRef::iterator p = begin;
426 *dot = end;
427 while (p != end && *p == '0')
428 p++;
429
430 if (p != end && *p == '.') {
431 *dot = p++;
432
433 if (end - begin == 1)
434 return createError("Significand has no digits");
435
436 while (p != end && *p == '0')
437 p++;
438 }
439
440 return p;
441}
442
443/* Given a normal decimal floating point number of the form
444
445 dddd.dddd[eE][+-]ddd
446
447 where the decimal point and exponent are optional, fill out the
448 structure D. Exponent is appropriate if the significand is
449 treated as an integer, and normalizedExponent if the significand
450 is taken to have the decimal point after a single leading
451 non-zero digit.
452
453 If the value is zero, V->firstSigDigit points to a non-digit, and
454 the return exponent is zero.
455*/
457 const char *firstSigDigit;
458 const char *lastSigDigit;
461};
462
465 StringRef::iterator dot = end;
466
467 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
468 if (!PtrOrErr)
469 return PtrOrErr.takeError();
470 StringRef::iterator p = *PtrOrErr;
471
472 D->firstSigDigit = p;
473 D->exponent = 0;
474 D->normalizedExponent = 0;
475
476 for (; p != end; ++p) {
477 if (*p == '.') {
478 if (dot != end)
479 return createError("String contains multiple dots");
480 dot = p++;
481 if (p == end)
482 break;
483 }
484 if (decDigitValue(*p) >= 10U)
485 break;
486 }
487
488 if (p != end) {
489 if (*p != 'e' && *p != 'E')
490 return createError("Invalid character in significand");
491 if (p == begin)
492 return createError("Significand has no digits");
493 if (dot != end && p - begin == 1)
494 return createError("Significand has no digits");
495
496 /* p points to the first non-digit in the string */
497 auto ExpOrErr = readExponent(p + 1, end);
498 if (!ExpOrErr)
499 return ExpOrErr.takeError();
500 D->exponent = *ExpOrErr;
501
502 /* Implied decimal point? */
503 if (dot == end)
504 dot = p;
505 }
506
507 /* If number is all zeroes accept any exponent. */
508 if (p != D->firstSigDigit) {
509 /* Drop insignificant trailing zeroes. */
510 if (p != begin) {
511 do
512 do
513 p--;
514 while (p != begin && *p == '0');
515 while (p != begin && *p == '.');
516 }
517
518 /* Adjust the exponents for any decimal point. */
519 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
520 D->normalizedExponent = (D->exponent +
521 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
522 - (dot > D->firstSigDigit && dot < p)));
523 }
524
525 D->lastSigDigit = p;
526 return Error::success();
527}
528
529/* Return the trailing fraction of a hexadecimal number.
530 DIGITVALUE is the first hex digit of the fraction, P points to
531 the next digit. */
534 unsigned int digitValue) {
535 unsigned int hexDigit;
536
537 /* If the first trailing digit isn't 0 or 8 we can work out the
538 fraction immediately. */
539 if (digitValue > 8)
540 return lfMoreThanHalf;
541 else if (digitValue < 8 && digitValue > 0)
542 return lfLessThanHalf;
543
544 // Otherwise we need to find the first non-zero digit.
545 while (p != end && (*p == '0' || *p == '.'))
546 p++;
547
548 if (p == end)
549 return createError("Invalid trailing hexadecimal fraction!");
550
551 hexDigit = hexDigitValue(*p);
552
553 /* If we ran off the end it is exactly zero or one-half, otherwise
554 a little more. */
555 if (hexDigit == UINT_MAX)
556 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
557 else
558 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
559}
560
561/* Return the fraction lost were a bignum truncated losing the least
562 significant BITS bits. */
563static lostFraction
565 unsigned int partCount,
566 unsigned int bits)
567{
568 unsigned int lsb;
569
570 lsb = APInt::tcLSB(parts, partCount);
571
572 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
573 if (bits <= lsb)
574 return lfExactlyZero;
575 if (bits == lsb + 1)
576 return lfExactlyHalf;
577 if (bits <= partCount * APFloatBase::integerPartWidth &&
578 APInt::tcExtractBit(parts, bits - 1))
579 return lfMoreThanHalf;
580
581 return lfLessThanHalf;
582}
583
584/* Shift DST right BITS bits noting lost fraction. */
585static lostFraction
586shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
587{
588 lostFraction lost_fraction;
589
590 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
591
592 APInt::tcShiftRight(dst, parts, bits);
593
594 return lost_fraction;
595}
596
597/* Combine the effect of two lost fractions. */
598static lostFraction
600 lostFraction lessSignificant)
601{
602 if (lessSignificant != lfExactlyZero) {
603 if (moreSignificant == lfExactlyZero)
604 moreSignificant = lfLessThanHalf;
605 else if (moreSignificant == lfExactlyHalf)
606 moreSignificant = lfMoreThanHalf;
607 }
608
609 return moreSignificant;
610}
611
612/* The error from the true value, in half-ulps, on multiplying two
613 floating point numbers, which differ from the value they
614 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
615 than the returned value.
616
617 See "How to Read Floating Point Numbers Accurately" by William D
618 Clinger. */
619static unsigned int
620HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
621{
622 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
623
624 if (HUerr1 + HUerr2 == 0)
625 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
626 else
627 return inexactMultiply + 2 * (HUerr1 + HUerr2);
628}
629
630/* The number of ulps from the boundary (zero, or half if ISNEAREST)
631 when the least significant BITS are truncated. BITS cannot be
632 zero. */
634ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
635 bool isNearest) {
636 unsigned int count, partBits;
637 APFloatBase::integerPart part, boundary;
638
639 assert(bits != 0);
640
641 bits--;
643 partBits = bits % APFloatBase::integerPartWidth + 1;
644
645 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
646
647 if (isNearest)
648 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
649 else
650 boundary = 0;
651
652 if (count == 0) {
653 if (part - boundary <= boundary - part)
654 return part - boundary;
655 else
656 return boundary - part;
657 }
658
659 if (part == boundary) {
660 while (--count)
661 if (parts[count])
662 return ~(APFloatBase::integerPart) 0; /* A lot. */
663
664 return parts[0];
665 } else if (part == boundary - 1) {
666 while (--count)
667 if (~parts[count])
668 return ~(APFloatBase::integerPart) 0; /* A lot. */
669
670 return -parts[0];
671 }
672
673 return ~(APFloatBase::integerPart) 0; /* A lot. */
674}
675
676/* Place pow(5, power) in DST, and return the number of parts used.
677 DST must be at least one part larger than size of the answer. */
678static unsigned int
679powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
680 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
682 pow5s[0] = 78125 * 5;
683
684 unsigned int partsCount = 1;
685 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
686 unsigned int result;
687 assert(power <= maxExponent);
688
689 p1 = dst;
690 p2 = scratch;
691
692 *p1 = firstEightPowers[power & 7];
693 power >>= 3;
694
695 result = 1;
696 pow5 = pow5s;
697
698 for (unsigned int n = 0; power; power >>= 1, n++) {
699 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
700 if (n != 0) {
701 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
702 partsCount, partsCount);
703 partsCount *= 2;
704 if (pow5[partsCount - 1] == 0)
705 partsCount--;
706 }
707
708 if (power & 1) {
710
711 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
712 result += partsCount;
713 if (p2[result - 1] == 0)
714 result--;
715
716 /* Now result is in p1 with partsCount parts and p2 is scratch
717 space. */
718 tmp = p1;
719 p1 = p2;
720 p2 = tmp;
721 }
722
723 pow5 += partsCount;
724 }
725
726 if (p1 != dst)
727 APInt::tcAssign(dst, p1, result);
728
729 return result;
730}
731
732/* Zero at the end to avoid modular arithmetic when adding one; used
733 when rounding up during hexadecimal output. */
734static const char hexDigitsLower[] = "0123456789abcdef0";
735static const char hexDigitsUpper[] = "0123456789ABCDEF0";
736static const char infinityL[] = "infinity";
737static const char infinityU[] = "INFINITY";
738static const char NaNL[] = "nan";
739static const char NaNU[] = "NAN";
740
741/* Write out an integerPart in hexadecimal, starting with the most
742 significant nibble. Write out exactly COUNT hexdigits, return
743 COUNT. */
744static unsigned int
745partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
746 const char *hexDigitChars)
747{
748 unsigned int result = count;
749
751
752 part >>= (APFloatBase::integerPartWidth - 4 * count);
753 while (count--) {
754 dst[count] = hexDigitChars[part & 0xf];
755 part >>= 4;
756 }
757
758 return result;
759}
760
761/* Write out an unsigned decimal integer. */
762static char *
763writeUnsignedDecimal (char *dst, unsigned int n)
764{
765 char buff[40], *p;
766
767 p = buff;
768 do
769 *p++ = '0' + n % 10;
770 while (n /= 10);
771
772 do
773 *dst++ = *--p;
774 while (p != buff);
775
776 return dst;
777}
778
779/* Write out a signed decimal integer. */
780static char *
781writeSignedDecimal (char *dst, int value)
782{
783 if (value < 0) {
784 *dst++ = '-';
785 dst = writeUnsignedDecimal(dst, -(unsigned) value);
786 } else {
787 dst = writeUnsignedDecimal(dst, value);
788 }
789
790 return dst;
791}
792
793// Compute the ULP of the input using a definition from:
794// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
795// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
796static APFloat harrisonUlp(const APFloat &X) {
797 const fltSemantics &Sem = X.getSemantics();
798 switch (X.getCategory()) {
799 case APFloat::fcNaN:
800 return APFloat::getQNaN(Sem);
802 return APFloat::getInf(Sem);
803 case APFloat::fcZero:
804 return APFloat::getSmallest(Sem);
806 break;
807 }
808 if (X.isDenormal() || X.isSmallestNormalized())
809 return APFloat::getSmallest(Sem);
810 int Exp = ilogb(X);
811 if (X.getExactLog2() != INT_MIN)
812 Exp -= 1;
813 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
815}
816
817namespace detail {
818/* Constructors. */
819void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
820 unsigned int count;
821
822 semantics = ourSemantics;
823 count = partCount();
824 if (count > 1)
825 significand.parts = new integerPart[count];
826}
827
828void IEEEFloat::freeSignificand() {
829 if (needsCleanup())
830 delete [] significand.parts;
831}
832
833void IEEEFloat::assign(const IEEEFloat &rhs) {
834 assert(semantics == rhs.semantics);
835
836 sign = rhs.sign;
837 category = rhs.category;
838 exponent = rhs.exponent;
839 if (isFiniteNonZero() || category == fcNaN)
840 copySignificand(rhs);
841}
842
843void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
844 assert(isFiniteNonZero() || category == fcNaN);
845 assert(rhs.partCount() >= partCount());
846
847 APInt::tcAssign(significandParts(), rhs.significandParts(),
848 partCount());
849}
850
851/* Make this number a NaN, with an arbitrary but deterministic value
852 for the significand. If double or longer, this is a signalling NaN,
853 which may not be ideal. If float, this is QNaN(0). */
854void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
855 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
856 llvm_unreachable("This floating point format does not support NaN");
857
858 if (Negative && !semantics->hasSignedRepr)
860 "This floating point format does not support signed values");
861
862 category = fcNaN;
863 sign = Negative;
864 exponent = exponentNaN();
865
866 integerPart *significand = significandParts();
867 unsigned numParts = partCount();
868
869 APInt fill_storage;
870 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
871 // Finite-only types do not distinguish signalling and quiet NaN, so
872 // make them all signalling.
873 SNaN = false;
874 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
875 sign = true;
876 fill_storage = APInt::getZero(semantics->precision - 1);
877 } else {
878 fill_storage = APInt::getAllOnes(semantics->precision - 1);
879 }
880 fill = &fill_storage;
881 }
882
883 // Set the significand bits to the fill.
884 if (!fill || fill->getNumWords() < numParts)
885 APInt::tcSet(significand, 0, numParts);
886 if (fill) {
887 APInt::tcAssign(significand, fill->getRawData(),
888 std::min(fill->getNumWords(), numParts));
889
890 // Zero out the excess bits of the significand.
891 unsigned bitsToPreserve = semantics->precision - 1;
892 unsigned part = bitsToPreserve / 64;
893 bitsToPreserve %= 64;
894 significand[part] &= ((1ULL << bitsToPreserve) - 1);
895 for (part++; part != numParts; ++part)
896 significand[part] = 0;
897 }
898
899 unsigned QNaNBit =
900 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
901
902 if (SNaN) {
903 // We always have to clear the QNaN bit to make it an SNaN.
904 APInt::tcClearBit(significand, QNaNBit);
905
906 // If there are no bits set in the payload, we have to set
907 // *something* to make it a NaN instead of an infinity;
908 // conventionally, this is the next bit down from the QNaN bit.
909 if (APInt::tcIsZero(significand, numParts))
910 APInt::tcSetBit(significand, QNaNBit - 1);
911 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
912 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
913 // Do nothing.
914 } else {
915 // We always have to set the QNaN bit to make it a QNaN.
916 APInt::tcSetBit(significand, QNaNBit);
917 }
918
919 // For x87 extended precision, we want to make a NaN, not a
920 // pseudo-NaN. Maybe we should expose the ability to make
921 // pseudo-NaNs?
922 if (semantics == &APFloatBase::semX87DoubleExtended)
923 APInt::tcSetBit(significand, QNaNBit + 1);
924}
925
927 if (this != &rhs) {
928 if (semantics != rhs.semantics) {
929 freeSignificand();
930 initialize(rhs.semantics);
931 }
932 assign(rhs);
933 }
934
935 return *this;
936}
937
939 freeSignificand();
940
941 semantics = rhs.semantics;
942 significand = rhs.significand;
943 exponent = rhs.exponent;
944 category = rhs.category;
945 sign = rhs.sign;
946
947 rhs.semantics = &APFloatBase::semBogus;
948 return *this;
949}
950
952 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
953 (APInt::tcExtractBit(significandParts(),
954 semantics->precision - 1) == 0);
955}
956
958 // The smallest number by magnitude in our format will be the smallest
959 // denormal, i.e. the floating point number with exponent being minimum
960 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
961 return isFiniteNonZero() && exponent == semantics->minExponent &&
962 significandMSB() == 0;
963}
964
966 return getCategory() == fcNormal && exponent == semantics->minExponent &&
967 isSignificandAllZerosExceptMSB();
968}
969
970unsigned int IEEEFloat::getNumHighBits() const {
971 const unsigned int PartCount = partCountForBits(semantics->precision);
972 const unsigned int Bits = PartCount * integerPartWidth;
973
974 // Compute how many bits are used in the final word.
975 // When precision is just 1, it represents the 'Pth'
976 // Precision bit and not the actual significand bit.
977 const unsigned int NumHighBits = (semantics->precision > 1)
978 ? (Bits - semantics->precision + 1)
979 : (Bits - semantics->precision);
980 return NumHighBits;
981}
982
983bool IEEEFloat::isSignificandAllOnes() const {
984 // Test if the significand excluding the integral bit is all ones. This allows
985 // us to test for binade boundaries.
986 const integerPart *Parts = significandParts();
987 const unsigned PartCount = partCountForBits(semantics->precision);
988 for (unsigned i = 0; i < PartCount - 1; i++)
989 if (~Parts[i])
990 return false;
991
992 // Set the unused high bits to all ones when we compare.
993 const unsigned NumHighBits = getNumHighBits();
994 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
995 "Can not have more high bits to fill than integerPartWidth");
996 const integerPart HighBitFill =
997 ~integerPart(0) << (integerPartWidth - NumHighBits);
998 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
999 return false;
1000
1001 return true;
1002}
1003
1004bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1005 // Test if the significand excluding the integral bit is all ones except for
1006 // the least significant bit.
1007 const integerPart *Parts = significandParts();
1008
1009 if (Parts[0] & 1)
1010 return false;
1011
1012 const unsigned PartCount = partCountForBits(semantics->precision);
1013 for (unsigned i = 0; i < PartCount - 1; i++) {
1014 if (~Parts[i] & ~unsigned{!i})
1015 return false;
1016 }
1017
1018 // Set the unused high bits to all ones when we compare.
1019 const unsigned NumHighBits = getNumHighBits();
1020 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1021 "Can not have more high bits to fill than integerPartWidth");
1022 const integerPart HighBitFill = ~integerPart(0)
1023 << (integerPartWidth - NumHighBits);
1024 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1025 return false;
1026
1027 return true;
1028}
1029
1030bool IEEEFloat::isSignificandAllZeros() const {
1031 // Test if the significand excluding the integral bit is all zeros. This
1032 // allows us to test for binade boundaries.
1033 const integerPart *Parts = significandParts();
1034 const unsigned PartCount = partCountForBits(semantics->precision);
1035
1036 for (unsigned i = 0; i < PartCount - 1; i++)
1037 if (Parts[i])
1038 return false;
1039
1040 // Compute how many bits are used in the final word.
1041 const unsigned NumHighBits = getNumHighBits();
1042 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1043 "clear than integerPartWidth");
1044 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1045
1046 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1047 return false;
1048
1049 return true;
1050}
1051
1052bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1053 const integerPart *Parts = significandParts();
1054 const unsigned PartCount = partCountForBits(semantics->precision);
1055
1056 for (unsigned i = 0; i < PartCount - 1; i++) {
1057 if (Parts[i])
1058 return false;
1059 }
1060
1061 const unsigned NumHighBits = getNumHighBits();
1062 const integerPart MSBMask = integerPart(1)
1063 << (integerPartWidth - NumHighBits);
1064 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1065}
1066
1068 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1069 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1070 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1071 // The largest number by magnitude in our format will be the floating point
1072 // number with maximum exponent and with significand that is all ones except
1073 // the LSB.
1074 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1075 ? isSignificandAllOnesExceptLSB()
1076 : IsMaxExp;
1077 } else {
1078 // The largest number by magnitude in our format will be the floating point
1079 // number with maximum exponent and with significand that is all ones.
1080 return IsMaxExp && isSignificandAllOnes();
1081 }
1082}
1083
1085 // This could be made more efficient; I'm going for obviously correct.
1086 if (!isFinite()) return false;
1087 IEEEFloat truncated = *this;
1088 truncated.roundToIntegral(rmTowardZero);
1089 return compare(truncated) == cmpEqual;
1090}
1091
1092bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1093 if (this == &rhs)
1094 return true;
1095 if (semantics != rhs.semantics ||
1096 category != rhs.category ||
1097 sign != rhs.sign)
1098 return false;
1099 if (category==fcZero || category==fcInfinity)
1100 return true;
1101
1102 if (isFiniteNonZero() && exponent != rhs.exponent)
1103 return false;
1104
1105 return std::equal(significandParts(), significandParts() + partCount(),
1106 rhs.significandParts());
1107}
1108
1110 initialize(&ourSemantics);
1111 sign = 0;
1112 category = fcNormal;
1113 zeroSignificand();
1114 exponent = ourSemantics.precision - 1;
1115 significandParts()[0] = value;
1117}
1118
1120 initialize(&ourSemantics);
1121 // The Float8E8MOFNU format does not have a representation
1122 // for zero. So, use the closest representation instead.
1123 // Moreover, the all-zero encoding represents a valid
1124 // normal value (which is the smallestNormalized here).
1125 // Hence, we call makeSmallestNormalized (where category is
1126 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1127 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1128}
1129
1130// Delegate to the previous constructor, because later copy constructor may
1131// actually inspects category, which can't be garbage.
1133 : IEEEFloat(ourSemantics) {}
1134
1136 initialize(rhs.semantics);
1137 assign(rhs);
1138}
1139
1140IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1141 *this = std::move(rhs);
1142}
1143
1144IEEEFloat::~IEEEFloat() { freeSignificand(); }
1145
1146unsigned int IEEEFloat::partCount() const {
1147 return partCountForBits(semantics->precision + 1);
1148}
1149
1150const APFloat::integerPart *IEEEFloat::significandParts() const {
1151 return const_cast<IEEEFloat *>(this)->significandParts();
1152}
1153
1154APFloat::integerPart *IEEEFloat::significandParts() {
1155 if (partCount() > 1)
1156 return significand.parts;
1157 else
1158 return &significand.part;
1159}
1160
1161void IEEEFloat::zeroSignificand() {
1162 APInt::tcSet(significandParts(), 0, partCount());
1163}
1164
1165/* Increment an fcNormal floating point number's significand. */
1166void IEEEFloat::incrementSignificand() {
1167 integerPart carry;
1168
1169 carry = APInt::tcIncrement(significandParts(), partCount());
1170
1171 /* Our callers should never cause us to overflow. */
1172 assert(carry == 0);
1173 (void)carry;
1174}
1175
1176/* Add the significand of the RHS. Returns the carry flag. */
1177APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1178 integerPart *parts;
1179
1180 parts = significandParts();
1181
1182 assert(semantics == rhs.semantics);
1183 assert(exponent == rhs.exponent);
1184
1185 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1186}
1187
1188/* Subtract the significand of the RHS with a borrow flag. Returns
1189 the borrow flag. */
1190APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1191 integerPart borrow) {
1192 integerPart *parts;
1193
1194 parts = significandParts();
1195
1196 assert(semantics == rhs.semantics);
1197 assert(exponent == rhs.exponent);
1198
1199 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1200 partCount());
1201}
1202
1203/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1204 on to the full-precision result of the multiplication. Returns the
1205 lost fraction. */
1206lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1207 IEEEFloat addend,
1208 bool ignoreAddend) {
1209 unsigned int omsb; // One, not zero, based MSB.
1210 unsigned int partsCount, newPartsCount, precision;
1211 integerPart *lhsSignificand;
1212 integerPart scratch[4];
1213 integerPart *fullSignificand;
1214 lostFraction lost_fraction;
1215 bool ignored;
1216
1217 assert(semantics == rhs.semantics);
1218
1219 precision = semantics->precision;
1220
1221 // Allocate space for twice as many bits as the original significand, plus one
1222 // extra bit for the addition to overflow into.
1223 newPartsCount = partCountForBits(precision * 2 + 1);
1224
1225 if (newPartsCount > 4)
1226 fullSignificand = new integerPart[newPartsCount];
1227 else
1228 fullSignificand = scratch;
1229
1230 lhsSignificand = significandParts();
1231 partsCount = partCount();
1232
1233 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1234 rhs.significandParts(), partsCount, partsCount);
1235
1236 lost_fraction = lfExactlyZero;
1237 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1238 exponent += rhs.exponent;
1239
1240 // Assume the operands involved in the multiplication are single-precision
1241 // FP, and the two multiplicants are:
1242 // *this = a23 . a22 ... a0 * 2^e1
1243 // rhs = b23 . b22 ... b0 * 2^e2
1244 // the result of multiplication is:
1245 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1246 // Note that there are three significant bits at the left-hand side of the
1247 // radix point: two for the multiplication, and an overflow bit for the
1248 // addition (that will always be zero at this point). Move the radix point
1249 // toward left by two bits, and adjust exponent accordingly.
1250 exponent += 2;
1251
1252 if (!ignoreAddend && addend.isNonZero()) {
1253 // The intermediate result of the multiplication has "2 * precision"
1254 // signicant bit; adjust the addend to be consistent with mul result.
1255 //
1256 Significand savedSignificand = significand;
1257 const fltSemantics *savedSemantics = semantics;
1258 fltSemantics extendedSemantics;
1260 unsigned int extendedPrecision;
1261
1262 // Normalize our MSB to one below the top bit to allow for overflow.
1263 extendedPrecision = 2 * precision + 1;
1264 if (omsb != extendedPrecision - 1) {
1265 assert(extendedPrecision > omsb);
1266 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1267 (extendedPrecision - 1) - omsb);
1268 exponent -= (extendedPrecision - 1) - omsb;
1269 }
1270
1271 /* Create new semantics. */
1272 extendedSemantics = *semantics;
1273 extendedSemantics.precision = extendedPrecision;
1274
1275 if (newPartsCount == 1)
1276 significand.part = fullSignificand[0];
1277 else
1278 significand.parts = fullSignificand;
1279 semantics = &extendedSemantics;
1280
1281 // Make a copy so we can convert it to the extended semantics.
1282 // Note that we cannot convert the addend directly, as the extendedSemantics
1283 // is a local variable (which we take a reference to).
1284 IEEEFloat extendedAddend(addend);
1285 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero,
1286 &ignored);
1287 assert(status == APFloat::opOK);
1288 (void)status;
1289
1290 // Shift the significand of the addend right by one bit. This guarantees
1291 // that the high bit of the significand is zero (same as fullSignificand),
1292 // so the addition will overflow (if it does overflow at all) into the top bit.
1293 lost_fraction = extendedAddend.shiftSignificandRight(1);
1294 assert(lost_fraction == lfExactlyZero &&
1295 "Lost precision while shifting addend for fused-multiply-add.");
1296
1297 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1298
1299 /* Restore our state. */
1300 if (newPartsCount == 1)
1301 fullSignificand[0] = significand.part;
1302 significand = savedSignificand;
1303 semantics = savedSemantics;
1304
1305 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1306 }
1307
1308 // Convert the result having "2 * precision" significant-bits back to the one
1309 // having "precision" significant-bits. First, move the radix point from
1310 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1311 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1312 exponent -= precision + 1;
1313
1314 // In case MSB resides at the left-hand side of radix point, shift the
1315 // mantissa right by some amount to make sure the MSB reside right before
1316 // the radix point (i.e. "MSB . rest-significant-bits").
1317 //
1318 // Note that the result is not normalized when "omsb < precision". So, the
1319 // caller needs to call IEEEFloat::normalize() if normalized value is
1320 // expected.
1321 if (omsb > precision) {
1322 unsigned int bits, significantParts;
1323 lostFraction lf;
1324
1325 bits = omsb - precision;
1326 significantParts = partCountForBits(omsb);
1327 lf = shiftRight(fullSignificand, significantParts, bits);
1328 lost_fraction = combineLostFractions(lf, lost_fraction);
1329 exponent += bits;
1330 }
1331
1332 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1333
1334 if (newPartsCount > 4)
1335 delete [] fullSignificand;
1336
1337 return lost_fraction;
1338}
1339
1340lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1341 // When the given semantics has zero, the addend here is a zero.
1342 // i.e . it belongs to the 'fcZero' category.
1343 // But when the semantics does not support zero, we need to
1344 // explicitly convey that this addend should be ignored
1345 // for multiplication.
1346 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1347}
1348
1349/* Multiply the significands of LHS and RHS to DST. */
1350lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1351 unsigned int bit, i, partsCount;
1352 const integerPart *rhsSignificand;
1353 integerPart *lhsSignificand, *dividend, *divisor;
1354 integerPart scratch[4];
1355 lostFraction lost_fraction;
1356
1357 assert(semantics == rhs.semantics);
1358
1359 lhsSignificand = significandParts();
1360 rhsSignificand = rhs.significandParts();
1361 partsCount = partCount();
1362
1363 if (partsCount > 2)
1364 dividend = new integerPart[partsCount * 2];
1365 else
1366 dividend = scratch;
1367
1368 divisor = dividend + partsCount;
1369
1370 /* Copy the dividend and divisor as they will be modified in-place. */
1371 for (i = 0; i < partsCount; i++) {
1372 dividend[i] = lhsSignificand[i];
1373 divisor[i] = rhsSignificand[i];
1374 lhsSignificand[i] = 0;
1375 }
1376
1377 exponent -= rhs.exponent;
1378
1379 unsigned int precision = semantics->precision;
1380
1381 /* Normalize the divisor. */
1382 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1383 if (bit) {
1384 exponent += bit;
1385 APInt::tcShiftLeft(divisor, partsCount, bit);
1386 }
1387
1388 /* Normalize the dividend. */
1389 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1390 if (bit) {
1391 exponent -= bit;
1392 APInt::tcShiftLeft(dividend, partsCount, bit);
1393 }
1394
1395 /* Ensure the dividend >= divisor initially for the loop below.
1396 Incidentally, this means that the division loop below is
1397 guaranteed to set the integer bit to one. */
1398 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1399 exponent--;
1400 APInt::tcShiftLeft(dividend, partsCount, 1);
1401 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1402 }
1403
1404 /* Long division. */
1405 for (bit = precision; bit; bit -= 1) {
1406 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1407 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1408 APInt::tcSetBit(lhsSignificand, bit - 1);
1409 }
1410
1411 APInt::tcShiftLeft(dividend, partsCount, 1);
1412 }
1413
1414 /* Figure out the lost fraction. */
1415 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1416
1417 if (cmp > 0)
1418 lost_fraction = lfMoreThanHalf;
1419 else if (cmp == 0)
1420 lost_fraction = lfExactlyHalf;
1421 else if (APInt::tcIsZero(dividend, partsCount))
1422 lost_fraction = lfExactlyZero;
1423 else
1424 lost_fraction = lfLessThanHalf;
1425
1426 if (partsCount > 2)
1427 delete [] dividend;
1428
1429 return lost_fraction;
1430}
1431
1432unsigned int IEEEFloat::significandMSB() const {
1433 return APInt::tcMSB(significandParts(), partCount());
1434}
1435
1436unsigned int IEEEFloat::significandLSB() const {
1437 return APInt::tcLSB(significandParts(), partCount());
1438}
1439
1440/* Note that a zero result is NOT normalized to fcZero. */
1441lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1442 /* Our exponent should not overflow. */
1443 assert((ExponentType) (exponent + bits) >= exponent);
1444
1445 exponent += bits;
1446
1447 return shiftRight(significandParts(), partCount(), bits);
1448}
1449
1450/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1451void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1452 assert(bits < semantics->precision ||
1453 (semantics->precision == 1 && bits <= 1));
1454
1455 if (bits) {
1456 unsigned int partsCount = partCount();
1457
1458 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1459 exponent -= bits;
1460
1461 assert(!APInt::tcIsZero(significandParts(), partsCount));
1462 }
1463}
1464
1466 int compare;
1467
1468 assert(semantics == rhs.semantics);
1470 assert(rhs.isFiniteNonZero());
1471
1472 compare = exponent - rhs.exponent;
1473
1474 /* If exponents are equal, do an unsigned bignum comparison of the
1475 significands. */
1476 if (compare == 0)
1477 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1478 partCount());
1479
1480 if (compare > 0)
1481 return cmpGreaterThan;
1482 else if (compare < 0)
1483 return cmpLessThan;
1484 else
1485 return cmpEqual;
1486}
1487
1488/* Set the least significant BITS bits of a bignum, clear the
1489 rest. */
1490static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1491 unsigned bits) {
1492 unsigned i = 0;
1493 while (bits > APInt::APINT_BITS_PER_WORD) {
1494 dst[i++] = ~(APInt::WordType)0;
1496 }
1497
1498 if (bits)
1499 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1500
1501 while (i < parts)
1502 dst[i++] = 0;
1503}
1504
1505/* Handle overflow. Sign is preserved. We either become infinity or
1506 the largest finite number. */
1507APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1509 /* Infinity? */
1510 if (rounding_mode == rmNearestTiesToEven ||
1511 rounding_mode == rmNearestTiesToAway ||
1512 (rounding_mode == rmTowardPositive && !sign) ||
1513 (rounding_mode == rmTowardNegative && sign)) {
1515 makeNaN(false, sign);
1516 else
1517 category = fcInfinity;
1518 return static_cast<opStatus>(opOverflow | opInexact);
1519 }
1520 }
1521
1522 /* Otherwise we become the largest finite number. */
1523 category = fcNormal;
1524 exponent = semantics->maxExponent;
1525 tcSetLeastSignificantBits(significandParts(), partCount(),
1526 semantics->precision);
1527 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1528 semantics->nanEncoding == fltNanEncoding::AllOnes)
1529 APInt::tcClearBit(significandParts(), 0);
1530
1531 return opInexact;
1532}
1533
1534/* Returns TRUE if, when truncating the current number, with BIT the
1535 new LSB, with the given lost fraction and rounding mode, the result
1536 would need to be rounded away from zero (i.e., by increasing the
1537 signficand). This routine must work for fcZero of both signs, and
1538 fcNormal numbers. */
1539bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1540 lostFraction lost_fraction,
1541 unsigned int bit) const {
1542 /* NaNs and infinities should not have lost fractions. */
1543 assert(isFiniteNonZero() || category == fcZero);
1544
1545 /* Current callers never pass this so we don't handle it. */
1546 assert(lost_fraction != lfExactlyZero);
1547
1548 switch (rounding_mode) {
1550 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1551
1553 if (lost_fraction == lfMoreThanHalf)
1554 return true;
1555
1556 /* Our zeroes don't have a significand to test. */
1557 if (lost_fraction == lfExactlyHalf && category != fcZero)
1558 return APInt::tcExtractBit(significandParts(), bit);
1559
1560 return false;
1561
1562 case rmTowardZero:
1563 return false;
1564
1565 case rmTowardPositive:
1566 return !sign;
1567
1568 case rmTowardNegative:
1569 return sign;
1570
1571 default:
1572 break;
1573 }
1574 llvm_unreachable("Invalid rounding mode found");
1575}
1576
1577APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1578 lostFraction lost_fraction) {
1579 unsigned int omsb; /* One, not zero, based MSB. */
1580 int exponentChange;
1581
1582 if (!isFiniteNonZero())
1583 return opOK;
1584
1585 /* Before rounding normalize the exponent of fcNormal numbers. */
1586 omsb = significandMSB() + 1;
1587
1588 // Only skip this `if` if the value is exactly zero.
1589 if (omsb || lost_fraction != lfExactlyZero) {
1590 /* OMSB is numbered from 1. We want to place it in the integer
1591 bit numbered PRECISION if possible, with a compensating change in
1592 the exponent. */
1593 exponentChange = omsb - semantics->precision;
1594
1595 /* If the resulting exponent is too high, overflow according to
1596 the rounding mode. */
1597 if (exponent + exponentChange > semantics->maxExponent)
1598 return handleOverflow(rounding_mode);
1599
1600 /* Subnormal numbers have exponent minExponent, and their MSB
1601 is forced based on that. */
1602 if (exponent + exponentChange < semantics->minExponent)
1603 exponentChange = semantics->minExponent - exponent;
1604
1605 /* Shifting left is easy as we don't lose precision. */
1606 if (exponentChange < 0) {
1607 assert(lost_fraction == lfExactlyZero);
1608
1609 shiftSignificandLeft(-exponentChange);
1610
1611 return opOK;
1612 }
1613
1614 if (exponentChange > 0) {
1615 lostFraction lf;
1616
1617 /* Shift right and capture any new lost fraction. */
1618 lf = shiftSignificandRight(exponentChange);
1619
1620 lost_fraction = combineLostFractions(lf, lost_fraction);
1621
1622 /* Keep OMSB up-to-date. */
1623 if (omsb > (unsigned) exponentChange)
1624 omsb -= exponentChange;
1625 else
1626 omsb = 0;
1627 }
1628 }
1629
1630 // The all-ones values is an overflow if NaN is all ones. If NaN is
1631 // represented by negative zero, then it is a valid finite value.
1632 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1633 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1634 exponent == semantics->maxExponent && isSignificandAllOnes())
1635 return handleOverflow(rounding_mode);
1636
1637 /* Now round the number according to rounding_mode given the lost
1638 fraction. */
1639
1640 /* As specified in IEEE 754, since we do not trap we do not report
1641 underflow for exact results. */
1642 if (lost_fraction == lfExactlyZero) {
1643 /* Canonicalize zeroes. */
1644 if (omsb == 0) {
1645 category = fcZero;
1646 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1647 sign = false;
1648 if (!semantics->hasZero)
1650 }
1651
1652 return opOK;
1653 }
1654
1655 /* Increment the significand if we're rounding away from zero. */
1656 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1657 if (omsb == 0)
1658 exponent = semantics->minExponent;
1659
1660 incrementSignificand();
1661 omsb = significandMSB() + 1;
1662
1663 /* Did the significand increment overflow? */
1664 if (omsb == (unsigned) semantics->precision + 1) {
1665 /* Renormalize by incrementing the exponent and shifting our
1666 significand right one. However if we already have the
1667 maximum exponent we overflow to infinity. */
1668 if (exponent == semantics->maxExponent)
1669 // Invoke overflow handling with a rounding mode that will guarantee
1670 // that the result gets turned into the correct infinity representation.
1671 // This is needed instead of just setting the category to infinity to
1672 // account for 8-bit floating point types that have no inf, only NaN.
1673 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1674
1675 shiftSignificandRight(1);
1676
1677 return opInexact;
1678 }
1679
1680 // The all-ones values is an overflow if NaN is all ones. If NaN is
1681 // represented by negative zero, then it is a valid finite value.
1682 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1683 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1684 exponent == semantics->maxExponent && isSignificandAllOnes())
1685 return handleOverflow(rounding_mode);
1686 }
1687
1688 /* The normal case - we were and are not denormal, and any
1689 significand increment above didn't overflow. */
1690 if (omsb == semantics->precision)
1691 return opInexact;
1692
1693 /* We have a non-zero denormal. */
1694 assert(omsb < semantics->precision);
1695
1696 /* Canonicalize zeroes. */
1697 if (omsb == 0) {
1698 category = fcZero;
1699 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1700 sign = false;
1701 // This condition handles the case where the semantics
1702 // does not have zero but uses the all-zero encoding
1703 // to represent the smallest normal value.
1704 if (!semantics->hasZero)
1706 }
1707
1708 /* The fcZero case is a denormal that underflowed to zero. */
1709 return (opStatus) (opUnderflow | opInexact);
1710}
1711
1712APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1713 bool subtract) {
1714 switch (PackCategoriesIntoKey(category, rhs.category)) {
1715 default:
1716 llvm_unreachable(nullptr);
1717
1721 assign(rhs);
1722 [[fallthrough]];
1727 if (isSignaling()) {
1728 makeQuiet();
1729 return opInvalidOp;
1730 }
1731 return rhs.isSignaling() ? opInvalidOp : opOK;
1732
1736 return opOK;
1737
1740 category = fcInfinity;
1741 sign = rhs.sign ^ subtract;
1742 return opOK;
1743
1745 assign(rhs);
1746 sign = rhs.sign ^ subtract;
1747 return opOK;
1748
1750 /* Sign depends on rounding mode; handled by caller. */
1751 return opOK;
1752
1754 /* Differently signed infinities can only be validly
1755 subtracted. */
1756 if (((sign ^ rhs.sign)!=0) != subtract) {
1757 makeNaN();
1758 return opInvalidOp;
1759 }
1760
1761 return opOK;
1762
1764 return opDivByZero;
1765 }
1766}
1767
1768/* Add or subtract two normal numbers. */
1769lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1770 bool subtract) {
1771 integerPart carry = 0;
1772 lostFraction lost_fraction;
1773 int bits;
1774
1775 /* Determine if the operation on the absolute values is effectively
1776 an addition or subtraction. */
1777 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1778
1779 /* Are we bigger exponent-wise than the RHS? */
1780 bits = exponent - rhs.exponent;
1781
1782 /* Subtraction is more subtle than one might naively expect. */
1783 if (subtract) {
1784 if ((bits < 0) && !semantics->hasSignedRepr)
1786 "This floating point format does not support signed values");
1787
1788 IEEEFloat temp_rhs(rhs);
1789 bool lost_fraction_is_from_rhs = false;
1790
1791 if (bits == 0)
1792 lost_fraction = lfExactlyZero;
1793 else if (bits > 0) {
1794 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1795 lost_fraction_is_from_rhs = true;
1796 shiftSignificandLeft(1);
1797 } else {
1798 lost_fraction = shiftSignificandRight(-bits - 1);
1799 temp_rhs.shiftSignificandLeft(1);
1800 }
1801
1802 // Should we reverse the subtraction.
1803 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1804 if (cmp_result == cmpLessThan) {
1805 bool borrow =
1806 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1807 if (borrow) {
1808 // The lost fraction is being subtracted, borrow from the significand
1809 // and invert `lost_fraction`.
1810 if (lost_fraction == lfLessThanHalf)
1811 lost_fraction = lfMoreThanHalf;
1812 else if (lost_fraction == lfMoreThanHalf)
1813 lost_fraction = lfLessThanHalf;
1814 }
1815 carry = temp_rhs.subtractSignificand(*this, borrow);
1816 copySignificand(temp_rhs);
1817 sign = !sign;
1818 } else if (cmp_result == cmpGreaterThan) {
1819 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1820 if (borrow) {
1821 // The lost fraction is being subtracted, borrow from the significand
1822 // and invert `lost_fraction`.
1823 if (lost_fraction == lfLessThanHalf)
1824 lost_fraction = lfMoreThanHalf;
1825 else if (lost_fraction == lfMoreThanHalf)
1826 lost_fraction = lfLessThanHalf;
1827 }
1828 carry = subtractSignificand(temp_rhs, borrow);
1829 } else { // cmpEqual
1830 zeroSignificand();
1831 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1832 // rhs is slightly larger due to the lost fraction, flip the sign.
1833 sign = !sign;
1834 }
1835 }
1836
1837 /* The code above is intended to ensure that no borrow is
1838 necessary. */
1839 assert(!carry);
1840 (void)carry;
1841 } else {
1842 if (bits > 0) {
1843 IEEEFloat temp_rhs(rhs);
1844
1845 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1846 carry = addSignificand(temp_rhs);
1847 } else {
1848 lost_fraction = shiftSignificandRight(-bits);
1849 carry = addSignificand(rhs);
1850 }
1851
1852 /* We have a guard bit; generating a carry cannot happen. */
1853 assert(!carry);
1854 (void)carry;
1855 }
1856
1857 return lost_fraction;
1858}
1859
1860APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1861 switch (PackCategoriesIntoKey(category, rhs.category)) {
1862 default:
1863 llvm_unreachable(nullptr);
1864
1868 assign(rhs);
1869 sign = false;
1870 [[fallthrough]];
1875 sign ^= rhs.sign; // restore the original sign
1876 if (isSignaling()) {
1877 makeQuiet();
1878 return opInvalidOp;
1879 }
1880 return rhs.isSignaling() ? opInvalidOp : opOK;
1881
1885 category = fcInfinity;
1886 return opOK;
1887
1891 category = fcZero;
1892 return opOK;
1893
1896 makeNaN();
1897 return opInvalidOp;
1898
1900 return opOK;
1901 }
1902}
1903
1904APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1905 switch (PackCategoriesIntoKey(category, rhs.category)) {
1906 default:
1907 llvm_unreachable(nullptr);
1908
1912 assign(rhs);
1913 sign = false;
1914 [[fallthrough]];
1919 sign ^= rhs.sign; // restore the original sign
1920 if (isSignaling()) {
1921 makeQuiet();
1922 return opInvalidOp;
1923 }
1924 return rhs.isSignaling() ? opInvalidOp : opOK;
1925
1930 return opOK;
1931
1933 category = fcZero;
1934 return opOK;
1935
1937 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1938 makeNaN(false, sign);
1939 else
1940 category = fcInfinity;
1941 return opDivByZero;
1942
1945 makeNaN();
1946 return opInvalidOp;
1947
1949 return opOK;
1950 }
1951}
1952
1953APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1954 switch (PackCategoriesIntoKey(category, rhs.category)) {
1955 default:
1956 llvm_unreachable(nullptr);
1957
1961 assign(rhs);
1962 [[fallthrough]];
1967 if (isSignaling()) {
1968 makeQuiet();
1969 return opInvalidOp;
1970 }
1971 return rhs.isSignaling() ? opInvalidOp : opOK;
1972
1976 return opOK;
1977
1983 makeNaN();
1984 return opInvalidOp;
1985
1987 return opOK;
1988 }
1989}
1990
1991APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1992 switch (PackCategoriesIntoKey(category, rhs.category)) {
1993 default:
1994 llvm_unreachable(nullptr);
1995
1999 assign(rhs);
2000 [[fallthrough]];
2005 if (isSignaling()) {
2006 makeQuiet();
2007 return opInvalidOp;
2008 }
2009 return rhs.isSignaling() ? opInvalidOp : opOK;
2010
2014 return opOK;
2015
2021 makeNaN();
2022 return opInvalidOp;
2023
2025 return opDivByZero; // fake status, indicating this is not a special case
2026 }
2027}
2028
2029/* Change sign. */
2031 // With NaN-as-negative-zero, neither NaN or negative zero can change
2032 // their signs.
2033 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2034 (isZero() || isNaN()))
2035 return;
2036 /* Look mummy, this one's easy. */
2037 sign = !sign;
2038}
2039
2040/* Normalized addition or subtraction. */
2041APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2042 roundingMode rounding_mode,
2043 bool subtract) {
2044 opStatus fs;
2045
2046 fs = addOrSubtractSpecials(rhs, subtract);
2047
2048 /* This return code means it was not a simple case. */
2049 if (fs == opDivByZero) {
2050 lostFraction lost_fraction;
2051
2052 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2053 fs = normalize(rounding_mode, lost_fraction);
2054
2055 /* Can only be zero if we lost no fraction. */
2056 assert(category != fcZero || lost_fraction == lfExactlyZero);
2057 }
2058
2059 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2060 positive zero unless rounding to minus infinity, except that
2061 adding two like-signed zeroes gives that zero. */
2062 if (category == fcZero) {
2063 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2064 sign = (rounding_mode == rmTowardNegative);
2065 // NaN-in-negative-zero means zeros need to be normalized to +0.
2066 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2067 sign = false;
2068 }
2069
2070 return fs;
2071}
2072
2073/* Normalized addition. */
2075 roundingMode rounding_mode) {
2076 return addOrSubtract(rhs, rounding_mode, false);
2077}
2078
2079/* Normalized subtraction. */
2081 roundingMode rounding_mode) {
2082 return addOrSubtract(rhs, rounding_mode, true);
2083}
2084
2085/* Normalized multiply. */
2087 roundingMode rounding_mode) {
2088 opStatus fs;
2089
2090 sign ^= rhs.sign;
2091 fs = multiplySpecials(rhs);
2092
2093 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2094 sign = false;
2095 if (isFiniteNonZero()) {
2096 lostFraction lost_fraction = multiplySignificand(rhs);
2097 fs = normalize(rounding_mode, lost_fraction);
2098 if (lost_fraction != lfExactlyZero)
2099 fs = (opStatus) (fs | opInexact);
2100 }
2101
2102 return fs;
2103}
2104
2105/* Normalized divide. */
2107 roundingMode rounding_mode) {
2108 opStatus fs;
2109
2110 sign ^= rhs.sign;
2111 fs = divideSpecials(rhs);
2112
2113 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2114 sign = false;
2115 if (isFiniteNonZero()) {
2116 lostFraction lost_fraction = divideSignificand(rhs);
2117 fs = normalize(rounding_mode, lost_fraction);
2118 if (lost_fraction != lfExactlyZero)
2119 fs = (opStatus) (fs | opInexact);
2120 }
2121
2122 return fs;
2123}
2124
2125/* Normalized remainder. */
2127 opStatus fs;
2128 unsigned int origSign = sign;
2129
2130 // First handle the special cases.
2131 fs = remainderSpecials(rhs);
2132 if (fs != opDivByZero)
2133 return fs;
2134
2135 fs = opOK;
2136
2137 // Make sure the current value is less than twice the denom. If the addition
2138 // did not succeed (an overflow has happened), which means that the finite
2139 // value we currently posses must be less than twice the denom (as we are
2140 // using the same semantics).
2141 IEEEFloat P2 = rhs;
2142 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2143 fs = mod(P2);
2144 assert(fs == opOK);
2145 }
2146
2147 // Lets work with absolute numbers.
2148 IEEEFloat P = rhs;
2149 P.sign = false;
2150 sign = false;
2151
2152 //
2153 // To calculate the remainder we use the following scheme.
2154 //
2155 // The remainder is defained as follows:
2156 //
2157 // remainder = numer - rquot * denom = x - r * p
2158 //
2159 // Where r is the result of: x/p, rounded toward the nearest integral value
2160 // (with halfway cases rounded toward the even number).
2161 //
2162 // Currently, (after x mod 2p):
2163 // r is the number of 2p's present inside x, which is inherently, an even
2164 // number of p's.
2165 //
2166 // We may split the remaining calculation into 4 options:
2167 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2168 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2169 // are done as well.
2170 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2171 // to subtract 1p at least once.
2172 // - if x >= p then we must subtract p at least once, as x must be a
2173 // remainder.
2174 //
2175 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2176 //
2177 // We can now split the remaining calculation to the following 3 options:
2178 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2179 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2180 // must round up to the next even number. so we must subtract p once more.
2181 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2182 // integral, and subtract p once more.
2183 //
2184
2185 // Extend the semantics to prevent an overflow/underflow or inexact result.
2186 bool losesInfo;
2187 fltSemantics extendedSemantics = *semantics;
2188 extendedSemantics.maxExponent++;
2189 extendedSemantics.minExponent--;
2190 extendedSemantics.precision += 2;
2191
2192 IEEEFloat VEx = *this;
2193 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2194 assert(fs == opOK && !losesInfo);
2195 IEEEFloat PEx = P;
2196 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2197 assert(fs == opOK && !losesInfo);
2198
2199 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2200 // any fraction.
2201 fs = VEx.add(VEx, rmNearestTiesToEven);
2202 assert(fs == opOK);
2203
2204 if (VEx.compare(PEx) == cmpGreaterThan) {
2206 assert(fs == opOK);
2207
2208 // Make VEx = this.add(this), but because we have different semantics, we do
2209 // not want to `convert` again, so we just subtract PEx twice (which equals
2210 // to the desired value).
2211 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2212 assert(fs == opOK);
2213 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2214 assert(fs == opOK);
2215
2216 cmpResult result = VEx.compare(PEx);
2217 if (result == cmpGreaterThan || result == cmpEqual) {
2219 assert(fs == opOK);
2220 }
2221 }
2222
2223 if (isZero()) {
2224 sign = origSign; // IEEE754 requires this
2225 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2226 // But some 8-bit floats only have positive 0.
2227 sign = false;
2228 }
2229
2230 else
2231 sign ^= origSign;
2232 return fs;
2233}
2234
2235/* Normalized llvm frem (C fmod). */
2237 opStatus fs;
2238 fs = modSpecials(rhs);
2239 unsigned int origSign = sign;
2240
2241 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2243 int Exp = ilogb(*this) - ilogb(rhs);
2244 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2245 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2246 // check for it.
2247 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2248 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2249 V.sign = sign;
2250
2252
2253 // When the semantics supports zero, this loop's
2254 // exit-condition is handled by the 'isFiniteNonZero'
2255 // category check above. However, when the semantics
2256 // does not have 'fcZero' and we have reached the
2257 // minimum possible value, (and any further subtract
2258 // will underflow to the same value) explicitly
2259 // provide an exit-path here.
2260 if (!semantics->hasZero && this->isSmallest())
2261 break;
2262
2263 assert(fs==opOK);
2264 }
2265 if (isZero()) {
2266 sign = origSign; // fmod requires this
2267 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2268 sign = false;
2269 }
2270 return fs;
2271}
2272
2273/* Normalized fused-multiply-add. */
2275 const IEEEFloat &addend,
2276 roundingMode rounding_mode) {
2277 opStatus fs;
2278
2279 /* Post-multiplication sign, before addition. */
2280 sign ^= multiplicand.sign;
2281
2282 /* If and only if all arguments are normal do we need to do an
2283 extended-precision calculation. */
2284 if (isFiniteNonZero() &&
2285 multiplicand.isFiniteNonZero() &&
2286 addend.isFinite()) {
2287 lostFraction lost_fraction;
2288
2289 lost_fraction = multiplySignificand(multiplicand, addend);
2290 fs = normalize(rounding_mode, lost_fraction);
2291 if (lost_fraction != lfExactlyZero)
2292 fs = (opStatus) (fs | opInexact);
2293
2294 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2295 positive zero unless rounding to minus infinity, except that
2296 adding two like-signed zeroes gives that zero. */
2297 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2298 sign = (rounding_mode == rmTowardNegative);
2299 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2300 sign = false;
2301 }
2302 } else {
2303 fs = multiplySpecials(multiplicand);
2304
2305 /* FS can only be opOK or opInvalidOp. There is no more work
2306 to do in the latter case. The IEEE-754R standard says it is
2307 implementation-defined in this case whether, if ADDEND is a
2308 quiet NaN, we raise invalid op; this implementation does so.
2309
2310 If we need to do the addition we can do so with normal
2311 precision. */
2312 if (fs == opOK)
2313 fs = addOrSubtract(addend, rounding_mode, false);
2314 }
2315
2316 return fs;
2317}
2318
2319/* Rounding-mode correct round to integral value. */
2321 opStatus fs;
2322
2323 if (isInfinity())
2324 // [IEEE Std 754-2008 6.1]:
2325 // The behavior of infinity in floating-point arithmetic is derived from the
2326 // limiting cases of real arithmetic with operands of arbitrarily
2327 // large magnitude, when such a limit exists.
2328 // ...
2329 // Operations on infinite operands are usually exact and therefore signal no
2330 // exceptions ...
2331 return opOK;
2332
2333 if (isNaN()) {
2334 if (isSignaling()) {
2335 // [IEEE Std 754-2008 6.2]:
2336 // Under default exception handling, any operation signaling an invalid
2337 // operation exception and for which a floating-point result is to be
2338 // delivered shall deliver a quiet NaN.
2339 makeQuiet();
2340 // [IEEE Std 754-2008 6.2]:
2341 // Signaling NaNs shall be reserved operands that, under default exception
2342 // handling, signal the invalid operation exception(see 7.2) for every
2343 // general-computational and signaling-computational operation except for
2344 // the conversions described in 5.12.
2345 return opInvalidOp;
2346 } else {
2347 // [IEEE Std 754-2008 6.2]:
2348 // For an operation with quiet NaN inputs, other than maximum and minimum
2349 // operations, if a floating-point result is to be delivered the result
2350 // shall be a quiet NaN which should be one of the input NaNs.
2351 // ...
2352 // Every general-computational and quiet-computational operation involving
2353 // one or more input NaNs, none of them signaling, shall signal no
2354 // exception, except fusedMultiplyAdd might signal the invalid operation
2355 // exception(see 7.2).
2356 return opOK;
2357 }
2358 }
2359
2360 if (isZero()) {
2361 // [IEEE Std 754-2008 6.3]:
2362 // ... the sign of the result of conversions, the quantize operation, the
2363 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2364 // the sign of the first or only operand.
2365 return opOK;
2366 }
2367
2368 // If the exponent is large enough, we know that this value is already
2369 // integral, and the arithmetic below would potentially cause it to saturate
2370 // to +/-Inf. Bail out early instead.
2371 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2372 return opOK;
2373
2374 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2375 // precision of our format, and then subtract it back off again. The choice
2376 // of rounding modes for the addition/subtraction determines the rounding mode
2377 // for our integral rounding as well.
2378 // NOTE: When the input value is negative, we do subtraction followed by
2379 // addition instead.
2380 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2381 1);
2382 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2383 IEEEFloat MagicConstant(*semantics);
2384 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2386 assert(fs == opOK);
2387 MagicConstant.sign = sign;
2388
2389 // Preserve the input sign so that we can handle the case of zero result
2390 // correctly.
2391 bool inputSign = isNegative();
2392
2393 fs = add(MagicConstant, rounding_mode);
2394
2395 // Current value and 'MagicConstant' are both integers, so the result of the
2396 // subtraction is always exact according to Sterbenz' lemma.
2397 subtract(MagicConstant, rounding_mode);
2398
2399 // Restore the input sign.
2400 if (inputSign != isNegative())
2401 changeSign();
2402
2403 return fs;
2404}
2405
2406/* Comparison requires normalized numbers. */
2408 cmpResult result;
2409
2410 assert(semantics == rhs.semantics);
2411
2412 switch (PackCategoriesIntoKey(category, rhs.category)) {
2413 default:
2414 llvm_unreachable(nullptr);
2415
2423 return cmpUnordered;
2424
2428 if (sign)
2429 return cmpLessThan;
2430 else
2431 return cmpGreaterThan;
2432
2436 if (rhs.sign)
2437 return cmpGreaterThan;
2438 else
2439 return cmpLessThan;
2440
2442 if (sign == rhs.sign)
2443 return cmpEqual;
2444 else if (sign)
2445 return cmpLessThan;
2446 else
2447 return cmpGreaterThan;
2448
2450 return cmpEqual;
2451
2453 break;
2454 }
2455
2456 /* Two normal numbers. Do they have the same sign? */
2457 if (sign != rhs.sign) {
2458 if (sign)
2459 result = cmpLessThan;
2460 else
2461 result = cmpGreaterThan;
2462 } else {
2463 /* Compare absolute values; invert result if negative. */
2464 result = compareAbsoluteValue(rhs);
2465
2466 if (sign) {
2467 if (result == cmpLessThan)
2468 result = cmpGreaterThan;
2469 else if (result == cmpGreaterThan)
2470 result = cmpLessThan;
2471 }
2472 }
2473
2474 return result;
2475}
2476
2477/// IEEEFloat::convert - convert a value of one floating point type to another.
2478/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2479/// records whether the transformation lost information, i.e. whether
2480/// converting the result back to the original type will produce the
2481/// original value (this is almost the same as return value==fsOK, but there
2482/// are edge cases where this is not so).
2483
2485 roundingMode rounding_mode,
2486 bool *losesInfo) {
2488 unsigned int newPartCount, oldPartCount;
2489 opStatus fs;
2490 int shift;
2491 const fltSemantics &fromSemantics = *semantics;
2492 bool is_signaling = isSignaling();
2493
2495 newPartCount = partCountForBits(toSemantics.precision + 1);
2496 oldPartCount = partCount();
2497 shift = toSemantics.precision - fromSemantics.precision;
2498
2499 bool X86SpecialNan = false;
2500 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2501 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2502 (!(*significandParts() & 0x8000000000000000ULL) ||
2503 !(*significandParts() & 0x4000000000000000ULL))) {
2504 // x86 has some unusual NaNs which cannot be represented in any other
2505 // format; note them here.
2506 X86SpecialNan = true;
2507 }
2508
2509 // If this is a truncation of a denormal number, and the target semantics
2510 // has larger exponent range than the source semantics (this can happen
2511 // when truncating from PowerPC double-double to double format), the
2512 // right shift could lose result mantissa bits. Adjust exponent instead
2513 // of performing excessive shift.
2514 // Also do a similar trick in case shifting denormal would produce zero
2515 // significand as this case isn't handled correctly by normalize.
2516 if (shift < 0 && isFiniteNonZero()) {
2517 int omsb = significandMSB() + 1;
2518 int exponentChange = omsb - fromSemantics.precision;
2519 if (exponent + exponentChange < toSemantics.minExponent)
2520 exponentChange = toSemantics.minExponent - exponent;
2521 exponentChange = std::max(exponentChange, shift);
2522 if (exponentChange < 0) {
2523 shift -= exponentChange;
2524 exponent += exponentChange;
2525 } else if (omsb <= -shift) {
2526 exponentChange = omsb + shift - 1; // leave at least one bit set
2527 shift -= exponentChange;
2528 exponent += exponentChange;
2529 }
2530 }
2531
2532 // If this is a truncation, perform the shift before we narrow the storage.
2533 if (shift < 0 && (isFiniteNonZero() ||
2534 (category == fcNaN && semantics->nonFiniteBehavior !=
2536 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2537
2538 // Fix the storage so it can hold to new value.
2539 if (newPartCount > oldPartCount) {
2540 // The new type requires more storage; make it available.
2541 integerPart *newParts;
2542 newParts = new integerPart[newPartCount];
2543 APInt::tcSet(newParts, 0, newPartCount);
2544 if (isFiniteNonZero() || category==fcNaN)
2545 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2546 freeSignificand();
2547 significand.parts = newParts;
2548 } else if (newPartCount == 1 && oldPartCount != 1) {
2549 // Switch to built-in storage for a single part.
2550 integerPart newPart = 0;
2551 if (isFiniteNonZero() || category==fcNaN)
2552 newPart = significandParts()[0];
2553 freeSignificand();
2554 significand.part = newPart;
2555 }
2556
2557 // Now that we have the right storage, switch the semantics.
2558 semantics = &toSemantics;
2559
2560 // If this is an extension, perform the shift now that the storage is
2561 // available.
2562 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2563 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2564
2565 if (isFiniteNonZero()) {
2566 fs = normalize(rounding_mode, lostFraction);
2567 *losesInfo = (fs != opOK);
2568 } else if (category == fcNaN) {
2569 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2570 *losesInfo =
2572 makeNaN(false, sign);
2573 return is_signaling ? opInvalidOp : opOK;
2574 }
2575
2576 // If NaN is negative zero, we need to create a new NaN to avoid converting
2577 // NaN to -Inf.
2578 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2579 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2580 makeNaN(false, false);
2581
2582 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2583
2584 // For x87 extended precision, we want to make a NaN, not a special NaN if
2585 // the input wasn't special either.
2586 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2587 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2588
2589 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2590 // This also guarantees that a sNaN does not become Inf on a truncation
2591 // that loses all payload bits.
2592 if (is_signaling) {
2593 makeQuiet();
2594 fs = opInvalidOp;
2595 } else {
2596 fs = opOK;
2597 }
2598 } else if (category == fcInfinity &&
2599 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2600 makeNaN(false, sign);
2601 *losesInfo = true;
2602 fs = opInexact;
2603 } else if (category == fcZero &&
2604 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2605 // Negative zero loses info, but positive zero doesn't.
2606 *losesInfo =
2607 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2608 fs = *losesInfo ? opInexact : opOK;
2609 // NaN is negative zero means -0 -> +0, which can lose information
2610 sign = false;
2611 } else {
2612 *losesInfo = false;
2613 fs = opOK;
2614 }
2615
2616 if (category == fcZero && !semantics->hasZero)
2618 return fs;
2619}
2620
2621/* Convert a floating point number to an integer according to the
2622 rounding mode. If the rounded integer value is out of range this
2623 returns an invalid operation exception and the contents of the
2624 destination parts are unspecified. If the rounded value is in
2625 range but the floating point number is not the exact integer, the C
2626 standard doesn't require an inexact exception to be raised. IEEE
2627 854 does require it so we do that.
2628
2629 Note that for conversions to integer type the C standard requires
2630 round-to-zero to always be used. */
2631APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2632 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2633 roundingMode rounding_mode, bool *isExact) const {
2634 lostFraction lost_fraction;
2635 const integerPart *src;
2636 unsigned int dstPartsCount, truncatedBits;
2637
2638 *isExact = false;
2639
2640 /* Handle the three special cases first. */
2641 if (category == fcInfinity || category == fcNaN)
2642 return opInvalidOp;
2643
2644 dstPartsCount = partCountForBits(width);
2645 assert(dstPartsCount <= parts.size() && "Integer too big");
2646
2647 if (category == fcZero) {
2648 APInt::tcSet(parts.data(), 0, dstPartsCount);
2649 // Negative zero can't be represented as an int.
2650 *isExact = !sign;
2651 return opOK;
2652 }
2653
2654 src = significandParts();
2655
2656 /* Step 1: place our absolute value, with any fraction truncated, in
2657 the destination. */
2658 if (exponent < 0) {
2659 /* Our absolute value is less than one; truncate everything. */
2660 APInt::tcSet(parts.data(), 0, dstPartsCount);
2661 /* For exponent -1 the integer bit represents .5, look at that.
2662 For smaller exponents leftmost truncated bit is 0. */
2663 truncatedBits = semantics->precision -1U - exponent;
2664 } else {
2665 /* We want the most significant (exponent + 1) bits; the rest are
2666 truncated. */
2667 unsigned int bits = exponent + 1U;
2668
2669 /* Hopelessly large in magnitude? */
2670 if (bits > width)
2671 return opInvalidOp;
2672
2673 if (bits < semantics->precision) {
2674 /* We truncate (semantics->precision - bits) bits. */
2675 truncatedBits = semantics->precision - bits;
2676 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2677 } else {
2678 /* We want at least as many bits as are available. */
2679 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2680 0);
2681 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2682 bits - semantics->precision);
2683 truncatedBits = 0;
2684 }
2685 }
2686
2687 /* Step 2: work out any lost fraction, and increment the absolute
2688 value if we would round away from zero. */
2689 if (truncatedBits) {
2690 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2691 truncatedBits);
2692 if (lost_fraction != lfExactlyZero &&
2693 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2694 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2695 return opInvalidOp; /* Overflow. */
2696 }
2697 } else {
2698 lost_fraction = lfExactlyZero;
2699 }
2700
2701 /* Step 3: check if we fit in the destination. */
2702 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2703
2704 if (sign) {
2705 if (!isSigned) {
2706 /* Negative numbers cannot be represented as unsigned. */
2707 if (omsb != 0)
2708 return opInvalidOp;
2709 } else {
2710 /* It takes omsb bits to represent the unsigned integer value.
2711 We lose a bit for the sign, but care is needed as the
2712 maximally negative integer is a special case. */
2713 if (omsb == width &&
2714 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2715 return opInvalidOp;
2716
2717 /* This case can happen because of rounding. */
2718 if (omsb > width)
2719 return opInvalidOp;
2720 }
2721
2722 APInt::tcNegate (parts.data(), dstPartsCount);
2723 } else {
2724 if (omsb >= width + !isSigned)
2725 return opInvalidOp;
2726 }
2727
2728 if (lost_fraction == lfExactlyZero) {
2729 *isExact = true;
2730 return opOK;
2731 }
2732 return opInexact;
2733}
2734
2735/* Same as convertToSignExtendedInteger, except we provide
2736 deterministic values in case of an invalid operation exception,
2737 namely zero for NaNs and the minimal or maximal value respectively
2738 for underflow or overflow.
2739 The *isExact output tells whether the result is exact, in the sense
2740 that converting it back to the original floating point type produces
2741 the original value. This is almost equivalent to result==opOK,
2742 except for negative zeroes.
2743*/
2746 unsigned int width, bool isSigned,
2747 roundingMode rounding_mode, bool *isExact) const {
2748 opStatus fs;
2749
2750 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2751 isExact);
2752
2753 if (fs == opInvalidOp) {
2754 unsigned int bits, dstPartsCount;
2755
2756 dstPartsCount = partCountForBits(width);
2757 assert(dstPartsCount <= parts.size() && "Integer too big");
2758
2759 if (category == fcNaN)
2760 bits = 0;
2761 else if (sign)
2762 bits = isSigned;
2763 else
2764 bits = width - isSigned;
2765
2766 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2767 if (sign && isSigned)
2768 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2769 }
2770
2771 return fs;
2772}
2773
2774/* Convert an unsigned integer SRC to a floating point number,
2775 rounding according to ROUNDING_MODE. The sign of the floating
2776 point number is not modified. */
2777APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2778 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2779 unsigned int omsb, precision, dstCount;
2780 integerPart *dst;
2781 lostFraction lost_fraction;
2782
2783 category = fcNormal;
2784 omsb = APInt::tcMSB(src, srcCount) + 1;
2785 dst = significandParts();
2786 dstCount = partCount();
2787 precision = semantics->precision;
2788
2789 /* We want the most significant PRECISION bits of SRC. There may not
2790 be that many; extract what we can. */
2791 if (precision <= omsb) {
2792 exponent = omsb - 1;
2793 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2794 omsb - precision);
2795 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2796 } else {
2797 exponent = precision - 1;
2798 lost_fraction = lfExactlyZero;
2799 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2800 }
2801
2802 return normalize(rounding_mode, lost_fraction);
2803}
2804
2806 roundingMode rounding_mode) {
2807 unsigned int partCount = Val.getNumWords();
2808 APInt api = Val;
2809
2810 sign = false;
2811 if (isSigned && api.isNegative()) {
2812 sign = true;
2813 api = -api;
2814 }
2815
2816 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2817}
2818
2820IEEEFloat::convertFromHexadecimalString(StringRef s,
2821 roundingMode rounding_mode) {
2822 lostFraction lost_fraction = lfExactlyZero;
2823
2824 category = fcNormal;
2825 zeroSignificand();
2826 exponent = 0;
2827
2828 integerPart *significand = significandParts();
2829 unsigned partsCount = partCount();
2830 unsigned bitPos = partsCount * integerPartWidth;
2831 bool computedTrailingFraction = false;
2832
2833 // Skip leading zeroes and any (hexa)decimal point.
2834 StringRef::iterator begin = s.begin();
2835 StringRef::iterator end = s.end();
2837 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2838 if (!PtrOrErr)
2839 return PtrOrErr.takeError();
2840 StringRef::iterator p = *PtrOrErr;
2841 StringRef::iterator firstSignificantDigit = p;
2842
2843 while (p != end) {
2844 integerPart hex_value;
2845
2846 if (*p == '.') {
2847 if (dot != end)
2848 return createError("String contains multiple dots");
2849 dot = p++;
2850 continue;
2851 }
2852
2853 hex_value = hexDigitValue(*p);
2854 if (hex_value == UINT_MAX)
2855 break;
2856
2857 p++;
2858
2859 // Store the number while we have space.
2860 if (bitPos) {
2861 bitPos -= 4;
2862 hex_value <<= bitPos % integerPartWidth;
2863 significand[bitPos / integerPartWidth] |= hex_value;
2864 } else if (!computedTrailingFraction) {
2865 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2866 if (!FractOrErr)
2867 return FractOrErr.takeError();
2868 lost_fraction = *FractOrErr;
2869 computedTrailingFraction = true;
2870 }
2871 }
2872
2873 /* Hex floats require an exponent but not a hexadecimal point. */
2874 if (p == end)
2875 return createError("Hex strings require an exponent");
2876 if (*p != 'p' && *p != 'P')
2877 return createError("Invalid character in significand");
2878 if (p == begin)
2879 return createError("Significand has no digits");
2880 if (dot != end && p - begin == 1)
2881 return createError("Significand has no digits");
2882
2883 /* Ignore the exponent if we are zero. */
2884 if (p != firstSignificantDigit) {
2885 int expAdjustment;
2886
2887 /* Implicit hexadecimal point? */
2888 if (dot == end)
2889 dot = p;
2890
2891 /* Calculate the exponent adjustment implicit in the number of
2892 significant digits. */
2893 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2894 if (expAdjustment < 0)
2895 expAdjustment++;
2896 expAdjustment = expAdjustment * 4 - 1;
2897
2898 /* Adjust for writing the significand starting at the most
2899 significant nibble. */
2900 expAdjustment += semantics->precision;
2901 expAdjustment -= partsCount * integerPartWidth;
2902
2903 /* Adjust for the given exponent. */
2904 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2905 if (!ExpOrErr)
2906 return ExpOrErr.takeError();
2907 exponent = *ExpOrErr;
2908 }
2909
2910 return normalize(rounding_mode, lost_fraction);
2911}
2912
2914IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2915 unsigned sigPartCount, int exp,
2916 roundingMode rounding_mode) {
2917 unsigned int parts, pow5PartCount;
2918 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2920 bool isNearest;
2921
2922 isNearest = (rounding_mode == rmNearestTiesToEven ||
2923 rounding_mode == rmNearestTiesToAway);
2924
2925 parts = partCountForBits(semantics->precision + 11);
2926
2927 /* Calculate pow(5, abs(exp)). */
2928 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2929
2930 for (;; parts *= 2) {
2931 opStatus sigStatus, powStatus;
2932 unsigned int excessPrecision, truncatedBits;
2933
2934 calcSemantics.precision = parts * integerPartWidth - 1;
2935 excessPrecision = calcSemantics.precision - semantics->precision;
2936 truncatedBits = excessPrecision;
2937
2938 IEEEFloat decSig(calcSemantics, uninitialized);
2939 decSig.makeZero(sign);
2940 IEEEFloat pow5(calcSemantics);
2941
2942 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2944 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2946 /* Add exp, as 10^n = 5^n * 2^n. */
2947 decSig.exponent += exp;
2948
2949 lostFraction calcLostFraction;
2950 integerPart HUerr, HUdistance;
2951 unsigned int powHUerr;
2952
2953 if (exp >= 0) {
2954 /* multiplySignificand leaves the precision-th bit set to 1. */
2955 calcLostFraction = decSig.multiplySignificand(pow5);
2956 powHUerr = powStatus != opOK;
2957 } else {
2958 calcLostFraction = decSig.divideSignificand(pow5);
2959 /* Denormal numbers have less precision. */
2960 if (decSig.exponent < semantics->minExponent) {
2961 excessPrecision += (semantics->minExponent - decSig.exponent);
2962 truncatedBits = excessPrecision;
2963 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
2964 }
2965 /* Extra half-ulp lost in reciprocal of exponent. */
2966 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2967 }
2968
2969 /* Both multiplySignificand and divideSignificand return the
2970 result with the integer bit set. */
2972 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2973
2974 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2975 powHUerr);
2976 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2977 excessPrecision, isNearest);
2978
2979 /* Are we guaranteed to round correctly if we truncate? */
2980 if (HUdistance >= HUerr) {
2981 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2982 calcSemantics.precision - excessPrecision,
2983 excessPrecision);
2984 /* Take the exponent of decSig. If we tcExtract-ed less bits
2985 above we must adjust our exponent to compensate for the
2986 implicit right shift. */
2987 exponent = (decSig.exponent + semantics->precision
2988 - (calcSemantics.precision - excessPrecision));
2989 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2990 decSig.partCount(),
2991 truncatedBits);
2992 return normalize(rounding_mode, calcLostFraction);
2993 }
2994 }
2995}
2996
2997Expected<APFloat::opStatus>
2998IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2999 decimalInfo D;
3000 opStatus fs;
3001
3002 /* Scan the text. */
3003 StringRef::iterator p = str.begin();
3004 if (Error Err = interpretDecimal(p, str.end(), &D))
3005 return std::move(Err);
3006
3007 /* Handle the quick cases. First the case of no significant digits,
3008 i.e. zero, and then exponents that are obviously too large or too
3009 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3010 definitely overflows if
3011
3012 (exp - 1) * L >= maxExponent
3013
3014 and definitely underflows to zero where
3015
3016 (exp + 1) * L <= minExponent - precision
3017
3018 With integer arithmetic the tightest bounds for L are
3019
3020 93/28 < L < 196/59 [ numerator <= 256 ]
3021 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3022 */
3023
3024 // Test if we have a zero number allowing for strings with no null terminators
3025 // and zero decimals with non-zero exponents.
3026 //
3027 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3028 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3029 // be at most one dot. On the other hand, if we have a zero with a non-zero
3030 // exponent, then we know that D.firstSigDigit will be non-numeric.
3031 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3032 category = fcZero;
3033 fs = opOK;
3034 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3035 sign = false;
3036 if (!semantics->hasZero)
3038
3039 /* Check whether the normalized exponent is high enough to overflow
3040 max during the log-rebasing in the max-exponent check below. */
3041 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3042 fs = handleOverflow(rounding_mode);
3043
3044 /* If it wasn't, then it also wasn't high enough to overflow max
3045 during the log-rebasing in the min-exponent check. Check that it
3046 won't overflow min in either check, then perform the min-exponent
3047 check. */
3048 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3049 (D.normalizedExponent + 1) * 28738 <=
3050 8651 * (semantics->minExponent - (int) semantics->precision)) {
3051 /* Underflow to zero and round. */
3052 category = fcNormal;
3053 zeroSignificand();
3054 fs = normalize(rounding_mode, lfLessThanHalf);
3055
3056 /* We can finally safely perform the max-exponent check. */
3057 } else if ((D.normalizedExponent - 1) * 42039
3058 >= 12655 * semantics->maxExponent) {
3059 /* Overflow and round. */
3060 fs = handleOverflow(rounding_mode);
3061 } else {
3062 integerPart *decSignificand;
3063 unsigned int partCount;
3064
3065 /* A tight upper bound on number of bits required to hold an
3066 N-digit decimal integer is N * 196 / 59. Allocate enough space
3067 to hold the full significand, and an extra part required by
3068 tcMultiplyPart. */
3069 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3070 partCount = partCountForBits(1 + 196 * partCount / 59);
3071 decSignificand = new integerPart[partCount + 1];
3072 partCount = 0;
3073
3074 /* Convert to binary efficiently - we do almost all multiplication
3075 in an integerPart. When this would overflow do we do a single
3076 bignum multiplication, and then revert again to multiplication
3077 in an integerPart. */
3078 do {
3079 integerPart decValue, val, multiplier;
3080
3081 val = 0;
3082 multiplier = 1;
3083
3084 do {
3085 if (*p == '.') {
3086 p++;
3087 if (p == str.end()) {
3088 break;
3089 }
3090 }
3091 decValue = decDigitValue(*p++);
3092 if (decValue >= 10U) {
3093 delete[] decSignificand;
3094 return createError("Invalid character in significand");
3095 }
3096 multiplier *= 10;
3097 val = val * 10 + decValue;
3098 /* The maximum number that can be multiplied by ten with any
3099 digit added without overflowing an integerPart. */
3100 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3101
3102 /* Multiply out the current part. */
3103 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3104 partCount, partCount + 1, false);
3105
3106 /* If we used another part (likely but not guaranteed), increase
3107 the count. */
3108 if (decSignificand[partCount])
3109 partCount++;
3110 } while (p <= D.lastSigDigit);
3111
3112 category = fcNormal;
3113 fs = roundSignificandWithExponent(decSignificand, partCount,
3114 D.exponent, rounding_mode);
3115
3116 delete [] decSignificand;
3117 }
3118
3119 return fs;
3120}
3121
3122bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3123 const size_t MIN_NAME_SIZE = 3;
3124
3125 if (str.size() < MIN_NAME_SIZE)
3126 return false;
3127
3128 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3129 makeInf(false);
3130 return true;
3131 }
3132
3133 bool IsNegative = str.consume_front("-");
3134 if (IsNegative) {
3135 if (str.size() < MIN_NAME_SIZE)
3136 return false;
3137
3138 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3139 makeInf(true);
3140 return true;
3141 }
3142 }
3143
3144 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3145 bool IsSignaling = str.consume_front_insensitive("s");
3146 if (IsSignaling) {
3147 if (str.size() < MIN_NAME_SIZE)
3148 return false;
3149 }
3150
3151 if (str.consume_front("nan") || str.consume_front("NaN")) {
3152 // A NaN without payload.
3153 if (str.empty()) {
3154 makeNaN(IsSignaling, IsNegative);
3155 return true;
3156 }
3157
3158 // Allow the payload to be inside parentheses.
3159 if (str.front() == '(') {
3160 // Parentheses should be balanced (and not empty).
3161 if (str.size() <= 2 || str.back() != ')')
3162 return false;
3163
3164 str = str.slice(1, str.size() - 1);
3165 }
3166
3167 // Determine the payload number's radix.
3168 unsigned Radix = 10;
3169 if (str[0] == '0') {
3170 if (str.size() > 1 && tolower(str[1]) == 'x') {
3171 str = str.drop_front(2);
3172 Radix = 16;
3173 } else {
3174 Radix = 8;
3175 }
3176 }
3177
3178 // Parse the payload and make the NaN.
3179 APInt Payload;
3180 if (!str.getAsInteger(Radix, Payload)) {
3181 makeNaN(IsSignaling, IsNegative, &Payload);
3182 return true;
3183 }
3184 }
3185
3186 return false;
3187}
3188
3189Expected<APFloat::opStatus>
3191 if (str.empty())
3192 return createError("Invalid string length");
3193
3194 // Handle special cases.
3195 if (convertFromStringSpecials(str))
3196 return opOK;
3197
3198 /* Handle a leading minus sign. */
3199 StringRef::iterator p = str.begin();
3200 size_t slen = str.size();
3201 sign = *p == '-' ? 1 : 0;
3202 if (sign && !semantics->hasSignedRepr)
3204 "This floating point format does not support signed values");
3205
3206 if (*p == '-' || *p == '+') {
3207 p++;
3208 slen--;
3209 if (!slen)
3210 return createError("String has no digits");
3211 }
3212
3213 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3214 if (slen == 2)
3215 return createError("Invalid string");
3216 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3217 rounding_mode);
3218 }
3219
3220 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3221}
3222
3223/* Write out a hexadecimal representation of the floating point value
3224 to DST, which must be of sufficient size, in the C99 form
3225 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3226 excluding the terminating NUL.
3227
3228 If UPPERCASE, the output is in upper case, otherwise in lower case.
3229
3230 HEXDIGITS digits appear altogether, rounding the value if
3231 necessary. If HEXDIGITS is 0, the minimal precision to display the
3232 number precisely is used instead. If nothing would appear after
3233 the decimal point it is suppressed.
3234
3235 The decimal exponent is always printed and has at least one digit.
3236 Zero values display an exponent of zero. Infinities and NaNs
3237 appear as "infinity" or "nan" respectively.
3238
3239 The above rules are as specified by C99. There is ambiguity about
3240 what the leading hexadecimal digit should be. This implementation
3241 uses whatever is necessary so that the exponent is displayed as
3242 stored. This implies the exponent will fall within the IEEE format
3243 range, and the leading hexadecimal digit will be 0 (for denormals),
3244 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3245 any other digits zero).
3246*/
3247unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3248 bool upperCase,
3249 roundingMode rounding_mode) const {
3250 char *p;
3251
3252 p = dst;
3253 if (sign)
3254 *dst++ = '-';
3255
3256 switch (category) {
3257 case fcInfinity:
3258 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3259 dst += sizeof infinityL - 1;
3260 break;
3261
3262 case fcNaN:
3263 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3264 dst += sizeof NaNU - 1;
3265 break;
3266
3267 case fcZero:
3268 *dst++ = '0';
3269 *dst++ = upperCase ? 'X': 'x';
3270 *dst++ = '0';
3271 if (hexDigits > 1) {
3272 *dst++ = '.';
3273 memset (dst, '0', hexDigits - 1);
3274 dst += hexDigits - 1;
3275 }
3276 *dst++ = upperCase ? 'P': 'p';
3277 *dst++ = '0';
3278 break;
3279
3280 case fcNormal:
3281 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3282 break;
3283 }
3284
3285 *dst = 0;
3286
3287 return static_cast<unsigned int>(dst - p);
3288}
3289
3290/* Does the hard work of outputting the correctly rounded hexadecimal
3291 form of a normal floating point number with the specified number of
3292 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3293 digits necessary to print the value precisely is output. */
3294char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3295 bool upperCase,
3296 roundingMode rounding_mode) const {
3297 unsigned int count, valueBits, shift, partsCount, outputDigits;
3298 const char *hexDigitChars;
3299 const integerPart *significand;
3300 char *p;
3301 bool roundUp;
3302
3303 *dst++ = '0';
3304 *dst++ = upperCase ? 'X': 'x';
3305
3306 roundUp = false;
3307 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3308
3309 significand = significandParts();
3310 partsCount = partCount();
3311
3312 /* +3 because the first digit only uses the single integer bit, so
3313 we have 3 virtual zero most-significant-bits. */
3314 valueBits = semantics->precision + 3;
3315 shift = integerPartWidth - valueBits % integerPartWidth;
3316
3317 /* The natural number of digits required ignoring trailing
3318 insignificant zeroes. */
3319 outputDigits = (valueBits - significandLSB () + 3) / 4;
3320
3321 /* hexDigits of zero means use the required number for the
3322 precision. Otherwise, see if we are truncating. If we are,
3323 find out if we need to round away from zero. */
3324 if (hexDigits) {
3325 if (hexDigits < outputDigits) {
3326 /* We are dropping non-zero bits, so need to check how to round.
3327 "bits" is the number of dropped bits. */
3328 unsigned int bits;
3329 lostFraction fraction;
3330
3331 bits = valueBits - hexDigits * 4;
3332 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3333 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3334 }
3335 outputDigits = hexDigits;
3336 }
3337
3338 /* Write the digits consecutively, and start writing in the location
3339 of the hexadecimal point. We move the most significant digit
3340 left and add the hexadecimal point later. */
3341 p = ++dst;
3342
3343 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3344
3345 while (outputDigits && count) {
3346 integerPart part;
3347
3348 /* Put the most significant integerPartWidth bits in "part". */
3349 if (--count == partsCount)
3350 part = 0; /* An imaginary higher zero part. */
3351 else
3352 part = significand[count] << shift;
3353
3354 if (count && shift)
3355 part |= significand[count - 1] >> (integerPartWidth - shift);
3356
3357 /* Convert as much of "part" to hexdigits as we can. */
3358 unsigned int curDigits = integerPartWidth / 4;
3359
3360 curDigits = std::min(curDigits, outputDigits);
3361 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3362 outputDigits -= curDigits;
3363 }
3364
3365 if (roundUp) {
3366 char *q = dst;
3367
3368 /* Note that hexDigitChars has a trailing '0'. */
3369 do {
3370 q--;
3371 *q = hexDigitChars[hexDigitValue (*q) + 1];
3372 } while (*q == '0');
3373 assert(q >= p);
3374 } else {
3375 /* Add trailing zeroes. */
3376 memset (dst, '0', outputDigits);
3377 dst += outputDigits;
3378 }
3379
3380 /* Move the most significant digit to before the point, and if there
3381 is something after the decimal point add it. This must come
3382 after rounding above. */
3383 p[-1] = p[0];
3384 if (dst -1 == p)
3385 dst--;
3386 else
3387 p[0] = '.';
3388
3389 /* Finally output the exponent. */
3390 *dst++ = upperCase ? 'P': 'p';
3391
3392 return writeSignedDecimal (dst, exponent);
3393}
3394
3396 if (!Arg.isFiniteNonZero())
3397 return hash_combine((uint8_t)Arg.category,
3398 // NaN has no sign, fix it at zero.
3399 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3400 Arg.semantics->precision);
3401
3402 // Normal floats need their exponent and significand hashed.
3403 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3404 Arg.semantics->precision, Arg.exponent,
3406 Arg.significandParts(),
3407 Arg.significandParts() + Arg.partCount()));
3408}
3409
3410// Conversion from APFloat to/from host float/double. It may eventually be
3411// possible to eliminate these and have everybody deal with APFloats, but that
3412// will take a while. This approach will not easily extend to long double.
3413// Current implementation requires integerPartWidth==64, which is correct at
3414// the moment but could be made more general.
3415
3416// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3417// the actual IEEE respresentations. We compensate for that here.
3418
3419APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3420 assert(semantics ==
3421 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3422 assert(partCount()==2);
3423
3424 uint64_t myexponent, mysignificand;
3425
3426 if (isFiniteNonZero()) {
3427 myexponent = exponent+16383; //bias
3428 mysignificand = significandParts()[0];
3429 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3430 myexponent = 0; // denormal
3431 } else if (category==fcZero) {
3432 myexponent = 0;
3433 mysignificand = 0;
3434 } else if (category==fcInfinity) {
3435 myexponent = 0x7fff;
3436 mysignificand = 0x8000000000000000ULL;
3437 } else {
3438 assert(category == fcNaN && "Unknown category");
3439 myexponent = 0x7fff;
3440 mysignificand = significandParts()[0];
3441 }
3442
3443 uint64_t words[2];
3444 words[0] = mysignificand;
3445 words[1] = ((uint64_t)(sign & 1) << 15) |
3446 (myexponent & 0x7fffLL);
3447 return APInt(80, words);
3448}
3449
3450APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3451 assert(semantics ==
3452 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3453 assert(partCount()==2);
3454
3455 uint64_t words[2];
3456 opStatus fs;
3457 bool losesInfo;
3458
3459 // Convert number to double. To avoid spurious underflows, we re-
3460 // normalize against the "double" minExponent first, and only *then*
3461 // truncate the mantissa. The result of that second conversion
3462 // may be inexact, but should never underflow.
3463 // Declare fltSemantics before APFloat that uses it (and
3464 // saves pointer to it) to ensure correct destruction order.
3465 fltSemantics extendedSemantics = *semantics;
3466 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3467 IEEEFloat extended(*this);
3468 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3469 assert(fs == opOK && !losesInfo);
3470 (void)fs;
3471
3472 IEEEFloat u(extended);
3473 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3474 assert(fs == opOK || fs == opInexact);
3475 (void)fs;
3476 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3477
3478 // If conversion was exact or resulted in a special case, we're done;
3479 // just set the second double to zero. Otherwise, re-convert back to
3480 // the extended format and compute the difference. This now should
3481 // convert exactly to double.
3482 if (u.isFiniteNonZero() && losesInfo) {
3483 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3484 assert(fs == opOK && !losesInfo);
3485 (void)fs;
3486
3487 IEEEFloat v(extended);
3488 v.subtract(u, rmNearestTiesToEven);
3489 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3490 assert(fs == opOK && !losesInfo);
3491 (void)fs;
3492 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3493 } else {
3494 words[1] = 0;
3495 }
3496
3497 return APInt(128, words);
3498}
3499
3500template <const fltSemantics &S>
3501APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3502 assert(semantics == &S);
3503 const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
3504 ? -S.minExponent
3505 : -(S.minExponent - 1);
3506 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3507 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3508 constexpr integerPart integer_bit =
3509 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3510 constexpr uint64_t significand_mask = integer_bit - 1;
3511 constexpr unsigned int exponent_bits =
3512 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3513 : S.sizeInBits;
3514 static_assert(exponent_bits < 64);
3515 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3516
3517 uint64_t myexponent;
3518 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3519 mysignificand;
3520
3521 if (isFiniteNonZero()) {
3522 myexponent = exponent + bias;
3523 std::copy_n(significandParts(), mysignificand.size(),
3524 mysignificand.begin());
3525 if (myexponent == 1 &&
3526 !(significandParts()[integer_bit_part] & integer_bit))
3527 myexponent = 0; // denormal
3528 } else if (category == fcZero) {
3529 if (!S.hasZero)
3530 llvm_unreachable("semantics does not support zero!");
3531 myexponent = ::exponentZero(S) + bias;
3532 mysignificand.fill(0);
3533 } else if (category == fcInfinity) {
3534 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3535 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3536 llvm_unreachable("semantics don't support inf!");
3537 myexponent = ::exponentInf(S) + bias;
3538 mysignificand.fill(0);
3539 } else {
3540 assert(category == fcNaN && "Unknown category!");
3541 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3542 llvm_unreachable("semantics don't support NaN!");
3543 myexponent = ::exponentNaN(S) + bias;
3544 std::copy_n(significandParts(), mysignificand.size(),
3545 mysignificand.begin());
3546 }
3547 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3548 auto words_iter =
3549 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3550 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3551 // Clear the integer bit.
3552 words[mysignificand.size() - 1] &= significand_mask;
3553 }
3554 std::fill(words_iter, words.end(), uint64_t{0});
3555 constexpr size_t last_word = words.size() - 1;
3556 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3557 << ((S.sizeInBits - 1) % 64);
3558 words[last_word] |= shifted_sign;
3559 uint64_t shifted_exponent = (myexponent & exponent_mask)
3560 << (trailing_significand_bits % 64);
3561 words[last_word] |= shifted_exponent;
3562 if constexpr (last_word == 0) {
3563 return APInt(S.sizeInBits, words[0]);
3564 }
3565 return APInt(S.sizeInBits, words);
3566}
3567
3568APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3569 assert(partCount() == 2);
3570 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3571}
3572
3573APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3574 assert(partCount()==1);
3575 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3576}
3577
3578APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3579 assert(partCount()==1);
3580 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3581}
3582
3583APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3584 assert(partCount() == 1);
3585 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3586}
3587
3588APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3589 assert(partCount()==1);
3590 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3591}
3592
3593APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3594 assert(partCount() == 1);
3595 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3596}
3597
3598APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3599 assert(partCount() == 1);
3600 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3601}
3602
3603APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3604 assert(partCount() == 1);
3605 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3606}
3607
3608APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3609 assert(partCount() == 1);
3610 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3611}
3612
3613APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3614 assert(partCount() == 1);
3615 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3616}
3617
3618APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3619 assert(partCount() == 1);
3620 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3621}
3622
3623APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3624 assert(partCount() == 1);
3625 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3626}
3627
3628APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3629 assert(partCount() == 1);
3630 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3631}
3632
3633APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3634 assert(partCount() == 1);
3635 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3636}
3637
3638APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3639 assert(partCount() == 1);
3640 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3641}
3642
3643APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3644 assert(partCount() == 1);
3645 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3646}
3647
3648APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3649 assert(partCount() == 1);
3650 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3651}
3652
3653// This function creates an APInt that is just a bit map of the floating
3654// point constant as it would appear in memory. It is not a conversion,
3655// and treating the result as a normal integer is unlikely to be useful.
3656
3658 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3659 return convertHalfAPFloatToAPInt();
3660
3661 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3662 return convertBFloatAPFloatToAPInt();
3663
3664 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3665 return convertFloatAPFloatToAPInt();
3666
3667 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3668 return convertDoubleAPFloatToAPInt();
3669
3670 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3671 return convertQuadrupleAPFloatToAPInt();
3672
3673 if (semantics ==
3674 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3675 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3676
3677 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3678 return convertFloat8E5M2APFloatToAPInt();
3679
3680 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3681 return convertFloat8E5M2FNUZAPFloatToAPInt();
3682
3683 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3684 return convertFloat8E4M3APFloatToAPInt();
3685
3686 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3687 return convertFloat8E4M3FNAPFloatToAPInt();
3688
3689 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3690 return convertFloat8E4M3FNUZAPFloatToAPInt();
3691
3692 if (semantics ==
3693 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3694 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3695
3696 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3697 return convertFloat8E3M4APFloatToAPInt();
3698
3699 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3700 return convertFloatTF32APFloatToAPInt();
3701
3702 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3703 return convertFloat8E8M0FNUAPFloatToAPInt();
3704
3705 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3706 return convertFloat6E3M2FNAPFloatToAPInt();
3707
3708 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3709 return convertFloat6E2M3FNAPFloatToAPInt();
3710
3711 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3712 return convertFloat4E2M1FNAPFloatToAPInt();
3713
3714 assert(semantics ==
3715 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3716 "unknown format!");
3717 return convertF80LongDoubleAPFloatToAPInt();
3718}
3719
3721 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3722 "Float semantics are not IEEEsingle");
3723 APInt api = bitcastToAPInt();
3724 return api.bitsToFloat();
3725}
3726
3728 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3729 "Float semantics are not IEEEdouble");
3730 APInt api = bitcastToAPInt();
3731 return api.bitsToDouble();
3732}
3733
3734#ifdef HAS_IEE754_FLOAT128
3735float128 IEEEFloat::convertToQuad() const {
3736 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3737 "Float semantics are not IEEEquads");
3738 APInt api = bitcastToAPInt();
3739 return api.bitsToQuad();
3740}
3741#endif
3742
3743/// Integer bit is explicit in this format. Intel hardware (387 and later)
3744/// does not support these bit patterns:
3745/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3746/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3747/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3748/// exponent = 0, integer bit 1 ("pseudodenormal")
3749/// At the moment, the first three are treated as NaNs, the last one as Normal.
3750void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3751 uint64_t i1 = api.getRawData()[0];
3752 uint64_t i2 = api.getRawData()[1];
3753 uint64_t myexponent = (i2 & 0x7fff);
3754 uint64_t mysignificand = i1;
3755 uint8_t myintegerbit = mysignificand >> 63;
3756
3757 initialize(&APFloatBase::semX87DoubleExtended);
3758 assert(partCount()==2);
3759
3760 sign = static_cast<unsigned int>(i2>>15);
3761 if (myexponent == 0 && mysignificand == 0) {
3762 makeZero(sign);
3763 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3764 makeInf(sign);
3765 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3766 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3767 category = fcNaN;
3768 exponent = exponentNaN();
3769 significandParts()[0] = mysignificand;
3770 significandParts()[1] = 0;
3771 } else {
3772 category = fcNormal;
3773 exponent = myexponent - 16383;
3774 significandParts()[0] = mysignificand;
3775 significandParts()[1] = 0;
3776 if (myexponent==0) // denormal
3777 exponent = -16382;
3778 }
3779}
3780
3781void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3782 uint64_t i1 = api.getRawData()[0];
3783 uint64_t i2 = api.getRawData()[1];
3784 opStatus fs;
3785 bool losesInfo;
3786
3787 // Get the first double and convert to our format.
3788 initFromDoubleAPInt(APInt(64, i1));
3789 fs = convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3790 &losesInfo);
3791 assert(fs == opOK && !losesInfo);
3792 (void)fs;
3793
3794 // Unless we have a special case, add in second double.
3795 if (isFiniteNonZero()) {
3796 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3797 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3798 &losesInfo);
3799 assert(fs == opOK && !losesInfo);
3800 (void)fs;
3801
3803 }
3804}
3805
3806// The E8M0 format has the following characteristics:
3807// It is an 8-bit unsigned format with only exponents (no actual significand).
3808// No encodings for {zero, infinities or denorms}.
3809// NaN is represented by all 1's.
3810// Bias is 127.
3811void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3812 const uint64_t exponent_mask = 0xff;
3813 uint64_t val = api.getRawData()[0];
3814 uint64_t myexponent = (val & exponent_mask);
3815
3816 initialize(&APFloatBase::semFloat8E8M0FNU);
3817 assert(partCount() == 1);
3818
3819 // This format has unsigned representation only
3820 sign = 0;
3821
3822 // Set the significand
3823 // This format does not have any significand but the 'Pth' precision bit is
3824 // always set to 1 for consistency in APFloat's internal representation.
3825 uint64_t mysignificand = 1;
3826 significandParts()[0] = mysignificand;
3827
3828 // This format can either have a NaN or fcNormal
3829 // All 1's i.e. 255 is a NaN
3830 if (val == exponent_mask) {
3831 category = fcNaN;
3832 exponent = exponentNaN();
3833 return;
3834 }
3835 // Handle fcNormal...
3836 category = fcNormal;
3837 exponent = myexponent - 127; // 127 is bias
3838}
3839template <const fltSemantics &S>
3840void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3841 assert(api.getBitWidth() == S.sizeInBits);
3842 constexpr integerPart integer_bit = integerPart{1}
3843 << ((S.precision - 1) % integerPartWidth);
3844 constexpr uint64_t significand_mask = integer_bit - 1;
3845 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3846 constexpr unsigned int stored_significand_parts =
3847 partCountForBits(trailing_significand_bits);
3848 constexpr unsigned int exponent_bits =
3849 S.sizeInBits - 1 - trailing_significand_bits;
3850 static_assert(exponent_bits < 64);
3851 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3852 constexpr int bias = -(S.minExponent - 1);
3853
3854 // Copy the bits of the significand. We need to clear out the exponent and
3855 // sign bit in the last word.
3856 std::array<integerPart, stored_significand_parts> mysignificand;
3857 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3858 if constexpr (significand_mask != 0) {
3859 mysignificand[mysignificand.size() - 1] &= significand_mask;
3860 }
3861
3862 // We assume the last word holds the sign bit, the exponent, and potentially
3863 // some of the trailing significand field.
3864 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3865 uint64_t myexponent =
3866 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3867
3868 initialize(&S);
3869 assert(partCount() == mysignificand.size());
3870
3871 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3872
3873 bool all_zero_significand = llvm::all_of(mysignificand, equal_to(0));
3874
3875 bool is_zero = myexponent == 0 && all_zero_significand;
3876
3877 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3878 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3879 makeInf(sign);
3880 return;
3881 }
3882 }
3883
3884 bool is_nan = false;
3885
3886 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3887 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3888 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3889 bool all_ones_significand =
3890 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3891 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3892 (!significand_mask ||
3893 mysignificand[mysignificand.size() - 1] == significand_mask);
3894 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3895 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3896 is_nan = is_zero && sign;
3897 }
3898
3899 if (is_nan) {
3900 category = fcNaN;
3901 exponent = ::exponentNaN(S);
3902 std::copy_n(mysignificand.begin(), mysignificand.size(),
3903 significandParts());
3904 return;
3905 }
3906
3907 if (is_zero) {
3908 makeZero(sign);
3909 return;
3910 }
3911
3912 category = fcNormal;
3913 exponent = myexponent - bias;
3914 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3915 if (myexponent == 0) // denormal
3916 exponent = S.minExponent;
3917 else
3918 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3919}
3920
3921void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3922 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
3923}
3924
3925void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3926 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
3927}
3928
3929void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3930 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
3931}
3932
3933void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3934 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
3935}
3936
3937void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3938 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
3939}
3940
3941void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3942 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
3943}
3944
3945void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3946 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
3947}
3948
3949void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3950 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
3951}
3952
3953void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3954 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
3955}
3956
3957void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3958 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
3959}
3960
3961void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3962 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
3963}
3964
3965void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
3966 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
3967}
3968
3969void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3970 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
3971}
3972
3973void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3974 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
3975}
3976
3977void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3978 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
3979}
3980
3981void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3982 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
3983}
3984
3985/// Treat api as containing the bits of a floating point number.
3986void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3987 assert(api.getBitWidth() == Sem->sizeInBits);
3988 if (Sem == &APFloatBase::semIEEEhalf)
3989 return initFromHalfAPInt(api);
3990 if (Sem == &APFloatBase::semBFloat)
3991 return initFromBFloatAPInt(api);
3992 if (Sem == &APFloatBase::semIEEEsingle)
3993 return initFromFloatAPInt(api);
3994 if (Sem == &APFloatBase::semIEEEdouble)
3995 return initFromDoubleAPInt(api);
3996 if (Sem == &APFloatBase::semX87DoubleExtended)
3997 return initFromF80LongDoubleAPInt(api);
3998 if (Sem == &APFloatBase::semIEEEquad)
3999 return initFromQuadrupleAPInt(api);
4000 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
4001 return initFromPPCDoubleDoubleLegacyAPInt(api);
4002 if (Sem == &APFloatBase::semFloat8E5M2)
4003 return initFromFloat8E5M2APInt(api);
4004 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
4005 return initFromFloat8E5M2FNUZAPInt(api);
4006 if (Sem == &APFloatBase::semFloat8E4M3)
4007 return initFromFloat8E4M3APInt(api);
4008 if (Sem == &APFloatBase::semFloat8E4M3FN)
4009 return initFromFloat8E4M3FNAPInt(api);
4010 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
4011 return initFromFloat8E4M3FNUZAPInt(api);
4012 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
4013 return initFromFloat8E4M3B11FNUZAPInt(api);
4014 if (Sem == &APFloatBase::semFloat8E3M4)
4015 return initFromFloat8E3M4APInt(api);
4016 if (Sem == &APFloatBase::semFloatTF32)
4017 return initFromFloatTF32APInt(api);
4018 if (Sem == &APFloatBase::semFloat8E8M0FNU)
4019 return initFromFloat8E8M0FNUAPInt(api);
4020 if (Sem == &APFloatBase::semFloat6E3M2FN)
4021 return initFromFloat6E3M2FNAPInt(api);
4022 if (Sem == &APFloatBase::semFloat6E2M3FN)
4023 return initFromFloat6E2M3FNAPInt(api);
4024 if (Sem == &APFloatBase::semFloat4E2M1FN)
4025 return initFromFloat4E2M1FNAPInt(api);
4026
4027 llvm_unreachable("unsupported semantics");
4028}
4029
4030/// Make this number the largest magnitude normal number in the given
4031/// semantics.
4032void IEEEFloat::makeLargest(bool Negative) {
4033 if (Negative && !semantics->hasSignedRepr)
4035 "This floating point format does not support signed values");
4036 // We want (in interchange format):
4037 // sign = {Negative}
4038 // exponent = 1..10
4039 // significand = 1..1
4040 category = fcNormal;
4041 sign = Negative;
4042 exponent = semantics->maxExponent;
4043
4044 // Use memset to set all but the highest integerPart to all ones.
4045 integerPart *significand = significandParts();
4046 unsigned PartCount = partCount();
4047 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4048
4049 // Set the high integerPart especially setting all unused top bits for
4050 // internal consistency.
4051 const unsigned NumUnusedHighBits =
4052 PartCount*integerPartWidth - semantics->precision;
4053 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4054 ? (~integerPart(0) >> NumUnusedHighBits)
4055 : 0;
4056 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4057 semantics->nanEncoding == fltNanEncoding::AllOnes &&
4058 (semantics->precision > 1))
4059 significand[0] &= ~integerPart(1);
4060}
4061
4062/// Make this number the smallest magnitude denormal number in the given
4063/// semantics.
4064void IEEEFloat::makeSmallest(bool Negative) {
4065 if (Negative && !semantics->hasSignedRepr)
4067 "This floating point format does not support signed values");
4068 // We want (in interchange format):
4069 // sign = {Negative}
4070 // exponent = 0..0
4071 // significand = 0..01
4072 category = fcNormal;
4073 sign = Negative;
4074 exponent = semantics->minExponent;
4075 APInt::tcSet(significandParts(), 1, partCount());
4076}
4077
4079 if (Negative && !semantics->hasSignedRepr)
4081 "This floating point format does not support signed values");
4082 // We want (in interchange format):
4083 // sign = {Negative}
4084 // exponent = 0..0
4085 // significand = 10..0
4086
4087 category = fcNormal;
4088 zeroSignificand();
4089 sign = Negative;
4090 exponent = semantics->minExponent;
4091 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4092}
4093
4094IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4095 initFromAPInt(&Sem, API);
4096}
4097
4099 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4100}
4101
4103 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4104}
4105
4106namespace {
4107 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4108 Buffer.append(Str.begin(), Str.end());
4109 }
4110
4111 /// Removes data from the given significand until it is no more
4112 /// precise than is required for the desired precision.
4113 void AdjustToPrecision(APInt &significand,
4114 int &exp, unsigned FormatPrecision) {
4115 unsigned bits = significand.getActiveBits();
4116
4117 // 196/59 is a very slight overestimate of lg_2(10).
4118 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4119
4120 if (bits <= bitsRequired) return;
4121
4122 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4123 if (!tensRemovable) return;
4124
4125 exp += tensRemovable;
4126
4127 APInt divisor(significand.getBitWidth(), 1);
4128 APInt powten(significand.getBitWidth(), 10);
4129 while (true) {
4130 if (tensRemovable & 1)
4131 divisor *= powten;
4132 tensRemovable >>= 1;
4133 if (!tensRemovable) break;
4134 powten *= powten;
4135 }
4136
4137 significand = significand.udiv(divisor);
4138
4139 // Truncate the significand down to its active bit count.
4140 significand = significand.trunc(significand.getActiveBits());
4141 }
4142
4143
4144 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4145 int &exp, unsigned FormatPrecision) {
4146 unsigned N = buffer.size();
4147 if (N <= FormatPrecision) return;
4148
4149 // The most significant figures are the last ones in the buffer.
4150 unsigned FirstSignificant = N - FormatPrecision;
4151
4152 // Round.
4153 // FIXME: this probably shouldn't use 'round half up'.
4154
4155 // Rounding down is just a truncation, except we also want to drop
4156 // trailing zeros from the new result.
4157 if (buffer[FirstSignificant - 1] < '5') {
4158 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4159 FirstSignificant++;
4160
4161 exp += FirstSignificant;
4162 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4163 return;
4164 }
4165
4166 // Rounding up requires a decimal add-with-carry. If we continue
4167 // the carry, the newly-introduced zeros will just be truncated.
4168 for (unsigned I = FirstSignificant; I != N; ++I) {
4169 if (buffer[I] == '9') {
4170 FirstSignificant++;
4171 } else {
4172 buffer[I]++;
4173 break;
4174 }
4175 }
4176
4177 // If we carried through, we have exactly one digit of precision.
4178 if (FirstSignificant == N) {
4179 exp += FirstSignificant;
4180 buffer.clear();
4181 buffer.push_back('1');
4182 return;
4183 }
4184
4185 exp += FirstSignificant;
4186 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4187 }
4188
4189 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4190 APInt significand, unsigned FormatPrecision,
4191 unsigned FormatMaxPadding, bool TruncateZero) {
4192 const int semanticsPrecision = significand.getBitWidth();
4193
4194 if (isNeg)
4195 Str.push_back('-');
4196
4197 // Set FormatPrecision if zero. We want to do this before we
4198 // truncate trailing zeros, as those are part of the precision.
4199 if (!FormatPrecision) {
4200 // We use enough digits so the number can be round-tripped back to an
4201 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4202 // Accurately" by Steele and White.
4203 // FIXME: Using a formula based purely on the precision is conservative;
4204 // we can print fewer digits depending on the actual value being printed.
4205
4206 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4207 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4208 }
4209
4210 // Ignore trailing binary zeros.
4211 int trailingZeros = significand.countr_zero();
4212 exp += trailingZeros;
4213 significand.lshrInPlace(trailingZeros);
4214
4215 // Change the exponent from 2^e to 10^e.
4216 if (exp == 0) {
4217 // Nothing to do.
4218 } else if (exp > 0) {
4219 // Just shift left.
4220 significand = significand.zext(semanticsPrecision + exp);
4221 significand <<= exp;
4222 exp = 0;
4223 } else { /* exp < 0 */
4224 int texp = -exp;
4225
4226 // We transform this using the identity:
4227 // (N)(2^-e) == (N)(5^e)(10^-e)
4228 // This means we have to multiply N (the significand) by 5^e.
4229 // To avoid overflow, we have to operate on numbers large
4230 // enough to store N * 5^e:
4231 // log2(N * 5^e) == log2(N) + e * log2(5)
4232 // <= semantics->precision + e * 137 / 59
4233 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4234
4235 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4236
4237 // Multiply significand by 5^e.
4238 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4239 significand = significand.zext(precision);
4240 APInt five_to_the_i(precision, 5);
4241 while (true) {
4242 if (texp & 1)
4243 significand *= five_to_the_i;
4244
4245 texp >>= 1;
4246 if (!texp)
4247 break;
4248 five_to_the_i *= five_to_the_i;
4249 }
4250 }
4251
4252 AdjustToPrecision(significand, exp, FormatPrecision);
4253
4255
4256 // Fill the buffer.
4257 unsigned precision = significand.getBitWidth();
4258 if (precision < 4) {
4259 // We need enough precision to store the value 10.
4260 precision = 4;
4261 significand = significand.zext(precision);
4262 }
4263 APInt ten(precision, 10);
4264 APInt digit(precision, 0);
4265
4266 bool inTrail = true;
4267 while (significand != 0) {
4268 // digit <- significand % 10
4269 // significand <- significand / 10
4270 APInt::udivrem(significand, ten, significand, digit);
4271
4272 unsigned d = digit.getZExtValue();
4273
4274 // Drop trailing zeros.
4275 if (inTrail && !d)
4276 exp++;
4277 else {
4278 buffer.push_back((char) ('0' + d));
4279 inTrail = false;
4280 }
4281 }
4282
4283 assert(!buffer.empty() && "no characters in buffer!");
4284
4285 // Drop down to FormatPrecision.
4286 // TODO: don't do more precise calculations above than are required.
4287 AdjustToPrecision(buffer, exp, FormatPrecision);
4288
4289 unsigned NDigits = buffer.size();
4290
4291 // Check whether we should use scientific notation.
4292 bool FormatScientific;
4293 if (!FormatMaxPadding)
4294 FormatScientific = true;
4295 else {
4296 if (exp >= 0) {
4297 // 765e3 --> 765000
4298 // ^^^
4299 // But we shouldn't make the number look more precise than it is.
4300 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4301 NDigits + (unsigned) exp > FormatPrecision);
4302 } else {
4303 // Power of the most significant digit.
4304 int MSD = exp + (int) (NDigits - 1);
4305 if (MSD >= 0) {
4306 // 765e-2 == 7.65
4307 FormatScientific = false;
4308 } else {
4309 // 765e-5 == 0.00765
4310 // ^ ^^
4311 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4312 }
4313 }
4314 }
4315
4316 // Scientific formatting is pretty straightforward.
4317 if (FormatScientific) {
4318 exp += (NDigits - 1);
4319
4320 Str.push_back(buffer[NDigits-1]);
4321 Str.push_back('.');
4322 if (NDigits == 1 && TruncateZero)
4323 Str.push_back('0');
4324 else
4325 for (unsigned I = 1; I != NDigits; ++I)
4326 Str.push_back(buffer[NDigits-1-I]);
4327 // Fill with zeros up to FormatPrecision.
4328 if (!TruncateZero && FormatPrecision > NDigits - 1)
4329 Str.append(FormatPrecision - NDigits + 1, '0');
4330 // For !TruncateZero we use lower 'e'.
4331 Str.push_back(TruncateZero ? 'E' : 'e');
4332
4333 Str.push_back(exp >= 0 ? '+' : '-');
4334 if (exp < 0)
4335 exp = -exp;
4336 SmallVector<char, 6> expbuf;
4337 do {
4338 expbuf.push_back((char) ('0' + (exp % 10)));
4339 exp /= 10;
4340 } while (exp);
4341 // Exponent always at least two digits if we do not truncate zeros.
4342 if (!TruncateZero && expbuf.size() < 2)
4343 expbuf.push_back('0');
4344 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4345 Str.push_back(expbuf[E-1-I]);
4346 return;
4347 }
4348
4349 // Non-scientific, positive exponents.
4350 if (exp >= 0) {
4351 for (unsigned I = 0; I != NDigits; ++I)
4352 Str.push_back(buffer[NDigits-1-I]);
4353 for (unsigned I = 0; I != (unsigned) exp; ++I)
4354 Str.push_back('0');
4355 return;
4356 }
4357
4358 // Non-scientific, negative exponents.
4359
4360 // The number of digits to the left of the decimal point.
4361 int NWholeDigits = exp + (int) NDigits;
4362
4363 unsigned I = 0;
4364 if (NWholeDigits > 0) {
4365 for (; I != (unsigned) NWholeDigits; ++I)
4366 Str.push_back(buffer[NDigits-I-1]);
4367 Str.push_back('.');
4368 } else {
4369 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4370
4371 Str.push_back('0');
4372 Str.push_back('.');
4373 for (unsigned Z = 1; Z != NZeros; ++Z)
4374 Str.push_back('0');
4375 }
4376
4377 for (; I != NDigits; ++I)
4378 Str.push_back(buffer[NDigits-I-1]);
4379
4380 }
4381} // namespace
4382
4383void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4384 unsigned FormatMaxPadding, bool TruncateZero) const {
4385 switch (category) {
4386 case fcInfinity:
4387 if (isNegative())
4388 return append(Str, "-Inf");
4389 else
4390 return append(Str, "+Inf");
4391
4392 case fcNaN: return append(Str, "NaN");
4393
4394 case fcZero:
4395 if (isNegative())
4396 Str.push_back('-');
4397
4398 if (!FormatMaxPadding) {
4399 if (TruncateZero)
4400 append(Str, "0.0E+0");
4401 else {
4402 append(Str, "0.0");
4403 if (FormatPrecision > 1)
4404 Str.append(FormatPrecision - 1, '0');
4405 append(Str, "e+00");
4406 }
4407 } else {
4408 Str.push_back('0');
4409 }
4410 return;
4411
4412 case fcNormal:
4413 break;
4414 }
4415
4416 // Decompose the number into an APInt and an exponent.
4417 int exp = exponent - ((int) semantics->precision - 1);
4418 APInt significand(
4419 semantics->precision,
4420 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4421
4422 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4423 FormatMaxPadding, TruncateZero);
4424
4425}
4426
4428 if (!isFinite() || isZero())
4429 return INT_MIN;
4430
4431 const integerPart *Parts = significandParts();
4432 const int PartCount = partCountForBits(semantics->precision);
4433
4434 int PopCount = 0;
4435 for (int i = 0; i < PartCount; ++i) {
4436 PopCount += llvm::popcount(Parts[i]);
4437 if (PopCount > 1)
4438 return INT_MIN;
4439 }
4440
4441 if (exponent != semantics->minExponent)
4442 return exponent;
4443
4444 int CountrParts = 0;
4445 for (int i = 0; i < PartCount;
4446 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4447 if (Parts[i] != 0) {
4448 return exponent - semantics->precision + CountrParts +
4449 llvm::countr_zero(Parts[i]) + 1;
4450 }
4451 }
4452
4453 llvm_unreachable("didn't find the set bit");
4454}
4455
4457 if (!isNaN())
4458 return false;
4459 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4460 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4461 return false;
4462
4463 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4464 // first bit of the trailing significand being 0.
4465 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4466}
4467
4468/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4469///
4470/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4471/// appropriate sign switching before/after the computation.
4473 // If we are performing nextDown, swap sign so we have -x.
4474 if (nextDown)
4475 changeSign();
4476
4477 // Compute nextUp(x)
4478 opStatus result = opOK;
4479
4480 // Handle each float category separately.
4481 switch (category) {
4482 case fcInfinity:
4483 // nextUp(+inf) = +inf
4484 if (!isNegative())
4485 break;
4486 // nextUp(-inf) = -getLargest()
4487 makeLargest(true);
4488 break;
4489 case fcNaN:
4490 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4491 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4492 // change the payload.
4493 if (isSignaling()) {
4494 result = opInvalidOp;
4495 // For consistency, propagate the sign of the sNaN to the qNaN.
4496 makeNaN(false, isNegative(), nullptr);
4497 }
4498 break;
4499 case fcZero:
4500 // nextUp(pm 0) = +getSmallest()
4501 makeSmallest(false);
4502 break;
4503 case fcNormal:
4504 // nextUp(-getSmallest()) = -0
4505 if (isSmallest() && isNegative()) {
4506 APInt::tcSet(significandParts(), 0, partCount());
4507 category = fcZero;
4508 exponent = 0;
4509 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4510 sign = false;
4511 if (!semantics->hasZero)
4513 break;
4514 }
4515
4516 if (isLargest() && !isNegative()) {
4517 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4518 // nextUp(getLargest()) == NAN
4519 makeNaN();
4520 break;
4521 } else if (semantics->nonFiniteBehavior ==
4523 // nextUp(getLargest()) == getLargest()
4524 break;
4525 } else {
4526 // nextUp(getLargest()) == INFINITY
4527 APInt::tcSet(significandParts(), 0, partCount());
4528 category = fcInfinity;
4529 exponent = semantics->maxExponent + 1;
4530 break;
4531 }
4532 }
4533
4534 // nextUp(normal) == normal + inc.
4535 if (isNegative()) {
4536 // If we are negative, we need to decrement the significand.
4537
4538 // We only cross a binade boundary that requires adjusting the exponent
4539 // if:
4540 // 1. exponent != semantics->minExponent. This implies we are not in the
4541 // smallest binade or are dealing with denormals.
4542 // 2. Our significand excluding the integral bit is all zeros.
4543 bool WillCrossBinadeBoundary =
4544 exponent != semantics->minExponent && isSignificandAllZeros();
4545
4546 // Decrement the significand.
4547 //
4548 // We always do this since:
4549 // 1. If we are dealing with a non-binade decrement, by definition we
4550 // just decrement the significand.
4551 // 2. If we are dealing with a normal -> normal binade decrement, since
4552 // we have an explicit integral bit the fact that all bits but the
4553 // integral bit are zero implies that subtracting one will yield a
4554 // significand with 0 integral bit and 1 in all other spots. Thus we
4555 // must just adjust the exponent and set the integral bit to 1.
4556 // 3. If we are dealing with a normal -> denormal binade decrement,
4557 // since we set the integral bit to 0 when we represent denormals, we
4558 // just decrement the significand.
4559 integerPart *Parts = significandParts();
4560 APInt::tcDecrement(Parts, partCount());
4561
4562 if (WillCrossBinadeBoundary) {
4563 // Our result is a normal number. Do the following:
4564 // 1. Set the integral bit to 1.
4565 // 2. Decrement the exponent.
4566 APInt::tcSetBit(Parts, semantics->precision - 1);
4567 exponent--;
4568 }
4569 } else {
4570 // If we are positive, we need to increment the significand.
4571
4572 // We only cross a binade boundary that requires adjusting the exponent if
4573 // the input is not a denormal and all of said input's significand bits
4574 // are set. If all of said conditions are true: clear the significand, set
4575 // the integral bit to 1, and increment the exponent. If we have a
4576 // denormal always increment since moving denormals and the numbers in the
4577 // smallest normal binade have the same exponent in our representation.
4578 // If there are only exponents, any increment always crosses the
4579 // BinadeBoundary.
4580 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4581 (!isDenormal() && isSignificandAllOnes());
4582
4583 if (WillCrossBinadeBoundary) {
4584 integerPart *Parts = significandParts();
4585 APInt::tcSet(Parts, 0, partCount());
4586 APInt::tcSetBit(Parts, semantics->precision - 1);
4587 assert(exponent != semantics->maxExponent &&
4588 "We can not increment an exponent beyond the maxExponent allowed"
4589 " by the given floating point semantics.");
4590 exponent++;
4591 } else {
4592 incrementSignificand();
4593 }
4594 }
4595 break;
4596 }
4597
4598 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4599 if (nextDown)
4600 changeSign();
4601
4602 return result;
4603}
4604
4605APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4606 return ::exponentNaN(*semantics);
4607}
4608
4609APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4610 return ::exponentInf(*semantics);
4611}
4612
4613APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4614 return ::exponentZero(*semantics);
4615}
4616
4617void IEEEFloat::makeInf(bool Negative) {
4618 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4619 llvm_unreachable("This floating point format does not support Inf");
4620
4621 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4622 // There is no Inf, so make NaN instead.
4623 makeNaN(false, Negative);
4624 return;
4625 }
4626 category = fcInfinity;
4627 sign = Negative;
4628 exponent = exponentInf();
4629 APInt::tcSet(significandParts(), 0, partCount());
4630}
4631
4632void IEEEFloat::makeZero(bool Negative) {
4633 if (!semantics->hasZero)
4634 llvm_unreachable("This floating point format does not support Zero");
4635
4636 category = fcZero;
4637 sign = Negative;
4638 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4639 // Merge negative zero to positive because 0b10000...000 is used for NaN
4640 sign = false;
4641 }
4642 exponent = exponentZero();
4643 APInt::tcSet(significandParts(), 0, partCount());
4644}
4645
4647 assert(isNaN());
4648 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4649 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4650}
4651
4652int ilogb(const IEEEFloat &Arg) {
4653 if (Arg.isNaN())
4654 return APFloat::IEK_NaN;
4655 if (Arg.isZero())
4656 return APFloat::IEK_Zero;
4657 if (Arg.isInfinity())
4658 return APFloat::IEK_Inf;
4659 if (!Arg.isDenormal())
4660 return Arg.exponent;
4661
4662 IEEEFloat Normalized(Arg);
4663 int SignificandBits = Arg.getSemantics().precision - 1;
4664
4665 Normalized.exponent += SignificandBits;
4666 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4667 return Normalized.exponent - SignificandBits;
4668}
4669
4671 auto MaxExp = X.getSemantics().maxExponent;
4672 auto MinExp = X.getSemantics().minExponent;
4673
4674 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4675 // overflow; clamp it to a safe range before adding, but ensure that the range
4676 // is large enough that the clamp does not change the result. The range we
4677 // need to support is the difference between the largest possible exponent and
4678 // the normalized exponent of half the smallest denormal.
4679
4680 int SignificandBits = X.getSemantics().precision - 1;
4681 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4682
4683 // Clamp to one past the range ends to let normalize handle overlflow.
4684 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4685 X.normalize(RoundingMode, lfExactlyZero);
4686 if (X.isNaN())
4687 X.makeQuiet();
4688 return X;
4689}
4690
4691IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4692 Exp = ilogb(Val);
4693
4694 // Quiet signalling nans.
4695 if (Exp == APFloat::IEK_NaN) {
4696 IEEEFloat Quiet(Val);
4697 Quiet.makeQuiet();
4698 return Quiet;
4699 }
4700
4701 if (Exp == APFloat::IEK_Inf)
4702 return Val;
4703
4704 // 1 is added because frexp is defined to return a normalized fraction in
4705 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4706 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4707 return scalbn(Val, -Exp, RM);
4708}
4709
4711 : Semantics(&S),
4712 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4713 APFloat(APFloatBase::semIEEEdouble)}) {
4714 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4715}
4716
4718 : Semantics(&S), Floats(new APFloat[2]{
4719 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4720 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4721 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4722}
4723
4725 : Semantics(&S),
4726 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4727 APFloat(APFloatBase::semIEEEdouble)}) {
4728 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4729}
4730
4732 : Semantics(&S),
4733 Floats(new APFloat[2]{
4734 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4735 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4736 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4737}
4738
4740 APFloat &&Second)
4741 : Semantics(&S),
4742 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4743 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4744 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4745 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4746}
4747
4749 : Semantics(RHS.Semantics),
4750 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4751 APFloat(RHS.Floats[1])}
4752 : nullptr) {
4753 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4754}
4755
4757 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4758 RHS.Semantics = &APFloatBase::semBogus;
4759 RHS.Floats = nullptr;
4760 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4761}
4762
4764 if (Semantics == RHS.Semantics && RHS.Floats) {
4765 Floats[0] = RHS.Floats[0];
4766 Floats[1] = RHS.Floats[1];
4767 } else if (this != &RHS) {
4768 this->~DoubleAPFloat();
4769 new (this) DoubleAPFloat(RHS);
4770 }
4771 return *this;
4772}
4773
4774// Returns a result such that:
4775// 1. abs(Lo) <= ulp(Hi)/2
4776// 2. Hi == RTNE(Hi + Lo)
4777// 3. Hi + Lo == X + Y
4778//
4779// Requires that log2(X) >= log2(Y).
4780static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4781 if (!X.isFinite())
4782 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4783 APFloat Hi = X + Y;
4784 APFloat Delta = Hi - X;
4785 APFloat Lo = Y - Delta;
4786 return {Hi, Lo};
4787}
4788
4789// Implement addition, subtraction, multiplication and division based on:
4790// "Software for Doubled-Precision Floating-Point Computations",
4791// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4792APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4793 const APFloat &c, const APFloat &cc,
4794 roundingMode RM) {
4795 int Status = opOK;
4796 APFloat z = a;
4797 Status |= z.add(c, RM);
4798 if (!z.isFinite()) {
4799 if (!z.isInfinity()) {
4800 Floats[0] = std::move(z);
4801 Floats[1].makeZero(/* Neg = */ false);
4802 return (opStatus)Status;
4803 }
4804 Status = opOK;
4805 auto AComparedToC = a.compareAbsoluteValue(c);
4806 z = cc;
4807 Status |= z.add(aa, RM);
4808 if (AComparedToC == APFloat::cmpGreaterThan) {
4809 // z = cc + aa + c + a;
4810 Status |= z.add(c, RM);
4811 Status |= z.add(a, RM);
4812 } else {
4813 // z = cc + aa + a + c;
4814 Status |= z.add(a, RM);
4815 Status |= z.add(c, RM);
4816 }
4817 if (!z.isFinite()) {
4818 Floats[0] = std::move(z);
4819 Floats[1].makeZero(/* Neg = */ false);
4820 return (opStatus)Status;
4821 }
4822 Floats[0] = z;
4823 APFloat zz = aa;
4824 Status |= zz.add(cc, RM);
4825 if (AComparedToC == APFloat::cmpGreaterThan) {
4826 // Floats[1] = a - z + c + zz;
4827 Floats[1] = a;
4828 Status |= Floats[1].subtract(z, RM);
4829 Status |= Floats[1].add(c, RM);
4830 Status |= Floats[1].add(zz, RM);
4831 } else {
4832 // Floats[1] = c - z + a + zz;
4833 Floats[1] = c;
4834 Status |= Floats[1].subtract(z, RM);
4835 Status |= Floats[1].add(a, RM);
4836 Status |= Floats[1].add(zz, RM);
4837 }
4838 } else {
4839 // q = a - z;
4840 APFloat q = a;
4841 Status |= q.subtract(z, RM);
4842
4843 // zz = q + c + (a - (q + z)) + aa + cc;
4844 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4845 auto zz = q;
4846 Status |= zz.add(c, RM);
4847 Status |= q.add(z, RM);
4848 Status |= q.subtract(a, RM);
4849 q.changeSign();
4850 Status |= zz.add(q, RM);
4851 Status |= zz.add(aa, RM);
4852 Status |= zz.add(cc, RM);
4853 if (zz.isZero() && !zz.isNegative()) {
4854 Floats[0] = std::move(z);
4855 Floats[1].makeZero(/* Neg = */ false);
4856 return opOK;
4857 }
4858 Floats[0] = z;
4859 Status |= Floats[0].add(zz, RM);
4860 if (!Floats[0].isFinite()) {
4861 Floats[1].makeZero(/* Neg = */ false);
4862 return (opStatus)Status;
4863 }
4864 Floats[1] = std::move(z);
4865 Status |= Floats[1].subtract(Floats[0], RM);
4866 Status |= Floats[1].add(zz, RM);
4867 }
4868 return (opStatus)Status;
4869}
4870
4871APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4872 const DoubleAPFloat &RHS,
4873 DoubleAPFloat &Out,
4874 roundingMode RM) {
4875 if (LHS.getCategory() == fcNaN) {
4876 Out = LHS;
4877 return opOK;
4878 }
4879 if (RHS.getCategory() == fcNaN) {
4880 Out = RHS;
4881 return opOK;
4882 }
4883 if (LHS.getCategory() == fcZero) {
4884 Out = RHS;
4885 return opOK;
4886 }
4887 if (RHS.getCategory() == fcZero) {
4888 Out = LHS;
4889 return opOK;
4890 }
4891 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4892 LHS.isNegative() != RHS.isNegative()) {
4893 Out.makeNaN(false, Out.isNegative(), nullptr);
4894 return opInvalidOp;
4895 }
4896 if (LHS.getCategory() == fcInfinity) {
4897 Out = LHS;
4898 return opOK;
4899 }
4900 if (RHS.getCategory() == fcInfinity) {
4901 Out = RHS;
4902 return opOK;
4903 }
4904 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4905
4906 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4907 CC(RHS.Floats[1]);
4908 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4909 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4910 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4911 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4912 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4913 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4914 return Out.addImpl(A, AA, C, CC, RM);
4915}
4916
4918 roundingMode RM) {
4919 return addWithSpecial(*this, RHS, *this, RM);
4920}
4921
4923 roundingMode RM) {
4924 changeSign();
4925 auto Ret = add(RHS, RM);
4926 changeSign();
4927 return Ret;
4928}
4929
4932 const auto &LHS = *this;
4933 auto &Out = *this;
4934 /* Interesting observation: For special categories, finding the lowest
4935 common ancestor of the following layered graph gives the correct
4936 return category:
4937
4938 NaN
4939 / \
4940 Zero Inf
4941 \ /
4942 Normal
4943
4944 e.g. NaN * NaN = NaN
4945 Zero * Inf = NaN
4946 Normal * Zero = Zero
4947 Normal * Inf = Inf
4948 */
4949 if (LHS.getCategory() == fcNaN) {
4950 Out = LHS;
4951 return opOK;
4952 }
4953 if (RHS.getCategory() == fcNaN) {
4954 Out = RHS;
4955 return opOK;
4956 }
4957 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4958 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4959 Out.makeNaN(false, false, nullptr);
4960 return opOK;
4961 }
4962 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4963 Out = LHS;
4964 return opOK;
4965 }
4966 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4967 Out = RHS;
4968 return opOK;
4969 }
4970 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4971 "Special cases not handled exhaustively");
4972
4973 int Status = opOK;
4974 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4975 // t = a * c
4976 APFloat T = A;
4977 Status |= T.multiply(C, RM);
4978 if (!T.isFiniteNonZero()) {
4979 Floats[0] = std::move(T);
4980 Floats[1].makeZero(/* Neg = */ false);
4981 return (opStatus)Status;
4982 }
4983
4984 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4985 APFloat Tau = A;
4986 T.changeSign();
4987 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4988 T.changeSign();
4989 {
4990 // v = a * d
4991 APFloat V = A;
4992 Status |= V.multiply(D, RM);
4993 // w = b * c
4994 APFloat W = B;
4995 Status |= W.multiply(C, RM);
4996 Status |= V.add(W, RM);
4997 // tau += v + w
4998 Status |= Tau.add(V, RM);
4999 }
5000 // u = t + tau
5001 APFloat U = T;
5002 Status |= U.add(Tau, RM);
5003
5004 Floats[0] = U;
5005 if (!U.isFinite()) {
5006 Floats[1].makeZero(/* Neg = */ false);
5007 } else {
5008 // Floats[1] = (t - u) + tau
5009 Status |= T.subtract(U, RM);
5010 Status |= T.add(Tau, RM);
5011 Floats[1] = std::move(T);
5012 }
5013 return (opStatus)Status;
5014}
5015
5018 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5019 "Unexpected Semantics");
5020 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5021 auto Ret = Tmp.divide(
5022 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
5023 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5024 return Ret;
5025}
5026
5028 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5029 "Unexpected Semantics");
5030 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5031 auto Ret = Tmp.remainder(
5032 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5033 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5034 return Ret;
5035}
5036
5038 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5039 "Unexpected Semantics");
5040 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5041 auto Ret = Tmp.mod(
5042 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5043 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5044 return Ret;
5045}
5046
5049 const DoubleAPFloat &Addend,
5051 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5052 "Unexpected Semantics");
5053 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5054 auto Ret = Tmp.fusedMultiplyAdd(
5055 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
5056 Multiplicand.bitcastToAPInt()),
5057 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
5058 RM);
5059 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5060 return Ret;
5061}
5062
5064 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5065 "Unexpected Semantics");
5066 const APFloat &Hi = getFirst();
5067 const APFloat &Lo = getSecond();
5068
5069 APFloat RoundedHi = Hi;
5070 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
5071
5072 // We can reduce the problem to just the high part if the input:
5073 // 1. Represents a non-finite value.
5074 // 2. Has a component which is zero.
5075 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5076 Floats[0] = std::move(RoundedHi);
5077 Floats[1].makeZero(/*Neg=*/false);
5078 return HiStatus;
5079 }
5080
5081 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5082 // halfway point.
5083 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5084 APFloat TieBreaker) {
5085 // RoundingError tells us which direction we rounded:
5086 // - RoundingError > 0: we rounded up.
5087 // - RoundingError < 0: we rounded down.
5088 // Sterbenz' lemma ensures that RoundingError is exact.
5089 const APFloat RoundingError = Rounded - ToRound;
5090 if (TieBreaker.isNonZero() &&
5091 TieBreaker.isNegative() != RoundingError.isNegative() &&
5092 abs(RoundingError).isExactlyValue(0.5))
5093 Rounded.add(
5094 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5096 return Rounded;
5097 };
5098
5099 // Case 1: Hi is not an integer.
5100 // Special cases are for rounding modes that are sensitive to ties.
5101 if (RoundedHi != Hi) {
5102 // We need to consider the case where Hi was between two integers and the
5103 // rounding mode broke the tie when, in fact, Lo may have had a different
5104 // sign than Hi.
5105 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5106 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5107
5108 Floats[0] = std::move(RoundedHi);
5109 Floats[1].makeZero(/*Neg=*/false);
5110 return HiStatus;
5111 }
5112
5113 // Case 2: Hi is an integer.
5114 // Special cases are for rounding modes which are rounding towards or away from zero.
5115 RoundingMode LoRoundingMode;
5116 if (RM == rmTowardZero)
5117 // When our input is positive, we want the Lo component rounded toward
5118 // negative infinity to get the smallest result magnitude. Likewise,
5119 // negative inputs want the Lo component rounded toward positive infinity.
5120 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5121 else
5122 LoRoundingMode = RM;
5123
5124 APFloat RoundedLo = Lo;
5125 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5126 if (LoRoundingMode == rmNearestTiesToAway)
5127 // We need to consider the case where Lo was between two integers and the
5128 // rounding mode broke the tie when, in fact, Hi may have had a different
5129 // sign than Lo.
5130 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5131
5132 // We must ensure that the final result has no overlap between the two APFloat values.
5133 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5134
5135 Floats[0] = std::move(RoundedHi);
5136 Floats[1] = std::move(RoundedLo);
5137 return LoStatus;
5138}
5139
5141 Floats[0].changeSign();
5142 Floats[1].changeSign();
5143}
5144
5147 // Compare absolute values of the high parts.
5148 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5149 if (HiPartCmp != cmpEqual)
5150 return HiPartCmp;
5151
5152 // Zero, regardless of sign, is equal.
5153 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5154 return cmpEqual;
5155
5156 // At this point, |this->Hi| == |RHS.Hi|.
5157 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5158 // same, and Hi-|Lo| if signs are different.
5159 const bool ThisIsSubtractive =
5160 Floats[0].isNegative() != Floats[1].isNegative();
5161 const bool RHSIsSubtractive =
5162 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5163
5164 // Case 1: The low part of 'this' is zero.
5165 if (Floats[1].isZero())
5166 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5167 // If RHS is subtractive, its magnitude is smaller.
5168 // If RHS is additive, its magnitude is larger.
5169 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5170
5171 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5172 if (RHS.Floats[1].isZero())
5173 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5174 // If 'this' is subtractive, its magnitude is smaller.
5175 // If 'this' is additive, its magnitude is larger.
5176 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5177
5178 // If their natures differ, the additive one is larger.
5179 if (ThisIsSubtractive != RHSIsSubtractive)
5180 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5181
5182 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5183 // The comparison now depends on the magnitude of the low parts.
5184 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5185
5186 if (ThisIsSubtractive) {
5187 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5188 if (LoPartCmp == cmpLessThan)
5189 return cmpGreaterThan;
5190 if (LoPartCmp == cmpGreaterThan)
5191 return cmpLessThan;
5192 }
5193
5194 // If additive, the comparison of |Lo| is direct.
5195 // If equal, they are equal.
5196 return LoPartCmp;
5197}
5198
5200 return Floats[0].getCategory();
5201}
5202
5203bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5204
5206 Floats[0].makeInf(Neg);
5207 Floats[1].makeZero(/* Neg = */ false);
5208}
5209
5211 Floats[0].makeZero(Neg);
5212 Floats[1].makeZero(/* Neg = */ false);
5213}
5214
5216 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5217 "Unexpected Semantics");
5218 Floats[0] =
5219 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5220 Floats[1] =
5221 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5222 if (Neg)
5223 changeSign();
5224}
5225
5227 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5228 "Unexpected Semantics");
5229 Floats[0].makeSmallest(Neg);
5230 Floats[1].makeZero(/* Neg = */ false);
5231}
5232
5234 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5235 "Unexpected Semantics");
5236 Floats[0] =
5237 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5238 if (Neg)
5239 Floats[0].changeSign();
5240 Floats[1].makeZero(/* Neg = */ false);
5241}
5242
5243void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5244 Floats[0].makeNaN(SNaN, Neg, fill);
5245 Floats[1].makeZero(/* Neg = */ false);
5246}
5247
5249 auto Result = Floats[0].compare(RHS.Floats[0]);
5250 // |Float[0]| > |Float[1]|
5251 if (Result == APFloat::cmpEqual)
5252 return Floats[1].compare(RHS.Floats[1]);
5253 return Result;
5254}
5255
5257 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5258 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5259}
5260
5262 if (Arg.Floats)
5263 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5264 return hash_combine(Arg.Semantics);
5265}
5266
5268 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5269 "Unexpected Semantics");
5270 uint64_t Data[] = {
5271 Floats[0].bitcastToAPInt().getRawData()[0],
5272 Floats[1].bitcastToAPInt().getRawData()[0],
5273 };
5274 return APInt(128, Data);
5275}
5276
5278 roundingMode RM) {
5279 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5280 "Unexpected Semantics");
5281 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5282 auto Ret = Tmp.convertFromString(S, RM);
5283 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5284 return Ret;
5285}
5286
5287// The double-double lattice of values corresponds to numbers which obey:
5288// - abs(lo) <= 1/2 * ulp(hi)
5289// - roundTiesToEven(hi + lo) == hi
5290//
5291// nextUp must choose the smallest output > input that follows these rules.
5292// nexDown must choose the largest output < input that follows these rules.
5294 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5295 "Unexpected Semantics");
5296 // nextDown(x) = -nextUp(-x)
5297 if (nextDown) {
5298 changeSign();
5299 APFloat::opStatus Result = next(/*nextDown=*/false);
5300 changeSign();
5301 return Result;
5302 }
5303 switch (getCategory()) {
5304 case fcInfinity:
5305 // nextUp(+inf) = +inf
5306 // nextUp(-inf) = -getLargest()
5307 if (isNegative())
5308 makeLargest(true);
5309 return opOK;
5310
5311 case fcNaN:
5312 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5313 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5314 // change the payload.
5315 if (getFirst().isSignaling()) {
5316 // For consistency, propagate the sign of the sNaN to the qNaN.
5317 makeNaN(false, isNegative(), nullptr);
5318 return opInvalidOp;
5319 }
5320 return opOK;
5321
5322 case fcZero:
5323 // nextUp(pm 0) = +getSmallest()
5324 makeSmallest(false);
5325 return opOK;
5326
5327 case fcNormal:
5328 break;
5329 }
5330
5331 const APFloat &HiOld = getFirst();
5332 const APFloat &LoOld = getSecond();
5333
5334 APFloat NextLo = LoOld;
5335 NextLo.next(/*nextDown=*/false);
5336
5337 // We want to admit values where:
5338 // 1. abs(Lo) <= ulp(Hi)/2
5339 // 2. Hi == RTNE(Hi + lo)
5340 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5341 return Hi + Lo == Hi;
5342 };
5343
5344 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5345 if (InLattice(HiOld, NextLo)) {
5346 // Yes, the result is (HiOld, nextUp(LoOld)).
5347 Floats[1] = std::move(NextLo);
5348
5349 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5350 // value is defined to have exactly 106 bits of precision. This limitation
5351 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5352 // value.
5353 DoubleAPFloat Largest{*Semantics, uninitialized};
5354 Largest.makeLargest(/*Neg=*/false);
5355 if (compare(Largest) == cmpGreaterThan)
5356 makeInf(/*Neg=*/false);
5357
5358 return opOK;
5359 }
5360
5361 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5362 // correct result. We know the new hi component will be nextUp(HiOld) but our
5363 // lattice rules make it a little ambiguous what the correct NextLo must be.
5364 APFloat NextHi = HiOld;
5365 NextHi.next(/*nextDown=*/false);
5366
5367 // nextUp(getLargest()) == INFINITY
5368 if (NextHi.isInfinity()) {
5369 makeInf(/*Neg=*/false);
5370 return opOK;
5371 }
5372
5373 // IEEE 754-2019 5.3.1:
5374 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5375 // -0."
5376 if (NextHi.isZero()) {
5377 makeZero(/*Neg=*/true);
5378 return opOK;
5379 }
5380
5381 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5382 // negative infinity as possible.
5383 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5384 if (!InLattice(NextHi, NextLo))
5385 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5386 NextLo.next(/*nextDown=*/false);
5387
5388 Floats[0] = std::move(NextHi);
5389 Floats[1] = std::move(NextLo);
5390
5391 return opOK;
5392}
5393
5394APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5395 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5396 roundingMode RM, bool *IsExact) const {
5397 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5398 "Unexpected Semantics");
5399
5400 // If Hi is not finite, or Lo is zero, the value is entirely represented
5401 // by Hi. Delegate to the simpler single-APFloat conversion.
5402 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5403 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5404
5405 // First, round the full double-double value to an integral value. This
5406 // simplifies the rest of the function, as we no longer need to consider
5407 // fractional parts.
5408 *IsExact = false;
5409 DoubleAPFloat Integral = *this;
5410 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5411 if (RoundStatus == opInvalidOp)
5412 return opInvalidOp;
5413 const APFloat &IntegralHi = Integral.getFirst();
5414 const APFloat &IntegralLo = Integral.getSecond();
5415
5416 // If rounding results in either component being zero, the sum is trivial.
5417 // Delegate to the simpler single-APFloat conversion.
5418 bool HiIsExact;
5419 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5420 const opStatus HiStatus =
5421 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5422 // The conversion from an integer-valued float to an APInt may fail if the
5423 // result would be out of range. Regardless, taking this path is only
5424 // possible if rounding occurred during the initial `roundToIntegral`.
5425 return HiStatus == opOK ? opInexact : HiStatus;
5426 }
5427
5428 // A negative number cannot be represented by an unsigned integer.
5429 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5430 if (!IsSigned && IntegralHi.isNegative())
5431 return opInvalidOp;
5432
5433 // Handle the special boundary case where |Hi| is exactly the power of two
5434 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5435 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5436 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5437 // signed, N for unsigned).
5438 bool LoIsExact;
5439 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5440 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5441 if (HiExactLog2 >= 0 &&
5442 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5443 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5444 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5445 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5446 return opInvalidOp;
5447
5448 // If the signs differ, the sum will fit. We can compute the result using
5449 // properties of two's complement arithmetic without a wide intermediate
5450 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5451 const opStatus LoStatus = IntegralLo.convertToInteger(
5452 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5453 if (LoStatus == opInvalidOp)
5454 return opInvalidOp;
5455
5456 // Adjust the bit pattern of Lo to account for Hi's value:
5457 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5458 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5459 // already produced the correct final bit pattern.
5460 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5461 // can be computed by taking the two's complement pattern for `Lo` and
5462 // clearing the sign bit.
5463 if (IsSigned && !IntegralHi.isNegative())
5464 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5465 *IsExact = RoundStatus == opOK;
5466 return RoundStatus;
5467 }
5468
5469 // Convert Hi into an integer. This may not fit but that is OK: we know that
5470 // Hi + Lo would not fit either in this situation.
5471 const opStatus HiStatus = IntegralHi.convertToInteger(
5472 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5473 if (HiStatus == opInvalidOp)
5474 return HiStatus;
5475
5476 // Convert Lo into a temporary integer of the same width.
5477 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5478 const opStatus LoStatus =
5479 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5480 if (LoStatus == opInvalidOp)
5481 return LoStatus;
5482
5483 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5484 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5485 // where the sum could cross the integer type's boundary is when Hi is a
5486 // power of two, which is handled by the special case block above.
5487 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5488
5489 *IsExact = RoundStatus == opOK;
5490 return RoundStatus;
5491}
5492
5495 unsigned int Width, bool IsSigned,
5496 roundingMode RM, bool *IsExact) const {
5497 opStatus FS =
5498 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5499
5500 if (FS == opInvalidOp) {
5501 const unsigned DstPartsCount = partCountForBits(Width);
5502 assert(DstPartsCount <= Input.size() && "Integer too big");
5503
5504 unsigned Bits;
5505 if (getCategory() == fcNaN)
5506 Bits = 0;
5507 else if (isNegative())
5508 Bits = IsSigned;
5509 else
5510 Bits = Width - IsSigned;
5511
5512 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5513 if (isNegative() && IsSigned)
5514 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5515 }
5516
5517 return FS;
5518}
5519
5520APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5521 switch (RM) {
5523 makeLargest(/*Neg=*/isNegative());
5524 break;
5526 if (isNegative())
5527 makeInf(/*Neg=*/true);
5528 else
5529 makeLargest(/*Neg=*/false);
5530 break;
5532 if (isNegative())
5533 makeLargest(/*Neg=*/true);
5534 else
5535 makeInf(/*Neg=*/false);
5536 break;
5539 makeInf(/*Neg=*/isNegative());
5540 break;
5541 default:
5542 llvm_unreachable("Invalid rounding mode found");
5543 }
5544 opStatus S = opInexact;
5545 if (!getFirst().isFinite())
5546 S = static_cast<opStatus>(S | opOverflow);
5547 return S;
5548}
5549
5550APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5551 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5552 // Find the most significant bit of the source integer. APInt::tcMSB returns
5553 // UINT_MAX for a zero value.
5554 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5555 if (SrcMSB == UINT_MAX) {
5556 // The source integer is 0.
5557 makeZero(/*Neg=*/false);
5558 return opOK;
5559 }
5560
5561 // Create a minimally-sized APInt to represent the source value.
5562 const unsigned SrcBitWidth = SrcMSB + 1;
5563 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5564 /*isUnsigned=*/true};
5565
5566 // Stage 1: Initial Approximation.
5567 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5568 // We use round-to-nearest because it minimizes the initial error, which is
5569 // crucial for the subsequent steps.
5571 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5572
5573 // If the first approximation already overflows, the number is too large.
5574 // NOTE: The underlying semantics are *more* conservative when choosing to
5575 // overflow because their notion of ULP is much larger. As such, it is always
5576 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5577 if (!Hi.isFinite())
5578 return handleOverflow(RM);
5579
5580 // Stage 2: Exact Error Calculation.
5581 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5582 // This is done by converting Hi back to an integer and subtracting it from
5583 // the original source.
5584 bool HiAsIntIsExact;
5585 // Create an integer representation of Hi. Its width is determined by the
5586 // exponent of Hi, ensuring it's just large enough. This width can exceed
5587 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5588 // accurately when converted back to an integer.
5589 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5590 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5591 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5592
5593 // Stage 3: Error Approximation and Rounding.
5594 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5595 // captures the remainder of the original number. The rounding mode for this
5596 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5597 // ensure the final sum (Hi + Lo) rounds correctly.
5598 roundingMode LoRM = RM;
5599 // Adjustments are only necessary when the initial approximation Hi was an
5600 // overestimate, making the Error negative.
5601 if (Error.isNegative()) {
5602 if (RM == rmNearestTiesToAway) {
5603 // For rmNearestTiesToAway, a tie should round away from zero. Since
5604 // SrcInt is positive, this means rounding toward +infinity.
5605 // A standard conversion of a negative Error would round ties toward
5606 // -infinity, causing the final sum Hi + Lo to be smaller. To
5607 // counteract this, we detect the tie case and override the rounding
5608 // mode for Lo to rmTowardPositive.
5609 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5610 const unsigned LoPrecision = getSecond().getSemantics().precision;
5611 if (ErrorActiveBits > LoPrecision) {
5612 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5613 // A tie occurs when the bits to be truncated are of the form 100...0.
5614 // This is detected by checking if the number of trailing zeros is
5615 // exactly one less than the number of bits being truncated.
5616 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5617 LoRM = rmTowardPositive;
5618 }
5619 } else if (RM == rmTowardZero) {
5620 // For rmTowardZero, the final positive result must be truncated (rounded
5621 // down). When Hi is an overestimate, Error is negative. A standard
5622 // rmTowardZero conversion of Error would make it *less* negative,
5623 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5624 // rounds down correctly, we force Lo to round toward -infinity.
5625 LoRM = rmTowardNegative;
5626 }
5627 }
5628
5630 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5631
5632 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5633 // components do not overlap. fastTwoSum performs this operation.
5634 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5635 Floats[0] = std::move(Hi);
5636 Floats[1] = std::move(Lo);
5637
5638 // A final check for overflow is needed because fastTwoSum can cause a
5639 // carry-out from Lo that pushes Hi to infinity.
5640 if (!getFirst().isFinite())
5641 return handleOverflow(RM);
5642
5643 // The largest DoubleAPFloat must be canonical. Values which are larger are
5644 // not canonical and are equivalent to overflow.
5645 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5646 DoubleAPFloat Largest{*Semantics};
5647 Largest.makeLargest(/*Neg=*/false);
5648 if (compare(Largest) == APFloat::cmpGreaterThan)
5649 return handleOverflow(RM);
5650 }
5651
5652 // The final status of the operation is determined by the conversion of the
5653 // error term. If Lo could represent Error exactly, the entire conversion
5654 // is exact. Otherwise, it's inexact.
5655 return Status;
5656}
5657
5659 bool IsSigned,
5660 roundingMode RM) {
5661 const bool NegateInput = IsSigned && Input.isNegative();
5662 APInt API = Input;
5663 if (NegateInput)
5664 API.negate();
5665
5667 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5668 if (NegateInput)
5669 changeSign();
5670 return Status;
5671}
5672
5674 unsigned int HexDigits,
5675 bool UpperCase,
5676 roundingMode RM) const {
5677 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5678 "Unexpected Semantics");
5679 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5680 .convertToHexString(DST, HexDigits, UpperCase, RM);
5681}
5682
5684 return getCategory() == fcNormal &&
5685 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5686 // (double)(Hi + Lo) == Hi defines a normal number.
5687 Floats[0] != Floats[0] + Floats[1]);
5688}
5689
5691 if (getCategory() != fcNormal)
5692 return false;
5693 DoubleAPFloat Tmp(*this);
5694 Tmp.makeSmallest(this->isNegative());
5695 return Tmp.compare(*this) == cmpEqual;
5696}
5697
5699 if (getCategory() != fcNormal)
5700 return false;
5701
5702 DoubleAPFloat Tmp(*this);
5704 return Tmp.compare(*this) == cmpEqual;
5705}
5706
5708 if (getCategory() != fcNormal)
5709 return false;
5710 DoubleAPFloat Tmp(*this);
5711 Tmp.makeLargest(this->isNegative());
5712 return Tmp.compare(*this) == cmpEqual;
5713}
5714
5716 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5717 "Unexpected Semantics");
5718 return Floats[0].isInteger() && Floats[1].isInteger();
5719}
5720
5722 unsigned FormatPrecision,
5723 unsigned FormatMaxPadding,
5724 bool TruncateZero) const {
5725 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5726 "Unexpected Semantics");
5727 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5728 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5729}
5730
5732 // In order for Hi + Lo to be a power of two, the following must be true:
5733 // 1. Hi must be a power of two.
5734 // 2. Lo must be zero.
5735 if (getSecond().isNonZero())
5736 return INT_MIN;
5737 return getFirst().getExactLog2Abs();
5738}
5739
5740int ilogb(const DoubleAPFloat &Arg) {
5741 const APFloat &Hi = Arg.getFirst();
5742 const APFloat &Lo = Arg.getSecond();
5743 int IlogbResult = ilogb(Hi);
5744 // Zero and non-finite values can delegate to ilogb(Hi).
5745 if (Arg.getCategory() != fcNormal)
5746 return IlogbResult;
5747 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5748 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5749 return IlogbResult;
5750 if (Hi.getExactLog2Abs() == INT_MIN)
5751 return IlogbResult;
5752 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5753 // get nudged out of the binade by the low component.
5754 return IlogbResult - 1;
5755}
5756
5759 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5760 "Unexpected Semantics");
5762 scalbn(Arg.Floats[0], Exp, RM),
5763 scalbn(Arg.Floats[1], Exp, RM));
5764}
5765
5766DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5768 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5769 "Unexpected Semantics");
5770
5771 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5772 // [1.0, 2.0).
5773 Exp = ilogb(Arg);
5774
5775 // For NaNs, quiet any signaling NaN and return the result, as per standard
5776 // practice.
5777 if (Exp == APFloat::IEK_NaN) {
5778 DoubleAPFloat Quiet{Arg};
5779 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5780 return Quiet;
5781 }
5782
5783 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5784 if (Exp == APFloat::IEK_Inf)
5785 return Arg;
5786
5787 // For zero, the fraction is zero and the standard requires the exponent be 0.
5788 if (Exp == APFloat::IEK_Zero) {
5789 Exp = 0;
5790 return Arg;
5791 }
5792
5793 const APFloat &Hi = Arg.getFirst();
5794 const APFloat &Lo = Arg.getSecond();
5795
5796 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5797 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5798 // Increment the exponent to ensure the fraction is in the correct range.
5799 ++Exp;
5800
5801 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5802 APFloat Second = Lo;
5803 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5804 roundingMode LoRoundingMode;
5805 // The interpretation of rmTowardZero depends on the sign of the combined
5806 // Arg rather than the sign of the component.
5807 if (RM == rmTowardZero)
5808 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5809 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5810 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5811 // "away from zero" based on its own sign would move the value in the
5812 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5813 // direction-agnostic. We only need to bother with this if Lo is scaled
5814 // down.
5815 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5816 LoRoundingMode = rmNearestTiesToEven;
5817 else
5818 LoRoundingMode = RM;
5819 Second = scalbn(Lo, -Exp, LoRoundingMode);
5820 // The rmNearestTiesToEven proxy is correct most of the time, but it
5821 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5822 // exact midpoint.
5823 // NOTE: This is morally equivalent to roundTiesTowardZero.
5824 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5825 // Re-scale the result back to check if rounding occurred.
5826 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5827 if (RecomposedLo != Lo) {
5828 // RoundingError tells us which direction we rounded:
5829 // - RoundingError > 0: we rounded up.
5830 // - RoundingError < 0: we down up.
5831 const APFloat RoundingError = RecomposedLo - Lo;
5832 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5833 // We do this by checking if the absolute rounding error is exactly
5834 // half a ULP of the result.
5835 const APFloat UlpOfSecond = harrisonUlp(Second);
5836 const APFloat ScaledUlpOfSecond =
5837 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5838 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5839 const bool RoundedLoAway =
5840 Second.isNegative() == RoundingError.isNegative();
5841 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5842 // decrease the magnitude of Second to increase the magnitude
5843 // First+Second.
5844 if (IsMidpoint && RoundedLoAway)
5845 Second.next(/*nextDown=*/!Second.isNegative());
5846 }
5847 }
5848 // Handle a tricky edge case where Arg is slightly less than a power of two
5849 // (e.g., Arg = 2^k - epsilon). In this situation:
5850 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5851 // 2. ilogb(Arg) correctly returns k-1.
5852 // 3. Our initial Exp becomes (k-1) + 1 = k.
5853 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5854 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5855 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5856 // We detect this specific case by checking if Hi is a power of two and if
5857 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5858 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5859 // valid fraction.
5860 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5861 ++Exp;
5862 }
5863
5864 APFloat First = scalbn(Hi, -Exp, RM);
5866 std::move(Second));
5867}
5868
5869} // namespace detail
5870
5871APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5872 if (usesLayout<IEEEFloat>(Semantics)) {
5873 new (&IEEE) IEEEFloat(std::move(F));
5874 return;
5875 }
5876 if (usesLayout<DoubleAPFloat>(Semantics)) {
5877 const fltSemantics& S = F.getSemantics();
5878 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5880 return;
5881 }
5882 llvm_unreachable("Unexpected semantics");
5883}
5884
5889
5890hash_code hash_value(const APFloat &Arg) {
5891 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5892 return hash_value(Arg.U.IEEE);
5893 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5894 return hash_value(Arg.U.Double);
5895 llvm_unreachable("Unexpected semantics");
5896}
5897
5899 : APFloat(Semantics) {
5900 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5901 assert(StatusOrErr && "Invalid floating point representation");
5902 consumeError(StatusOrErr.takeError());
5903}
5904
5906 if (isZero())
5907 return isNegative() ? fcNegZero : fcPosZero;
5908 if (isNormal())
5909 return isNegative() ? fcNegNormal : fcPosNormal;
5910 if (isDenormal())
5912 if (isInfinity())
5913 return isNegative() ? fcNegInf : fcPosInf;
5914 assert(isNaN() && "Other class of FP constant");
5915 return isSignaling() ? fcSNan : fcQNan;
5916}
5917
5918bool APFloat::getExactInverse(APFloat *Inv) const {
5919 // Only finite, non-zero numbers can have a useful, representable inverse.
5920 // This check filters out +/- zero, +/- infinity, and NaN.
5921 if (!isFiniteNonZero())
5922 return false;
5923
5924 // Historically, this function rejects subnormal inputs. One reason why this
5925 // might be important is that subnormals may behave differently under FTZ/DAZ
5926 // runtime behavior.
5927 if (isDenormal())
5928 return false;
5929
5930 // A number has an exact, representable inverse if and only if it is a power
5931 // of two.
5932 //
5933 // Mathematical Rationale:
5934 // 1. A binary floating-point number x is a dyadic rational, meaning it can
5935 // be written as x = M / 2^k for integers M (the significand) and k.
5936 // 2. The inverse is 1/x = 2^k / M.
5937 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
5938 // in binary), its denominator M must also be a power of two.
5939 // Let's say M = 2^m.
5940 // 4. Substituting this back into the formula for x, we get
5941 // x = (2^m) / (2^k) = 2^(m-k).
5942 //
5943 // This proves that x must be a power of two.
5944
5945 // getExactLog2Abs() returns the integer exponent if the number is a power of
5946 // two or INT_MIN if it is not.
5947 const int Exp = getExactLog2Abs();
5948 if (Exp == INT_MIN)
5949 return false;
5950
5951 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
5952 // scaling 1.0 by the negated exponent.
5953 APFloat Reciprocal =
5954 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
5955 rmTowardZero);
5956
5957 // scalbn might round if the resulting exponent -Exp is outside the
5958 // representable range, causing overflow (to infinity) or underflow. We
5959 // must verify that the result is still the exact power of two we expect.
5960 if (Reciprocal.getExactLog2Abs() != -Exp)
5961 return false;
5962
5963 // Avoid multiplication with a subnormal, it is not safe on all platforms and
5964 // may be slower than a normal division.
5965 if (Reciprocal.isDenormal())
5966 return false;
5967
5968 assert(Reciprocal.isFiniteNonZero());
5969
5970 if (Inv)
5971 *Inv = std::move(Reciprocal);
5972
5973 return true;
5974}
5975
5977 roundingMode RM, bool *losesInfo) {
5978 if (&getSemantics() == &ToSemantics) {
5979 *losesInfo = false;
5980 return opOK;
5981 }
5982 if (usesLayout<IEEEFloat>(getSemantics()) &&
5983 usesLayout<IEEEFloat>(ToSemantics))
5984 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5985 if (usesLayout<IEEEFloat>(getSemantics()) &&
5986 usesLayout<DoubleAPFloat>(ToSemantics)) {
5987 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
5988 auto Ret =
5989 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
5990 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5991 return Ret;
5992 }
5993 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5994 usesLayout<IEEEFloat>(ToSemantics)) {
5995 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5996 *this = APFloat(std::move(getIEEE()), ToSemantics);
5997 return Ret;
5998 }
5999 llvm_unreachable("Unexpected semantics");
6000}
6001
6005
6007 SmallVector<char, 16> Buffer;
6008 toString(Buffer);
6009 OS << Buffer;
6010}
6011
6012#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6014 print(dbgs());
6015 dbgs() << '\n';
6016}
6017#endif
6018
6020 NID.Add(bitcastToAPInt());
6021}
6022
6024 roundingMode rounding_mode,
6025 bool *isExact) const {
6026 unsigned bitWidth = result.getBitWidth();
6027 SmallVector<uint64_t, 4> parts(result.getNumWords());
6028 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
6029 rounding_mode, isExact);
6030 // Keeps the original signed-ness.
6031 result = APInt(bitWidth, parts);
6032 return status;
6033}
6034
6036 if (&getSemantics() ==
6037 (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
6038 return getIEEE().convertToDouble();
6039 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
6040 "Float semantics is not representable by IEEEdouble");
6041 APFloat Temp = *this;
6042 bool LosesInfo;
6043 opStatus St =
6044 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
6045 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6046 (void)St;
6047 return Temp.getIEEE().convertToDouble();
6048}
6049
6050#ifdef HAS_IEE754_FLOAT128
6051float128 APFloat::convertToQuad() const {
6052 if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
6053 return getIEEE().convertToQuad();
6054 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
6055 "Float semantics is not representable by IEEEquad");
6056 APFloat Temp = *this;
6057 bool LosesInfo;
6058 opStatus St =
6059 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
6060 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6061 (void)St;
6062 return Temp.getIEEE().convertToQuad();
6063}
6064#endif
6065
6067 if (&getSemantics() ==
6068 (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
6069 return getIEEE().convertToFloat();
6070 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
6071 "Float semantics is not representable by IEEEsingle");
6072 APFloat Temp = *this;
6073 bool LosesInfo;
6074 opStatus St =
6075 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
6076 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6077 (void)St;
6078 return Temp.getIEEE().convertToFloat();
6079}
6080
6082 static constexpr StringLiteral ValidFormats[] = {
6083 "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
6084 "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
6085 "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
6086 return llvm::is_contained(ValidFormats, Format);
6087}
6088
6090 // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
6091 // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
6093 .Case("Float8E5M2", &semFloat8E5M2)
6094 .Case("Float8E4M3FN", &semFloat8E4M3FN)
6095 .Case("Float4E2M1FN", &semFloat4E2M1FN)
6096 .Case("Float6E3M2FN", &semFloat6E3M2FN)
6097 .Case("Float6E2M3FN", &semFloat6E2M3FN)
6098 .Default(nullptr);
6099}
6100
6101APFloat::Storage::~Storage() {
6102 if (usesLayout<IEEEFloat>(*semantics)) {
6103 IEEE.~IEEEFloat();
6104 return;
6105 }
6106 if (usesLayout<DoubleAPFloat>(*semantics)) {
6107 Double.~DoubleAPFloat();
6108 return;
6109 }
6110 llvm_unreachable("Unexpected semantics");
6111}
6112
6113APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6114 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6115 new (this) IEEEFloat(RHS.IEEE);
6116 return;
6117 }
6118 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6119 new (this) DoubleAPFloat(RHS.Double);
6120 return;
6121 }
6122 llvm_unreachable("Unexpected semantics");
6123}
6124
6125APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6126 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6127 new (this) IEEEFloat(std::move(RHS.IEEE));
6128 return;
6129 }
6130 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6131 new (this) DoubleAPFloat(std::move(RHS.Double));
6132 return;
6133 }
6134 llvm_unreachable("Unexpected semantics");
6135}
6136
6137APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6138 if (usesLayout<IEEEFloat>(*semantics) &&
6139 usesLayout<IEEEFloat>(*RHS.semantics)) {
6140 IEEE = RHS.IEEE;
6141 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6142 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6143 Double = RHS.Double;
6144 } else if (this != &RHS) {
6145 this->~Storage();
6146 new (this) Storage(RHS);
6147 }
6148 return *this;
6149}
6150
6151APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6152 if (usesLayout<IEEEFloat>(*semantics) &&
6153 usesLayout<IEEEFloat>(*RHS.semantics)) {
6154 IEEE = std::move(RHS.IEEE);
6155 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6156 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6157 Double = std::move(RHS.Double);
6158 } else if (this != &RHS) {
6159 this->~Storage();
6160 new (this) Storage(std::move(RHS));
6161 }
6162 return *this;
6163}
6164
6165} // namespace llvm
6166
6167#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:49
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:26
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:306
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:98
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:247
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:334
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:222
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEquad()
Definition APFloat.h:298
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:225
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:313
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:243
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:278
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:317
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool isValidArbitraryFPFormat(StringRef Format)
Returns true if the given string is a valid arbitrary floating-point format interpretation for llvm....
Definition APFloat.cpp:6081
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:260
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:218
friend class APFloat
Definition APFloat.h:291
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:251
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:145
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:155
static constexpr unsigned integerPartWidth
Definition APFloat.h:152
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:300
APInt::WordType integerPart
Definition APFloat.h:151
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:239
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:264
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:304
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:307
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:316
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:315
static const fltSemantics & Float8E4M3()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:308
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:190
static const fltSemantics & Float8E3M4()
Definition APFloat.h:311
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:255
static const fltSemantics & Float8E5M2()
Definition APFloat.h:303
fltCategory
Category of internally-represented number.
Definition APFloat.h:370
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:314
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static LLVM_ABI const fltSemantics * getArbitraryFPSemantics(StringRef Format)
Returns the fltSemantics for a given arbitrary FP format string, or nullptr if invalid.
Definition APFloat.cpp:6089
static const fltSemantics & FloatTF32()
Definition APFloat.h:312
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:228
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1175
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:6019
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1263
bool isFiniteNonZero() const
Definition APFloat.h:1526
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5976
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1564
bool isNegative() const
Definition APFloat.h:1516
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5918
cmpResult compareAbsoluteValue(const APFloat &RHS) const
Definition APFloat.h:1471
friend DoubleAPFloat
Definition APFloat.h:1580
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:6035
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1545
bool isNormal() const
Definition APFloat.h:1520
bool isDenormal() const
Definition APFloat.h:1517
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1236
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:6002
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5890
const fltSemantics & getSemantics() const
Definition APFloat.h:1524
bool isFinite() const
Definition APFloat.h:1521
bool isNaN() const
Definition APFloat.h:1514
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1143
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1506
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6066
bool isSignaling() const
Definition APFloat.h:1518
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1290
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1272
bool isZero() const
Definition APFloat.h:1512
APInt bitcastToAPInt() const
Definition APFloat.h:1408
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1387
opStatus next(bool nextDown)
Definition APFloat.h:1309
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1153
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1203
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5905
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1281
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5885
friend IEEEFloat
Definition APFloat.h:1579
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:6013
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:6006
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1303
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1228
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1134
bool isInfinity() const
Definition APFloat.h:1513
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1584
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2379
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2351
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1769
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2374
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2453
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2423
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2763
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1767
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2359
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2737
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2643
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1510
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2384
void negate()
Negate this APInt in place.
Definition APInt.h:1483
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1933
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2390
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2710
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2365
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2403
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1751
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2541
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2488
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2527
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1759
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1928
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1737
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:576
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
FoldingSetNodeID - This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:209
void Add(const T &x)
Definition FoldingSet.h:249
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
const char * iterator
Definition StringRef.h:59
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
iterator begin() const
Definition StringRef.h:113
char back() const
back - Get the last character in the string.
Definition StringRef.h:152
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:714
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
char front() const
front - Get the first character in the string.
Definition StringRef.h:146
iterator end() const
Definition StringRef.h:115
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:675
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5233
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4763
LLVM_ABI void changeSign()
Definition APFloat.cpp:5140
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5707
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:5027
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4930
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5199
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5256
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5731
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5658
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5267
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5277
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5690
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4922
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5261
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5146
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5683
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5494
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5226
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5740
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5293
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5205
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5715
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5210
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:5016
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5698
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:5037
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4710
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5721
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5215
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5248
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:5063
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:5048
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5673
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5203
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4917
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5243
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3247
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1465
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2236
fltCategory getCategory() const
Definition APFloat.h:582
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2805
bool isFiniteNonZero() const
Definition APFloat.h:585
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:472
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:4032
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4427
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3657
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4670
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2407
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:547
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2106
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:572
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2126
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3727
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3720
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2080
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4383
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:4064
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4617
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:965
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4646
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1067
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2074
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:559
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3190
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:854
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2086
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2320
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:926
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1092
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:4078
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1084
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1119
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2274
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4652
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4472
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:569
const fltSemantics & getSemantics() const
Definition APFloat.h:583
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:562
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4456
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4632
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2484
LLVM_ABI void changeSign()
Definition APFloat.cpp:2030
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:951
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2745
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:957
An opaque object representing a hash code.
Definition Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:448
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:450
static constexpr opStatus opDivByZero
Definition APFloat.h:445
static constexpr opStatus opOverflow
Definition APFloat.h:446
static constexpr cmpResult cmpLessThan
Definition APFloat.h:440
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1490
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:436
static constexpr uninitializedTag uninitialized
Definition APFloat.h:430
static constexpr fltCategory fcZero
Definition APFloat.h:452
static constexpr opStatus opOK
Definition APFloat.h:443
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:441
static constexpr unsigned integerPartWidth
Definition APFloat.h:438
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3395
APFloatBase::ExponentType ExponentType
Definition APFloat.h:429
static constexpr fltCategory fcNormal
Definition APFloat.h:451
static constexpr opStatus opInvalidOp
Definition APFloat.h:444
APFloatBase::opStatus opStatus
Definition APFloat.h:426
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4691
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:424
static constexpr cmpResult cmpUnordered
Definition APFloat.h:442
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:435
APFloatBase::roundingMode roundingMode
Definition APFloat.h:425
APFloatBase::cmpResult cmpResult
Definition APFloat.h:427
static constexpr fltCategory fcInfinity
Definition APFloat.h:449
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:433
static constexpr roundingMode rmTowardZero
Definition APFloat.h:437
static constexpr opStatus opUnderflow
Definition APFloat.h:447
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:431
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4652
static constexpr cmpResult cmpEqual
Definition APFloat.h:439
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4670
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4780
APFloatBase::integerPart integerPart
Definition APFloat.h:423
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:745
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
static const char infinityL[]
Definition APFloat.cpp:736
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:309
static const char NaNU[]
Definition APFloat.cpp:739
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:620
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:679
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:796
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:283
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:371
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:209
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1601
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:763
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
const unsigned int maxPrecision
Definition APFloat.cpp:208
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1622
static const char NaNL[]
Definition APFloat.cpp:738
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
static const char infinityU[]
Definition APFloat.cpp:737
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:50
@ lfMoreThanHalf
Definition APFloat.h:54
@ lfLessThanHalf
Definition APFloat.h:52
@ lfExactlyHalf
Definition APFloat.h:53
@ lfExactlyZero
Definition APFloat.h:51
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:463
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:210
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1610
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:293
static Error createError(const Twine &Err)
Definition APFloat.cpp:305
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:586
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
static const char hexDigitsUpper[]
Definition APFloat.cpp:735
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
const unsigned int maxExponent
Definition APFloat.cpp:207
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:316
fltNonfiniteBehavior
Definition APFloat.h:948
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:599
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:423
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:288
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:564
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1636
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:634
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:781
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:533
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:326
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:734
#define N
const char * lastSigDigit
Definition APFloat.cpp:458
const char * firstSigDigit
Definition APFloat.cpp:457
APFloatBase::ExponentType maxExponent
Definition APFloat.h:996
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1009
APFloatBase::ExponentType minExponent
Definition APFloat.h:1000
unsigned int sizeInBits
Definition APFloat.h:1007
unsigned int precision
Definition APFloat.h:1004
fltNanEncoding nanEncoding
Definition APFloat.h:1011