LLVM 23.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Error.h"
29#include <cstring>
30#include <limits.h>
31
32/// Shared headers from LLVM libc
33/// Make sure to add ${LLVM_SOURCE_DIR}/../libc to include directories.
34///
35/// Notes: So far it looks like APFloat does not check errnos or floating-point
36/// exceptions after calling the math functions, so we will configure LLVM libc
37/// math functions to skip setting errnos and floating-point exceptions
38/// explicitly. We also put them in a separate namespace so that the symbols
39/// do not clash with other libc math builds just in case.
40#define LIBC_NAMESPACE __llvm_libc_apfloat
41#define LIBC_MATH (LIBC_MATH_NO_ERRNO | LIBC_MATH_NO_EXCEPT)
42
43#include "shared/math.h"
44#include "shared/math_check_exceptions.h"
45
46#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
47 do { \
48 if (usesLayout<IEEEFloat>(getSemantics())) \
49 return U.IEEE.METHOD_CALL; \
50 if (usesLayout<DoubleAPFloat>(getSemantics())) \
51 return U.Double.METHOD_CALL; \
52 llvm_unreachable("Unexpected semantics"); \
53 } while (false)
54
55using namespace llvm;
56
57/// A macro used to combine two fcCategory enums into one key which can be used
58/// in a switch statement to classify how the interaction of two APFloat's
59/// categories affects an operation.
60///
61/// TODO: If clang source code is ever allowed to use constexpr in its own
62/// codebase, change this into a static inline function.
63#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
64
65/* Assumed in hexadecimal significand parsing, and conversion to
66 hexadecimal strings. */
67static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
68
69namespace llvm {
70
71constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
72constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
73constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
74constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
75constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
76constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
77constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
79constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
80constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
82constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
84constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
86constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
87constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
88constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
89 127,
90 -127,
91 1,
92 8,
95 false,
96 false,
97 false};
98
99constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
101constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
103constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
105constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
106 80};
107constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
108constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
109constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
110 1023, -1022 + 53, 53 + 53, 128};
111
113 switch (S) {
114 case S_IEEEhalf:
115 return IEEEhalf();
116 case S_BFloat:
117 return BFloat();
118 case S_IEEEsingle:
119 return IEEEsingle();
120 case S_IEEEdouble:
121 return IEEEdouble();
122 case S_IEEEquad:
123 return IEEEquad();
125 return PPCDoubleDouble();
127 return PPCDoubleDoubleLegacy();
128 case S_Float8E5M2:
129 return Float8E5M2();
130 case S_Float8E5M2FNUZ:
131 return Float8E5M2FNUZ();
132 case S_Float8E4M3:
133 return Float8E4M3();
134 case S_Float8E4M3FN:
135 return Float8E4M3FN();
136 case S_Float8E4M3FNUZ:
137 return Float8E4M3FNUZ();
139 return Float8E4M3B11FNUZ();
140 case S_Float8E3M4:
141 return Float8E3M4();
142 case S_FloatTF32:
143 return FloatTF32();
144 case S_Float8E8M0FNU:
145 return Float8E8M0FNU();
146 case S_Float6E3M2FN:
147 return Float6E3M2FN();
148 case S_Float6E2M3FN:
149 return Float6E2M3FN();
150 case S_Float4E2M1FN:
151 return Float4E2M1FN();
153 return x87DoubleExtended();
154 }
155 llvm_unreachable("Unrecognised floating semantics");
156}
157
160 if (&Sem == &llvm::APFloat::IEEEhalf())
161 return S_IEEEhalf;
162 else if (&Sem == &llvm::APFloat::BFloat())
163 return S_BFloat;
164 else if (&Sem == &llvm::APFloat::IEEEsingle())
165 return S_IEEEsingle;
166 else if (&Sem == &llvm::APFloat::IEEEdouble())
167 return S_IEEEdouble;
168 else if (&Sem == &llvm::APFloat::IEEEquad())
169 return S_IEEEquad;
170 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
171 return S_PPCDoubleDouble;
172 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
174 else if (&Sem == &llvm::APFloat::Float8E5M2())
175 return S_Float8E5M2;
176 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
177 return S_Float8E5M2FNUZ;
178 else if (&Sem == &llvm::APFloat::Float8E4M3())
179 return S_Float8E4M3;
180 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
181 return S_Float8E4M3FN;
182 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
183 return S_Float8E4M3FNUZ;
184 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
185 return S_Float8E4M3B11FNUZ;
186 else if (&Sem == &llvm::APFloat::Float8E3M4())
187 return S_Float8E3M4;
188 else if (&Sem == &llvm::APFloat::FloatTF32())
189 return S_FloatTF32;
190 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
191 return S_Float8E8M0FNU;
192 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
193 return S_Float6E3M2FN;
194 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
195 return S_Float6E2M3FN;
196 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
197 return S_Float4E2M1FN;
198 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
199 return S_x87DoubleExtended;
200 else
201 llvm_unreachable("Unknown floating semantics");
202}
203
205 const fltSemantics &B) {
206 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
207 A.precision <= B.precision;
208}
209
210/* A tight upper bound on number of parts required to hold the value
211 pow(5, power) is
212
213 power * 815 / (351 * integerPartWidth) + 1
214
215 However, whilst the result may require only this many parts,
216 because we are multiplying two values to get it, the
217 multiplication may require an extra part with the excess part
218 being zero (consider the trivial case of 1 * 1, tcFullMultiply
219 requires two parts to hold the single-part result). So we add an
220 extra one to guarantee enough space whilst multiplying. */
221const unsigned int maxExponent = 16383;
222const unsigned int maxPrecision = 113;
224const unsigned int maxPowerOfFiveParts =
225 2 +
227
228unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
229 return semantics.precision;
230}
233 return semantics.maxExponent;
234}
237 return semantics.minExponent;
238}
239unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
240 return semantics.sizeInBits;
241}
243 bool isSigned) {
244 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
245 // at least one more bit than the MaxExponent to hold the max FP value.
246 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
247 // Extra sign bit needed.
248 if (isSigned)
249 ++MinBitWidth;
250 return MinBitWidth;
251}
252
254 return semantics.hasZero;
255}
256
258 return semantics.hasSignedRepr;
259}
260
264
268
270 // Keep in sync with Type::isIEEELikeFPTy
271 return SemanticsToEnum(semantics) <= S_IEEEquad;
272}
273
275 return semantics.hasSignBitInMSB;
276}
277
279 const fltSemantics &Dst) {
280 // Exponent range must be larger.
281 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
282 return false;
283
284 // If the mantissa is long enough, the result value could still be denormal
285 // with a larger exponent range.
286 //
287 // FIXME: This condition is probably not accurate but also shouldn't be a
288 // practical concern with existing types.
289 return Dst.precision >= Src.precision;
290}
291
293 return Sem.sizeInBits;
294}
295
296static constexpr APFloatBase::ExponentType
297exponentZero(const fltSemantics &semantics) {
298 return semantics.minExponent - 1;
299}
300
301static constexpr APFloatBase::ExponentType
302exponentInf(const fltSemantics &semantics) {
303 return semantics.maxExponent + 1;
304}
305
306static constexpr APFloatBase::ExponentType
307exponentNaN(const fltSemantics &semantics) {
310 return exponentZero(semantics);
311 if (semantics.hasSignedRepr)
312 return semantics.maxExponent;
313 }
314 return semantics.maxExponent + 1;
315}
316
317/* A bunch of private, handy routines. */
318
319static inline Error createError(const Twine &Err) {
321}
322
323static constexpr inline unsigned int partCountForBits(unsigned int bits) {
324 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
326}
327
328/* Returns 0U-9U. Return values >= 10U are not digits. */
329static inline unsigned int
330decDigitValue(unsigned int c)
331{
332 return c - '0';
333}
334
335/* Return the value of a decimal exponent of the form
336 [+-]ddddddd.
337
338 If the exponent overflows, returns a large exponent with the
339 appropriate sign. */
342 const unsigned int overlargeExponent = 24000; /* FIXME. */
343 StringRef::iterator p = begin;
344
345 // Treat no exponent as 0 to match binutils
346 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end))
347 return 0;
348
349 bool isNegative = *p == '-';
350 if (*p == '-' || *p == '+') {
351 p++;
352 if (p == end)
353 return createError("Exponent has no digits");
354 }
355
356 unsigned absExponent = decDigitValue(*p++);
357 if (absExponent >= 10U)
358 return createError("Invalid character in exponent");
359
360 for (; p != end; ++p) {
361 unsigned value = decDigitValue(*p);
362 if (value >= 10U)
363 return createError("Invalid character in exponent");
364
365 absExponent = absExponent * 10U + value;
366 if (absExponent >= overlargeExponent) {
367 absExponent = overlargeExponent;
368 break;
369 }
370 }
371
372 if (isNegative)
373 return -(int) absExponent;
374 else
375 return (int) absExponent;
376}
377
378/* This is ugly and needs cleaning up, but I don't immediately see
379 how whilst remaining safe. */
382 int exponentAdjustment) {
383 int exponent = 0;
384
385 if (p == end)
386 return createError("Exponent has no digits");
387
388 bool negative = *p == '-';
389 if (*p == '-' || *p == '+') {
390 p++;
391 if (p == end)
392 return createError("Exponent has no digits");
393 }
394
395 int unsignedExponent = 0;
396 bool overflow = false;
397 for (; p != end; ++p) {
398 unsigned int value;
399
400 value = decDigitValue(*p);
401 if (value >= 10U)
402 return createError("Invalid character in exponent");
403
404 unsignedExponent = unsignedExponent * 10 + value;
405 if (unsignedExponent > 32767) {
406 overflow = true;
407 break;
408 }
409 }
410
411 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
412 overflow = true;
413
414 if (!overflow) {
415 exponent = unsignedExponent;
416 if (negative)
417 exponent = -exponent;
418 exponent += exponentAdjustment;
419 if (exponent > 32767 || exponent < -32768)
420 overflow = true;
421 }
422
423 if (overflow)
424 exponent = negative ? -32768: 32767;
425
426 return exponent;
427}
428
431 StringRef::iterator *dot) {
432 StringRef::iterator p = begin;
433 *dot = end;
434 while (p != end && *p == '0')
435 p++;
436
437 if (p != end && *p == '.') {
438 *dot = p++;
439
440 if (end - begin == 1)
441 return createError("Significand has no digits");
442
443 while (p != end && *p == '0')
444 p++;
445 }
446
447 return p;
448}
449
450/* Given a normal decimal floating point number of the form
451
452 dddd.dddd[eE][+-]ddd
453
454 where the decimal point and exponent are optional, fill out the
455 structure D. Exponent is appropriate if the significand is
456 treated as an integer, and normalizedExponent if the significand
457 is taken to have the decimal point after a single leading
458 non-zero digit.
459
460 If the value is zero, V->firstSigDigit points to a non-digit, and
461 the return exponent is zero.
462*/
464 const char *firstSigDigit;
465 const char *lastSigDigit;
468};
469
472 StringRef::iterator dot = end;
473
474 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
475 if (!PtrOrErr)
476 return PtrOrErr.takeError();
477 StringRef::iterator p = *PtrOrErr;
478
479 D->firstSigDigit = p;
480 D->exponent = 0;
481 D->normalizedExponent = 0;
482
483 for (; p != end; ++p) {
484 if (*p == '.') {
485 if (dot != end)
486 return createError("String contains multiple dots");
487 dot = p++;
488 if (p == end)
489 break;
490 }
491 if (decDigitValue(*p) >= 10U)
492 break;
493 }
494
495 if (p != end) {
496 if (*p != 'e' && *p != 'E')
497 return createError("Invalid character in significand");
498 if (p == begin)
499 return createError("Significand has no digits");
500 if (dot != end && p - begin == 1)
501 return createError("Significand has no digits");
502
503 /* p points to the first non-digit in the string */
504 auto ExpOrErr = readExponent(p + 1, end);
505 if (!ExpOrErr)
506 return ExpOrErr.takeError();
507 D->exponent = *ExpOrErr;
508
509 /* Implied decimal point? */
510 if (dot == end)
511 dot = p;
512 }
513
514 /* If number is all zeroes accept any exponent. */
515 if (p != D->firstSigDigit) {
516 /* Drop insignificant trailing zeroes. */
517 if (p != begin) {
518 do
519 do
520 p--;
521 while (p != begin && *p == '0');
522 while (p != begin && *p == '.');
523 }
524
525 /* Adjust the exponents for any decimal point. */
526 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
527 D->normalizedExponent = (D->exponent +
528 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
529 - (dot > D->firstSigDigit && dot < p)));
530 }
531
532 D->lastSigDigit = p;
533 return Error::success();
534}
535
536/* Return the trailing fraction of a hexadecimal number.
537 DIGITVALUE is the first hex digit of the fraction, P points to
538 the next digit. */
541 unsigned int digitValue) {
542 /* If the first trailing digit isn't 0 or 8 we can work out the
543 fraction immediately. */
544 if (digitValue > 8)
545 return lfMoreThanHalf;
546 else if (digitValue < 8 && digitValue > 0)
547 return lfLessThanHalf;
548
549 // Otherwise we need to find the first non-zero digit.
550 while (p != end && (*p == '0' || *p == '.'))
551 p++;
552
553 if (p == end)
554 return createError("Invalid trailing hexadecimal fraction!");
555
556 unsigned hexDigit = hexDigitValue(*p);
557
558 /* If we ran off the end it is exactly zero or one-half, otherwise
559 a little more. */
560 if (hexDigit == UINT_MAX)
561 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
562 else
563 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
564}
565
566/* Return the fraction lost were a bignum truncated losing the least
567 significant BITS bits. */
568static lostFraction
570 unsigned int partCount,
571 unsigned int bits)
572{
573 unsigned lsb = APInt::tcLSB(parts, partCount);
574
575 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
576 if (bits <= lsb)
577 return lfExactlyZero;
578 if (bits == lsb + 1)
579 return lfExactlyHalf;
580 if (bits <= partCount * APFloatBase::integerPartWidth &&
581 APInt::tcExtractBit(parts, bits - 1))
582 return lfMoreThanHalf;
583
584 return lfLessThanHalf;
585}
586
587/* Shift DST right BITS bits noting lost fraction. */
588static lostFraction
589shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
590{
591 lostFraction lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
592
593 APInt::tcShiftRight(dst, parts, bits);
594
595 return lost_fraction;
596}
597
598/* Combine the effect of two lost fractions. */
599static lostFraction
601 lostFraction lessSignificant)
602{
603 if (lessSignificant != lfExactlyZero) {
604 if (moreSignificant == lfExactlyZero)
605 moreSignificant = lfLessThanHalf;
606 else if (moreSignificant == lfExactlyHalf)
607 moreSignificant = lfMoreThanHalf;
608 }
609
610 return moreSignificant;
611}
612
613/* The error from the true value, in half-ulps, on multiplying two
614 floating point numbers, which differ from the value they
615 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
616 than the returned value.
617
618 See "How to Read Floating Point Numbers Accurately" by William D
619 Clinger. */
620static unsigned int
621HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
622{
623 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
624
625 if (HUerr1 + HUerr2 == 0)
626 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
627 else
628 return inexactMultiply + 2 * (HUerr1 + HUerr2);
629}
630
631/* The number of ulps from the boundary (zero, or half if ISNEAREST)
632 when the least significant BITS are truncated. BITS cannot be
633 zero. */
635ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
636 bool isNearest) {
637 assert(bits != 0);
638
639 bits--;
640 unsigned count = bits / APFloatBase::integerPartWidth;
641 unsigned partBits = bits % APFloatBase::integerPartWidth + 1;
642
644 parts[count] & (~(APFloatBase::integerPart)0 >>
645 (APFloatBase::integerPartWidth - partBits));
646
648 if (isNearest)
649 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
650 else
651 boundary = 0;
652
653 if (count == 0) {
654 if (part - boundary <= boundary - part)
655 return part - boundary;
656 else
657 return boundary - part;
658 }
659
660 if (part == boundary) {
661 while (--count)
662 if (parts[count])
663 return ~(APFloatBase::integerPart) 0; /* A lot. */
664
665 return parts[0];
666 } else if (part == boundary - 1) {
667 while (--count)
668 if (~parts[count])
669 return ~(APFloatBase::integerPart) 0; /* A lot. */
670
671 return -parts[0];
672 }
673
674 return ~(APFloatBase::integerPart) 0; /* A lot. */
675}
676
677/* Place pow(5, power) in DST, and return the number of parts used.
678 DST must be at least one part larger than size of the answer. */
679static unsigned int
680powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
681 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
683 pow5s[0] = 78125 * 5;
684
685 unsigned int partsCount = 1;
686 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
687 assert(power <= maxExponent);
688
689 p1 = dst;
690 p2 = scratch;
691
692 *p1 = firstEightPowers[power & 7];
693 power >>= 3;
694
695 unsigned result = 1;
696 pow5 = pow5s;
697
698 for (unsigned int n = 0; power; power >>= 1, n++) {
699 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
700 if (n != 0) {
701 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
702 partsCount, partsCount);
703 partsCount *= 2;
704 if (pow5[partsCount - 1] == 0)
705 partsCount--;
706 }
707
708 if (power & 1) {
710
711 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
712 result += partsCount;
713 if (p2[result - 1] == 0)
714 result--;
715
716 /* Now result is in p1 with partsCount parts and p2 is scratch
717 space. */
718 tmp = p1;
719 p1 = p2;
720 p2 = tmp;
721 }
722
723 pow5 += partsCount;
724 }
725
726 if (p1 != dst)
727 APInt::tcAssign(dst, p1, result);
728
729 return result;
730}
731
732/* Zero at the end to avoid modular arithmetic when adding one; used
733 when rounding up during hexadecimal output. */
734static const char hexDigitsLower[] = "0123456789abcdef0";
735static const char hexDigitsUpper[] = "0123456789ABCDEF0";
736static const char infinityL[] = "infinity";
737static const char infinityU[] = "INFINITY";
738static const char NaNL[] = "nan";
739static const char NaNU[] = "NAN";
740
741/* Write out an integerPart in hexadecimal, starting with the most
742 significant nibble. Write out exactly COUNT hexdigits, return
743 COUNT. */
744static unsigned int
745partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
746 const char *hexDigitChars)
747{
748 unsigned int result = count;
749
751
752 part >>= (APFloatBase::integerPartWidth - 4 * count);
753 while (count--) {
754 dst[count] = hexDigitChars[part & 0xf];
755 part >>= 4;
756 }
757
758 return result;
759}
760
761/* Write out an unsigned decimal integer. */
762static char *writeUnsignedDecimal(char *dst, unsigned int n) {
763 char buff[40], *p;
764
765 p = buff;
766 do
767 *p++ = '0' + n % 10;
768 while (n /= 10);
769
770 do
771 *dst++ = *--p;
772 while (p != buff);
773
774 return dst;
775}
776
777/* Write out a signed decimal integer. */
778static char *writeSignedDecimal(char *dst, int value) {
779 if (value < 0) {
780 *dst++ = '-';
781 dst = writeUnsignedDecimal(dst, -(unsigned) value);
782 } else {
783 dst = writeUnsignedDecimal(dst, value);
784 }
785
786 return dst;
787}
788
789// Compute the ULP of the input using a definition from:
790// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
791// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
792static APFloat harrisonUlp(const APFloat &X) {
793 const fltSemantics &Sem = X.getSemantics();
794 switch (X.getCategory()) {
795 case APFloat::fcNaN:
796 return APFloat::getQNaN(Sem);
798 return APFloat::getInf(Sem);
799 case APFloat::fcZero:
800 return APFloat::getSmallest(Sem);
802 break;
803 }
804 if (X.isDenormal() || X.isSmallestNormalized())
805 return APFloat::getSmallest(Sem);
806 int Exp = ilogb(X);
807 if (X.getExactLog2() != INT_MIN)
808 Exp -= 1;
809 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
811}
812
813namespace detail {
814/* Constructors. */
815void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
816 semantics = ourSemantics;
817 unsigned count = partCount();
818 if (count > 1)
819 significand.parts = new integerPart[count];
820}
821
822void IEEEFloat::freeSignificand() {
823 if (needsCleanup())
824 delete [] significand.parts;
825}
826
827void IEEEFloat::assign(const IEEEFloat &rhs) {
828 assert(semantics == rhs.semantics);
829
830 sign = rhs.sign;
831 category = rhs.category;
832 exponent = rhs.exponent;
833 if (isFiniteNonZero() || category == fcNaN)
834 copySignificand(rhs);
835}
836
837void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
838 assert(isFiniteNonZero() || category == fcNaN);
839 assert(rhs.partCount() >= partCount());
840
841 APInt::tcAssign(significandParts(), rhs.significandParts(),
842 partCount());
843}
844
845/* Make this number a NaN, with an arbitrary but deterministic value
846 for the significand. If double or longer, this is a signalling NaN,
847 which may not be ideal. If float, this is QNaN(0). */
848void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
849 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
850 llvm_unreachable("This floating point format does not support NaN");
851
852 if (Negative && !semantics->hasSignedRepr)
854 "This floating point format does not support signed values");
855
856 category = fcNaN;
857 sign = Negative;
858 exponent = exponentNaN();
859
860 integerPart *significand = significandParts();
861 unsigned numParts = partCount();
862
863 APInt fill_storage;
864 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
865 // Finite-only types do not distinguish signalling and quiet NaN, so
866 // make them all signalling.
867 SNaN = false;
868 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
869 sign = true;
870 fill_storage = APInt::getZero(semantics->precision - 1);
871 } else {
872 fill_storage = APInt::getAllOnes(semantics->precision - 1);
873 }
874 fill = &fill_storage;
875 }
876
877 // Set the significand bits to the fill.
878 if (!fill || fill->getNumWords() < numParts)
879 APInt::tcSet(significand, 0, numParts);
880 if (fill) {
881 APInt::tcAssign(significand, fill->getRawData(),
882 std::min(fill->getNumWords(), numParts));
883
884 // Zero out the excess bits of the significand.
885 unsigned bitsToPreserve = semantics->precision - 1;
886 unsigned part = bitsToPreserve / 64;
887 bitsToPreserve %= 64;
888 significand[part] &= ((1ULL << bitsToPreserve) - 1);
889 for (part++; part != numParts; ++part)
890 significand[part] = 0;
891 }
892
893 unsigned QNaNBit =
894 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
895
896 if (SNaN) {
897 // We always have to clear the QNaN bit to make it an SNaN.
898 APInt::tcClearBit(significand, QNaNBit);
899
900 // If there are no bits set in the payload, we have to set
901 // *something* to make it a NaN instead of an infinity;
902 // conventionally, this is the next bit down from the QNaN bit.
903 if (APInt::tcIsZero(significand, numParts))
904 APInt::tcSetBit(significand, QNaNBit - 1);
905 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
906 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
907 // Do nothing.
908 } else {
909 // We always have to set the QNaN bit to make it a QNaN.
910 APInt::tcSetBit(significand, QNaNBit);
911 }
912
913 // For x87 extended precision, we want to make a NaN, not a
914 // pseudo-NaN. Maybe we should expose the ability to make
915 // pseudo-NaNs?
916 if (semantics == &APFloatBase::semX87DoubleExtended)
917 APInt::tcSetBit(significand, QNaNBit + 1);
918}
919
921 if (this != &rhs) {
922 if (semantics != rhs.semantics) {
923 freeSignificand();
924 initialize(rhs.semantics);
925 }
926 assign(rhs);
927 }
928
929 return *this;
930}
931
933 freeSignificand();
934
935 semantics = rhs.semantics;
936 significand = rhs.significand;
937 exponent = rhs.exponent;
938 category = rhs.category;
939 sign = rhs.sign;
940
941 rhs.semantics = &APFloatBase::semBogus;
942 return *this;
943}
944
946 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
947 (APInt::tcExtractBit(significandParts(),
948 semantics->precision - 1) == 0);
949}
950
952 // The smallest number by magnitude in our format will be the smallest
953 // denormal, i.e. the floating point number with exponent being minimum
954 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
955 return isFiniteNonZero() && exponent == semantics->minExponent &&
956 significandMSB() == 0;
957}
958
960 return getCategory() == fcNormal && exponent == semantics->minExponent &&
961 isSignificandAllZerosExceptMSB();
962}
963
964unsigned int IEEEFloat::getNumHighBits() const {
965 const unsigned int PartCount = partCountForBits(semantics->precision);
966 const unsigned int Bits = PartCount * integerPartWidth;
967
968 // Compute how many bits are used in the final word.
969 // When precision is just 1, it represents the 'Pth'
970 // Precision bit and not the actual significand bit.
971 const unsigned int NumHighBits = (semantics->precision > 1)
972 ? (Bits - semantics->precision + 1)
973 : (Bits - semantics->precision);
974 return NumHighBits;
975}
976
977bool IEEEFloat::isSignificandAllOnes() const {
978 // Test if the significand excluding the integral bit is all ones. This allows
979 // us to test for binade boundaries.
980 const integerPart *Parts = significandParts();
981 const unsigned PartCount = partCountForBits(semantics->precision);
982 for (unsigned i = 0; i < PartCount - 1; i++)
983 if (~Parts[i])
984 return false;
985
986 // Set the unused high bits to all ones when we compare.
987 const unsigned NumHighBits = getNumHighBits();
988 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
989 "Can not have more high bits to fill than integerPartWidth");
990 const integerPart HighBitFill =
991 ~integerPart(0) << (integerPartWidth - NumHighBits);
992 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
993 return false;
994
995 return true;
996}
997
998bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
999 // Test if the significand excluding the integral bit is all ones except for
1000 // the least significant bit.
1001 const integerPart *Parts = significandParts();
1002
1003 if (Parts[0] & 1)
1004 return false;
1005
1006 const unsigned PartCount = partCountForBits(semantics->precision);
1007 for (unsigned i = 0; i < PartCount - 1; i++) {
1008 if (~Parts[i] & ~unsigned{!i})
1009 return false;
1010 }
1011
1012 // Set the unused high bits to all ones when we compare.
1013 const unsigned NumHighBits = getNumHighBits();
1014 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1015 "Can not have more high bits to fill than integerPartWidth");
1016 const integerPart HighBitFill = ~integerPart(0)
1017 << (integerPartWidth - NumHighBits);
1018 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1019 return false;
1020
1021 return true;
1022}
1023
1024bool IEEEFloat::isSignificandAllZeros() const {
1025 // Test if the significand excluding the integral bit is all zeros. This
1026 // allows us to test for binade boundaries.
1027 const integerPart *Parts = significandParts();
1028 const unsigned PartCount = partCountForBits(semantics->precision);
1029
1030 for (unsigned i = 0; i < PartCount - 1; i++)
1031 if (Parts[i])
1032 return false;
1033
1034 // Compute how many bits are used in the final word.
1035 const unsigned NumHighBits = getNumHighBits();
1036 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1037 "clear than integerPartWidth");
1038 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1039
1040 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1041 return false;
1042
1043 return true;
1044}
1045
1046bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1047 const integerPart *Parts = significandParts();
1048 const unsigned PartCount = partCountForBits(semantics->precision);
1049
1050 for (unsigned i = 0; i < PartCount - 1; i++) {
1051 if (Parts[i])
1052 return false;
1053 }
1054
1055 const unsigned NumHighBits = getNumHighBits();
1056 const integerPart MSBMask = integerPart(1)
1057 << (integerPartWidth - NumHighBits);
1058 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1059}
1060
1062 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1063 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1064 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1065 // The largest number by magnitude in our format will be the floating point
1066 // number with maximum exponent and with significand that is all ones except
1067 // the LSB.
1068 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1069 ? isSignificandAllOnesExceptLSB()
1070 : IsMaxExp;
1071 } else {
1072 // The largest number by magnitude in our format will be the floating point
1073 // number with maximum exponent and with significand that is all ones.
1074 return IsMaxExp && isSignificandAllOnes();
1075 }
1076}
1077
1079 // This could be made more efficient; I'm going for obviously correct.
1080 if (!isFinite()) return false;
1081 IEEEFloat truncated = *this;
1082 truncated.roundToIntegral(rmTowardZero);
1083 return compare(truncated) == cmpEqual;
1084}
1085
1086bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1087 if (this == &rhs)
1088 return true;
1089 if (semantics != rhs.semantics ||
1090 category != rhs.category ||
1091 sign != rhs.sign)
1092 return false;
1093 if (category==fcZero || category==fcInfinity)
1094 return true;
1095
1096 if (isFiniteNonZero() && exponent != rhs.exponent)
1097 return false;
1098
1099 return std::equal(significandParts(), significandParts() + partCount(),
1100 rhs.significandParts());
1101}
1102
1104 initialize(&ourSemantics);
1105 sign = 0;
1106 category = fcNormal;
1107 zeroSignificand();
1108 exponent = ourSemantics.precision - 1;
1109 significandParts()[0] = value;
1111}
1112
1114 initialize(&ourSemantics);
1115 // The Float8E8MOFNU format does not have a representation
1116 // for zero. So, use the closest representation instead.
1117 // Moreover, the all-zero encoding represents a valid
1118 // normal value (which is the smallestNormalized here).
1119 // Hence, we call makeSmallestNormalized (where category is
1120 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1121 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1122}
1123
1124// Delegate to the previous constructor, because later copy constructor may
1125// actually inspects category, which can't be garbage.
1127 : IEEEFloat(ourSemantics) {}
1128
1130 initialize(rhs.semantics);
1131 assign(rhs);
1132}
1133
1134IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1135 *this = std::move(rhs);
1136}
1137
1138IEEEFloat::~IEEEFloat() { freeSignificand(); }
1139
1140unsigned int IEEEFloat::partCount() const {
1141 return partCountForBits(semantics->precision + 1);
1142}
1143
1144const APFloat::integerPart *IEEEFloat::significandParts() const {
1145 return const_cast<IEEEFloat *>(this)->significandParts();
1146}
1147
1148APFloat::integerPart *IEEEFloat::significandParts() {
1149 if (partCount() > 1)
1150 return significand.parts;
1151 else
1152 return &significand.part;
1153}
1154
1155void IEEEFloat::zeroSignificand() {
1156 APInt::tcSet(significandParts(), 0, partCount());
1157}
1158
1159/* Increment an fcNormal floating point number's significand. */
1160void IEEEFloat::incrementSignificand() {
1161 [[maybe_unused]] integerPart carry =
1162 APInt::tcIncrement(significandParts(), partCount());
1163
1164 /* Our callers should never cause us to overflow. */
1165 assert(carry == 0);
1166}
1167
1168/* Add the significand of the RHS. Returns the carry flag. */
1169APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1170 integerPart *parts = significandParts();
1171
1172 assert(semantics == rhs.semantics);
1173 assert(exponent == rhs.exponent);
1174
1175 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1176}
1177
1178/* Subtract the significand of the RHS with a borrow flag. Returns
1179 the borrow flag. */
1180APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1181 integerPart borrow) {
1182 integerPart *parts = significandParts();
1183
1184 assert(semantics == rhs.semantics);
1185 assert(exponent == rhs.exponent);
1186
1187 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1188 partCount());
1189}
1190
1191/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1192 on to the full-precision result of the multiplication. Returns the
1193 lost fraction. */
1194lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1195 IEEEFloat addend,
1196 bool ignoreAddend) {
1197 integerPart scratch[4];
1198 bool ignored;
1199
1200 assert(semantics == rhs.semantics);
1201
1202 unsigned precision = semantics->precision;
1203
1204 // Allocate space for twice as many bits as the original significand, plus one
1205 // extra bit for the addition to overflow into.
1206 unsigned newPartsCount = partCountForBits(precision * 2 + 1);
1207
1208 // FIXME: Replace with SmallVector<4>.
1209 integerPart *fullSignificand =
1210 newPartsCount > 4 ? new integerPart[newPartsCount] : scratch;
1211
1212 integerPart *lhsSignificand = significandParts();
1213 unsigned partsCount = partCount();
1214
1215 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1216 rhs.significandParts(), partsCount, partsCount);
1217
1218 lostFraction lost_fraction = lfExactlyZero;
1219 // One, not zero, based MSB.
1220 unsigned omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1221 exponent += rhs.exponent;
1222
1223 // Assume the operands involved in the multiplication are single-precision
1224 // FP, and the two multiplicants are:
1225 // *this = a23 . a22 ... a0 * 2^e1
1226 // rhs = b23 . b22 ... b0 * 2^e2
1227 // the result of multiplication is:
1228 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1229 // Note that there are three significant bits at the left-hand side of the
1230 // radix point: two for the multiplication, and an overflow bit for the
1231 // addition (that will always be zero at this point). Move the radix point
1232 // toward left by two bits, and adjust exponent accordingly.
1233 exponent += 2;
1234
1235 if (!ignoreAddend && addend.isNonZero()) {
1236 // The intermediate result of the multiplication has "2 * precision"
1237 // signicant bit; adjust the addend to be consistent with mul result.
1238 //
1239 Significand savedSignificand = significand;
1240 const fltSemantics *savedSemantics = semantics;
1241
1242 // Normalize our MSB to one below the top bit to allow for overflow.
1243 unsigned extendedPrecision = 2 * precision + 1;
1244 if (omsb != extendedPrecision - 1) {
1245 assert(extendedPrecision > omsb);
1246 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1247 (extendedPrecision - 1) - omsb);
1248 exponent -= (extendedPrecision - 1) - omsb;
1249 }
1250
1251 /* Create new semantics. */
1252 fltSemantics extendedSemantics = *semantics;
1253 extendedSemantics.precision = extendedPrecision;
1254
1255 if (newPartsCount == 1)
1256 significand.part = fullSignificand[0];
1257 else
1258 significand.parts = fullSignificand;
1259 semantics = &extendedSemantics;
1260
1261 // Make a copy so we can convert it to the extended semantics.
1262 // Note that we cannot convert the addend directly, as the extendedSemantics
1263 // is a local variable (which we take a reference to).
1264 IEEEFloat extendedAddend(addend);
1265 [[maybe_unused]] opStatus status = extendedAddend.convert(
1266 extendedSemantics, APFloat::rmTowardZero, &ignored);
1267 assert(status == APFloat::opOK);
1268
1269 // Shift the significand of the addend right by one bit. This guarantees
1270 // that the high bit of the significand is zero (same as fullSignificand),
1271 // so the addition will overflow (if it does overflow at all) into the top bit.
1272 lost_fraction = extendedAddend.shiftSignificandRight(1);
1273 assert(lost_fraction == lfExactlyZero &&
1274 "Lost precision while shifting addend for fused-multiply-add.");
1275
1276 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1277
1278 /* Restore our state. */
1279 if (newPartsCount == 1)
1280 fullSignificand[0] = significand.part;
1281 significand = savedSignificand;
1282 semantics = savedSemantics;
1283
1284 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1285 }
1286
1287 // Convert the result having "2 * precision" significant-bits back to the one
1288 // having "precision" significant-bits. First, move the radix point from
1289 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1290 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1291 exponent -= precision + 1;
1292
1293 // In case MSB resides at the left-hand side of radix point, shift the
1294 // mantissa right by some amount to make sure the MSB reside right before
1295 // the radix point (i.e. "MSB . rest-significant-bits").
1296 //
1297 // Note that the result is not normalized when "omsb < precision". So, the
1298 // caller needs to call IEEEFloat::normalize() if normalized value is
1299 // expected.
1300 if (omsb > precision) {
1301 unsigned int bits, significantParts;
1302 lostFraction lf;
1303
1304 bits = omsb - precision;
1305 significantParts = partCountForBits(omsb);
1306 lf = shiftRight(fullSignificand, significantParts, bits);
1307 lost_fraction = combineLostFractions(lf, lost_fraction);
1308 exponent += bits;
1309 }
1310
1311 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1312
1313 if (newPartsCount > 4)
1314 delete [] fullSignificand;
1315
1316 return lost_fraction;
1317}
1318
1319lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1320 // When the given semantics has zero, the addend here is a zero.
1321 // i.e . it belongs to the 'fcZero' category.
1322 // But when the semantics does not support zero, we need to
1323 // explicitly convey that this addend should be ignored
1324 // for multiplication.
1325 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1326}
1327
1328/* Multiply the significands of LHS and RHS to DST. */
1329lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1330 integerPart scratch[4];
1331
1332 assert(semantics == rhs.semantics);
1333
1334 integerPart *lhsSignificand = significandParts();
1335 const integerPart *rhsSignificand = rhs.significandParts();
1336 unsigned partsCount = partCount();
1337
1338 integerPart *dividend =
1339 partsCount > 2 ? new integerPart[partsCount * 2] : scratch;
1340 integerPart *divisor = dividend + partsCount;
1341
1342 /* Copy the dividend and divisor as they will be modified in-place. */
1343 for (unsigned i = 0; i < partsCount; i++) {
1344 dividend[i] = lhsSignificand[i];
1345 divisor[i] = rhsSignificand[i];
1346 lhsSignificand[i] = 0;
1347 }
1348
1349 exponent -= rhs.exponent;
1350
1351 unsigned int precision = semantics->precision;
1352
1353 /* Normalize the divisor. */
1354 unsigned bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1355 if (bit) {
1356 exponent += bit;
1357 APInt::tcShiftLeft(divisor, partsCount, bit);
1358 }
1359
1360 /* Normalize the dividend. */
1361 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1362 if (bit) {
1363 exponent -= bit;
1364 APInt::tcShiftLeft(dividend, partsCount, bit);
1365 }
1366
1367 /* Ensure the dividend >= divisor initially for the loop below.
1368 Incidentally, this means that the division loop below is
1369 guaranteed to set the integer bit to one. */
1370 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1371 exponent--;
1372 APInt::tcShiftLeft(dividend, partsCount, 1);
1373 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1374 }
1375
1376 /* Long division. */
1377 for (bit = precision; bit; bit -= 1) {
1378 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1379 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1380 APInt::tcSetBit(lhsSignificand, bit - 1);
1381 }
1382
1383 APInt::tcShiftLeft(dividend, partsCount, 1);
1384 }
1385
1386 /* Figure out the lost fraction. */
1387 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1388
1389 lostFraction lost_fraction;
1390 if (cmp > 0)
1391 lost_fraction = lfMoreThanHalf;
1392 else if (cmp == 0)
1393 lost_fraction = lfExactlyHalf;
1394 else if (APInt::tcIsZero(dividend, partsCount))
1395 lost_fraction = lfExactlyZero;
1396 else
1397 lost_fraction = lfLessThanHalf;
1398
1399 if (partsCount > 2)
1400 delete [] dividend;
1401
1402 return lost_fraction;
1403}
1404
1405unsigned int IEEEFloat::significandMSB() const {
1406 return APInt::tcMSB(significandParts(), partCount());
1407}
1408
1409unsigned int IEEEFloat::significandLSB() const {
1410 return APInt::tcLSB(significandParts(), partCount());
1411}
1412
1413/* Note that a zero result is NOT normalized to fcZero. */
1414lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1415 /* Our exponent should not overflow. */
1416 assert((ExponentType) (exponent + bits) >= exponent);
1417
1418 exponent += bits;
1419
1420 return shiftRight(significandParts(), partCount(), bits);
1421}
1422
1423/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1424void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1425 assert(bits < semantics->precision ||
1426 (semantics->precision == 1 && bits <= 1));
1427
1428 if (bits) {
1429 unsigned int partsCount = partCount();
1430
1431 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1432 exponent -= bits;
1433
1434 assert(!APInt::tcIsZero(significandParts(), partsCount));
1435 }
1436}
1437
1439 assert(semantics == rhs.semantics);
1441 assert(rhs.isFiniteNonZero());
1442
1443 int compare = exponent - rhs.exponent;
1444
1445 /* If exponents are equal, do an unsigned bignum comparison of the
1446 significands. */
1447 if (compare == 0)
1448 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1449 partCount());
1450
1451 if (compare > 0)
1452 return cmpGreaterThan;
1453 else if (compare < 0)
1454 return cmpLessThan;
1455 else
1456 return cmpEqual;
1457}
1458
1459/* Set the least significant BITS bits of a bignum, clear the
1460 rest. */
1461static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1462 unsigned bits) {
1463 unsigned i = 0;
1464 while (bits > APInt::APINT_BITS_PER_WORD) {
1465 dst[i++] = ~(APInt::WordType)0;
1467 }
1468
1469 if (bits)
1470 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1471
1472 while (i < parts)
1473 dst[i++] = 0;
1474}
1475
1476/* Handle overflow. Sign is preserved. We either become infinity or
1477 the largest finite number. */
1478APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1480 /* Infinity? */
1481 if (rounding_mode == rmNearestTiesToEven ||
1482 rounding_mode == rmNearestTiesToAway ||
1483 (rounding_mode == rmTowardPositive && !sign) ||
1484 (rounding_mode == rmTowardNegative && sign)) {
1486 makeNaN(false, sign);
1487 else
1488 category = fcInfinity;
1489 return static_cast<opStatus>(opOverflow | opInexact);
1490 }
1491 }
1492
1493 /* Otherwise we become the largest finite number. */
1494 category = fcNormal;
1495 exponent = semantics->maxExponent;
1496 tcSetLeastSignificantBits(significandParts(), partCount(),
1497 semantics->precision);
1498 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1499 semantics->nanEncoding == fltNanEncoding::AllOnes)
1500 APInt::tcClearBit(significandParts(), 0);
1501
1502 return opInexact;
1503}
1504
1505/* Returns TRUE if, when truncating the current number, with BIT the
1506 new LSB, with the given lost fraction and rounding mode, the result
1507 would need to be rounded away from zero (i.e., by increasing the
1508 signficand). This routine must work for fcZero of both signs, and
1509 fcNormal numbers. */
1510bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1511 lostFraction lost_fraction,
1512 unsigned int bit) const {
1513 /* NaNs and infinities should not have lost fractions. */
1514 assert(isFiniteNonZero() || category == fcZero);
1515
1516 /* Current callers never pass this so we don't handle it. */
1517 assert(lost_fraction != lfExactlyZero);
1518
1519 switch (rounding_mode) {
1521 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1522
1524 if (lost_fraction == lfMoreThanHalf)
1525 return true;
1526
1527 /* Our zeroes don't have a significand to test. */
1528 if (lost_fraction == lfExactlyHalf && category != fcZero)
1529 return APInt::tcExtractBit(significandParts(), bit);
1530
1531 return false;
1532
1533 case rmTowardZero:
1534 return false;
1535
1536 case rmTowardPositive:
1537 return !sign;
1538
1539 case rmTowardNegative:
1540 return sign;
1541
1542 default:
1543 break;
1544 }
1545 llvm_unreachable("Invalid rounding mode found");
1546}
1547
1548APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1549 lostFraction lost_fraction) {
1550 if (!isFiniteNonZero())
1551 return opOK;
1552
1553 /* Before rounding normalize the exponent of fcNormal numbers. */
1554 /* One, not zero, based MSB. */
1555 unsigned omsb = significandMSB() + 1;
1556
1557 // Only skip this `if` if the value is exactly zero.
1558 if (omsb || lost_fraction != lfExactlyZero) {
1559 /* OMSB is numbered from 1. We want to place it in the integer
1560 bit numbered PRECISION if possible, with a compensating change in
1561 the exponent. */
1562 int exponentChange = omsb - semantics->precision;
1563
1564 /* If the resulting exponent is too high, overflow according to
1565 the rounding mode. */
1566 if (exponent + exponentChange > semantics->maxExponent)
1567 return handleOverflow(rounding_mode);
1568
1569 /* Subnormal numbers have exponent minExponent, and their MSB
1570 is forced based on that. */
1571 if (exponent + exponentChange < semantics->minExponent)
1572 exponentChange = semantics->minExponent - exponent;
1573
1574 /* Shifting left is easy as we don't lose precision. */
1575 if (exponentChange < 0) {
1576 assert(lost_fraction == lfExactlyZero);
1577
1578 shiftSignificandLeft(-exponentChange);
1579
1580 return opOK;
1581 }
1582
1583 if (exponentChange > 0) {
1584 lostFraction lf;
1585
1586 /* Shift right and capture any new lost fraction. */
1587 lf = shiftSignificandRight(exponentChange);
1588
1589 lost_fraction = combineLostFractions(lf, lost_fraction);
1590
1591 /* Keep OMSB up-to-date. */
1592 if (omsb > (unsigned) exponentChange)
1593 omsb -= exponentChange;
1594 else
1595 omsb = 0;
1596 }
1597 }
1598
1599 // The all-ones values is an overflow if NaN is all ones. If NaN is
1600 // represented by negative zero, then it is a valid finite value.
1601 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1602 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1603 exponent == semantics->maxExponent && isSignificandAllOnes())
1604 return handleOverflow(rounding_mode);
1605
1606 /* Now round the number according to rounding_mode given the lost
1607 fraction. */
1608
1609 /* As specified in IEEE 754, since we do not trap we do not report
1610 underflow for exact results. */
1611 if (lost_fraction == lfExactlyZero) {
1612 /* Canonicalize zeroes. */
1613 if (omsb == 0) {
1614 category = fcZero;
1615 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1616 sign = false;
1617 if (!semantics->hasZero)
1619 }
1620
1621 return opOK;
1622 }
1623
1624 /* Increment the significand if we're rounding away from zero. */
1625 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1626 if (omsb == 0)
1627 exponent = semantics->minExponent;
1628
1629 incrementSignificand();
1630 omsb = significandMSB() + 1;
1631
1632 /* Did the significand increment overflow? */
1633 if (omsb == (unsigned) semantics->precision + 1) {
1634 /* Renormalize by incrementing the exponent and shifting our
1635 significand right one. However if we already have the
1636 maximum exponent we overflow to infinity. */
1637 if (exponent == semantics->maxExponent)
1638 // Invoke overflow handling with a rounding mode that will guarantee
1639 // that the result gets turned into the correct infinity representation.
1640 // This is needed instead of just setting the category to infinity to
1641 // account for 8-bit floating point types that have no inf, only NaN.
1642 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1643
1644 shiftSignificandRight(1);
1645
1646 return opInexact;
1647 }
1648
1649 // The all-ones values is an overflow if NaN is all ones. If NaN is
1650 // represented by negative zero, then it is a valid finite value.
1651 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1652 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1653 exponent == semantics->maxExponent && isSignificandAllOnes())
1654 return handleOverflow(rounding_mode);
1655 }
1656
1657 /* The normal case - we were and are not denormal, and any
1658 significand increment above didn't overflow. */
1659 if (omsb == semantics->precision)
1660 return opInexact;
1661
1662 /* We have a non-zero denormal. */
1663 assert(omsb < semantics->precision);
1664
1665 /* Canonicalize zeroes. */
1666 if (omsb == 0) {
1667 category = fcZero;
1668 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1669 sign = false;
1670 // This condition handles the case where the semantics
1671 // does not have zero but uses the all-zero encoding
1672 // to represent the smallest normal value.
1673 if (!semantics->hasZero)
1675 }
1676
1677 /* The fcZero case is a denormal that underflowed to zero. */
1678 return (opStatus) (opUnderflow | opInexact);
1679}
1680
1681APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1682 bool subtract) {
1683 switch (PackCategoriesIntoKey(category, rhs.category)) {
1684 default:
1685 llvm_unreachable(nullptr);
1686
1690 assign(rhs);
1691 [[fallthrough]];
1696 if (isSignaling()) {
1697 makeQuiet();
1698 return opInvalidOp;
1699 }
1700 return rhs.isSignaling() ? opInvalidOp : opOK;
1701
1705 return opOK;
1706
1709 category = fcInfinity;
1710 sign = rhs.sign ^ subtract;
1711 return opOK;
1712
1714 assign(rhs);
1715 sign = rhs.sign ^ subtract;
1716 return opOK;
1717
1719 /* Sign depends on rounding mode; handled by caller. */
1720 return opOK;
1721
1723 /* Differently signed infinities can only be validly
1724 subtracted. */
1725 if (((sign ^ rhs.sign)!=0) != subtract) {
1726 makeNaN();
1727 return opInvalidOp;
1728 }
1729
1730 return opOK;
1731
1733 return opDivByZero;
1734 }
1735}
1736
1737/* Add or subtract two normal numbers. */
1738lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1739 bool subtract) {
1740 [[maybe_unused]] integerPart carry = 0;
1741 lostFraction lost_fraction;
1742
1743 /* Determine if the operation on the absolute values is effectively
1744 an addition or subtraction. */
1745 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1746
1747 /* Are we bigger exponent-wise than the RHS? */
1748 int bits = exponent - rhs.exponent;
1749
1750 /* Subtraction is more subtle than one might naively expect. */
1751 if (subtract) {
1752 if ((bits < 0) && !semantics->hasSignedRepr)
1754 "This floating point format does not support signed values");
1755
1756 IEEEFloat temp_rhs(rhs);
1757 bool lost_fraction_is_from_rhs = false;
1758
1759 if (bits == 0)
1760 lost_fraction = lfExactlyZero;
1761 else if (bits > 0) {
1762 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1763 lost_fraction_is_from_rhs = true;
1764 shiftSignificandLeft(1);
1765 } else {
1766 lost_fraction = shiftSignificandRight(-bits - 1);
1767 temp_rhs.shiftSignificandLeft(1);
1768 }
1769
1770 // Should we reverse the subtraction.
1771 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1772 if (cmp_result == cmpLessThan) {
1773 bool borrow =
1774 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1775 if (borrow) {
1776 // The lost fraction is being subtracted, borrow from the significand
1777 // and invert `lost_fraction`.
1778 if (lost_fraction == lfLessThanHalf)
1779 lost_fraction = lfMoreThanHalf;
1780 else if (lost_fraction == lfMoreThanHalf)
1781 lost_fraction = lfLessThanHalf;
1782 }
1783 carry = temp_rhs.subtractSignificand(*this, borrow);
1784 copySignificand(temp_rhs);
1785 sign = !sign;
1786 } else if (cmp_result == cmpGreaterThan) {
1787 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1788 if (borrow) {
1789 // The lost fraction is being subtracted, borrow from the significand
1790 // and invert `lost_fraction`.
1791 if (lost_fraction == lfLessThanHalf)
1792 lost_fraction = lfMoreThanHalf;
1793 else if (lost_fraction == lfMoreThanHalf)
1794 lost_fraction = lfLessThanHalf;
1795 }
1796 carry = subtractSignificand(temp_rhs, borrow);
1797 } else { // cmpEqual
1798 zeroSignificand();
1799 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1800 // rhs is slightly larger due to the lost fraction, flip the sign.
1801 sign = !sign;
1802 }
1803 }
1804
1805 /* The code above is intended to ensure that no borrow is
1806 necessary. */
1807 assert(!carry);
1808 } else {
1809 if (bits > 0) {
1810 IEEEFloat temp_rhs(rhs);
1811
1812 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1813 carry = addSignificand(temp_rhs);
1814 } else {
1815 lost_fraction = shiftSignificandRight(-bits);
1816 carry = addSignificand(rhs);
1817 }
1818
1819 /* We have a guard bit; generating a carry cannot happen. */
1820 assert(!carry);
1821 }
1822
1823 return lost_fraction;
1824}
1825
1826APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1827 switch (PackCategoriesIntoKey(category, rhs.category)) {
1828 default:
1829 llvm_unreachable(nullptr);
1830
1834 assign(rhs);
1835 sign = false;
1836 [[fallthrough]];
1841 sign ^= rhs.sign; // restore the original sign
1842 if (isSignaling()) {
1843 makeQuiet();
1844 return opInvalidOp;
1845 }
1846 return rhs.isSignaling() ? opInvalidOp : opOK;
1847
1851 category = fcInfinity;
1852 return opOK;
1853
1857 category = fcZero;
1858 return opOK;
1859
1862 makeNaN();
1863 return opInvalidOp;
1864
1866 return opOK;
1867 }
1868}
1869
1870APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1871 switch (PackCategoriesIntoKey(category, rhs.category)) {
1872 default:
1873 llvm_unreachable(nullptr);
1874
1878 assign(rhs);
1879 sign = false;
1880 [[fallthrough]];
1885 sign ^= rhs.sign; // restore the original sign
1886 if (isSignaling()) {
1887 makeQuiet();
1888 return opInvalidOp;
1889 }
1890 return rhs.isSignaling() ? opInvalidOp : opOK;
1891
1896 return opOK;
1897
1899 category = fcZero;
1900 return opOK;
1901
1903 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1904 makeNaN(false, sign);
1905 else
1906 category = fcInfinity;
1907 return opDivByZero;
1908
1911 makeNaN();
1912 return opInvalidOp;
1913
1915 return opOK;
1916 }
1917}
1918
1919APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1920 switch (PackCategoriesIntoKey(category, rhs.category)) {
1921 default:
1922 llvm_unreachable(nullptr);
1923
1927 assign(rhs);
1928 [[fallthrough]];
1933 if (isSignaling()) {
1934 makeQuiet();
1935 return opInvalidOp;
1936 }
1937 return rhs.isSignaling() ? opInvalidOp : opOK;
1938
1942 return opOK;
1943
1949 makeNaN();
1950 return opInvalidOp;
1951
1953 return opOK;
1954 }
1955}
1956
1957APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1958 switch (PackCategoriesIntoKey(category, rhs.category)) {
1959 default:
1960 llvm_unreachable(nullptr);
1961
1965 assign(rhs);
1966 [[fallthrough]];
1971 if (isSignaling()) {
1972 makeQuiet();
1973 return opInvalidOp;
1974 }
1975 return rhs.isSignaling() ? opInvalidOp : opOK;
1976
1980 return opOK;
1981
1987 makeNaN();
1988 return opInvalidOp;
1989
1991 return opDivByZero; // fake status, indicating this is not a special case
1992 }
1993}
1994
1995/* Change sign. */
1997 // With NaN-as-negative-zero, neither NaN or negative zero can change
1998 // their signs.
1999 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2000 (isZero() || isNaN()))
2001 return;
2002 /* Look mummy, this one's easy. */
2003 sign = !sign;
2004}
2005
2006/* Normalized addition or subtraction. */
2007APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2008 roundingMode rounding_mode,
2009 bool subtract) {
2010 opStatus fs = addOrSubtractSpecials(rhs, subtract);
2011
2012 /* This return code means it was not a simple case. */
2013 if (fs == opDivByZero) {
2014 lostFraction lost_fraction;
2015
2016 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2017 fs = normalize(rounding_mode, lost_fraction);
2018
2019 /* Can only be zero if we lost no fraction. */
2020 assert(category != fcZero || lost_fraction == lfExactlyZero);
2021 }
2022
2023 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2024 positive zero unless rounding to minus infinity, except that
2025 adding two like-signed zeroes gives that zero. */
2026 if (category == fcZero) {
2027 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2028 sign = (rounding_mode == rmTowardNegative);
2029 // NaN-in-negative-zero means zeros need to be normalized to +0.
2030 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2031 sign = false;
2032 }
2033
2034 return fs;
2035}
2036
2037/* Normalized addition. */
2039 roundingMode rounding_mode) {
2040 return addOrSubtract(rhs, rounding_mode, false);
2041}
2042
2043/* Normalized subtraction. */
2045 roundingMode rounding_mode) {
2046 return addOrSubtract(rhs, rounding_mode, true);
2047}
2048
2049/* Normalized multiply. */
2051 roundingMode rounding_mode) {
2052 sign ^= rhs.sign;
2053 opStatus fs = multiplySpecials(rhs);
2054
2055 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2056 sign = false;
2057 if (isFiniteNonZero()) {
2058 lostFraction lost_fraction = multiplySignificand(rhs);
2059 fs = normalize(rounding_mode, lost_fraction);
2060 if (lost_fraction != lfExactlyZero)
2061 fs = (opStatus) (fs | opInexact);
2062 }
2063
2064 return fs;
2065}
2066
2067/* Normalized divide. */
2069 roundingMode rounding_mode) {
2070 sign ^= rhs.sign;
2071 opStatus fs = divideSpecials(rhs);
2072
2073 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2074 sign = false;
2075 if (isFiniteNonZero()) {
2076 lostFraction lost_fraction = divideSignificand(rhs);
2077 fs = normalize(rounding_mode, lost_fraction);
2078 if (lost_fraction != lfExactlyZero)
2079 fs = (opStatus) (fs | opInexact);
2080 }
2081
2082 return fs;
2083}
2084
2085/* Normalized remainder. */
2087 unsigned int origSign = sign;
2088
2089 // First handle the special cases.
2090 opStatus fs = remainderSpecials(rhs);
2091 if (fs != opDivByZero)
2092 return fs;
2093
2094 fs = opOK;
2095
2096 // Make sure the current value is less than twice the denom. If the addition
2097 // did not succeed (an overflow has happened), which means that the finite
2098 // value we currently posses must be less than twice the denom (as we are
2099 // using the same semantics).
2100 IEEEFloat P2 = rhs;
2101 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2102 fs = mod(P2);
2103 assert(fs == opOK);
2104 }
2105
2106 // Lets work with absolute numbers.
2107 IEEEFloat P = rhs;
2108 P.sign = false;
2109 sign = false;
2110
2111 //
2112 // To calculate the remainder we use the following scheme.
2113 //
2114 // The remainder is defained as follows:
2115 //
2116 // remainder = numer - rquot * denom = x - r * p
2117 //
2118 // Where r is the result of: x/p, rounded toward the nearest integral value
2119 // (with halfway cases rounded toward the even number).
2120 //
2121 // Currently, (after x mod 2p):
2122 // r is the number of 2p's present inside x, which is inherently, an even
2123 // number of p's.
2124 //
2125 // We may split the remaining calculation into 4 options:
2126 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2127 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2128 // are done as well.
2129 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2130 // to subtract 1p at least once.
2131 // - if x >= p then we must subtract p at least once, as x must be a
2132 // remainder.
2133 //
2134 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2135 //
2136 // We can now split the remaining calculation to the following 3 options:
2137 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2138 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2139 // must round up to the next even number. so we must subtract p once more.
2140 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2141 // integral, and subtract p once more.
2142 //
2143
2144 // Extend the semantics to prevent an overflow/underflow or inexact result.
2145 bool losesInfo;
2146 fltSemantics extendedSemantics = *semantics;
2147 extendedSemantics.maxExponent++;
2148 extendedSemantics.minExponent--;
2149 extendedSemantics.precision += 2;
2150
2151 IEEEFloat VEx = *this;
2152 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2153 assert(fs == opOK && !losesInfo);
2154 IEEEFloat PEx = P;
2155 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2156 assert(fs == opOK && !losesInfo);
2157
2158 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2159 // any fraction.
2160 fs = VEx.add(VEx, rmNearestTiesToEven);
2161 assert(fs == opOK);
2162
2163 if (VEx.compare(PEx) == cmpGreaterThan) {
2165 assert(fs == opOK);
2166
2167 // Make VEx = this.add(this), but because we have different semantics, we do
2168 // not want to `convert` again, so we just subtract PEx twice (which equals
2169 // to the desired value).
2170 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2171 assert(fs == opOK);
2172 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2173 assert(fs == opOK);
2174
2175 cmpResult result = VEx.compare(PEx);
2176 if (result == cmpGreaterThan || result == cmpEqual) {
2178 assert(fs == opOK);
2179 }
2180 }
2181
2182 if (isZero()) {
2183 sign = origSign; // IEEE754 requires this
2184 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2185 // But some 8-bit floats only have positive 0.
2186 sign = false;
2187 } else {
2188 sign ^= origSign;
2189 }
2190 return fs;
2191}
2192
2193/* Normalized llvm frem (C fmod). */
2195 opStatus fs = modSpecials(rhs);
2196 unsigned int origSign = sign;
2197
2198 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2200 int Exp = ilogb(*this) - ilogb(rhs);
2201 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2202 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2203 // check for it.
2204 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2205 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2206 V.sign = sign;
2207
2209
2210 // When the semantics supports zero, this loop's
2211 // exit-condition is handled by the 'isFiniteNonZero'
2212 // category check above. However, when the semantics
2213 // does not have 'fcZero' and we have reached the
2214 // minimum possible value, (and any further subtract
2215 // will underflow to the same value) explicitly
2216 // provide an exit-path here.
2217 if (!semantics->hasZero && this->isSmallest())
2218 break;
2219
2220 assert(fs==opOK);
2221 }
2222 if (isZero()) {
2223 sign = origSign; // fmod requires this
2224 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2225 sign = false;
2226 }
2227 return fs;
2228}
2229
2230/* Normalized fused-multiply-add. */
2232 const IEEEFloat &addend,
2233 roundingMode rounding_mode) {
2234 opStatus fs;
2235
2236 /* Post-multiplication sign, before addition. */
2237 sign ^= multiplicand.sign;
2238
2239 /* If and only if all arguments are normal do we need to do an
2240 extended-precision calculation. */
2241 if (isFiniteNonZero() &&
2242 multiplicand.isFiniteNonZero() &&
2243 addend.isFinite()) {
2244 lostFraction lost_fraction;
2245
2246 lost_fraction = multiplySignificand(multiplicand, addend);
2247 fs = normalize(rounding_mode, lost_fraction);
2248 if (lost_fraction != lfExactlyZero)
2249 fs = (opStatus) (fs | opInexact);
2250
2251 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2252 positive zero unless rounding to minus infinity, except that
2253 adding two like-signed zeroes gives that zero. */
2254 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2255 sign = (rounding_mode == rmTowardNegative);
2256 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2257 sign = false;
2258 }
2259 } else {
2260 fs = multiplySpecials(multiplicand);
2261
2262 /* FS can only be opOK or opInvalidOp. There is no more work
2263 to do in the latter case. The IEEE-754R standard says it is
2264 implementation-defined in this case whether, if ADDEND is a
2265 quiet NaN, we raise invalid op; this implementation does so.
2266
2267 If we need to do the addition we can do so with normal
2268 precision. */
2269 if (fs == opOK)
2270 fs = addOrSubtract(addend, rounding_mode, false);
2271 }
2272
2273 return fs;
2274}
2275
2276/* Rounding-mode correct round to integral value. */
2278 if (isInfinity())
2279 // [IEEE Std 754-2008 6.1]:
2280 // The behavior of infinity in floating-point arithmetic is derived from the
2281 // limiting cases of real arithmetic with operands of arbitrarily
2282 // large magnitude, when such a limit exists.
2283 // ...
2284 // Operations on infinite operands are usually exact and therefore signal no
2285 // exceptions ...
2286 return opOK;
2287
2288 if (isNaN()) {
2289 if (isSignaling()) {
2290 // [IEEE Std 754-2008 6.2]:
2291 // Under default exception handling, any operation signaling an invalid
2292 // operation exception and for which a floating-point result is to be
2293 // delivered shall deliver a quiet NaN.
2294 makeQuiet();
2295 // [IEEE Std 754-2008 6.2]:
2296 // Signaling NaNs shall be reserved operands that, under default exception
2297 // handling, signal the invalid operation exception(see 7.2) for every
2298 // general-computational and signaling-computational operation except for
2299 // the conversions described in 5.12.
2300 return opInvalidOp;
2301 } else {
2302 // [IEEE Std 754-2008 6.2]:
2303 // For an operation with quiet NaN inputs, other than maximum and minimum
2304 // operations, if a floating-point result is to be delivered the result
2305 // shall be a quiet NaN which should be one of the input NaNs.
2306 // ...
2307 // Every general-computational and quiet-computational operation involving
2308 // one or more input NaNs, none of them signaling, shall signal no
2309 // exception, except fusedMultiplyAdd might signal the invalid operation
2310 // exception(see 7.2).
2311 return opOK;
2312 }
2313 }
2314
2315 if (isZero()) {
2316 // [IEEE Std 754-2008 6.3]:
2317 // ... the sign of the result of conversions, the quantize operation, the
2318 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2319 // the sign of the first or only operand.
2320 return opOK;
2321 }
2322
2323 // If the exponent is large enough, we know that this value is already
2324 // integral, and the arithmetic below would potentially cause it to saturate
2325 // to +/-Inf. Bail out early instead.
2326 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2327 return opOK;
2328
2329 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2330 // precision of our format, and then subtract it back off again. The choice
2331 // of rounding modes for the addition/subtraction determines the rounding mode
2332 // for our integral rounding as well.
2333 // NOTE: When the input value is negative, we do subtraction followed by
2334 // addition instead.
2335 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2336 1);
2337 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2338 IEEEFloat MagicConstant(*semantics);
2339 opStatus fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2341 assert(fs == opOK);
2342 MagicConstant.sign = sign;
2343
2344 // Preserve the input sign so that we can handle the case of zero result
2345 // correctly.
2346 bool inputSign = isNegative();
2347
2348 fs = add(MagicConstant, rounding_mode);
2349
2350 // Current value and 'MagicConstant' are both integers, so the result of the
2351 // subtraction is always exact according to Sterbenz' lemma.
2352 subtract(MagicConstant, rounding_mode);
2353
2354 // Restore the input sign.
2355 if (inputSign != isNegative())
2356 changeSign();
2357
2358 return fs;
2359}
2360
2361/* Comparison requires normalized numbers. */
2363 assert(semantics == rhs.semantics);
2364
2365 switch (PackCategoriesIntoKey(category, rhs.category)) {
2366 default:
2367 llvm_unreachable(nullptr);
2368
2376 return cmpUnordered;
2377
2381 if (sign)
2382 return cmpLessThan;
2383 else
2384 return cmpGreaterThan;
2385
2389 if (rhs.sign)
2390 return cmpGreaterThan;
2391 else
2392 return cmpLessThan;
2393
2395 if (sign == rhs.sign)
2396 return cmpEqual;
2397 else if (sign)
2398 return cmpLessThan;
2399 else
2400 return cmpGreaterThan;
2401
2403 return cmpEqual;
2404
2406 break;
2407 }
2408
2409 cmpResult result;
2410 /* Two normal numbers. Do they have the same sign? */
2411 if (sign != rhs.sign) {
2412 if (sign)
2413 result = cmpLessThan;
2414 else
2415 result = cmpGreaterThan;
2416 } else {
2417 /* Compare absolute values; invert result if negative. */
2418 result = compareAbsoluteValue(rhs);
2419
2420 if (sign) {
2421 if (result == cmpLessThan)
2422 result = cmpGreaterThan;
2423 else if (result == cmpGreaterThan)
2424 result = cmpLessThan;
2425 }
2426 }
2427
2428 return result;
2429}
2430
2431/// IEEEFloat::convert - convert a value of one floating point type to another.
2432/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2433/// records whether the transformation lost information, i.e. whether
2434/// converting the result back to the original type will produce the
2435/// original value (this is almost the same as return value==fsOK, but there
2436/// are edge cases where this is not so).
2437
2439 roundingMode rounding_mode,
2440 bool *losesInfo) {
2441 opStatus fs;
2442 const fltSemantics &fromSemantics = *semantics;
2443 bool is_signaling = isSignaling();
2444
2446 unsigned newPartCount = partCountForBits(toSemantics.precision + 1);
2447 unsigned oldPartCount = partCount();
2448 int shift = toSemantics.precision - fromSemantics.precision;
2449
2450 bool X86SpecialNan = false;
2451 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2452 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2453 (!(*significandParts() & 0x8000000000000000ULL) ||
2454 !(*significandParts() & 0x4000000000000000ULL))) {
2455 // x86 has some unusual NaNs which cannot be represented in any other
2456 // format; note them here.
2457 X86SpecialNan = true;
2458 }
2459
2460 // If this is a truncation of a denormal number, and the target semantics
2461 // has larger exponent range than the source semantics (this can happen
2462 // when truncating from PowerPC double-double to double format), the
2463 // right shift could lose result mantissa bits. Adjust exponent instead
2464 // of performing excessive shift.
2465 // Also do a similar trick in case shifting denormal would produce zero
2466 // significand as this case isn't handled correctly by normalize.
2467 if (shift < 0 && isFiniteNonZero()) {
2468 int omsb = significandMSB() + 1;
2469 int exponentChange = omsb - fromSemantics.precision;
2470 if (exponent + exponentChange < toSemantics.minExponent)
2471 exponentChange = toSemantics.minExponent - exponent;
2472 exponentChange = std::max(exponentChange, shift);
2473 if (exponentChange < 0) {
2474 shift -= exponentChange;
2475 exponent += exponentChange;
2476 } else if (omsb <= -shift) {
2477 exponentChange = omsb + shift - 1; // leave at least one bit set
2478 shift -= exponentChange;
2479 exponent += exponentChange;
2480 }
2481 }
2482
2483 // If this is a truncation, perform the shift before we narrow the storage.
2484 if (shift < 0 && (isFiniteNonZero() ||
2485 (category == fcNaN && semantics->nonFiniteBehavior !=
2487 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2488
2489 // Fix the storage so it can hold to new value.
2490 if (newPartCount > oldPartCount) {
2491 // The new type requires more storage; make it available.
2492 integerPart *newParts;
2493 newParts = new integerPart[newPartCount];
2494 APInt::tcSet(newParts, 0, newPartCount);
2495 if (isFiniteNonZero() || category==fcNaN)
2496 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2497 freeSignificand();
2498 significand.parts = newParts;
2499 } else if (newPartCount == 1 && oldPartCount != 1) {
2500 // Switch to built-in storage for a single part.
2501 integerPart newPart = 0;
2502 if (isFiniteNonZero() || category==fcNaN)
2503 newPart = significandParts()[0];
2504 freeSignificand();
2505 significand.part = newPart;
2506 }
2507
2508 // Now that we have the right storage, switch the semantics.
2509 semantics = &toSemantics;
2510
2511 // If this is an extension, perform the shift now that the storage is
2512 // available.
2513 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2514 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2515
2516 if (isFiniteNonZero()) {
2517 fs = normalize(rounding_mode, lostFraction);
2518 *losesInfo = (fs != opOK);
2519 } else if (category == fcNaN) {
2520 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2521 *losesInfo =
2523 makeNaN(false, sign);
2524 return is_signaling ? opInvalidOp : opOK;
2525 }
2526
2527 // If NaN is negative zero, we need to create a new NaN to avoid converting
2528 // NaN to -Inf.
2529 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2530 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2531 makeNaN(false, false);
2532
2533 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2534
2535 // For x87 extended precision, we want to make a NaN, not a special NaN if
2536 // the input wasn't special either.
2537 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2538 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2539
2540 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2541 // This also guarantees that a sNaN does not become Inf on a truncation
2542 // that loses all payload bits.
2543 if (is_signaling) {
2544 makeQuiet();
2545 fs = opInvalidOp;
2546 } else {
2547 fs = opOK;
2548 }
2549 } else if (category == fcInfinity &&
2550 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2551 makeNaN(false, sign);
2552 *losesInfo = true;
2553 fs = opInexact;
2554 } else if (category == fcZero &&
2555 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2556 // Negative zero loses info, but positive zero doesn't.
2557 *losesInfo =
2558 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2559 fs = *losesInfo ? opInexact : opOK;
2560 // NaN is negative zero means -0 -> +0, which can lose information
2561 sign = false;
2562 } else {
2563 *losesInfo = false;
2564 fs = opOK;
2565 }
2566
2567 if (category == fcZero && !semantics->hasZero)
2569 return fs;
2570}
2571
2572/* Convert a floating point number to an integer according to the
2573 rounding mode. If the rounded integer value is out of range this
2574 returns an invalid operation exception and the contents of the
2575 destination parts are unspecified. If the rounded value is in
2576 range but the floating point number is not the exact integer, the C
2577 standard doesn't require an inexact exception to be raised. IEEE
2578 854 does require it so we do that.
2579
2580 Note that for conversions to integer type the C standard requires
2581 round-to-zero to always be used. */
2582APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2583 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2584 roundingMode rounding_mode, bool *isExact) const {
2585 *isExact = false;
2586
2587 /* Handle the three special cases first. */
2588 if (category == fcInfinity || category == fcNaN)
2589 return opInvalidOp;
2590
2591 unsigned dstPartsCount = partCountForBits(width);
2592 assert(dstPartsCount <= parts.size() && "Integer too big");
2593
2594 if (category == fcZero) {
2595 APInt::tcSet(parts.data(), 0, dstPartsCount);
2596 // Negative zero can't be represented as an int.
2597 *isExact = !sign;
2598 return opOK;
2599 }
2600
2601 const integerPart *src = significandParts();
2602
2603 unsigned truncatedBits;
2604 /* Step 1: place our absolute value, with any fraction truncated, in
2605 the destination. */
2606 if (exponent < 0) {
2607 /* Our absolute value is less than one; truncate everything. */
2608 APInt::tcSet(parts.data(), 0, dstPartsCount);
2609 /* For exponent -1 the integer bit represents .5, look at that.
2610 For smaller exponents leftmost truncated bit is 0. */
2611 truncatedBits = semantics->precision -1U - exponent;
2612 } else {
2613 /* We want the most significant (exponent + 1) bits; the rest are
2614 truncated. */
2615 unsigned int bits = exponent + 1U;
2616
2617 /* Hopelessly large in magnitude? */
2618 if (bits > width)
2619 return opInvalidOp;
2620
2621 if (bits < semantics->precision) {
2622 /* We truncate (semantics->precision - bits) bits. */
2623 truncatedBits = semantics->precision - bits;
2624 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2625 } else {
2626 /* We want at least as many bits as are available. */
2627 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2628 0);
2629 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2630 bits - semantics->precision);
2631 truncatedBits = 0;
2632 }
2633 }
2634
2635 /* Step 2: work out any lost fraction, and increment the absolute
2636 value if we would round away from zero. */
2637 lostFraction lost_fraction;
2638 if (truncatedBits) {
2639 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2640 truncatedBits);
2641 if (lost_fraction != lfExactlyZero &&
2642 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2643 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2644 return opInvalidOp; /* Overflow. */
2645 }
2646 } else {
2647 lost_fraction = lfExactlyZero;
2648 }
2649
2650 /* Step 3: check if we fit in the destination. */
2651 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2652
2653 if (sign) {
2654 if (!isSigned) {
2655 /* Negative numbers cannot be represented as unsigned. */
2656 if (omsb != 0)
2657 return opInvalidOp;
2658 } else {
2659 /* It takes omsb bits to represent the unsigned integer value.
2660 We lose a bit for the sign, but care is needed as the
2661 maximally negative integer is a special case. */
2662 if (omsb == width &&
2663 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2664 return opInvalidOp;
2665
2666 /* This case can happen because of rounding. */
2667 if (omsb > width)
2668 return opInvalidOp;
2669 }
2670
2671 APInt::tcNegate (parts.data(), dstPartsCount);
2672 } else {
2673 if (omsb >= width + !isSigned)
2674 return opInvalidOp;
2675 }
2676
2677 if (lost_fraction == lfExactlyZero) {
2678 *isExact = true;
2679 return opOK;
2680 }
2681 return opInexact;
2682}
2683
2684/* Same as convertToSignExtendedInteger, except we provide
2685 deterministic values in case of an invalid operation exception,
2686 namely zero for NaNs and the minimal or maximal value respectively
2687 for underflow or overflow.
2688 The *isExact output tells whether the result is exact, in the sense
2689 that converting it back to the original floating point type produces
2690 the original value. This is almost equivalent to result==opOK,
2691 except for negative zeroes.
2692*/
2695 unsigned int width, bool isSigned,
2696 roundingMode rounding_mode, bool *isExact) const {
2697 opStatus fs = convertToSignExtendedInteger(parts, width, isSigned,
2698 rounding_mode, isExact);
2699
2700 if (fs == opInvalidOp) {
2701 unsigned int bits, dstPartsCount;
2702
2703 dstPartsCount = partCountForBits(width);
2704 assert(dstPartsCount <= parts.size() && "Integer too big");
2705
2706 if (category == fcNaN)
2707 bits = 0;
2708 else if (sign)
2709 bits = isSigned;
2710 else
2711 bits = width - isSigned;
2712
2713 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2714 if (sign && isSigned)
2715 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2716 }
2717
2718 return fs;
2719}
2720
2721/* Convert an unsigned integer SRC to a floating point number,
2722 rounding according to ROUNDING_MODE. The sign of the floating
2723 point number is not modified. */
2724APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2725 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2726 category = fcNormal;
2727 unsigned omsb = APInt::tcMSB(src, srcCount) + 1;
2728 integerPart *dst = significandParts();
2729 unsigned dstCount = partCount();
2730 unsigned precision = semantics->precision;
2731
2732 /* We want the most significant PRECISION bits of SRC. There may not
2733 be that many; extract what we can. */
2734 lostFraction lost_fraction;
2735 if (precision <= omsb) {
2736 exponent = omsb - 1;
2737 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2738 omsb - precision);
2739 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2740 } else {
2741 exponent = precision - 1;
2742 lost_fraction = lfExactlyZero;
2743 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2744 }
2745
2746 return normalize(rounding_mode, lost_fraction);
2747}
2748
2750 roundingMode rounding_mode) {
2751 unsigned int partCount = Val.getNumWords();
2752 APInt api = Val;
2753
2754 sign = false;
2755 if (isSigned && api.isNegative()) {
2756 sign = true;
2757 api = -api;
2758 }
2759
2760 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2761}
2762
2764IEEEFloat::convertFromHexadecimalString(StringRef s,
2765 roundingMode rounding_mode) {
2766 lostFraction lost_fraction = lfExactlyZero;
2767
2768 category = fcNormal;
2769 zeroSignificand();
2770 exponent = 0;
2771
2772 integerPart *significand = significandParts();
2773 unsigned partsCount = partCount();
2774 unsigned bitPos = partsCount * integerPartWidth;
2775 bool computedTrailingFraction = false;
2776
2777 // Skip leading zeroes and any (hexa)decimal point.
2778 StringRef::iterator begin = s.begin();
2779 StringRef::iterator end = s.end();
2781 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2782 if (!PtrOrErr)
2783 return PtrOrErr.takeError();
2784 StringRef::iterator p = *PtrOrErr;
2785 StringRef::iterator firstSignificantDigit = p;
2786
2787 while (p != end) {
2788 integerPart hex_value;
2789
2790 if (*p == '.') {
2791 if (dot != end)
2792 return createError("String contains multiple dots");
2793 dot = p++;
2794 continue;
2795 }
2796
2797 hex_value = hexDigitValue(*p);
2798 if (hex_value == UINT_MAX)
2799 break;
2800
2801 p++;
2802
2803 // Store the number while we have space.
2804 if (bitPos) {
2805 bitPos -= 4;
2806 hex_value <<= bitPos % integerPartWidth;
2807 significand[bitPos / integerPartWidth] |= hex_value;
2808 } else if (!computedTrailingFraction) {
2809 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2810 if (!FractOrErr)
2811 return FractOrErr.takeError();
2812 lost_fraction = *FractOrErr;
2813 computedTrailingFraction = true;
2814 }
2815 }
2816
2817 /* Hex floats require an exponent but not a hexadecimal point. */
2818 if (p == end)
2819 return createError("Hex strings require an exponent");
2820 if (*p != 'p' && *p != 'P')
2821 return createError("Invalid character in significand");
2822 if (p == begin)
2823 return createError("Significand has no digits");
2824 if (dot != end && p - begin == 1)
2825 return createError("Significand has no digits");
2826
2827 /* Ignore the exponent if we are zero. */
2828 if (p != firstSignificantDigit) {
2829 int expAdjustment;
2830
2831 /* Implicit hexadecimal point? */
2832 if (dot == end)
2833 dot = p;
2834
2835 /* Calculate the exponent adjustment implicit in the number of
2836 significant digits. */
2837 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2838 if (expAdjustment < 0)
2839 expAdjustment++;
2840 expAdjustment = expAdjustment * 4 - 1;
2841
2842 /* Adjust for writing the significand starting at the most
2843 significant nibble. */
2844 expAdjustment += semantics->precision;
2845 expAdjustment -= partsCount * integerPartWidth;
2846
2847 /* Adjust for the given exponent. */
2848 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2849 if (!ExpOrErr)
2850 return ExpOrErr.takeError();
2851 exponent = *ExpOrErr;
2852 }
2853
2854 return normalize(rounding_mode, lost_fraction);
2855}
2856
2858IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2859 unsigned sigPartCount, int exp,
2860 roundingMode rounding_mode) {
2861 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2863
2864 bool isNearest = rounding_mode == rmNearestTiesToEven ||
2865 rounding_mode == rmNearestTiesToAway;
2866
2867 unsigned parts = partCountForBits(semantics->precision + 11);
2868
2869 /* Calculate pow(5, abs(exp)). */
2870 unsigned pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp : -exp);
2871
2872 for (;; parts *= 2) {
2873 unsigned int excessPrecision, truncatedBits;
2874
2875 calcSemantics.precision = parts * integerPartWidth - 1;
2876 excessPrecision = calcSemantics.precision - semantics->precision;
2877 truncatedBits = excessPrecision;
2878
2879 IEEEFloat decSig(calcSemantics, uninitialized);
2880 decSig.makeZero(sign);
2881 IEEEFloat pow5(calcSemantics);
2882
2883 opStatus sigStatus = decSig.convertFromUnsignedParts(
2884 decSigParts, sigPartCount, rmNearestTiesToEven);
2885 opStatus powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2887 /* Add exp, as 10^n = 5^n * 2^n. */
2888 decSig.exponent += exp;
2889
2890 lostFraction calcLostFraction;
2891 integerPart HUerr, HUdistance;
2892 unsigned int powHUerr;
2893
2894 if (exp >= 0) {
2895 /* multiplySignificand leaves the precision-th bit set to 1. */
2896 calcLostFraction = decSig.multiplySignificand(pow5);
2897 powHUerr = powStatus != opOK;
2898 } else {
2899 calcLostFraction = decSig.divideSignificand(pow5);
2900 /* Denormal numbers have less precision. */
2901 if (decSig.exponent < semantics->minExponent) {
2902 excessPrecision += (semantics->minExponent - decSig.exponent);
2903 truncatedBits = excessPrecision;
2904 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
2905 }
2906 /* Extra half-ulp lost in reciprocal of exponent. */
2907 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2908 }
2909
2910 /* Both multiplySignificand and divideSignificand return the
2911 result with the integer bit set. */
2913 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2914
2915 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2916 powHUerr);
2917 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2918 excessPrecision, isNearest);
2919
2920 /* Are we guaranteed to round correctly if we truncate? */
2921 if (HUdistance >= HUerr) {
2922 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2923 calcSemantics.precision - excessPrecision,
2924 excessPrecision);
2925 /* Take the exponent of decSig. If we tcExtract-ed less bits
2926 above we must adjust our exponent to compensate for the
2927 implicit right shift. */
2928 exponent = (decSig.exponent + semantics->precision
2929 - (calcSemantics.precision - excessPrecision));
2930 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2931 decSig.partCount(),
2932 truncatedBits);
2933 return static_cast<opStatus>(normalize(rounding_mode, calcLostFraction) |
2934 ((sigStatus | powStatus) & opInexact));
2935 }
2936 }
2937}
2938
2939Expected<APFloat::opStatus>
2940IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2941 decimalInfo D;
2942 opStatus fs;
2943
2944 /* Scan the text. */
2945 StringRef::iterator p = str.begin();
2946 if (Error Err = interpretDecimal(p, str.end(), &D))
2947 return std::move(Err);
2948
2949 /* Handle the quick cases. First the case of no significant digits,
2950 i.e. zero, and then exponents that are obviously too large or too
2951 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2952 definitely overflows if
2953
2954 (exp - 1) * L >= maxExponent
2955
2956 and definitely underflows to zero where
2957
2958 (exp + 1) * L <= minExponent - precision
2959
2960 With integer arithmetic the tightest bounds for L are
2961
2962 93/28 < L < 196/59 [ numerator <= 256 ]
2963 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2964 */
2965
2966 // Test if we have a zero number allowing for strings with no null terminators
2967 // and zero decimals with non-zero exponents.
2968 //
2969 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2970 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2971 // be at most one dot. On the other hand, if we have a zero with a non-zero
2972 // exponent, then we know that D.firstSigDigit will be non-numeric.
2973 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2974 category = fcZero;
2975 fs = opOK;
2976 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2977 sign = false;
2978 if (!semantics->hasZero)
2980
2981 /* Check whether the normalized exponent is high enough to overflow
2982 max during the log-rebasing in the max-exponent check below. */
2983 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2984 fs = handleOverflow(rounding_mode);
2985
2986 /* If it wasn't, then it also wasn't high enough to overflow max
2987 during the log-rebasing in the min-exponent check. Check that it
2988 won't overflow min in either check, then perform the min-exponent
2989 check. */
2990 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2991 (D.normalizedExponent + 1) * 28738 <=
2992 8651 * (semantics->minExponent - (int) semantics->precision)) {
2993 /* Underflow to zero and round. */
2994 category = fcNormal;
2995 zeroSignificand();
2996 fs = normalize(rounding_mode, lfLessThanHalf);
2997
2998 /* We can finally safely perform the max-exponent check. */
2999 } else if ((D.normalizedExponent - 1) * 42039
3000 >= 12655 * semantics->maxExponent) {
3001 /* Overflow and round. */
3002 fs = handleOverflow(rounding_mode);
3003 } else {
3004 integerPart *decSignificand;
3005 unsigned int partCount;
3006
3007 /* A tight upper bound on number of bits required to hold an
3008 N-digit decimal integer is N * 196 / 59. Allocate enough space
3009 to hold the full significand, and an extra part required by
3010 tcMultiplyPart. */
3011 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3012 partCount = partCountForBits(1 + 196 * partCount / 59);
3013 decSignificand = new integerPart[partCount + 1];
3014 partCount = 0;
3015
3016 /* Convert to binary efficiently - we do almost all multiplication
3017 in an integerPart. When this would overflow do we do a single
3018 bignum multiplication, and then revert again to multiplication
3019 in an integerPart. */
3020 do {
3021 integerPart decValue, val, multiplier;
3022
3023 val = 0;
3024 multiplier = 1;
3025
3026 do {
3027 if (*p == '.') {
3028 p++;
3029 if (p == str.end()) {
3030 break;
3031 }
3032 }
3033 decValue = decDigitValue(*p++);
3034 if (decValue >= 10U) {
3035 delete[] decSignificand;
3036 return createError("Invalid character in significand");
3037 }
3038 multiplier *= 10;
3039 val = val * 10 + decValue;
3040 /* The maximum number that can be multiplied by ten with any
3041 digit added without overflowing an integerPart. */
3042 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3043
3044 /* Multiply out the current part. */
3045 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3046 partCount, partCount + 1, false);
3047
3048 /* If we used another part (likely but not guaranteed), increase
3049 the count. */
3050 if (decSignificand[partCount])
3051 partCount++;
3052 } while (p <= D.lastSigDigit);
3053
3054 category = fcNormal;
3055 fs = roundSignificandWithExponent(decSignificand, partCount,
3056 D.exponent, rounding_mode);
3057
3058 delete [] decSignificand;
3059 }
3060
3061 return fs;
3062}
3063
3064bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3065 const size_t MIN_NAME_SIZE = 3;
3066
3067 if (str.size() < MIN_NAME_SIZE)
3068 return false;
3069
3070 if (str == "inf" || str == "INFINITY" || str == "+Inf" || str == "+inf") {
3071 makeInf(false);
3072 return true;
3073 }
3074
3075 bool IsNegative = str.consume_front("-");
3076 if (IsNegative) {
3077 if (str.size() < MIN_NAME_SIZE)
3078 return false;
3079
3080 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3081 makeInf(true);
3082 return true;
3083 }
3084 }
3085
3086 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3087 bool IsSignaling = str.consume_front_insensitive("s");
3088 if (IsSignaling) {
3089 if (str.size() < MIN_NAME_SIZE)
3090 return false;
3091 }
3092
3093 if (str.consume_front("nan") || str.consume_front("NaN")) {
3094 // A NaN without payload.
3095 if (str.empty()) {
3096 makeNaN(IsSignaling, IsNegative);
3097 return true;
3098 }
3099
3100 // Allow the payload to be inside parentheses.
3101 if (str.front() == '(') {
3102 // Parentheses should be balanced (and not empty).
3103 if (str.size() <= 2 || str.back() != ')')
3104 return false;
3105
3106 str = str.slice(1, str.size() - 1);
3107 }
3108
3109 // Determine the payload number's radix.
3110 unsigned Radix = 10;
3111 if (str[0] == '0') {
3112 if (str.size() > 1 && tolower(str[1]) == 'x') {
3113 str = str.drop_front(2);
3114 Radix = 16;
3115 } else {
3116 Radix = 8;
3117 }
3118 }
3119
3120 // Parse the payload and make the NaN.
3121 APInt Payload;
3122 if (!str.getAsInteger(Radix, Payload)) {
3123 makeNaN(IsSignaling, IsNegative, &Payload);
3124 return true;
3125 }
3126 }
3127
3128 return false;
3129}
3130
3131Expected<APFloat::opStatus>
3133 if (str.empty())
3134 return createError("Invalid string length");
3135
3136 // Handle special cases.
3137 if (convertFromStringSpecials(str))
3138 return opOK;
3139
3140 /* Handle a leading minus sign. */
3141 StringRef::iterator p = str.begin();
3142 size_t slen = str.size();
3143 sign = *p == '-' ? 1 : 0;
3144 if (sign && !semantics->hasSignedRepr)
3146 "This floating point format does not support signed values");
3147
3148 if (*p == '-' || *p == '+') {
3149 p++;
3150 slen--;
3151 if (!slen)
3152 return createError("String has no digits");
3153 }
3154
3155 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3156 if (slen == 2)
3157 return createError("Invalid string");
3158 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3159 rounding_mode);
3160 }
3161
3162 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3163}
3164
3165/* Write out a hexadecimal representation of the floating point value
3166 to DST, which must be of sufficient size, in the C99 form
3167 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3168 excluding the terminating NUL.
3169
3170 If UPPERCASE, the output is in upper case, otherwise in lower case.
3171
3172 HEXDIGITS digits appear altogether, rounding the value if
3173 necessary. If HEXDIGITS is 0, the minimal precision to display the
3174 number precisely is used instead. If nothing would appear after
3175 the decimal point it is suppressed.
3176
3177 The decimal exponent is always printed and has at least one digit.
3178 Zero values display an exponent of zero. Infinities and NaNs
3179 appear as "infinity" or "nan" respectively.
3180
3181 The above rules are as specified by C99. There is ambiguity about
3182 what the leading hexadecimal digit should be. This implementation
3183 uses whatever is necessary so that the exponent is displayed as
3184 stored. This implies the exponent will fall within the IEEE format
3185 range, and the leading hexadecimal digit will be 0 (for denormals),
3186 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3187 any other digits zero).
3188*/
3189unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3190 bool upperCase,
3191 roundingMode rounding_mode) const {
3192 char *p = dst;
3193 if (sign)
3194 *dst++ = '-';
3195
3196 switch (category) {
3197 case fcInfinity:
3198 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3199 dst += sizeof infinityL - 1;
3200 break;
3201
3202 case fcNaN:
3203 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3204 dst += sizeof NaNU - 1;
3205 break;
3206
3207 case fcZero:
3208 *dst++ = '0';
3209 *dst++ = upperCase ? 'X': 'x';
3210 *dst++ = '0';
3211 if (hexDigits > 1) {
3212 *dst++ = '.';
3213 memset (dst, '0', hexDigits - 1);
3214 dst += hexDigits - 1;
3215 }
3216 *dst++ = upperCase ? 'P': 'p';
3217 *dst++ = '0';
3218 break;
3219
3220 case fcNormal:
3221 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3222 break;
3223 }
3224
3225 *dst = 0;
3226
3227 return static_cast<unsigned int>(dst - p);
3228}
3229
3230/* Does the hard work of outputting the correctly rounded hexadecimal
3231 form of a normal floating point number with the specified number of
3232 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3233 digits necessary to print the value precisely is output. */
3234char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3235 bool upperCase,
3236 roundingMode rounding_mode) const {
3237 *dst++ = '0';
3238 *dst++ = upperCase ? 'X': 'x';
3239
3240 bool roundUp = false;
3241 const char *hexDigitChars = upperCase ? hexDigitsUpper : hexDigitsLower;
3242
3243 const integerPart *significand = significandParts();
3244 unsigned partsCount = partCount();
3245
3246 /* +3 because the first digit only uses the single integer bit, so
3247 we have 3 virtual zero most-significant-bits. */
3248 unsigned valueBits = semantics->precision + 3;
3249 unsigned shift = integerPartWidth - valueBits % integerPartWidth;
3250
3251 /* The natural number of digits required ignoring trailing
3252 insignificant zeroes. */
3253 unsigned outputDigits = (valueBits - significandLSB() + 3) / 4;
3254
3255 /* hexDigits of zero means use the required number for the
3256 precision. Otherwise, see if we are truncating. If we are,
3257 find out if we need to round away from zero. */
3258 if (hexDigits) {
3259 if (hexDigits < outputDigits) {
3260 /* We are dropping non-zero bits, so need to check how to round.
3261 "bits" is the number of dropped bits. */
3262 unsigned int bits;
3263 lostFraction fraction;
3264
3265 bits = valueBits - hexDigits * 4;
3266 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3267 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3268 }
3269 outputDigits = hexDigits;
3270 }
3271
3272 /* Write the digits consecutively, and start writing in the location
3273 of the hexadecimal point. We move the most significant digit
3274 left and add the hexadecimal point later. */
3275 char *p = ++dst;
3276
3277 unsigned count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3278
3279 while (outputDigits && count) {
3280 integerPart part;
3281
3282 /* Put the most significant integerPartWidth bits in "part". */
3283 if (--count == partsCount)
3284 part = 0; /* An imaginary higher zero part. */
3285 else
3286 part = significand[count] << shift;
3287
3288 if (count && shift)
3289 part |= significand[count - 1] >> (integerPartWidth - shift);
3290
3291 /* Convert as much of "part" to hexdigits as we can. */
3292 unsigned int curDigits = integerPartWidth / 4;
3293
3294 curDigits = std::min(curDigits, outputDigits);
3295 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3296 outputDigits -= curDigits;
3297 }
3298
3299 if (roundUp) {
3300 char *q = dst;
3301
3302 /* Note that hexDigitChars has a trailing '0'. */
3303 do {
3304 q--;
3305 *q = hexDigitChars[hexDigitValue (*q) + 1];
3306 } while (*q == '0');
3307 assert(q >= p);
3308 } else {
3309 /* Add trailing zeroes. */
3310 memset (dst, '0', outputDigits);
3311 dst += outputDigits;
3312 }
3313
3314 /* Move the most significant digit to before the point, and if there
3315 is something after the decimal point add it. This must come
3316 after rounding above. */
3317 p[-1] = p[0];
3318 if (dst -1 == p)
3319 dst--;
3320 else
3321 p[0] = '.';
3322
3323 /* Finally output the exponent. */
3324 *dst++ = upperCase ? 'P': 'p';
3325
3326 return writeSignedDecimal (dst, exponent);
3327}
3328
3330 if (!Arg.isFiniteNonZero())
3331 return hash_combine((uint8_t)Arg.category,
3332 // NaN has no sign, fix it at zero.
3333 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3334 Arg.semantics->precision);
3335
3336 // Normal floats need their exponent and significand hashed.
3337 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3338 Arg.semantics->precision, Arg.exponent,
3340 Arg.significandParts(),
3341 Arg.significandParts() + Arg.partCount()));
3342}
3343
3344// Conversion from APFloat to/from host float/double. It may eventually be
3345// possible to eliminate these and have everybody deal with APFloats, but that
3346// will take a while. This approach will not easily extend to long double.
3347// Current implementation requires integerPartWidth==64, which is correct at
3348// the moment but could be made more general.
3349
3350// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3351// the actual IEEE respresentations. We compensate for that here.
3352
3353APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3354 assert(semantics ==
3355 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3356 assert(partCount()==2);
3357
3358 uint64_t myexponent, mysignificand;
3359
3360 if (isFiniteNonZero()) {
3361 myexponent = exponent+16383; //bias
3362 mysignificand = significandParts()[0];
3363 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3364 myexponent = 0; // denormal
3365 } else if (category==fcZero) {
3366 myexponent = 0;
3367 mysignificand = 0;
3368 } else if (category==fcInfinity) {
3369 myexponent = 0x7fff;
3370 mysignificand = 0x8000000000000000ULL;
3371 } else {
3372 assert(category == fcNaN && "Unknown category");
3373 myexponent = 0x7fff;
3374 mysignificand = significandParts()[0];
3375 }
3376
3377 uint64_t words[2];
3378 words[0] = mysignificand;
3379 words[1] = ((uint64_t)(sign & 1) << 15) |
3380 (myexponent & 0x7fffLL);
3381 return APInt(80, words);
3382}
3383
3384APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3385 assert(semantics ==
3386 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3387 assert(partCount()==2);
3388
3389 uint64_t words[2];
3390 bool losesInfo;
3391
3392 // Convert number to double. To avoid spurious underflows, we re-
3393 // normalize against the "double" minExponent first, and only *then*
3394 // truncate the mantissa. The result of that second conversion
3395 // may be inexact, but should never underflow.
3396 // Declare fltSemantics before APFloat that uses it (and
3397 // saves pointer to it) to ensure correct destruction order.
3398 fltSemantics extendedSemantics = *semantics;
3399 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3400 IEEEFloat extended(*this);
3401 [[maybe_unused]] opStatus fs =
3402 extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3403 assert(fs == opOK && !losesInfo);
3404
3405 IEEEFloat u(extended);
3406 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3407 assert(fs == opOK || fs == opInexact);
3408 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3409
3410 // If conversion was exact or resulted in a special case, we're done;
3411 // just set the second double to zero. Otherwise, re-convert back to
3412 // the extended format and compute the difference. This now should
3413 // convert exactly to double.
3414 if (u.isFiniteNonZero() && losesInfo) {
3415 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3416 assert(fs == opOK && !losesInfo);
3417
3418 IEEEFloat v(extended);
3419 v.subtract(u, rmNearestTiesToEven);
3420 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3421 assert(fs == opOK && !losesInfo);
3422 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3423 } else {
3424 words[1] = 0;
3425 }
3426
3427 return APInt(128, words);
3428}
3429
3430template <const fltSemantics &S>
3431APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3432 assert(semantics == &S);
3433 const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
3434 ? -S.minExponent
3435 : -(S.minExponent - 1);
3436 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3437 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3438 constexpr integerPart integer_bit =
3439 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3440 constexpr uint64_t significand_mask = integer_bit - 1;
3441 constexpr unsigned int exponent_bits =
3442 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3443 : S.sizeInBits;
3444 static_assert(exponent_bits < 64);
3445 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3446
3447 uint64_t myexponent;
3448 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3449 mysignificand;
3450
3451 if (isFiniteNonZero()) {
3452 myexponent = exponent + bias;
3453 std::copy_n(significandParts(), mysignificand.size(),
3454 mysignificand.begin());
3455 if (myexponent == 1 &&
3456 !(significandParts()[integer_bit_part] & integer_bit))
3457 myexponent = 0; // denormal
3458 } else if (category == fcZero) {
3459 if (!S.hasZero)
3460 llvm_unreachable("semantics does not support zero!");
3461 myexponent = ::exponentZero(S) + bias;
3462 mysignificand.fill(0);
3463 } else if (category == fcInfinity) {
3464 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3465 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3466 llvm_unreachable("semantics don't support inf!");
3467 myexponent = ::exponentInf(S) + bias;
3468 mysignificand.fill(0);
3469 } else {
3470 assert(category == fcNaN && "Unknown category!");
3471 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3472 llvm_unreachable("semantics don't support NaN!");
3473 myexponent = ::exponentNaN(S) + bias;
3474 std::copy_n(significandParts(), mysignificand.size(),
3475 mysignificand.begin());
3476 }
3477 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3478 auto words_iter =
3479 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3480 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3481 // Clear the integer bit.
3482 words[mysignificand.size() - 1] &= significand_mask;
3483 }
3484 std::fill(words_iter, words.end(), uint64_t{0});
3485 constexpr size_t last_word = words.size() - 1;
3486 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3487 << ((S.sizeInBits - 1) % 64);
3488 words[last_word] |= shifted_sign;
3489 uint64_t shifted_exponent = (myexponent & exponent_mask)
3490 << (trailing_significand_bits % 64);
3491 words[last_word] |= shifted_exponent;
3492 if constexpr (last_word == 0) {
3493 return APInt(S.sizeInBits, words[0]);
3494 }
3495 return APInt(S.sizeInBits, words);
3496}
3497
3498APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3499 assert(partCount() == 2);
3500 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3501}
3502
3503APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3504 assert(partCount()==1);
3505 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3506}
3507
3508APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3509 assert(partCount()==1);
3510 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3511}
3512
3513APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3514 assert(partCount() == 1);
3515 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3516}
3517
3518APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3519 assert(partCount()==1);
3520 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3521}
3522
3523APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3524 assert(partCount() == 1);
3525 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3526}
3527
3528APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3529 assert(partCount() == 1);
3530 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3531}
3532
3533APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3534 assert(partCount() == 1);
3535 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3536}
3537
3538APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3539 assert(partCount() == 1);
3540 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3541}
3542
3543APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3544 assert(partCount() == 1);
3545 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3546}
3547
3548APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3549 assert(partCount() == 1);
3550 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3551}
3552
3553APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3554 assert(partCount() == 1);
3555 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3556}
3557
3558APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3559 assert(partCount() == 1);
3560 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3561}
3562
3563APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3564 assert(partCount() == 1);
3565 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3566}
3567
3568APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3569 assert(partCount() == 1);
3570 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3571}
3572
3573APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3574 assert(partCount() == 1);
3575 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3576}
3577
3578APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3579 assert(partCount() == 1);
3580 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3581}
3582
3583// This function creates an APInt that is just a bit map of the floating
3584// point constant as it would appear in memory. It is not a conversion,
3585// and treating the result as a normal integer is unlikely to be useful.
3586
3588 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3589 return convertHalfAPFloatToAPInt();
3590
3591 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3592 return convertBFloatAPFloatToAPInt();
3593
3594 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3595 return convertFloatAPFloatToAPInt();
3596
3597 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3598 return convertDoubleAPFloatToAPInt();
3599
3600 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3601 return convertQuadrupleAPFloatToAPInt();
3602
3603 if (semantics ==
3604 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3605 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3606
3607 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3608 return convertFloat8E5M2APFloatToAPInt();
3609
3610 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3611 return convertFloat8E5M2FNUZAPFloatToAPInt();
3612
3613 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3614 return convertFloat8E4M3APFloatToAPInt();
3615
3616 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3617 return convertFloat8E4M3FNAPFloatToAPInt();
3618
3619 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3620 return convertFloat8E4M3FNUZAPFloatToAPInt();
3621
3622 if (semantics ==
3623 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3624 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3625
3626 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3627 return convertFloat8E3M4APFloatToAPInt();
3628
3629 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3630 return convertFloatTF32APFloatToAPInt();
3631
3632 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3633 return convertFloat8E8M0FNUAPFloatToAPInt();
3634
3635 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3636 return convertFloat6E3M2FNAPFloatToAPInt();
3637
3638 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3639 return convertFloat6E2M3FNAPFloatToAPInt();
3640
3641 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3642 return convertFloat4E2M1FNAPFloatToAPInt();
3643
3644 assert(semantics ==
3645 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3646 "unknown format!");
3647 return convertF80LongDoubleAPFloatToAPInt();
3648}
3649
3651 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3652 "Float semantics are not IEEEsingle");
3653 APInt api = bitcastToAPInt();
3654 return api.bitsToFloat();
3655}
3656
3658 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3659 "Float semantics are not IEEEdouble");
3660 APInt api = bitcastToAPInt();
3661 return api.bitsToDouble();
3662}
3663
3664#ifdef HAS_IEE754_FLOAT128
3665float128 IEEEFloat::convertToQuad() const {
3666 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3667 "Float semantics are not IEEEquads");
3668 APInt api = bitcastToAPInt();
3669 return api.bitsToQuad();
3670}
3671#endif
3672
3673/// Integer bit is explicit in this format. Intel hardware (387 and later)
3674/// does not support these bit patterns:
3675/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3676/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3677/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3678/// exponent = 0, integer bit 1 ("pseudodenormal")
3679/// At the moment, the first three are treated as NaNs, the last one as Normal.
3680void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3681 uint64_t i1 = api.getRawData()[0];
3682 uint64_t i2 = api.getRawData()[1];
3683 uint64_t myexponent = (i2 & 0x7fff);
3684 uint64_t mysignificand = i1;
3685 uint8_t myintegerbit = mysignificand >> 63;
3686
3687 initialize(&APFloatBase::semX87DoubleExtended);
3688 assert(partCount()==2);
3689
3690 sign = static_cast<unsigned int>(i2>>15);
3691 if (myexponent == 0 && mysignificand == 0) {
3692 makeZero(sign);
3693 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3694 makeInf(sign);
3695 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3696 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3697 category = fcNaN;
3698 exponent = exponentNaN();
3699 significandParts()[0] = mysignificand;
3700 significandParts()[1] = 0;
3701 } else {
3702 category = fcNormal;
3703 exponent = myexponent - 16383;
3704 significandParts()[0] = mysignificand;
3705 significandParts()[1] = 0;
3706 if (myexponent==0) // denormal
3707 exponent = -16382;
3708 }
3709}
3710
3711void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3712 uint64_t i1 = api.getRawData()[0];
3713 uint64_t i2 = api.getRawData()[1];
3714 bool losesInfo;
3715
3716 // Get the first double and convert to our format.
3717 initFromDoubleAPInt(APInt(64, i1));
3718 [[maybe_unused]] opStatus fs = convert(APFloatBase::semPPCDoubleDoubleLegacy,
3719 rmNearestTiesToEven, &losesInfo);
3720 // (convert may return opInvalidOp if i1 is an sNaN).
3721 assert((fs == opOK || fs == opInvalidOp) && !losesInfo);
3722
3723 // Unless we have a special case, add in second double.
3724 if (isFiniteNonZero()) {
3725 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3726 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3727 &losesInfo);
3728 assert(fs == opOK && !losesInfo);
3729
3731 }
3732}
3733
3734// The E8M0 format has the following characteristics:
3735// It is an 8-bit unsigned format with only exponents (no actual significand).
3736// No encodings for {zero, infinities or denorms}.
3737// NaN is represented by all 1's.
3738// Bias is 127.
3739void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3740 const uint64_t exponent_mask = 0xff;
3741 uint64_t val = api.getRawData()[0];
3742 uint64_t myexponent = val & exponent_mask;
3743
3744 initialize(&APFloatBase::semFloat8E8M0FNU);
3745 assert(partCount() == 1);
3746
3747 // This format has unsigned representation only
3748 sign = 0;
3749
3750 // Set the significand
3751 // This format does not have any significand but the 'Pth' precision bit is
3752 // always set to 1 for consistency in APFloat's internal representation.
3753 uint64_t mysignificand = 1;
3754 significandParts()[0] = mysignificand;
3755
3756 // This format can either have a NaN or fcNormal
3757 // All 1's i.e. 255 is a NaN
3758 if (val == exponent_mask) {
3759 category = fcNaN;
3760 exponent = exponentNaN();
3761 return;
3762 }
3763 // Handle fcNormal...
3764 category = fcNormal;
3765 exponent = myexponent - 127; // 127 is bias
3766}
3767
3768template <const fltSemantics &S>
3769void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3770 assert(api.getBitWidth() == S.sizeInBits);
3771 constexpr integerPart integer_bit = integerPart{1}
3772 << ((S.precision - 1) % integerPartWidth);
3773 constexpr uint64_t significand_mask = integer_bit - 1;
3774 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3775 constexpr unsigned int stored_significand_parts =
3776 partCountForBits(trailing_significand_bits);
3777 constexpr unsigned int exponent_bits =
3778 S.sizeInBits - 1 - trailing_significand_bits;
3779 static_assert(exponent_bits < 64);
3780 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3781 constexpr int bias = -(S.minExponent - 1);
3782
3783 // Copy the bits of the significand. We need to clear out the exponent and
3784 // sign bit in the last word.
3785 std::array<integerPart, stored_significand_parts> mysignificand;
3786 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3787 if constexpr (significand_mask != 0) {
3788 mysignificand[mysignificand.size() - 1] &= significand_mask;
3789 }
3790
3791 // We assume the last word holds the sign bit, the exponent, and potentially
3792 // some of the trailing significand field.
3793 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3794 uint64_t myexponent =
3795 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3796
3797 initialize(&S);
3798 assert(partCount() == mysignificand.size());
3799
3800 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3801
3802 bool all_zero_significand = llvm::all_of(mysignificand, equal_to(0));
3803
3804 bool is_zero = myexponent == 0 && all_zero_significand;
3805
3806 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3807 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3808 makeInf(sign);
3809 return;
3810 }
3811 }
3812
3813 bool is_nan = false;
3814
3815 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3816 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3817 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3818 bool all_ones_significand =
3819 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3820 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3821 (!significand_mask ||
3822 mysignificand[mysignificand.size() - 1] == significand_mask);
3823 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3824 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3825 is_nan = is_zero && sign;
3826 }
3827
3828 if (is_nan) {
3829 category = fcNaN;
3830 exponent = ::exponentNaN(S);
3831 std::copy_n(mysignificand.begin(), mysignificand.size(),
3832 significandParts());
3833 return;
3834 }
3835
3836 if (is_zero) {
3837 makeZero(sign);
3838 return;
3839 }
3840
3841 category = fcNormal;
3842 exponent = myexponent - bias;
3843 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3844 if (myexponent == 0) // denormal
3845 exponent = S.minExponent;
3846 else
3847 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3848}
3849
3850void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3851 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
3852}
3853
3854void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3855 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
3856}
3857
3858void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3859 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
3860}
3861
3862void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3863 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
3864}
3865
3866void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3867 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
3868}
3869
3870void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3871 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
3872}
3873
3874void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3875 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
3876}
3877
3878void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3879 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
3880}
3881
3882void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3883 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
3884}
3885
3886void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3887 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
3888}
3889
3890void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3891 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
3892}
3893
3894void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
3895 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
3896}
3897
3898void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3899 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
3900}
3901
3902void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3903 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
3904}
3905
3906void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3907 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
3908}
3909
3910void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3911 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
3912}
3913
3914/// Treat api as containing the bits of a floating point number.
3915void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3916 assert(api.getBitWidth() == Sem->sizeInBits);
3917 if (Sem == &APFloatBase::semIEEEhalf)
3918 return initFromHalfAPInt(api);
3919 if (Sem == &APFloatBase::semBFloat)
3920 return initFromBFloatAPInt(api);
3921 if (Sem == &APFloatBase::semIEEEsingle)
3922 return initFromFloatAPInt(api);
3923 if (Sem == &APFloatBase::semIEEEdouble)
3924 return initFromDoubleAPInt(api);
3925 if (Sem == &APFloatBase::semX87DoubleExtended)
3926 return initFromF80LongDoubleAPInt(api);
3927 if (Sem == &APFloatBase::semIEEEquad)
3928 return initFromQuadrupleAPInt(api);
3929 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
3930 return initFromPPCDoubleDoubleLegacyAPInt(api);
3931 if (Sem == &APFloatBase::semFloat8E5M2)
3932 return initFromFloat8E5M2APInt(api);
3933 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
3934 return initFromFloat8E5M2FNUZAPInt(api);
3935 if (Sem == &APFloatBase::semFloat8E4M3)
3936 return initFromFloat8E4M3APInt(api);
3937 if (Sem == &APFloatBase::semFloat8E4M3FN)
3938 return initFromFloat8E4M3FNAPInt(api);
3939 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
3940 return initFromFloat8E4M3FNUZAPInt(api);
3941 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
3942 return initFromFloat8E4M3B11FNUZAPInt(api);
3943 if (Sem == &APFloatBase::semFloat8E3M4)
3944 return initFromFloat8E3M4APInt(api);
3945 if (Sem == &APFloatBase::semFloatTF32)
3946 return initFromFloatTF32APInt(api);
3947 if (Sem == &APFloatBase::semFloat8E8M0FNU)
3948 return initFromFloat8E8M0FNUAPInt(api);
3949 if (Sem == &APFloatBase::semFloat6E3M2FN)
3950 return initFromFloat6E3M2FNAPInt(api);
3951 if (Sem == &APFloatBase::semFloat6E2M3FN)
3952 return initFromFloat6E2M3FNAPInt(api);
3953 if (Sem == &APFloatBase::semFloat4E2M1FN)
3954 return initFromFloat4E2M1FNAPInt(api);
3955
3956 llvm_unreachable("unsupported semantics");
3957}
3958
3959/// Make this number the largest magnitude normal number in the given
3960/// semantics.
3961void IEEEFloat::makeLargest(bool Negative) {
3962 if (Negative && !semantics->hasSignedRepr)
3964 "This floating point format does not support signed values");
3965 // We want (in interchange format):
3966 // sign = {Negative}
3967 // exponent = 1..10
3968 // significand = 1..1
3969 category = fcNormal;
3970 sign = Negative;
3971 exponent = semantics->maxExponent;
3972
3973 // Use memset to set all but the highest integerPart to all ones.
3974 integerPart *significand = significandParts();
3975 unsigned PartCount = partCount();
3976 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3977
3978 // Set the high integerPart especially setting all unused top bits for
3979 // internal consistency.
3980 const unsigned NumUnusedHighBits =
3981 PartCount*integerPartWidth - semantics->precision;
3982 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3983 ? (~integerPart(0) >> NumUnusedHighBits)
3984 : 0;
3985 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3986 semantics->nanEncoding == fltNanEncoding::AllOnes &&
3987 (semantics->precision > 1))
3988 significand[0] &= ~integerPart(1);
3989}
3990
3991/// Make this number the smallest magnitude denormal number in the given
3992/// semantics.
3993void IEEEFloat::makeSmallest(bool Negative) {
3994 if (Negative && !semantics->hasSignedRepr)
3996 "This floating point format does not support signed values");
3997 // We want (in interchange format):
3998 // sign = {Negative}
3999 // exponent = 0..0
4000 // significand = 0..01
4001 category = fcNormal;
4002 sign = Negative;
4003 exponent = semantics->minExponent;
4004 APInt::tcSet(significandParts(), 1, partCount());
4005}
4006
4008 if (Negative && !semantics->hasSignedRepr)
4010 "This floating point format does not support signed values");
4011 // We want (in interchange format):
4012 // sign = {Negative}
4013 // exponent = 0..0
4014 // significand = 10..0
4015
4016 category = fcNormal;
4017 zeroSignificand();
4018 sign = Negative;
4019 exponent = semantics->minExponent;
4020 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4021}
4022
4023IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4024 initFromAPInt(&Sem, API);
4025}
4026
4028 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4029}
4030
4032 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4033}
4034
4035namespace {
4036 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4037 Buffer.append(Str.begin(), Str.end());
4038 }
4039
4040 /// Removes data from the given significand until it is no more
4041 /// precise than is required for the desired precision.
4042 void AdjustToPrecision(APInt &significand,
4043 int &exp, unsigned FormatPrecision) {
4044 unsigned bits = significand.getActiveBits();
4045
4046 // 196/59 is a very slight overestimate of lg_2(10).
4047 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4048
4049 if (bits <= bitsRequired) return;
4050
4051 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4052 if (!tensRemovable) return;
4053
4054 exp += tensRemovable;
4055
4056 APInt divisor(significand.getBitWidth(), 1);
4057 APInt powten(significand.getBitWidth(), 10);
4058 while (true) {
4059 if (tensRemovable & 1)
4060 divisor *= powten;
4061 tensRemovable >>= 1;
4062 if (!tensRemovable) break;
4063 powten *= powten;
4064 }
4065
4066 significand = significand.udiv(divisor);
4067
4068 // Truncate the significand down to its active bit count.
4069 significand = significand.trunc(significand.getActiveBits());
4070 }
4071
4072
4073 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4074 int &exp, unsigned FormatPrecision) {
4075 unsigned N = buffer.size();
4076 if (N <= FormatPrecision) return;
4077
4078 // The most significant figures are the last ones in the buffer.
4079 unsigned FirstSignificant = N - FormatPrecision;
4080
4081 // Round.
4082 // FIXME: this probably shouldn't use 'round half up'.
4083
4084 // Rounding down is just a truncation, except we also want to drop
4085 // trailing zeros from the new result.
4086 if (buffer[FirstSignificant - 1] < '5') {
4087 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4088 FirstSignificant++;
4089
4090 exp += FirstSignificant;
4091 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4092 return;
4093 }
4094
4095 // Rounding up requires a decimal add-with-carry. If we continue
4096 // the carry, the newly-introduced zeros will just be truncated.
4097 for (unsigned I = FirstSignificant; I != N; ++I) {
4098 if (buffer[I] == '9') {
4099 FirstSignificant++;
4100 } else {
4101 buffer[I]++;
4102 break;
4103 }
4104 }
4105
4106 // If we carried through, we have exactly one digit of precision.
4107 if (FirstSignificant == N) {
4108 exp += FirstSignificant;
4109 buffer.clear();
4110 buffer.push_back('1');
4111 return;
4112 }
4113
4114 exp += FirstSignificant;
4115 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4116 }
4117
4118 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4119 APInt significand, unsigned FormatPrecision,
4120 unsigned FormatMaxPadding, bool TruncateZero) {
4121 const int semanticsPrecision = significand.getBitWidth();
4122
4123 if (isNeg)
4124 Str.push_back('-');
4125
4126 // Set FormatPrecision if zero. We want to do this before we
4127 // truncate trailing zeros, as those are part of the precision.
4128 if (!FormatPrecision) {
4129 // We use enough digits so the number can be round-tripped back to an
4130 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4131 // Accurately" by Steele and White.
4132 // FIXME: Using a formula based purely on the precision is conservative;
4133 // we can print fewer digits depending on the actual value being printed.
4134
4135 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4136 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4137 }
4138
4139 // Ignore trailing binary zeros.
4140 int trailingZeros = significand.countr_zero();
4141 exp += trailingZeros;
4142 significand.lshrInPlace(trailingZeros);
4143
4144 // Change the exponent from 2^e to 10^e.
4145 if (exp == 0) {
4146 // Nothing to do.
4147 } else if (exp > 0) {
4148 // Just shift left.
4149 significand = significand.zext(semanticsPrecision + exp);
4150 significand <<= exp;
4151 exp = 0;
4152 } else { /* exp < 0 */
4153 int texp = -exp;
4154
4155 // We transform this using the identity:
4156 // (N)(2^-e) == (N)(5^e)(10^-e)
4157 // This means we have to multiply N (the significand) by 5^e.
4158 // To avoid overflow, we have to operate on numbers large
4159 // enough to store N * 5^e:
4160 // log2(N * 5^e) == log2(N) + e * log2(5)
4161 // <= semantics->precision + e * 137 / 59
4162 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4163
4164 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4165
4166 // Multiply significand by 5^e.
4167 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4168 significand = significand.zext(precision);
4169 APInt five_to_the_i(precision, 5);
4170 while (true) {
4171 if (texp & 1)
4172 significand *= five_to_the_i;
4173
4174 texp >>= 1;
4175 if (!texp)
4176 break;
4177 five_to_the_i *= five_to_the_i;
4178 }
4179 }
4180
4181 AdjustToPrecision(significand, exp, FormatPrecision);
4182
4184
4185 // Fill the buffer.
4186 unsigned precision = significand.getBitWidth();
4187 if (precision < 4) {
4188 // We need enough precision to store the value 10.
4189 precision = 4;
4190 significand = significand.zext(precision);
4191 }
4192 APInt ten(precision, 10);
4193 APInt digit(precision, 0);
4194
4195 bool inTrail = true;
4196 while (significand != 0) {
4197 // digit <- significand % 10
4198 // significand <- significand / 10
4199 APInt::udivrem(significand, ten, significand, digit);
4200
4201 unsigned d = digit.getZExtValue();
4202
4203 // Drop trailing zeros.
4204 if (inTrail && !d)
4205 exp++;
4206 else {
4207 buffer.push_back((char) ('0' + d));
4208 inTrail = false;
4209 }
4210 }
4211
4212 assert(!buffer.empty() && "no characters in buffer!");
4213
4214 // Drop down to FormatPrecision.
4215 // TODO: don't do more precise calculations above than are required.
4216 AdjustToPrecision(buffer, exp, FormatPrecision);
4217
4218 unsigned NDigits = buffer.size();
4219
4220 // Check whether we should use scientific notation.
4221 bool FormatScientific;
4222 if (!FormatMaxPadding) {
4223 FormatScientific = true;
4224 } else {
4225 if (exp >= 0) {
4226 // 765e3 --> 765000
4227 // ^^^
4228 // But we shouldn't make the number look more precise than it is.
4229 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4230 NDigits + (unsigned) exp > FormatPrecision);
4231 } else {
4232 // Power of the most significant digit.
4233 int MSD = exp + (int) (NDigits - 1);
4234 if (MSD >= 0) {
4235 // 765e-2 == 7.65
4236 FormatScientific = false;
4237 } else {
4238 // 765e-5 == 0.00765
4239 // ^ ^^
4240 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4241 }
4242 }
4243 }
4244
4245 // Scientific formatting is pretty straightforward.
4246 if (FormatScientific) {
4247 exp += (NDigits - 1);
4248
4249 Str.push_back(buffer[NDigits-1]);
4250 Str.push_back('.');
4251 if (NDigits == 1 && TruncateZero)
4252 Str.push_back('0');
4253 else
4254 for (unsigned I = 1; I != NDigits; ++I)
4255 Str.push_back(buffer[NDigits-1-I]);
4256 // Fill with zeros up to FormatPrecision.
4257 if (!TruncateZero && FormatPrecision > NDigits - 1)
4258 Str.append(FormatPrecision - NDigits + 1, '0');
4259 // For !TruncateZero we use lower 'e'.
4260 Str.push_back(TruncateZero ? 'E' : 'e');
4261
4262 Str.push_back(exp >= 0 ? '+' : '-');
4263 if (exp < 0)
4264 exp = -exp;
4265 SmallVector<char, 6> expbuf;
4266 do {
4267 expbuf.push_back((char) ('0' + (exp % 10)));
4268 exp /= 10;
4269 } while (exp);
4270 // Exponent always at least two digits if we do not truncate zeros.
4271 if (!TruncateZero && expbuf.size() < 2)
4272 expbuf.push_back('0');
4273 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4274 Str.push_back(expbuf[E-1-I]);
4275 return;
4276 }
4277
4278 // Non-scientific, positive exponents.
4279 if (exp >= 0) {
4280 for (unsigned I = 0; I != NDigits; ++I)
4281 Str.push_back(buffer[NDigits-1-I]);
4282 for (unsigned I = 0; I != (unsigned) exp; ++I)
4283 Str.push_back('0');
4284 return;
4285 }
4286
4287 // Non-scientific, negative exponents.
4288
4289 // The number of digits to the left of the decimal point.
4290 int NWholeDigits = exp + (int) NDigits;
4291
4292 unsigned I = 0;
4293 if (NWholeDigits > 0) {
4294 for (; I != (unsigned) NWholeDigits; ++I)
4295 Str.push_back(buffer[NDigits-I-1]);
4296 Str.push_back('.');
4297 } else {
4298 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4299
4300 Str.push_back('0');
4301 Str.push_back('.');
4302 for (unsigned Z = 1; Z != NZeros; ++Z)
4303 Str.push_back('0');
4304 }
4305
4306 for (; I != NDigits; ++I)
4307 Str.push_back(buffer[NDigits-I-1]);
4308
4309 }
4310} // namespace
4311
4312void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4313 unsigned FormatMaxPadding, bool TruncateZero) const {
4314 switch (category) {
4315 case fcInfinity:
4316 if (isNegative())
4317 return append(Str, "-Inf");
4318 else
4319 return append(Str, "+Inf");
4320
4321 case fcNaN: return append(Str, "NaN");
4322
4323 case fcZero:
4324 if (isNegative())
4325 Str.push_back('-');
4326
4327 if (!FormatMaxPadding) {
4328 if (TruncateZero)
4329 append(Str, "0.0E+0");
4330 else {
4331 append(Str, "0.0");
4332 if (FormatPrecision > 1)
4333 Str.append(FormatPrecision - 1, '0');
4334 append(Str, "e+00");
4335 }
4336 } else {
4337 Str.push_back('0');
4338 }
4339 return;
4340
4341 case fcNormal:
4342 break;
4343 }
4344
4345 // Decompose the number into an APInt and an exponent.
4346 int exp = exponent - ((int) semantics->precision - 1);
4347 APInt significand(
4348 semantics->precision,
4349 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4350
4351 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4352 FormatMaxPadding, TruncateZero);
4353
4354}
4355
4357 if (!isFinite() || isZero())
4358 return INT_MIN;
4359
4360 const integerPart *Parts = significandParts();
4361 const int PartCount = partCountForBits(semantics->precision);
4362
4363 int PopCount = 0;
4364 for (int i = 0; i < PartCount; ++i) {
4365 PopCount += llvm::popcount(Parts[i]);
4366 if (PopCount > 1)
4367 return INT_MIN;
4368 }
4369
4370 if (exponent != semantics->minExponent)
4371 return exponent;
4372
4373 int CountrParts = 0;
4374 for (int i = 0; i < PartCount;
4375 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4376 if (Parts[i] != 0) {
4377 return exponent - semantics->precision + CountrParts +
4378 llvm::countr_zero(Parts[i]) + 1;
4379 }
4380 }
4381
4382 llvm_unreachable("didn't find the set bit");
4383}
4384
4386 if (!isNaN())
4387 return false;
4388 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4389 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4390 return false;
4391
4392 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4393 // first bit of the trailing significand being 0.
4394 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4395}
4396
4397/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4398///
4399/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4400/// appropriate sign switching before/after the computation.
4402 // If we are performing nextDown, swap sign so we have -x.
4403 if (nextDown)
4404 changeSign();
4405
4406 // Compute nextUp(x)
4407 opStatus result = opOK;
4408
4409 // Handle each float category separately.
4410 switch (category) {
4411 case fcInfinity:
4412 // nextUp(+inf) = +inf
4413 if (!isNegative())
4414 break;
4415 // nextUp(-inf) = -getLargest()
4416 makeLargest(true);
4417 break;
4418 case fcNaN:
4419 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4420 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4421 // change the payload.
4422 if (isSignaling()) {
4423 result = opInvalidOp;
4424 // For consistency, propagate the sign of the sNaN to the qNaN.
4425 makeNaN(false, isNegative(), nullptr);
4426 }
4427 break;
4428 case fcZero:
4429 // nextUp(pm 0) = +getSmallest()
4430 makeSmallest(false);
4431 break;
4432 case fcNormal:
4433 // nextUp(-getSmallest()) = -0
4434 if (isSmallest() && isNegative()) {
4435 APInt::tcSet(significandParts(), 0, partCount());
4436 category = fcZero;
4437 exponent = 0;
4438 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4439 sign = false;
4440 if (!semantics->hasZero)
4442 break;
4443 }
4444
4445 if (isLargest() && !isNegative()) {
4446 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4447 // nextUp(getLargest()) == NAN
4448 makeNaN();
4449 break;
4450 } else if (semantics->nonFiniteBehavior ==
4452 // nextUp(getLargest()) == getLargest()
4453 break;
4454 } else {
4455 // nextUp(getLargest()) == INFINITY
4456 APInt::tcSet(significandParts(), 0, partCount());
4457 category = fcInfinity;
4458 exponent = semantics->maxExponent + 1;
4459 break;
4460 }
4461 }
4462
4463 // nextUp(normal) == normal + inc.
4464 if (isNegative()) {
4465 // If we are negative, we need to decrement the significand.
4466
4467 // We only cross a binade boundary that requires adjusting the exponent
4468 // if:
4469 // 1. exponent != semantics->minExponent. This implies we are not in the
4470 // smallest binade or are dealing with denormals.
4471 // 2. Our significand excluding the integral bit is all zeros.
4472 bool WillCrossBinadeBoundary =
4473 exponent != semantics->minExponent && isSignificandAllZeros();
4474
4475 // Decrement the significand.
4476 //
4477 // We always do this since:
4478 // 1. If we are dealing with a non-binade decrement, by definition we
4479 // just decrement the significand.
4480 // 2. If we are dealing with a normal -> normal binade decrement, since
4481 // we have an explicit integral bit the fact that all bits but the
4482 // integral bit are zero implies that subtracting one will yield a
4483 // significand with 0 integral bit and 1 in all other spots. Thus we
4484 // must just adjust the exponent and set the integral bit to 1.
4485 // 3. If we are dealing with a normal -> denormal binade decrement,
4486 // since we set the integral bit to 0 when we represent denormals, we
4487 // just decrement the significand.
4488 integerPart *Parts = significandParts();
4489 APInt::tcDecrement(Parts, partCount());
4490
4491 if (WillCrossBinadeBoundary) {
4492 // Our result is a normal number. Do the following:
4493 // 1. Set the integral bit to 1.
4494 // 2. Decrement the exponent.
4495 APInt::tcSetBit(Parts, semantics->precision - 1);
4496 exponent--;
4497 }
4498 } else {
4499 // If we are positive, we need to increment the significand.
4500
4501 // We only cross a binade boundary that requires adjusting the exponent if
4502 // the input is not a denormal and all of said input's significand bits
4503 // are set. If all of said conditions are true: clear the significand, set
4504 // the integral bit to 1, and increment the exponent. If we have a
4505 // denormal always increment since moving denormals and the numbers in the
4506 // smallest normal binade have the same exponent in our representation.
4507 // If there are only exponents, any increment always crosses the
4508 // BinadeBoundary.
4509 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4510 (!isDenormal() && isSignificandAllOnes());
4511
4512 if (WillCrossBinadeBoundary) {
4513 integerPart *Parts = significandParts();
4514 APInt::tcSet(Parts, 0, partCount());
4515 APInt::tcSetBit(Parts, semantics->precision - 1);
4516 assert(exponent != semantics->maxExponent &&
4517 "We can not increment an exponent beyond the maxExponent allowed"
4518 " by the given floating point semantics.");
4519 exponent++;
4520 } else {
4521 incrementSignificand();
4522 }
4523 }
4524 break;
4525 }
4526
4527 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4528 if (nextDown)
4529 changeSign();
4530
4531 return result;
4532}
4533
4535 assert(isNaN() && "Can only be called on NaN values");
4536 // Number of bits in the payload, excluding the (maybe implied) integer bit.
4537 unsigned Bits = semantics->precision - 1;
4538 return APInt(Bits, ArrayRef(significandParts(), partCountForBits(Bits)));
4539}
4540
4541APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4542 return ::exponentNaN(*semantics);
4543}
4544
4545APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4546 return ::exponentInf(*semantics);
4547}
4548
4549APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4550 return ::exponentZero(*semantics);
4551}
4552
4553void IEEEFloat::makeInf(bool Negative) {
4554 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4555 llvm_unreachable("This floating point format does not support Inf");
4556
4557 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4558 // There is no Inf, so make NaN instead.
4559 makeNaN(false, Negative);
4560 return;
4561 }
4562 category = fcInfinity;
4563 sign = Negative;
4564 exponent = exponentInf();
4565 APInt::tcSet(significandParts(), 0, partCount());
4566}
4567
4568void IEEEFloat::makeZero(bool Negative) {
4569 if (!semantics->hasZero)
4570 llvm_unreachable("This floating point format does not support Zero");
4571
4572 category = fcZero;
4573 sign = Negative;
4574 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4575 // Merge negative zero to positive because 0b10000...000 is used for NaN
4576 sign = false;
4577 }
4578 exponent = exponentZero();
4579 APInt::tcSet(significandParts(), 0, partCount());
4580}
4581
4583 assert(isNaN());
4584 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4585 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4586}
4587
4588int ilogb(const IEEEFloat &Arg) {
4589 if (Arg.isNaN())
4590 return APFloat::IEK_NaN;
4591 if (Arg.isZero())
4592 return APFloat::IEK_Zero;
4593 if (Arg.isInfinity())
4594 return APFloat::IEK_Inf;
4595 if (!Arg.isDenormal())
4596 return Arg.exponent;
4597
4598 IEEEFloat Normalized(Arg);
4599 int SignificandBits = Arg.getSemantics().precision - 1;
4600
4601 Normalized.exponent += SignificandBits;
4602 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4603 return Normalized.exponent - SignificandBits;
4604}
4605
4607 auto MaxExp = X.getSemantics().maxExponent;
4608 auto MinExp = X.getSemantics().minExponent;
4609
4610 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4611 // overflow; clamp it to a safe range before adding, but ensure that the range
4612 // is large enough that the clamp does not change the result. The range we
4613 // need to support is the difference between the largest possible exponent and
4614 // the normalized exponent of half the smallest denormal.
4615
4616 int SignificandBits = X.getSemantics().precision - 1;
4617 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4618
4619 // Clamp to one past the range ends to let normalize handle overlflow.
4620 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4621 X.normalize(RoundingMode, lfExactlyZero);
4622 if (X.isNaN())
4623 X.makeQuiet();
4624 return X;
4625}
4626
4627IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4628 Exp = ilogb(Val);
4629
4630 // Quiet signalling nans.
4631 if (Exp == APFloat::IEK_NaN) {
4632 IEEEFloat Quiet(Val);
4633 Quiet.makeQuiet();
4634 return Quiet;
4635 }
4636
4637 if (Exp == APFloat::IEK_Inf)
4638 return Val;
4639
4640 // 1 is added because frexp is defined to return a normalized fraction in
4641 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4642 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4643 return scalbn(Val, -Exp, RM);
4644}
4645
4647 : Semantics(&S),
4648 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4649 APFloat(APFloatBase::semIEEEdouble)}) {
4650 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4651}
4652
4654 : Semantics(&S), Floats(new APFloat[2]{
4655 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4656 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4657 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4658}
4659
4661 : Semantics(&S),
4662 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4663 APFloat(APFloatBase::semIEEEdouble)}) {
4664 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4665}
4666
4668 : Semantics(&S),
4669 Floats(new APFloat[2]{
4670 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4671 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4672 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4673}
4674
4676 APFloat &&Second)
4677 : Semantics(&S),
4678 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4679 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4680 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4681 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4682}
4683
4685 : Semantics(RHS.Semantics),
4686 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4687 APFloat(RHS.Floats[1])}
4688 : nullptr) {
4689 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4690}
4691
4693 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4694 RHS.Semantics = &APFloatBase::semBogus;
4695 RHS.Floats = nullptr;
4696 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4697}
4698
4700 if (Semantics == RHS.Semantics && RHS.Floats) {
4701 Floats[0] = RHS.Floats[0];
4702 Floats[1] = RHS.Floats[1];
4703 } else if (this != &RHS) {
4704 this->~DoubleAPFloat();
4705 new (this) DoubleAPFloat(RHS);
4706 }
4707 return *this;
4708}
4709
4710// Returns a result such that:
4711// 1. abs(Lo) <= ulp(Hi)/2
4712// 2. Hi == RTNE(Hi + Lo)
4713// 3. Hi + Lo == X + Y
4714//
4715// Requires that log2(X) >= log2(Y).
4716static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4717 if (!X.isFinite())
4718 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4719 APFloat Hi = X + Y;
4720 APFloat Delta = Hi - X;
4721 APFloat Lo = Y - Delta;
4722 return {Hi, Lo};
4723}
4724
4725// Implement addition, subtraction, multiplication and division based on:
4726// "Software for Doubled-Precision Floating-Point Computations",
4727// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4728APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4729 const APFloat &c, const APFloat &cc,
4730 roundingMode RM) {
4731 int Status = opOK;
4732 APFloat z = a;
4733 Status |= z.add(c, RM);
4734 if (!z.isFinite()) {
4735 if (!z.isInfinity()) {
4736 Floats[0] = std::move(z);
4737 Floats[1].makeZero(/* Neg = */ false);
4738 return (opStatus)Status;
4739 }
4740 Status = opOK;
4741 auto AComparedToC = a.compareAbsoluteValue(c);
4742 z = cc;
4743 Status |= z.add(aa, RM);
4744 if (AComparedToC == APFloat::cmpGreaterThan) {
4745 // z = cc + aa + c + a;
4746 Status |= z.add(c, RM);
4747 Status |= z.add(a, RM);
4748 } else {
4749 // z = cc + aa + a + c;
4750 Status |= z.add(a, RM);
4751 Status |= z.add(c, RM);
4752 }
4753 if (!z.isFinite()) {
4754 Floats[0] = std::move(z);
4755 Floats[1].makeZero(/* Neg = */ false);
4756 return (opStatus)Status;
4757 }
4758 Floats[0] = z;
4759 APFloat zz = aa;
4760 Status |= zz.add(cc, RM);
4761 if (AComparedToC == APFloat::cmpGreaterThan) {
4762 // Floats[1] = a - z + c + zz;
4763 Floats[1] = a;
4764 Status |= Floats[1].subtract(z, RM);
4765 Status |= Floats[1].add(c, RM);
4766 Status |= Floats[1].add(zz, RM);
4767 } else {
4768 // Floats[1] = c - z + a + zz;
4769 Floats[1] = c;
4770 Status |= Floats[1].subtract(z, RM);
4771 Status |= Floats[1].add(a, RM);
4772 Status |= Floats[1].add(zz, RM);
4773 }
4774 } else {
4775 // q = a - z;
4776 APFloat q = a;
4777 Status |= q.subtract(z, RM);
4778
4779 // zz = q + c + (a - (q + z)) + aa + cc;
4780 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4781 auto zz = q;
4782 Status |= zz.add(c, RM);
4783 Status |= q.add(z, RM);
4784 Status |= q.subtract(a, RM);
4785 q.changeSign();
4786 Status |= zz.add(q, RM);
4787 Status |= zz.add(aa, RM);
4788 Status |= zz.add(cc, RM);
4789 if (zz.isZero() && !zz.isNegative()) {
4790 Floats[0] = std::move(z);
4791 Floats[1].makeZero(/* Neg = */ false);
4792 return opOK;
4793 }
4794 Floats[0] = z;
4795 Status |= Floats[0].add(zz, RM);
4796 if (!Floats[0].isFinite()) {
4797 Floats[1].makeZero(/* Neg = */ false);
4798 return (opStatus)Status;
4799 }
4800 Floats[1] = std::move(z);
4801 Status |= Floats[1].subtract(Floats[0], RM);
4802 Status |= Floats[1].add(zz, RM);
4803 }
4804 return (opStatus)Status;
4805}
4806
4807APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4808 const DoubleAPFloat &RHS,
4809 DoubleAPFloat &Out,
4810 roundingMode RM) {
4811 if (LHS.getCategory() == fcNaN) {
4812 Out = LHS;
4813 return opOK;
4814 }
4815 if (RHS.getCategory() == fcNaN) {
4816 Out = RHS;
4817 return opOK;
4818 }
4819 if (LHS.getCategory() == fcZero) {
4820 Out = RHS;
4821 return opOK;
4822 }
4823 if (RHS.getCategory() == fcZero) {
4824 Out = LHS;
4825 return opOK;
4826 }
4827 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4828 LHS.isNegative() != RHS.isNegative()) {
4829 Out.makeNaN(false, Out.isNegative(), nullptr);
4830 return opInvalidOp;
4831 }
4832 if (LHS.getCategory() == fcInfinity) {
4833 Out = LHS;
4834 return opOK;
4835 }
4836 if (RHS.getCategory() == fcInfinity) {
4837 Out = RHS;
4838 return opOK;
4839 }
4840 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4841
4842 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4843 CC(RHS.Floats[1]);
4844 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4845 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4846 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4847 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4848 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4849 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4850 return Out.addImpl(A, AA, C, CC, RM);
4851}
4852
4854 roundingMode RM) {
4855 return addWithSpecial(*this, RHS, *this, RM);
4856}
4857
4859 roundingMode RM) {
4860 changeSign();
4861 auto Ret = add(RHS, RM);
4862 changeSign();
4863 return Ret;
4864}
4865
4868 const auto &LHS = *this;
4869 auto &Out = *this;
4870 /* Interesting observation: For special categories, finding the lowest
4871 common ancestor of the following layered graph gives the correct
4872 return category:
4873
4874 NaN
4875 / \
4876 Zero Inf
4877 \ /
4878 Normal
4879
4880 e.g. NaN * NaN = NaN
4881 Zero * Inf = NaN
4882 Normal * Zero = Zero
4883 Normal * Inf = Inf
4884 */
4885 if (LHS.getCategory() == fcNaN) {
4886 Out = LHS;
4887 return opOK;
4888 }
4889 if (RHS.getCategory() == fcNaN) {
4890 Out = RHS;
4891 return opOK;
4892 }
4893 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4894 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4895 Out.makeNaN(false, false, nullptr);
4896 return opOK;
4897 }
4898 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4899 Out = LHS;
4900 return opOK;
4901 }
4902 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4903 Out = RHS;
4904 return opOK;
4905 }
4906 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4907 "Special cases not handled exhaustively");
4908
4909 int Status = opOK;
4910 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4911 // t = a * c
4912 APFloat T = A;
4913 Status |= T.multiply(C, RM);
4914 if (!T.isFiniteNonZero()) {
4915 Floats[0] = std::move(T);
4916 Floats[1].makeZero(/* Neg = */ false);
4917 return (opStatus)Status;
4918 }
4919
4920 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4921 APFloat Tau = A;
4922 T.changeSign();
4923 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4924 T.changeSign();
4925 {
4926 // v = a * d
4927 APFloat V = A;
4928 Status |= V.multiply(D, RM);
4929 // w = b * c
4930 APFloat W = B;
4931 Status |= W.multiply(C, RM);
4932 Status |= V.add(W, RM);
4933 // tau += v + w
4934 Status |= Tau.add(V, RM);
4935 }
4936 // u = t + tau
4937 APFloat U = T;
4938 Status |= U.add(Tau, RM);
4939
4940 Floats[0] = U;
4941 if (!U.isFinite()) {
4942 Floats[1].makeZero(/* Neg = */ false);
4943 } else {
4944 // Floats[1] = (t - u) + tau
4945 Status |= T.subtract(U, RM);
4946 Status |= T.add(Tau, RM);
4947 Floats[1] = std::move(T);
4948 }
4949 return (opStatus)Status;
4950}
4951
4954 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4955 "Unexpected Semantics");
4956 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4957 auto Ret = Tmp.divide(
4958 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4959 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4960 return Ret;
4961}
4962
4964 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4965 "Unexpected Semantics");
4966 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4967 auto Ret = Tmp.remainder(
4968 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4969 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4970 return Ret;
4971}
4972
4974 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4975 "Unexpected Semantics");
4976 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4977 auto Ret = Tmp.mod(
4978 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4979 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4980 return Ret;
4981}
4982
4985 const DoubleAPFloat &Addend,
4987 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
4988 "Unexpected Semantics");
4989 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
4990 auto Ret = Tmp.fusedMultiplyAdd(
4991 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
4992 Multiplicand.bitcastToAPInt()),
4993 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
4994 RM);
4995 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
4996 return Ret;
4997}
4998
5000 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5001 "Unexpected Semantics");
5002 const APFloat &Hi = getFirst();
5003 const APFloat &Lo = getSecond();
5004
5005 APFloat RoundedHi = Hi;
5006 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
5007
5008 // We can reduce the problem to just the high part if the input:
5009 // 1. Represents a non-finite value.
5010 // 2. Has a component which is zero.
5011 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5012 Floats[0] = std::move(RoundedHi);
5013 Floats[1].makeZero(/*Neg=*/false);
5014 return HiStatus;
5015 }
5016
5017 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5018 // halfway point.
5019 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5020 APFloat TieBreaker) {
5021 // RoundingError tells us which direction we rounded:
5022 // - RoundingError > 0: we rounded up.
5023 // - RoundingError < 0: we rounded down.
5024 // Sterbenz' lemma ensures that RoundingError is exact.
5025 const APFloat RoundingError = Rounded - ToRound;
5026 if (TieBreaker.isNonZero() &&
5027 TieBreaker.isNegative() != RoundingError.isNegative() &&
5028 abs(RoundingError).isExactlyValue(0.5))
5029 Rounded.add(
5030 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5032 return Rounded;
5033 };
5034
5035 // Case 1: Hi is not an integer.
5036 // Special cases are for rounding modes that are sensitive to ties.
5037 if (RoundedHi != Hi) {
5038 // We need to consider the case where Hi was between two integers and the
5039 // rounding mode broke the tie when, in fact, Lo may have had a different
5040 // sign than Hi.
5041 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5042 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5043
5044 Floats[0] = std::move(RoundedHi);
5045 Floats[1].makeZero(/*Neg=*/false);
5046 return HiStatus;
5047 }
5048
5049 // Case 2: Hi is an integer.
5050 // Special cases are for rounding modes which are rounding towards or away from zero.
5051 RoundingMode LoRoundingMode;
5052 if (RM == rmTowardZero)
5053 // When our input is positive, we want the Lo component rounded toward
5054 // negative infinity to get the smallest result magnitude. Likewise,
5055 // negative inputs want the Lo component rounded toward positive infinity.
5056 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5057 else
5058 LoRoundingMode = RM;
5059
5060 APFloat RoundedLo = Lo;
5061 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5062 if (LoRoundingMode == rmNearestTiesToAway)
5063 // We need to consider the case where Lo was between two integers and the
5064 // rounding mode broke the tie when, in fact, Hi may have had a different
5065 // sign than Lo.
5066 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5067
5068 // We must ensure that the final result has no overlap between the two APFloat values.
5069 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5070
5071 Floats[0] = std::move(RoundedHi);
5072 Floats[1] = std::move(RoundedLo);
5073 return LoStatus;
5074}
5075
5077 Floats[0].changeSign();
5078 Floats[1].changeSign();
5079}
5080
5083 // Compare absolute values of the high parts.
5084 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5085 if (HiPartCmp != cmpEqual)
5086 return HiPartCmp;
5087
5088 // Zero, regardless of sign, is equal.
5089 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5090 return cmpEqual;
5091
5092 // At this point, |this->Hi| == |RHS.Hi|.
5093 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5094 // same, and Hi-|Lo| if signs are different.
5095 const bool ThisIsSubtractive =
5096 Floats[0].isNegative() != Floats[1].isNegative();
5097 const bool RHSIsSubtractive =
5098 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5099
5100 // Case 1: The low part of 'this' is zero.
5101 if (Floats[1].isZero())
5102 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5103 // If RHS is subtractive, its magnitude is smaller.
5104 // If RHS is additive, its magnitude is larger.
5105 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5106
5107 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5108 if (RHS.Floats[1].isZero())
5109 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5110 // If 'this' is subtractive, its magnitude is smaller.
5111 // If 'this' is additive, its magnitude is larger.
5112 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5113
5114 // If their natures differ, the additive one is larger.
5115 if (ThisIsSubtractive != RHSIsSubtractive)
5116 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5117
5118 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5119 // The comparison now depends on the magnitude of the low parts.
5120 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5121
5122 if (ThisIsSubtractive) {
5123 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5124 if (LoPartCmp == cmpLessThan)
5125 return cmpGreaterThan;
5126 if (LoPartCmp == cmpGreaterThan)
5127 return cmpLessThan;
5128 }
5129
5130 // If additive, the comparison of |Lo| is direct.
5131 // If equal, they are equal.
5132 return LoPartCmp;
5133}
5134
5136 return Floats[0].getCategory();
5137}
5138
5139bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5140
5142 Floats[0].makeInf(Neg);
5143 Floats[1].makeZero(/* Neg = */ false);
5144}
5145
5147 Floats[0].makeZero(Neg);
5148 Floats[1].makeZero(/* Neg = */ false);
5149}
5150
5152 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5153 "Unexpected Semantics");
5154 Floats[0] =
5155 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5156 Floats[1] =
5157 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5158 if (Neg)
5159 changeSign();
5160}
5161
5163 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5164 "Unexpected Semantics");
5165 Floats[0].makeSmallest(Neg);
5166 Floats[1].makeZero(/* Neg = */ false);
5167}
5168
5170 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5171 "Unexpected Semantics");
5172 Floats[0] =
5173 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5174 if (Neg)
5175 Floats[0].changeSign();
5176 Floats[1].makeZero(/* Neg = */ false);
5177}
5178
5179void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5180 Floats[0].makeNaN(SNaN, Neg, fill);
5181 Floats[1].makeZero(/* Neg = */ false);
5182}
5183
5185 auto Result = Floats[0].compare(RHS.Floats[0]);
5186 // |Float[0]| > |Float[1]|
5187 if (Result == APFloat::cmpEqual)
5188 return Floats[1].compare(RHS.Floats[1]);
5189 return Result;
5190}
5191
5193 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5194 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5195}
5196
5198 if (Arg.Floats)
5199 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5200 return hash_combine(Arg.Semantics);
5201}
5202
5204 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5205 "Unexpected Semantics");
5206 uint64_t Data[] = {
5207 Floats[0].bitcastToAPInt().getRawData()[0],
5208 Floats[1].bitcastToAPInt().getRawData()[0],
5209 };
5210 return APInt(128, Data);
5211}
5212
5214 roundingMode RM) {
5215 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5216 "Unexpected Semantics");
5217 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5218 auto Ret = Tmp.convertFromString(S, RM);
5219 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5220 return Ret;
5221}
5222
5223// The double-double lattice of values corresponds to numbers which obey:
5224// - abs(lo) <= 1/2 * ulp(hi)
5225// - roundTiesToEven(hi + lo) == hi
5226//
5227// nextUp must choose the smallest output > input that follows these rules.
5228// nexDown must choose the largest output < input that follows these rules.
5230 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5231 "Unexpected Semantics");
5232 // nextDown(x) = -nextUp(-x)
5233 if (nextDown) {
5234 changeSign();
5235 APFloat::opStatus Result = next(/*nextDown=*/false);
5236 changeSign();
5237 return Result;
5238 }
5239 switch (getCategory()) {
5240 case fcInfinity:
5241 // nextUp(+inf) = +inf
5242 // nextUp(-inf) = -getLargest()
5243 if (isNegative())
5244 makeLargest(true);
5245 return opOK;
5246
5247 case fcNaN:
5248 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5249 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5250 // change the payload.
5251 if (getFirst().isSignaling()) {
5252 // For consistency, propagate the sign of the sNaN to the qNaN.
5253 makeNaN(false, isNegative(), nullptr);
5254 return opInvalidOp;
5255 }
5256 return opOK;
5257
5258 case fcZero:
5259 // nextUp(pm 0) = +getSmallest()
5260 makeSmallest(false);
5261 return opOK;
5262
5263 case fcNormal:
5264 break;
5265 }
5266
5267 const APFloat &HiOld = getFirst();
5268 const APFloat &LoOld = getSecond();
5269
5270 APFloat NextLo = LoOld;
5271 NextLo.next(/*nextDown=*/false);
5272
5273 // We want to admit values where:
5274 // 1. abs(Lo) <= ulp(Hi)/2
5275 // 2. Hi == RTNE(Hi + lo)
5276 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5277 return Hi + Lo == Hi;
5278 };
5279
5280 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5281 if (InLattice(HiOld, NextLo)) {
5282 // Yes, the result is (HiOld, nextUp(LoOld)).
5283 Floats[1] = std::move(NextLo);
5284
5285 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5286 // value is defined to have exactly 106 bits of precision. This limitation
5287 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5288 // value.
5289 DoubleAPFloat Largest{*Semantics, uninitialized};
5290 Largest.makeLargest(/*Neg=*/false);
5291 if (compare(Largest) == cmpGreaterThan)
5292 makeInf(/*Neg=*/false);
5293
5294 return opOK;
5295 }
5296
5297 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5298 // correct result. We know the new hi component will be nextUp(HiOld) but our
5299 // lattice rules make it a little ambiguous what the correct NextLo must be.
5300 APFloat NextHi = HiOld;
5301 NextHi.next(/*nextDown=*/false);
5302
5303 // nextUp(getLargest()) == INFINITY
5304 if (NextHi.isInfinity()) {
5305 makeInf(/*Neg=*/false);
5306 return opOK;
5307 }
5308
5309 // IEEE 754-2019 5.3.1:
5310 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5311 // -0."
5312 if (NextHi.isZero()) {
5313 makeZero(/*Neg=*/true);
5314 return opOK;
5315 }
5316
5317 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5318 // negative infinity as possible.
5319 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5320 if (!InLattice(NextHi, NextLo))
5321 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5322 NextLo.next(/*nextDown=*/false);
5323
5324 Floats[0] = std::move(NextHi);
5325 Floats[1] = std::move(NextLo);
5326
5327 return opOK;
5328}
5329
5330APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5331 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5332 roundingMode RM, bool *IsExact) const {
5333 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5334 "Unexpected Semantics");
5335
5336 // If Hi is not finite, or Lo is zero, the value is entirely represented
5337 // by Hi. Delegate to the simpler single-APFloat conversion.
5338 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5339 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5340
5341 // First, round the full double-double value to an integral value. This
5342 // simplifies the rest of the function, as we no longer need to consider
5343 // fractional parts.
5344 *IsExact = false;
5345 DoubleAPFloat Integral = *this;
5346 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5347 if (RoundStatus == opInvalidOp)
5348 return opInvalidOp;
5349 const APFloat &IntegralHi = Integral.getFirst();
5350 const APFloat &IntegralLo = Integral.getSecond();
5351
5352 // If rounding results in either component being zero, the sum is trivial.
5353 // Delegate to the simpler single-APFloat conversion.
5354 bool HiIsExact;
5355 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5356 const opStatus HiStatus =
5357 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5358 // The conversion from an integer-valued float to an APInt may fail if the
5359 // result would be out of range. Regardless, taking this path is only
5360 // possible if rounding occurred during the initial `roundToIntegral`.
5361 return HiStatus == opOK ? opInexact : HiStatus;
5362 }
5363
5364 // A negative number cannot be represented by an unsigned integer.
5365 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5366 if (!IsSigned && IntegralHi.isNegative())
5367 return opInvalidOp;
5368
5369 // Handle the special boundary case where |Hi| is exactly the power of two
5370 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5371 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5372 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5373 // signed, N for unsigned).
5374 bool LoIsExact;
5375 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5376 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5377 if (HiExactLog2 >= 0 &&
5378 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5379 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5380 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5381 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5382 return opInvalidOp;
5383
5384 // If the signs differ, the sum will fit. We can compute the result using
5385 // properties of two's complement arithmetic without a wide intermediate
5386 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5387 const opStatus LoStatus = IntegralLo.convertToInteger(
5388 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5389 if (LoStatus == opInvalidOp)
5390 return opInvalidOp;
5391
5392 // Adjust the bit pattern of Lo to account for Hi's value:
5393 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5394 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5395 // already produced the correct final bit pattern.
5396 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5397 // can be computed by taking the two's complement pattern for `Lo` and
5398 // clearing the sign bit.
5399 if (IsSigned && !IntegralHi.isNegative())
5400 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5401 *IsExact = RoundStatus == opOK;
5402 return RoundStatus;
5403 }
5404
5405 // Convert Hi into an integer. This may not fit but that is OK: we know that
5406 // Hi + Lo would not fit either in this situation.
5407 const opStatus HiStatus = IntegralHi.convertToInteger(
5408 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5409 if (HiStatus == opInvalidOp)
5410 return HiStatus;
5411
5412 // Convert Lo into a temporary integer of the same width.
5413 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5414 const opStatus LoStatus =
5415 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5416 if (LoStatus == opInvalidOp)
5417 return LoStatus;
5418
5419 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5420 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5421 // where the sum could cross the integer type's boundary is when Hi is a
5422 // power of two, which is handled by the special case block above.
5423 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5424
5425 *IsExact = RoundStatus == opOK;
5426 return RoundStatus;
5427}
5428
5431 unsigned int Width, bool IsSigned,
5432 roundingMode RM, bool *IsExact) const {
5433 opStatus FS =
5434 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5435
5436 if (FS == opInvalidOp) {
5437 const unsigned DstPartsCount = partCountForBits(Width);
5438 assert(DstPartsCount <= Input.size() && "Integer too big");
5439
5440 unsigned Bits;
5441 if (getCategory() == fcNaN)
5442 Bits = 0;
5443 else if (isNegative())
5444 Bits = IsSigned;
5445 else
5446 Bits = Width - IsSigned;
5447
5448 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5449 if (isNegative() && IsSigned)
5450 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5451 }
5452
5453 return FS;
5454}
5455
5456APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5457 switch (RM) {
5459 makeLargest(/*Neg=*/isNegative());
5460 break;
5462 if (isNegative())
5463 makeInf(/*Neg=*/true);
5464 else
5465 makeLargest(/*Neg=*/false);
5466 break;
5468 if (isNegative())
5469 makeLargest(/*Neg=*/true);
5470 else
5471 makeInf(/*Neg=*/false);
5472 break;
5475 makeInf(/*Neg=*/isNegative());
5476 break;
5477 default:
5478 llvm_unreachable("Invalid rounding mode found");
5479 }
5480 opStatus S = opInexact;
5481 if (!getFirst().isFinite())
5482 S = static_cast<opStatus>(S | opOverflow);
5483 return S;
5484}
5485
5486APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5487 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5488 // Find the most significant bit of the source integer. APInt::tcMSB returns
5489 // UINT_MAX for a zero value.
5490 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5491 if (SrcMSB == UINT_MAX) {
5492 // The source integer is 0.
5493 makeZero(/*Neg=*/false);
5494 return opOK;
5495 }
5496
5497 // Create a minimally-sized APInt to represent the source value.
5498 const unsigned SrcBitWidth = SrcMSB + 1;
5499 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5500 /*isUnsigned=*/true};
5501
5502 // Stage 1: Initial Approximation.
5503 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5504 // We use round-to-nearest because it minimizes the initial error, which is
5505 // crucial for the subsequent steps.
5507 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5508
5509 // If the first approximation already overflows, the number is too large.
5510 // NOTE: The underlying semantics are *more* conservative when choosing to
5511 // overflow because their notion of ULP is much larger. As such, it is always
5512 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5513 if (!Hi.isFinite())
5514 return handleOverflow(RM);
5515
5516 // Stage 2: Exact Error Calculation.
5517 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5518 // This is done by converting Hi back to an integer and subtracting it from
5519 // the original source.
5520 bool HiAsIntIsExact;
5521 // Create an integer representation of Hi. Its width is determined by the
5522 // exponent of Hi, ensuring it's just large enough. This width can exceed
5523 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5524 // accurately when converted back to an integer.
5525 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5526 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5527 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5528
5529 // Stage 3: Error Approximation and Rounding.
5530 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5531 // captures the remainder of the original number. The rounding mode for this
5532 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5533 // ensure the final sum (Hi + Lo) rounds correctly.
5534 roundingMode LoRM = RM;
5535 // Adjustments are only necessary when the initial approximation Hi was an
5536 // overestimate, making the Error negative.
5537 if (Error.isNegative()) {
5538 if (RM == rmNearestTiesToAway) {
5539 // For rmNearestTiesToAway, a tie should round away from zero. Since
5540 // SrcInt is positive, this means rounding toward +infinity.
5541 // A standard conversion of a negative Error would round ties toward
5542 // -infinity, causing the final sum Hi + Lo to be smaller. To
5543 // counteract this, we detect the tie case and override the rounding
5544 // mode for Lo to rmTowardPositive.
5545 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5546 const unsigned LoPrecision = getSecond().getSemantics().precision;
5547 if (ErrorActiveBits > LoPrecision) {
5548 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5549 // A tie occurs when the bits to be truncated are of the form 100...0.
5550 // This is detected by checking if the number of trailing zeros is
5551 // exactly one less than the number of bits being truncated.
5552 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5553 LoRM = rmTowardPositive;
5554 }
5555 } else if (RM == rmTowardZero) {
5556 // For rmTowardZero, the final positive result must be truncated (rounded
5557 // down). When Hi is an overestimate, Error is negative. A standard
5558 // rmTowardZero conversion of Error would make it *less* negative,
5559 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5560 // rounds down correctly, we force Lo to round toward -infinity.
5561 LoRM = rmTowardNegative;
5562 }
5563 }
5564
5566 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5567
5568 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5569 // components do not overlap. fastTwoSum performs this operation.
5570 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5571 Floats[0] = std::move(Hi);
5572 Floats[1] = std::move(Lo);
5573
5574 // A final check for overflow is needed because fastTwoSum can cause a
5575 // carry-out from Lo that pushes Hi to infinity.
5576 if (!getFirst().isFinite())
5577 return handleOverflow(RM);
5578
5579 // The largest DoubleAPFloat must be canonical. Values which are larger are
5580 // not canonical and are equivalent to overflow.
5581 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5582 DoubleAPFloat Largest{*Semantics};
5583 Largest.makeLargest(/*Neg=*/false);
5584 if (compare(Largest) == APFloat::cmpGreaterThan)
5585 return handleOverflow(RM);
5586 }
5587
5588 // The final status of the operation is determined by the conversion of the
5589 // error term. If Lo could represent Error exactly, the entire conversion
5590 // is exact. Otherwise, it's inexact.
5591 return Status;
5592}
5593
5595 bool IsSigned,
5596 roundingMode RM) {
5597 const bool NegateInput = IsSigned && Input.isNegative();
5598 APInt API = Input;
5599 if (NegateInput)
5600 API.negate();
5601
5603 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5604 if (NegateInput)
5605 changeSign();
5606 return Status;
5607}
5608
5610 unsigned int HexDigits,
5611 bool UpperCase,
5612 roundingMode RM) const {
5613 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5614 "Unexpected Semantics");
5615 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5616 .convertToHexString(DST, HexDigits, UpperCase, RM);
5617}
5618
5620 return getCategory() == fcNormal &&
5621 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5622 // (double)(Hi + Lo) == Hi defines a normal number.
5623 Floats[0] != Floats[0] + Floats[1]);
5624}
5625
5627 if (getCategory() != fcNormal)
5628 return false;
5629 DoubleAPFloat Tmp(*this);
5630 Tmp.makeSmallest(this->isNegative());
5631 return Tmp.compare(*this) == cmpEqual;
5632}
5633
5635 if (getCategory() != fcNormal)
5636 return false;
5637
5638 DoubleAPFloat Tmp(*this);
5640 return Tmp.compare(*this) == cmpEqual;
5641}
5642
5644 if (getCategory() != fcNormal)
5645 return false;
5646 DoubleAPFloat Tmp(*this);
5647 Tmp.makeLargest(this->isNegative());
5648 return Tmp.compare(*this) == cmpEqual;
5649}
5650
5652 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5653 "Unexpected Semantics");
5654 return Floats[0].isInteger() && Floats[1].isInteger();
5655}
5656
5658 unsigned FormatPrecision,
5659 unsigned FormatMaxPadding,
5660 bool TruncateZero) const {
5661 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5662 "Unexpected Semantics");
5663 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5664 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5665}
5666
5668 // In order for Hi + Lo to be a power of two, the following must be true:
5669 // 1. Hi must be a power of two.
5670 // 2. Lo must be zero.
5671 if (getSecond().isNonZero())
5672 return INT_MIN;
5673 return getFirst().getExactLog2Abs();
5674}
5675
5676int ilogb(const DoubleAPFloat &Arg) {
5677 const APFloat &Hi = Arg.getFirst();
5678 const APFloat &Lo = Arg.getSecond();
5679 int IlogbResult = ilogb(Hi);
5680 // Zero and non-finite values can delegate to ilogb(Hi).
5681 if (Arg.getCategory() != fcNormal)
5682 return IlogbResult;
5683 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5684 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5685 return IlogbResult;
5686 if (Hi.getExactLog2Abs() == INT_MIN)
5687 return IlogbResult;
5688 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5689 // get nudged out of the binade by the low component.
5690 return IlogbResult - 1;
5691}
5692
5695 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5696 "Unexpected Semantics");
5698 scalbn(Arg.Floats[0], Exp, RM),
5699 scalbn(Arg.Floats[1], Exp, RM));
5700}
5701
5702DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5704 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5705 "Unexpected Semantics");
5706
5707 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5708 // [1.0, 2.0).
5709 Exp = ilogb(Arg);
5710
5711 // For NaNs, quiet any signaling NaN and return the result, as per standard
5712 // practice.
5713 if (Exp == APFloat::IEK_NaN) {
5714 DoubleAPFloat Quiet{Arg};
5715 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5716 return Quiet;
5717 }
5718
5719 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5720 if (Exp == APFloat::IEK_Inf)
5721 return Arg;
5722
5723 // For zero, the fraction is zero and the standard requires the exponent be 0.
5724 if (Exp == APFloat::IEK_Zero) {
5725 Exp = 0;
5726 return Arg;
5727 }
5728
5729 const APFloat &Hi = Arg.getFirst();
5730 const APFloat &Lo = Arg.getSecond();
5731
5732 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5733 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5734 // Increment the exponent to ensure the fraction is in the correct range.
5735 ++Exp;
5736
5737 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5738 APFloat Second = Lo;
5739 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5740 roundingMode LoRoundingMode;
5741 // The interpretation of rmTowardZero depends on the sign of the combined
5742 // Arg rather than the sign of the component.
5743 if (RM == rmTowardZero)
5744 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5745 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5746 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5747 // "away from zero" based on its own sign would move the value in the
5748 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5749 // direction-agnostic. We only need to bother with this if Lo is scaled
5750 // down.
5751 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5752 LoRoundingMode = rmNearestTiesToEven;
5753 else
5754 LoRoundingMode = RM;
5755 Second = scalbn(Lo, -Exp, LoRoundingMode);
5756 // The rmNearestTiesToEven proxy is correct most of the time, but it
5757 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5758 // exact midpoint.
5759 // NOTE: This is morally equivalent to roundTiesTowardZero.
5760 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5761 // Re-scale the result back to check if rounding occurred.
5762 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5763 if (RecomposedLo != Lo) {
5764 // RoundingError tells us which direction we rounded:
5765 // - RoundingError > 0: we rounded up.
5766 // - RoundingError < 0: we down up.
5767 const APFloat RoundingError = RecomposedLo - Lo;
5768 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5769 // We do this by checking if the absolute rounding error is exactly
5770 // half a ULP of the result.
5771 const APFloat UlpOfSecond = harrisonUlp(Second);
5772 const APFloat ScaledUlpOfSecond =
5773 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5774 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5775 const bool RoundedLoAway =
5776 Second.isNegative() == RoundingError.isNegative();
5777 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5778 // decrease the magnitude of Second to increase the magnitude
5779 // First+Second.
5780 if (IsMidpoint && RoundedLoAway)
5781 Second.next(/*nextDown=*/!Second.isNegative());
5782 }
5783 }
5784 // Handle a tricky edge case where Arg is slightly less than a power of two
5785 // (e.g., Arg = 2^k - epsilon). In this situation:
5786 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5787 // 2. ilogb(Arg) correctly returns k-1.
5788 // 3. Our initial Exp becomes (k-1) + 1 = k.
5789 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5790 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5791 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5792 // We detect this specific case by checking if Hi is a power of two and if
5793 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5794 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5795 // valid fraction.
5796 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5797 ++Exp;
5798 }
5799
5800 APFloat First = scalbn(Hi, -Exp, RM);
5802 std::move(Second));
5803}
5804
5805APInt DoubleAPFloat::getNaNPayload() const { return Floats[0].getNaNPayload(); }
5806} // namespace detail
5807
5808APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5809 if (usesLayout<IEEEFloat>(Semantics)) {
5810 new (&IEEE) IEEEFloat(std::move(F));
5811 return;
5812 }
5813 if (usesLayout<DoubleAPFloat>(Semantics)) {
5814 const fltSemantics& S = F.getSemantics();
5815 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5817 return;
5818 }
5819 llvm_unreachable("Unexpected semantics");
5820}
5821
5826
5827hash_code hash_value(const APFloat &Arg) {
5828 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5829 return hash_value(Arg.U.IEEE);
5830 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5831 return hash_value(Arg.U.Double);
5832 llvm_unreachable("Unexpected semantics");
5833}
5834
5836 : APFloat(Semantics) {
5837 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5838 assert(StatusOrErr && "Invalid floating point representation");
5839 consumeError(StatusOrErr.takeError());
5840}
5841
5843 if (isZero())
5844 return isNegative() ? fcNegZero : fcPosZero;
5845 if (isNormal())
5846 return isNegative() ? fcNegNormal : fcPosNormal;
5847 if (isDenormal())
5849 if (isInfinity())
5850 return isNegative() ? fcNegInf : fcPosInf;
5851 assert(isNaN() && "Other class of FP constant");
5852 return isSignaling() ? fcSNan : fcQNan;
5853}
5854
5855bool APFloat::getExactInverse(APFloat *Inv) const {
5856 // Only finite, non-zero numbers can have a useful, representable inverse.
5857 // This check filters out +/- zero, +/- infinity, and NaN.
5858 if (!isFiniteNonZero())
5859 return false;
5860
5861 // Historically, this function rejects subnormal inputs. One reason why this
5862 // might be important is that subnormals may behave differently under FTZ/DAZ
5863 // runtime behavior.
5864 if (isDenormal())
5865 return false;
5866
5867 // A number has an exact, representable inverse if and only if it is a power
5868 // of two.
5869 //
5870 // Mathematical Rationale:
5871 // 1. A binary floating-point number x is a dyadic rational, meaning it can
5872 // be written as x = M / 2^k for integers M (the significand) and k.
5873 // 2. The inverse is 1/x = 2^k / M.
5874 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
5875 // in binary), its denominator M must also be a power of two.
5876 // Let's say M = 2^m.
5877 // 4. Substituting this back into the formula for x, we get
5878 // x = (2^m) / (2^k) = 2^(m-k).
5879 //
5880 // This proves that x must be a power of two.
5881
5882 // getExactLog2Abs() returns the integer exponent if the number is a power of
5883 // two or INT_MIN if it is not.
5884 const int Exp = getExactLog2Abs();
5885 if (Exp == INT_MIN)
5886 return false;
5887
5888 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
5889 // scaling 1.0 by the negated exponent.
5890 APFloat Reciprocal =
5891 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
5892 rmTowardZero);
5893
5894 // scalbn might round if the resulting exponent -Exp is outside the
5895 // representable range, causing overflow (to infinity) or underflow. We
5896 // must verify that the result is still the exact power of two we expect.
5897 if (Reciprocal.getExactLog2Abs() != -Exp)
5898 return false;
5899
5900 // Avoid multiplication with a subnormal, it is not safe on all platforms and
5901 // may be slower than a normal division.
5902 if (Reciprocal.isDenormal())
5903 return false;
5904
5905 assert(Reciprocal.isFiniteNonZero());
5906
5907 if (Inv)
5908 *Inv = std::move(Reciprocal);
5909
5910 return true;
5911}
5912
5914 roundingMode RM, bool *losesInfo) {
5915 if (&getSemantics() == &ToSemantics) {
5916 *losesInfo = false;
5917 return opOK;
5918 }
5919 if (usesLayout<IEEEFloat>(getSemantics()) &&
5920 usesLayout<IEEEFloat>(ToSemantics))
5921 return U.IEEE.convert(ToSemantics, RM, losesInfo);
5922 if (usesLayout<IEEEFloat>(getSemantics()) &&
5923 usesLayout<DoubleAPFloat>(ToSemantics)) {
5924 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
5925 auto Ret =
5926 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
5927 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5928 return Ret;
5929 }
5930 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5931 usesLayout<IEEEFloat>(ToSemantics)) {
5932 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5933 *this = APFloat(std::move(getIEEE()), ToSemantics);
5934 return Ret;
5935 }
5936 llvm_unreachable("Unexpected semantics");
5937}
5938
5942
5944 SmallVector<char, 16> Buffer;
5945 toString(Buffer);
5946 OS << Buffer;
5947}
5948
5949#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5951 print(dbgs());
5952 dbgs() << '\n';
5953}
5954#endif
5955
5957 NID.Add(bitcastToAPInt());
5958}
5959
5961 roundingMode rounding_mode,
5962 bool *isExact) const {
5963 unsigned bitWidth = result.getBitWidth();
5964 SmallVector<uint64_t, 4> parts(result.getNumWords());
5965 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5966 rounding_mode, isExact);
5967 // Keeps the original signed-ness.
5968 result = APInt(bitWidth, parts);
5969 return status;
5970}
5971
5973 if (&getSemantics() == &APFloatBase::semIEEEdouble)
5974 return getIEEE().convertToDouble();
5975 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
5976 "Float semantics is not representable by IEEEdouble");
5977 APFloat Temp = *this;
5978 bool LosesInfo;
5979 [[maybe_unused]] opStatus St =
5980 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5981 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5982 return Temp.getIEEE().convertToDouble();
5983}
5984
5985#ifdef HAS_IEE754_FLOAT128
5986float128 APFloat::convertToQuad() const {
5987 if (&getSemantics() == &APFloatBase::semIEEEquad)
5988 return getIEEE().convertToQuad();
5989 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
5990 "Float semantics is not representable by IEEEquad");
5991 APFloat Temp = *this;
5992 bool LosesInfo;
5993 [[maybe_unused]] opStatus St =
5994 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5995 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5996 return Temp.getIEEE().convertToQuad();
5997}
5998#endif
5999
6001 if (&getSemantics() == &APFloatBase::semIEEEsingle)
6002 return getIEEE().convertToFloat();
6003 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
6004 "Float semantics is not representable by IEEEsingle");
6005 APFloat Temp = *this;
6006 bool LosesInfo;
6007 [[maybe_unused]] opStatus St =
6008 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
6009 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6010 return Temp.getIEEE().convertToFloat();
6011}
6012
6014 static constexpr StringLiteral ValidFormats[] = {
6015 "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
6016 "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
6017 "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
6018 return llvm::is_contained(ValidFormats, Format);
6019}
6020
6022 // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4,
6023 // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU.
6025 .Case("Float8E5M2", &semFloat8E5M2)
6026 .Case("Float8E4M3FN", &semFloat8E4M3FN)
6027 .Case("Float4E2M1FN", &semFloat4E2M1FN)
6028 .Case("Float6E3M2FN", &semFloat6E3M2FN)
6029 .Case("Float6E2M3FN", &semFloat6E2M3FN)
6030 .Default(nullptr);
6031}
6032
6033APFloat::Storage::~Storage() {
6034 if (usesLayout<IEEEFloat>(*semantics)) {
6035 IEEE.~IEEEFloat();
6036 return;
6037 }
6038 if (usesLayout<DoubleAPFloat>(*semantics)) {
6039 Double.~DoubleAPFloat();
6040 return;
6041 }
6042 llvm_unreachable("Unexpected semantics");
6043}
6044
6045APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6046 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6047 new (this) IEEEFloat(RHS.IEEE);
6048 return;
6049 }
6050 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6051 new (this) DoubleAPFloat(RHS.Double);
6052 return;
6053 }
6054 llvm_unreachable("Unexpected semantics");
6055}
6056
6057APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6058 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6059 new (this) IEEEFloat(std::move(RHS.IEEE));
6060 return;
6061 }
6062 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6063 new (this) DoubleAPFloat(std::move(RHS.Double));
6064 return;
6065 }
6066 llvm_unreachable("Unexpected semantics");
6067}
6068
6069APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6070 if (usesLayout<IEEEFloat>(*semantics) &&
6071 usesLayout<IEEEFloat>(*RHS.semantics)) {
6072 IEEE = RHS.IEEE;
6073 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6074 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6075 Double = RHS.Double;
6076 } else if (this != &RHS) {
6077 this->~Storage();
6078 new (this) Storage(RHS);
6079 }
6080 return *this;
6081}
6082
6083APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6084 if (usesLayout<IEEEFloat>(*semantics) &&
6085 usesLayout<IEEEFloat>(*RHS.semantics)) {
6086 IEEE = std::move(RHS.IEEE);
6087 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6088 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6089 Double = std::move(RHS.Double);
6090 } else if (this != &RHS) {
6091 this->~Storage();
6092 new (this) Storage(std::move(RHS));
6093 }
6094 return *this;
6095}
6096
6097namespace {
6098
6099APFloat::opStatus getOpStatusFromLibc(int libc_exceptions) {
6101 if (libc_exceptions & FE_INVALID)
6103 if (libc_exceptions & FE_DIVBYZERO)
6105 if (libc_exceptions & FE_OVERFLOW)
6107 if (libc_exceptions & FE_UNDERFLOW)
6109 if (libc_exceptions & FE_INEXACT)
6111 return status;
6112}
6113
6114} // namespace
6115
6116// TODO: Support other rounding modes when LLVM libc math implement static
6117// roundings.
6118std::optional<APFloat> exp(const APFloat &x, RoundingMode rounding_mode,
6119 APFloat::opStatus *status) {
6120
6121 if (rounding_mode == APFloatBase::rmNearestTiesToEven) {
6122 if (APFloat::SemanticsToEnum(x.getSemantics()) ==
6124 float x_val = x.convertToFloat();
6125 int exc =
6126 LIBC_NAMESPACE::shared::check::exp_exceptions(x_val, FE_TONEAREST);
6127 if (status)
6128 *status = getOpStatusFromLibc(exc);
6129 float result = LIBC_NAMESPACE::shared::expf(x_val);
6130 return APFloat(result);
6131 }
6132 if (APFloat::SemanticsToEnum(x.getSemantics()) ==
6134 double x_val = x.convertToDouble();
6135 int exc =
6136 LIBC_NAMESPACE::shared::check::exp_exceptions(x_val, FE_TONEAREST);
6137 if (status)
6138 *status = getOpStatusFromLibc(exc);
6139 double result = LIBC_NAMESPACE::shared::exp(x_val);
6140 return APFloat(result);
6141 }
6142 }
6143 return std::nullopt;
6144}
6145
6146} // namespace llvm
6147
6148#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:63
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:27
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
static bool isSigned(unsigned Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:297
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:307
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:112
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:261
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:335
static constexpr roundingMode rmTowardZero
Definition APFloat.h:349
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:236
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:343
static const fltSemantics & BFloat()
Definition APFloat.h:296
static const fltSemantics & IEEEquad()
Definition APFloat.h:299
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:239
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:314
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:257
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:292
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:318
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static LLVM_ABI bool isValidArbitraryFPFormat(StringRef Format)
Returns true if the given string is a valid arbitrary floating-point format interpretation for llvm....
Definition APFloat.cpp:6013
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:274
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:232
friend class APFloat
Definition APFloat.h:292
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:228
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:265
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:159
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:156
static constexpr unsigned integerPartWidth
Definition APFloat.h:153
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:301
APInt::WordType integerPart
Definition APFloat.h:152
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:253
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:278
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:308
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:347
static const fltSemantics & IEEEhalf()
Definition APFloat.h:295
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:317
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:316
static const fltSemantics & Float8E4M3()
Definition APFloat.h:306
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:309
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:204
static const fltSemantics & Float8E3M4()
Definition APFloat.h:312
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:269
static const fltSemantics & Float8E5M2()
Definition APFloat.h:304
fltCategory
Category of internally-represented number.
Definition APFloat.h:371
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:350
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:300
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:315
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:361
static LLVM_ABI const fltSemantics * getArbitraryFPSemantics(StringRef Format)
Returns the fltSemantics for a given arbitrary FP format string, or nullptr if invalid.
Definition APFloat.cpp:6021
static const fltSemantics & FloatTF32()
Definition APFloat.h:313
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:242
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1180
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:5956
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1268
bool isFiniteNonZero() const
Definition APFloat.h:1549
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5913
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1595
bool isNegative() const
Definition APFloat.h:1539
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5855
cmpResult compareAbsoluteValue(const APFloat &RHS) const
Definition APFloat.h:1494
friend DoubleAPFloat
Definition APFloat.h:1611
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:5972
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1576
bool isNormal() const
Definition APFloat.h:1543
bool isDenormal() const
Definition APFloat.h:1540
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1241
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:5939
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5827
const fltSemantics & getSemantics() const
Definition APFloat.h:1547
bool isFinite() const
Definition APFloat.h:1544
bool isNaN() const
Definition APFloat.h:1537
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1148
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1529
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6000
bool isSignaling() const
Definition APFloat.h:1541
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1295
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1277
bool isZero() const
Definition APFloat.h:1535
APInt bitcastToAPInt() const
Definition APFloat.h:1431
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1392
opStatus next(bool nextDown)
Definition APFloat.h:1314
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1158
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1208
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5842
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1286
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Fill this APFloat with the result of a string conversion.
Definition APFloat.cpp:5822
friend IEEEFloat
Definition APFloat.h:1610
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:5950
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:5943
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1308
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1233
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1139
bool isInfinity() const
Definition APFloat.h:1536
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1599
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2403
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2375
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1793
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2398
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2477
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2447
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2787
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1775
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2383
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2761
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2667
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1518
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2408
void negate()
Negate this APInt in place.
Definition APInt.h:1491
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1941
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2414
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2734
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2389
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2427
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1759
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2565
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2512
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2551
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1767
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1936
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1745
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:576
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:208
void Add(const T &x)
Definition FoldingSet.h:248
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
const char * iterator
Definition StringRef.h:60
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
iterator begin() const
Definition StringRef.h:114
char back() const
Get the last character in the string.
Definition StringRef.h:153
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:714
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
char front() const
Get the first character in the string.
Definition StringRef.h:147
iterator end() const
Definition StringRef.h:116
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:675
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5169
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4699
LLVM_ABI void changeSign()
Definition APFloat.cpp:5076
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5643
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4963
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4866
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5135
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5192
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5667
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5594
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5203
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5213
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5626
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4858
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5197
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5082
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5619
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5430
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5162
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5676
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5229
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5141
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5651
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5146
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4952
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5634
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4973
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4646
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5657
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5151
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5184
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:4999
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:4984
LLVM_ABI APInt getNaNPayload() const
Definition APFloat.cpp:5805
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5609
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5139
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4853
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5179
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3189
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1438
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2194
fltCategory getCategory() const
Definition APFloat.h:583
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2749
LLVM_ABI APInt getNaNPayload() const
Definition APFloat.cpp:4534
bool isFiniteNonZero() const
Definition APFloat.h:586
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:473
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:3961
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4356
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3587
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4606
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2362
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:548
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2068
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:573
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2086
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3657
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3650
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2044
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4312
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:3993
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4553
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:959
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4582
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1061
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2038
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:560
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3132
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:848
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2050
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2277
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:920
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1086
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:4007
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1078
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1113
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2231
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4588
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4401
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:570
const fltSemantics & getSemantics() const
Definition APFloat.h:584
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:563
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4385
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4568
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2438
LLVM_ABI void changeSign()
Definition APFloat.cpp:1996
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:945
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2694
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:951
An opaque object representing a hash code.
Definition Hashing.h:77
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:449
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:451
static constexpr opStatus opDivByZero
Definition APFloat.h:446
static constexpr opStatus opOverflow
Definition APFloat.h:447
static constexpr cmpResult cmpLessThan
Definition APFloat.h:441
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1461
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:437
static constexpr uninitializedTag uninitialized
Definition APFloat.h:431
static constexpr fltCategory fcZero
Definition APFloat.h:453
static constexpr opStatus opOK
Definition APFloat.h:444
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:442
static constexpr unsigned integerPartWidth
Definition APFloat.h:439
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3329
APFloatBase::ExponentType ExponentType
Definition APFloat.h:430
static constexpr fltCategory fcNormal
Definition APFloat.h:452
static constexpr opStatus opInvalidOp
Definition APFloat.h:445
APFloatBase::opStatus opStatus
Definition APFloat.h:427
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4627
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:425
static constexpr cmpResult cmpUnordered
Definition APFloat.h:443
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:436
APFloatBase::roundingMode roundingMode
Definition APFloat.h:426
APFloatBase::cmpResult cmpResult
Definition APFloat.h:428
static constexpr fltCategory fcInfinity
Definition APFloat.h:450
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:434
static constexpr roundingMode rmTowardZero
Definition APFloat.h:438
static constexpr opStatus opUnderflow
Definition APFloat.h:448
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:432
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4588
static constexpr cmpResult cmpEqual
Definition APFloat.h:440
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4606
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4716
APFloatBase::integerPart integerPart
Definition APFloat.h:424
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:745
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
static const char infinityL[]
Definition APFloat.cpp:736
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:323
static const char NaNU[]
Definition APFloat.cpp:739
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:621
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:680
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:792
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:297
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:380
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:223
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1632
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:762
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
const unsigned int maxPrecision
Definition APFloat.cpp:222
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1653
static const char NaNL[]
Definition APFloat.cpp:738
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
static const char infinityU[]
Definition APFloat.cpp:737
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:51
@ lfMoreThanHalf
Definition APFloat.h:55
@ lfLessThanHalf
Definition APFloat.h:53
@ lfExactlyHalf
Definition APFloat.h:54
@ lfExactlyZero
Definition APFloat.h:52
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:470
LLVM_READONLY LLVM_ABI std::optional< APFloat > exp(const APFloat &X, RoundingMode RM=APFloat::rmNearestTiesToEven, APFloat::opStatus *Status=nullptr)
Implement IEEE 754-2019 exp functions.
Definition APFloat.cpp:6118
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:224
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1641
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:307
static Error createError(const Twine &Err)
Definition APFloat.cpp:319
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:589
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
static const char hexDigitsUpper[]
Definition APFloat.cpp:735
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
const unsigned int maxExponent
Definition APFloat.cpp:221
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:330
fltNonfiniteBehavior
Definition APFloat.h:953
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:600
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:430
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:302
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:569
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1667
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:635
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:778
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:305
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:540
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1106
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:340
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:285
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:734
#define N
const char * lastSigDigit
Definition APFloat.cpp:465
const char * firstSigDigit
Definition APFloat.cpp:464
APFloatBase::ExponentType maxExponent
Definition APFloat.h:1001
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1014
APFloatBase::ExponentType minExponent
Definition APFloat.h:1005
unsigned int sizeInBits
Definition APFloat.h:1012
unsigned int precision
Definition APFloat.h:1009
fltNanEncoding nanEncoding
Definition APFloat.h:1016