LLVM 19.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/Error.h"
28#include <cstring>
29#include <limits.h>
30
31#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
39
40using namespace llvm;
41
42/// A macro used to combine two fcCategory enums into one key which can be used
43/// in a switch statement to classify how the interaction of two APFloat's
44/// categories affects an operation.
45///
46/// TODO: If clang source code is ever allowed to use constexpr in its own
47/// codebase, change this into a static inline function.
48#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50/* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54namespace llvm {
55
56// How the nonfinite values Inf and NaN are represented.
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
61 IEEE754,
62
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
70 NanOnly,
71};
72
73// How NaN values are represented. This is curently only used in combination
74// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
75// while having IEEE non-finite behavior is liable to lead to unexpected
76// results.
77enum class fltNanEncoding {
78 // Represents the standard IEEE behavior where a value is NaN if its
79 // exponent is all 1s and the significand is non-zero.
80 IEEE,
81
82 // Represents the behavior in the Float8E4M3 floating point type where NaN is
83 // represented by having the exponent and mantissa set to all 1s.
84 // This behavior matches the FP8 E4M3 type described in
85 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
86 // as non-signalling, although the paper does not state whether the NaN
87 // values are signalling or not.
88 AllOnes,
89
90 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
91 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
92 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
93 // there is only one NaN value, it is treated as quiet NaN. This matches the
94 // behavior described in https://arxiv.org/abs/2206.02915 .
96};
97
98/* Represents floating point arithmetic semantics. */
100 /* The largest E such that 2^E is representable; this matches the
101 definition of IEEE 754. */
103
104 /* The smallest E such that 2^E is a normalized number; this
105 matches the definition of IEEE 754. */
107
108 /* Number of bits in the significand. This includes the integer
109 bit. */
110 unsigned int precision;
111
112 /* Number of bits actually used in the semantics. */
113 unsigned int sizeInBits;
114
116
118 // Returns true if any number described by this semantics can be precisely
119 // represented by the specified semantics. Does not take into account
120 // the value of fltNonfiniteBehavior.
121 bool isRepresentableBy(const fltSemantics &S) const {
122 return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
123 precision <= S.precision;
124 }
125};
126
127static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
128static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
129static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
130static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
131static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
132static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
133static constexpr fltSemantics semFloat8E5M2FNUZ = {
135static constexpr fltSemantics semFloat8E4M3FN = {
137static constexpr fltSemantics semFloat8E4M3FNUZ = {
141static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
142static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
143static constexpr fltSemantics semBogus = {0, 0, 0, 0};
144
145/* The IBM double-double semantics. Such a number consists of a pair of IEEE
146 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
147 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
148 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
149 to each other, and two 11-bit exponents.
150
151 Note: we need to make the value different from semBogus as otherwise
152 an unsafe optimization may collapse both values to a single address,
153 and we heavily rely on them having distinct addresses. */
154static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
155
156/* These are legacy semantics for the fallback, inaccrurate implementation of
157 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
158 operation. It's equivalent to having an IEEE number with consecutive 106
159 bits of mantissa and 11 bits of exponent.
160
161 It's not equivalent to IBM double-double. For example, a legit IBM
162 double-double, 1 + epsilon:
163
164 1 + epsilon = 1 + (1 >> 1076)
165
166 is not representable by a consecutive 106 bits of mantissa.
167
168 Currently, these semantics are used in the following way:
169
170 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
171 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
172 semPPCDoubleDoubleLegacy -> IEEE operations
173
174 We use bitcastToAPInt() to get the bit representation (in APInt) of the
175 underlying IEEEdouble, then use the APInt constructor to construct the
176 legacy IEEE float.
177
178 TODO: Implement all operations in semPPCDoubleDouble, and delete these
179 semantics. */
180static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
181 53 + 53, 128};
182
184 switch (S) {
185 case S_IEEEhalf:
186 return IEEEhalf();
187 case S_BFloat:
188 return BFloat();
189 case S_IEEEsingle:
190 return IEEEsingle();
191 case S_IEEEdouble:
192 return IEEEdouble();
193 case S_IEEEquad:
194 return IEEEquad();
196 return PPCDoubleDouble();
197 case S_Float8E5M2:
198 return Float8E5M2();
199 case S_Float8E5M2FNUZ:
200 return Float8E5M2FNUZ();
201 case S_Float8E4M3FN:
202 return Float8E4M3FN();
203 case S_Float8E4M3FNUZ:
204 return Float8E4M3FNUZ();
206 return Float8E4M3B11FNUZ();
207 case S_FloatTF32:
208 return FloatTF32();
210 return x87DoubleExtended();
211 }
212 llvm_unreachable("Unrecognised floating semantics");
213}
214
217 if (&Sem == &llvm::APFloat::IEEEhalf())
218 return S_IEEEhalf;
219 else if (&Sem == &llvm::APFloat::BFloat())
220 return S_BFloat;
221 else if (&Sem == &llvm::APFloat::IEEEsingle())
222 return S_IEEEsingle;
223 else if (&Sem == &llvm::APFloat::IEEEdouble())
224 return S_IEEEdouble;
225 else if (&Sem == &llvm::APFloat::IEEEquad())
226 return S_IEEEquad;
227 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
228 return S_PPCDoubleDouble;
229 else if (&Sem == &llvm::APFloat::Float8E5M2())
230 return S_Float8E5M2;
231 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
232 return S_Float8E5M2FNUZ;
233 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
234 return S_Float8E4M3FN;
235 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
236 return S_Float8E4M3FNUZ;
237 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
238 return S_Float8E4M3B11FNUZ;
239 else if (&Sem == &llvm::APFloat::FloatTF32())
240 return S_FloatTF32;
241 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
242 return S_x87DoubleExtended;
243 else
244 llvm_unreachable("Unknown floating semantics");
245}
246
253 return semPPCDoubleDouble;
254}
261}
265}
267
273
274/* A tight upper bound on number of parts required to hold the value
275 pow(5, power) is
276
277 power * 815 / (351 * integerPartWidth) + 1
278
279 However, whilst the result may require only this many parts,
280 because we are multiplying two values to get it, the
281 multiplication may require an extra part with the excess part
282 being zero (consider the trivial case of 1 * 1, tcFullMultiply
283 requires two parts to hold the single-part result). So we add an
284 extra one to guarantee enough space whilst multiplying. */
285const unsigned int maxExponent = 16383;
286const unsigned int maxPrecision = 113;
288const unsigned int maxPowerOfFiveParts =
289 2 +
291
292unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
293 return semantics.precision;
294}
297 return semantics.maxExponent;
298}
301 return semantics.minExponent;
302}
303unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
304 return semantics.sizeInBits;
305}
307 bool isSigned) {
308 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
309 // at least one more bit than the MaxExponent to hold the max FP value.
310 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
311 // Extra sign bit needed.
312 if (isSigned)
313 ++MinBitWidth;
314 return MinBitWidth;
315}
316
318 const fltSemantics &Dst) {
319 // Exponent range must be larger.
320 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
321 return false;
322
323 // If the mantissa is long enough, the result value could still be denormal
324 // with a larger exponent range.
325 //
326 // FIXME: This condition is probably not accurate but also shouldn't be a
327 // practical concern with existing types.
328 return Dst.precision >= Src.precision;
329}
330
332 return Sem.sizeInBits;
333}
334
335static constexpr APFloatBase::ExponentType
336exponentZero(const fltSemantics &semantics) {
337 return semantics.minExponent - 1;
338}
339
340static constexpr APFloatBase::ExponentType
341exponentInf(const fltSemantics &semantics) {
342 return semantics.maxExponent + 1;
343}
344
345static constexpr APFloatBase::ExponentType
346exponentNaN(const fltSemantics &semantics) {
349 return exponentZero(semantics);
350 return semantics.maxExponent;
351 }
352 return semantics.maxExponent + 1;
353}
354
355/* A bunch of private, handy routines. */
356
357static inline Error createError(const Twine &Err) {
358 return make_error<StringError>(Err, inconvertibleErrorCode());
359}
360
361static constexpr inline unsigned int partCountForBits(unsigned int bits) {
363}
364
365/* Returns 0U-9U. Return values >= 10U are not digits. */
366static inline unsigned int
367decDigitValue(unsigned int c)
368{
369 return c - '0';
370}
371
372/* Return the value of a decimal exponent of the form
373 [+-]ddddddd.
374
375 If the exponent overflows, returns a large exponent with the
376 appropriate sign. */
379 bool isNegative;
380 unsigned int absExponent;
381 const unsigned int overlargeExponent = 24000; /* FIXME. */
382 StringRef::iterator p = begin;
383
384 // Treat no exponent as 0 to match binutils
385 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
386 return 0;
387 }
388
389 isNegative = (*p == '-');
390 if (*p == '-' || *p == '+') {
391 p++;
392 if (p == end)
393 return createError("Exponent has no digits");
394 }
395
396 absExponent = decDigitValue(*p++);
397 if (absExponent >= 10U)
398 return createError("Invalid character in exponent");
399
400 for (; p != end; ++p) {
401 unsigned int value;
402
403 value = decDigitValue(*p);
404 if (value >= 10U)
405 return createError("Invalid character in exponent");
406
407 absExponent = absExponent * 10U + value;
408 if (absExponent >= overlargeExponent) {
409 absExponent = overlargeExponent;
410 break;
411 }
412 }
413
414 if (isNegative)
415 return -(int) absExponent;
416 else
417 return (int) absExponent;
418}
419
420/* This is ugly and needs cleaning up, but I don't immediately see
421 how whilst remaining safe. */
424 int exponentAdjustment) {
425 int unsignedExponent;
426 bool negative, overflow;
427 int exponent = 0;
428
429 if (p == end)
430 return createError("Exponent has no digits");
431
432 negative = *p == '-';
433 if (*p == '-' || *p == '+') {
434 p++;
435 if (p == end)
436 return createError("Exponent has no digits");
437 }
438
439 unsignedExponent = 0;
440 overflow = false;
441 for (; p != end; ++p) {
442 unsigned int value;
443
444 value = decDigitValue(*p);
445 if (value >= 10U)
446 return createError("Invalid character in exponent");
447
448 unsignedExponent = unsignedExponent * 10 + value;
449 if (unsignedExponent > 32767) {
450 overflow = true;
451 break;
452 }
453 }
454
455 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
456 overflow = true;
457
458 if (!overflow) {
459 exponent = unsignedExponent;
460 if (negative)
461 exponent = -exponent;
462 exponent += exponentAdjustment;
463 if (exponent > 32767 || exponent < -32768)
464 overflow = true;
465 }
466
467 if (overflow)
468 exponent = negative ? -32768: 32767;
469
470 return exponent;
471}
472
475 StringRef::iterator *dot) {
476 StringRef::iterator p = begin;
477 *dot = end;
478 while (p != end && *p == '0')
479 p++;
480
481 if (p != end && *p == '.') {
482 *dot = p++;
483
484 if (end - begin == 1)
485 return createError("Significand has no digits");
486
487 while (p != end && *p == '0')
488 p++;
489 }
490
491 return p;
492}
493
494/* Given a normal decimal floating point number of the form
495
496 dddd.dddd[eE][+-]ddd
497
498 where the decimal point and exponent are optional, fill out the
499 structure D. Exponent is appropriate if the significand is
500 treated as an integer, and normalizedExponent if the significand
501 is taken to have the decimal point after a single leading
502 non-zero digit.
503
504 If the value is zero, V->firstSigDigit points to a non-digit, and
505 the return exponent is zero.
506*/
508 const char *firstSigDigit;
509 const char *lastSigDigit;
512};
513
516 StringRef::iterator dot = end;
517
518 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
519 if (!PtrOrErr)
520 return PtrOrErr.takeError();
521 StringRef::iterator p = *PtrOrErr;
522
523 D->firstSigDigit = p;
524 D->exponent = 0;
525 D->normalizedExponent = 0;
526
527 for (; p != end; ++p) {
528 if (*p == '.') {
529 if (dot != end)
530 return createError("String contains multiple dots");
531 dot = p++;
532 if (p == end)
533 break;
534 }
535 if (decDigitValue(*p) >= 10U)
536 break;
537 }
538
539 if (p != end) {
540 if (*p != 'e' && *p != 'E')
541 return createError("Invalid character in significand");
542 if (p == begin)
543 return createError("Significand has no digits");
544 if (dot != end && p - begin == 1)
545 return createError("Significand has no digits");
546
547 /* p points to the first non-digit in the string */
548 auto ExpOrErr = readExponent(p + 1, end);
549 if (!ExpOrErr)
550 return ExpOrErr.takeError();
551 D->exponent = *ExpOrErr;
552
553 /* Implied decimal point? */
554 if (dot == end)
555 dot = p;
556 }
557
558 /* If number is all zeroes accept any exponent. */
559 if (p != D->firstSigDigit) {
560 /* Drop insignificant trailing zeroes. */
561 if (p != begin) {
562 do
563 do
564 p--;
565 while (p != begin && *p == '0');
566 while (p != begin && *p == '.');
567 }
568
569 /* Adjust the exponents for any decimal point. */
570 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
571 D->normalizedExponent = (D->exponent +
572 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
573 - (dot > D->firstSigDigit && dot < p)));
574 }
575
576 D->lastSigDigit = p;
577 return Error::success();
578}
579
580/* Return the trailing fraction of a hexadecimal number.
581 DIGITVALUE is the first hex digit of the fraction, P points to
582 the next digit. */
585 unsigned int digitValue) {
586 unsigned int hexDigit;
587
588 /* If the first trailing digit isn't 0 or 8 we can work out the
589 fraction immediately. */
590 if (digitValue > 8)
591 return lfMoreThanHalf;
592 else if (digitValue < 8 && digitValue > 0)
593 return lfLessThanHalf;
594
595 // Otherwise we need to find the first non-zero digit.
596 while (p != end && (*p == '0' || *p == '.'))
597 p++;
598
599 if (p == end)
600 return createError("Invalid trailing hexadecimal fraction!");
601
602 hexDigit = hexDigitValue(*p);
603
604 /* If we ran off the end it is exactly zero or one-half, otherwise
605 a little more. */
606 if (hexDigit == UINT_MAX)
607 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
608 else
609 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
610}
611
612/* Return the fraction lost were a bignum truncated losing the least
613 significant BITS bits. */
614static lostFraction
616 unsigned int partCount,
617 unsigned int bits)
618{
619 unsigned int lsb;
620
621 lsb = APInt::tcLSB(parts, partCount);
622
623 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
624 if (bits <= lsb)
625 return lfExactlyZero;
626 if (bits == lsb + 1)
627 return lfExactlyHalf;
628 if (bits <= partCount * APFloatBase::integerPartWidth &&
629 APInt::tcExtractBit(parts, bits - 1))
630 return lfMoreThanHalf;
631
632 return lfLessThanHalf;
633}
634
635/* Shift DST right BITS bits noting lost fraction. */
636static lostFraction
637shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
638{
639 lostFraction lost_fraction;
640
641 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
642
643 APInt::tcShiftRight(dst, parts, bits);
644
645 return lost_fraction;
646}
647
648/* Combine the effect of two lost fractions. */
649static lostFraction
651 lostFraction lessSignificant)
652{
653 if (lessSignificant != lfExactlyZero) {
654 if (moreSignificant == lfExactlyZero)
655 moreSignificant = lfLessThanHalf;
656 else if (moreSignificant == lfExactlyHalf)
657 moreSignificant = lfMoreThanHalf;
658 }
659
660 return moreSignificant;
661}
662
663/* The error from the true value, in half-ulps, on multiplying two
664 floating point numbers, which differ from the value they
665 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
666 than the returned value.
667
668 See "How to Read Floating Point Numbers Accurately" by William D
669 Clinger. */
670static unsigned int
671HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
672{
673 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
674
675 if (HUerr1 + HUerr2 == 0)
676 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
677 else
678 return inexactMultiply + 2 * (HUerr1 + HUerr2);
679}
680
681/* The number of ulps from the boundary (zero, or half if ISNEAREST)
682 when the least significant BITS are truncated. BITS cannot be
683 zero. */
685ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
686 bool isNearest) {
687 unsigned int count, partBits;
688 APFloatBase::integerPart part, boundary;
689
690 assert(bits != 0);
691
692 bits--;
694 partBits = bits % APFloatBase::integerPartWidth + 1;
695
696 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
697
698 if (isNearest)
699 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
700 else
701 boundary = 0;
702
703 if (count == 0) {
704 if (part - boundary <= boundary - part)
705 return part - boundary;
706 else
707 return boundary - part;
708 }
709
710 if (part == boundary) {
711 while (--count)
712 if (parts[count])
713 return ~(APFloatBase::integerPart) 0; /* A lot. */
714
715 return parts[0];
716 } else if (part == boundary - 1) {
717 while (--count)
718 if (~parts[count])
719 return ~(APFloatBase::integerPart) 0; /* A lot. */
720
721 return -parts[0];
722 }
723
724 return ~(APFloatBase::integerPart) 0; /* A lot. */
725}
726
727/* Place pow(5, power) in DST, and return the number of parts used.
728 DST must be at least one part larger than size of the answer. */
729static unsigned int
730powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
731 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
733 pow5s[0] = 78125 * 5;
734
735 unsigned int partsCount = 1;
736 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
737 unsigned int result;
738 assert(power <= maxExponent);
739
740 p1 = dst;
741 p2 = scratch;
742
743 *p1 = firstEightPowers[power & 7];
744 power >>= 3;
745
746 result = 1;
747 pow5 = pow5s;
748
749 for (unsigned int n = 0; power; power >>= 1, n++) {
750 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
751 if (n != 0) {
752 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
753 partsCount, partsCount);
754 partsCount *= 2;
755 if (pow5[partsCount - 1] == 0)
756 partsCount--;
757 }
758
759 if (power & 1) {
761
762 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
763 result += partsCount;
764 if (p2[result - 1] == 0)
765 result--;
766
767 /* Now result is in p1 with partsCount parts and p2 is scratch
768 space. */
769 tmp = p1;
770 p1 = p2;
771 p2 = tmp;
772 }
773
774 pow5 += partsCount;
775 }
776
777 if (p1 != dst)
778 APInt::tcAssign(dst, p1, result);
779
780 return result;
781}
782
783/* Zero at the end to avoid modular arithmetic when adding one; used
784 when rounding up during hexadecimal output. */
785static const char hexDigitsLower[] = "0123456789abcdef0";
786static const char hexDigitsUpper[] = "0123456789ABCDEF0";
787static const char infinityL[] = "infinity";
788static const char infinityU[] = "INFINITY";
789static const char NaNL[] = "nan";
790static const char NaNU[] = "NAN";
791
792/* Write out an integerPart in hexadecimal, starting with the most
793 significant nibble. Write out exactly COUNT hexdigits, return
794 COUNT. */
795static unsigned int
796partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
797 const char *hexDigitChars)
798{
799 unsigned int result = count;
800
802
803 part >>= (APFloatBase::integerPartWidth - 4 * count);
804 while (count--) {
805 dst[count] = hexDigitChars[part & 0xf];
806 part >>= 4;
807 }
808
809 return result;
810}
811
812/* Write out an unsigned decimal integer. */
813static char *
814writeUnsignedDecimal (char *dst, unsigned int n)
815{
816 char buff[40], *p;
817
818 p = buff;
819 do
820 *p++ = '0' + n % 10;
821 while (n /= 10);
822
823 do
824 *dst++ = *--p;
825 while (p != buff);
826
827 return dst;
828}
829
830/* Write out a signed decimal integer. */
831static char *
832writeSignedDecimal (char *dst, int value)
833{
834 if (value < 0) {
835 *dst++ = '-';
836 dst = writeUnsignedDecimal(dst, -(unsigned) value);
837 } else
838 dst = writeUnsignedDecimal(dst, value);
839
840 return dst;
841}
842
843namespace detail {
844/* Constructors. */
845void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
846 unsigned int count;
847
848 semantics = ourSemantics;
849 count = partCount();
850 if (count > 1)
851 significand.parts = new integerPart[count];
852}
853
854void IEEEFloat::freeSignificand() {
855 if (needsCleanup())
856 delete [] significand.parts;
857}
858
859void IEEEFloat::assign(const IEEEFloat &rhs) {
860 assert(semantics == rhs.semantics);
861
862 sign = rhs.sign;
863 category = rhs.category;
864 exponent = rhs.exponent;
865 if (isFiniteNonZero() || category == fcNaN)
866 copySignificand(rhs);
867}
868
869void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
870 assert(isFiniteNonZero() || category == fcNaN);
871 assert(rhs.partCount() >= partCount());
872
873 APInt::tcAssign(significandParts(), rhs.significandParts(),
874 partCount());
875}
876
877/* Make this number a NaN, with an arbitrary but deterministic value
878 for the significand. If double or longer, this is a signalling NaN,
879 which may not be ideal. If float, this is QNaN(0). */
880void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
881 category = fcNaN;
882 sign = Negative;
883 exponent = exponentNaN();
884
885 integerPart *significand = significandParts();
886 unsigned numParts = partCount();
887
888 APInt fill_storage;
890 // Finite-only types do not distinguish signalling and quiet NaN, so
891 // make them all signalling.
892 SNaN = false;
893 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
894 sign = true;
895 fill_storage = APInt::getZero(semantics->precision - 1);
896 } else {
897 fill_storage = APInt::getAllOnes(semantics->precision - 1);
898 }
899 fill = &fill_storage;
900 }
901
902 // Set the significand bits to the fill.
903 if (!fill || fill->getNumWords() < numParts)
904 APInt::tcSet(significand, 0, numParts);
905 if (fill) {
906 APInt::tcAssign(significand, fill->getRawData(),
907 std::min(fill->getNumWords(), numParts));
908
909 // Zero out the excess bits of the significand.
910 unsigned bitsToPreserve = semantics->precision - 1;
911 unsigned part = bitsToPreserve / 64;
912 bitsToPreserve %= 64;
913 significand[part] &= ((1ULL << bitsToPreserve) - 1);
914 for (part++; part != numParts; ++part)
915 significand[part] = 0;
916 }
917
918 unsigned QNaNBit = semantics->precision - 2;
919
920 if (SNaN) {
921 // We always have to clear the QNaN bit to make it an SNaN.
922 APInt::tcClearBit(significand, QNaNBit);
923
924 // If there are no bits set in the payload, we have to set
925 // *something* to make it a NaN instead of an infinity;
926 // conventionally, this is the next bit down from the QNaN bit.
927 if (APInt::tcIsZero(significand, numParts))
928 APInt::tcSetBit(significand, QNaNBit - 1);
929 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
930 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
931 // Do nothing.
932 } else {
933 // We always have to set the QNaN bit to make it a QNaN.
934 APInt::tcSetBit(significand, QNaNBit);
935 }
936
937 // For x87 extended precision, we want to make a NaN, not a
938 // pseudo-NaN. Maybe we should expose the ability to make
939 // pseudo-NaNs?
940 if (semantics == &semX87DoubleExtended)
941 APInt::tcSetBit(significand, QNaNBit + 1);
942}
943
945 if (this != &rhs) {
946 if (semantics != rhs.semantics) {
947 freeSignificand();
948 initialize(rhs.semantics);
949 }
950 assign(rhs);
951 }
952
953 return *this;
954}
955
957 freeSignificand();
958
959 semantics = rhs.semantics;
960 significand = rhs.significand;
961 exponent = rhs.exponent;
962 category = rhs.category;
963 sign = rhs.sign;
964
965 rhs.semantics = &semBogus;
966 return *this;
967}
968
970 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
971 (APInt::tcExtractBit(significandParts(),
972 semantics->precision - 1) == 0);
973}
974
976 // The smallest number by magnitude in our format will be the smallest
977 // denormal, i.e. the floating point number with exponent being minimum
978 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
979 return isFiniteNonZero() && exponent == semantics->minExponent &&
980 significandMSB() == 0;
981}
982
984 return getCategory() == fcNormal && exponent == semantics->minExponent &&
985 isSignificandAllZerosExceptMSB();
986}
987
988bool IEEEFloat::isSignificandAllOnes() const {
989 // Test if the significand excluding the integral bit is all ones. This allows
990 // us to test for binade boundaries.
991 const integerPart *Parts = significandParts();
992 const unsigned PartCount = partCountForBits(semantics->precision);
993 for (unsigned i = 0; i < PartCount - 1; i++)
994 if (~Parts[i])
995 return false;
996
997 // Set the unused high bits to all ones when we compare.
998 const unsigned NumHighBits =
999 PartCount*integerPartWidth - semantics->precision + 1;
1000 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1001 "Can not have more high bits to fill than integerPartWidth");
1002 const integerPart HighBitFill =
1003 ~integerPart(0) << (integerPartWidth - NumHighBits);
1004 if (~(Parts[PartCount - 1] | HighBitFill))
1005 return false;
1006
1007 return true;
1008}
1009
1010bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1011 // Test if the significand excluding the integral bit is all ones except for
1012 // the least significant bit.
1013 const integerPart *Parts = significandParts();
1014
1015 if (Parts[0] & 1)
1016 return false;
1017
1018 const unsigned PartCount = partCountForBits(semantics->precision);
1019 for (unsigned i = 0; i < PartCount - 1; i++) {
1020 if (~Parts[i] & ~unsigned{!i})
1021 return false;
1022 }
1023
1024 // Set the unused high bits to all ones when we compare.
1025 const unsigned NumHighBits =
1026 PartCount * integerPartWidth - semantics->precision + 1;
1027 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1028 "Can not have more high bits to fill than integerPartWidth");
1029 const integerPart HighBitFill = ~integerPart(0)
1030 << (integerPartWidth - NumHighBits);
1031 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1032 return false;
1033
1034 return true;
1035}
1036
1037bool IEEEFloat::isSignificandAllZeros() const {
1038 // Test if the significand excluding the integral bit is all zeros. This
1039 // allows us to test for binade boundaries.
1040 const integerPart *Parts = significandParts();
1041 const unsigned PartCount = partCountForBits(semantics->precision);
1042
1043 for (unsigned i = 0; i < PartCount - 1; i++)
1044 if (Parts[i])
1045 return false;
1046
1047 // Compute how many bits are used in the final word.
1048 const unsigned NumHighBits =
1049 PartCount*integerPartWidth - semantics->precision + 1;
1050 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1051 "clear than integerPartWidth");
1052 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1053
1054 if (Parts[PartCount - 1] & HighBitMask)
1055 return false;
1056
1057 return true;
1058}
1059
1060bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1061 const integerPart *Parts = significandParts();
1062 const unsigned PartCount = partCountForBits(semantics->precision);
1063
1064 for (unsigned i = 0; i < PartCount - 1; i++) {
1065 if (Parts[i])
1066 return false;
1067 }
1068
1069 const unsigned NumHighBits =
1070 PartCount * integerPartWidth - semantics->precision + 1;
1071 return Parts[PartCount - 1] == integerPart(1)
1072 << (integerPartWidth - NumHighBits);
1073}
1074
1077 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1078 // The largest number by magnitude in our format will be the floating point
1079 // number with maximum exponent and with significand that is all ones except
1080 // the LSB.
1081 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1082 isSignificandAllOnesExceptLSB();
1083 } else {
1084 // The largest number by magnitude in our format will be the floating point
1085 // number with maximum exponent and with significand that is all ones.
1086 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1087 isSignificandAllOnes();
1088 }
1089}
1090
1092 // This could be made more efficient; I'm going for obviously correct.
1093 if (!isFinite()) return false;
1094 IEEEFloat truncated = *this;
1095 truncated.roundToIntegral(rmTowardZero);
1096 return compare(truncated) == cmpEqual;
1097}
1098
1099bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1100 if (this == &rhs)
1101 return true;
1102 if (semantics != rhs.semantics ||
1103 category != rhs.category ||
1104 sign != rhs.sign)
1105 return false;
1106 if (category==fcZero || category==fcInfinity)
1107 return true;
1108
1109 if (isFiniteNonZero() && exponent != rhs.exponent)
1110 return false;
1111
1112 return std::equal(significandParts(), significandParts() + partCount(),
1113 rhs.significandParts());
1114}
1115
1117 initialize(&ourSemantics);
1118 sign = 0;
1119 category = fcNormal;
1120 zeroSignificand();
1121 exponent = ourSemantics.precision - 1;
1122 significandParts()[0] = value;
1124}
1125
1127 initialize(&ourSemantics);
1128 makeZero(false);
1129}
1130
1131// Delegate to the previous constructor, because later copy constructor may
1132// actually inspects category, which can't be garbage.
1134 : IEEEFloat(ourSemantics) {}
1135
1137 initialize(rhs.semantics);
1138 assign(rhs);
1139}
1140
1142 *this = std::move(rhs);
1143}
1144
1145IEEEFloat::~IEEEFloat() { freeSignificand(); }
1146
1147unsigned int IEEEFloat::partCount() const {
1148 return partCountForBits(semantics->precision + 1);
1149}
1150
1151const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1152 return const_cast<IEEEFloat *>(this)->significandParts();
1153}
1154
1155IEEEFloat::integerPart *IEEEFloat::significandParts() {
1156 if (partCount() > 1)
1157 return significand.parts;
1158 else
1159 return &significand.part;
1160}
1161
1162void IEEEFloat::zeroSignificand() {
1163 APInt::tcSet(significandParts(), 0, partCount());
1164}
1165
1166/* Increment an fcNormal floating point number's significand. */
1167void IEEEFloat::incrementSignificand() {
1168 integerPart carry;
1169
1170 carry = APInt::tcIncrement(significandParts(), partCount());
1171
1172 /* Our callers should never cause us to overflow. */
1173 assert(carry == 0);
1174 (void)carry;
1175}
1176
1177/* Add the significand of the RHS. Returns the carry flag. */
1178IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1179 integerPart *parts;
1180
1181 parts = significandParts();
1182
1183 assert(semantics == rhs.semantics);
1184 assert(exponent == rhs.exponent);
1185
1186 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1187}
1188
1189/* Subtract the significand of the RHS with a borrow flag. Returns
1190 the borrow flag. */
1191IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1192 integerPart borrow) {
1193 integerPart *parts;
1194
1195 parts = significandParts();
1196
1197 assert(semantics == rhs.semantics);
1198 assert(exponent == rhs.exponent);
1199
1200 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1201 partCount());
1202}
1203
1204/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1205 on to the full-precision result of the multiplication. Returns the
1206 lost fraction. */
1207lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1208 IEEEFloat addend) {
1209 unsigned int omsb; // One, not zero, based MSB.
1210 unsigned int partsCount, newPartsCount, precision;
1211 integerPart *lhsSignificand;
1212 integerPart scratch[4];
1213 integerPart *fullSignificand;
1214 lostFraction lost_fraction;
1215 bool ignored;
1216
1217 assert(semantics == rhs.semantics);
1218
1219 precision = semantics->precision;
1220
1221 // Allocate space for twice as many bits as the original significand, plus one
1222 // extra bit for the addition to overflow into.
1223 newPartsCount = partCountForBits(precision * 2 + 1);
1224
1225 if (newPartsCount > 4)
1226 fullSignificand = new integerPart[newPartsCount];
1227 else
1228 fullSignificand = scratch;
1229
1230 lhsSignificand = significandParts();
1231 partsCount = partCount();
1232
1233 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1234 rhs.significandParts(), partsCount, partsCount);
1235
1236 lost_fraction = lfExactlyZero;
1237 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1238 exponent += rhs.exponent;
1239
1240 // Assume the operands involved in the multiplication are single-precision
1241 // FP, and the two multiplicants are:
1242 // *this = a23 . a22 ... a0 * 2^e1
1243 // rhs = b23 . b22 ... b0 * 2^e2
1244 // the result of multiplication is:
1245 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1246 // Note that there are three significant bits at the left-hand side of the
1247 // radix point: two for the multiplication, and an overflow bit for the
1248 // addition (that will always be zero at this point). Move the radix point
1249 // toward left by two bits, and adjust exponent accordingly.
1250 exponent += 2;
1251
1252 if (addend.isNonZero()) {
1253 // The intermediate result of the multiplication has "2 * precision"
1254 // signicant bit; adjust the addend to be consistent with mul result.
1255 //
1256 Significand savedSignificand = significand;
1257 const fltSemantics *savedSemantics = semantics;
1258 fltSemantics extendedSemantics;
1260 unsigned int extendedPrecision;
1261
1262 // Normalize our MSB to one below the top bit to allow for overflow.
1263 extendedPrecision = 2 * precision + 1;
1264 if (omsb != extendedPrecision - 1) {
1265 assert(extendedPrecision > omsb);
1266 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1267 (extendedPrecision - 1) - omsb);
1268 exponent -= (extendedPrecision - 1) - omsb;
1269 }
1270
1271 /* Create new semantics. */
1272 extendedSemantics = *semantics;
1273 extendedSemantics.precision = extendedPrecision;
1274
1275 if (newPartsCount == 1)
1276 significand.part = fullSignificand[0];
1277 else
1278 significand.parts = fullSignificand;
1279 semantics = &extendedSemantics;
1280
1281 // Make a copy so we can convert it to the extended semantics.
1282 // Note that we cannot convert the addend directly, as the extendedSemantics
1283 // is a local variable (which we take a reference to).
1284 IEEEFloat extendedAddend(addend);
1285 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1286 assert(status == opOK);
1287 (void)status;
1288
1289 // Shift the significand of the addend right by one bit. This guarantees
1290 // that the high bit of the significand is zero (same as fullSignificand),
1291 // so the addition will overflow (if it does overflow at all) into the top bit.
1292 lost_fraction = extendedAddend.shiftSignificandRight(1);
1293 assert(lost_fraction == lfExactlyZero &&
1294 "Lost precision while shifting addend for fused-multiply-add.");
1295
1296 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1297
1298 /* Restore our state. */
1299 if (newPartsCount == 1)
1300 fullSignificand[0] = significand.part;
1301 significand = savedSignificand;
1302 semantics = savedSemantics;
1303
1304 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1305 }
1306
1307 // Convert the result having "2 * precision" significant-bits back to the one
1308 // having "precision" significant-bits. First, move the radix point from
1309 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1310 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1311 exponent -= precision + 1;
1312
1313 // In case MSB resides at the left-hand side of radix point, shift the
1314 // mantissa right by some amount to make sure the MSB reside right before
1315 // the radix point (i.e. "MSB . rest-significant-bits").
1316 //
1317 // Note that the result is not normalized when "omsb < precision". So, the
1318 // caller needs to call IEEEFloat::normalize() if normalized value is
1319 // expected.
1320 if (omsb > precision) {
1321 unsigned int bits, significantParts;
1322 lostFraction lf;
1323
1324 bits = omsb - precision;
1325 significantParts = partCountForBits(omsb);
1326 lf = shiftRight(fullSignificand, significantParts, bits);
1327 lost_fraction = combineLostFractions(lf, lost_fraction);
1328 exponent += bits;
1329 }
1330
1331 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1332
1333 if (newPartsCount > 4)
1334 delete [] fullSignificand;
1335
1336 return lost_fraction;
1337}
1338
1339lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1340 return multiplySignificand(rhs, IEEEFloat(*semantics));
1341}
1342
1343/* Multiply the significands of LHS and RHS to DST. */
1344lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1345 unsigned int bit, i, partsCount;
1346 const integerPart *rhsSignificand;
1347 integerPart *lhsSignificand, *dividend, *divisor;
1348 integerPart scratch[4];
1349 lostFraction lost_fraction;
1350
1351 assert(semantics == rhs.semantics);
1352
1353 lhsSignificand = significandParts();
1354 rhsSignificand = rhs.significandParts();
1355 partsCount = partCount();
1356
1357 if (partsCount > 2)
1358 dividend = new integerPart[partsCount * 2];
1359 else
1360 dividend = scratch;
1361
1362 divisor = dividend + partsCount;
1363
1364 /* Copy the dividend and divisor as they will be modified in-place. */
1365 for (i = 0; i < partsCount; i++) {
1366 dividend[i] = lhsSignificand[i];
1367 divisor[i] = rhsSignificand[i];
1368 lhsSignificand[i] = 0;
1369 }
1370
1371 exponent -= rhs.exponent;
1372
1373 unsigned int precision = semantics->precision;
1374
1375 /* Normalize the divisor. */
1376 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1377 if (bit) {
1378 exponent += bit;
1379 APInt::tcShiftLeft(divisor, partsCount, bit);
1380 }
1381
1382 /* Normalize the dividend. */
1383 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1384 if (bit) {
1385 exponent -= bit;
1386 APInt::tcShiftLeft(dividend, partsCount, bit);
1387 }
1388
1389 /* Ensure the dividend >= divisor initially for the loop below.
1390 Incidentally, this means that the division loop below is
1391 guaranteed to set the integer bit to one. */
1392 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1393 exponent--;
1394 APInt::tcShiftLeft(dividend, partsCount, 1);
1395 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1396 }
1397
1398 /* Long division. */
1399 for (bit = precision; bit; bit -= 1) {
1400 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1401 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1402 APInt::tcSetBit(lhsSignificand, bit - 1);
1403 }
1404
1405 APInt::tcShiftLeft(dividend, partsCount, 1);
1406 }
1407
1408 /* Figure out the lost fraction. */
1409 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1410
1411 if (cmp > 0)
1412 lost_fraction = lfMoreThanHalf;
1413 else if (cmp == 0)
1414 lost_fraction = lfExactlyHalf;
1415 else if (APInt::tcIsZero(dividend, partsCount))
1416 lost_fraction = lfExactlyZero;
1417 else
1418 lost_fraction = lfLessThanHalf;
1419
1420 if (partsCount > 2)
1421 delete [] dividend;
1422
1423 return lost_fraction;
1424}
1425
1426unsigned int IEEEFloat::significandMSB() const {
1427 return APInt::tcMSB(significandParts(), partCount());
1428}
1429
1430unsigned int IEEEFloat::significandLSB() const {
1431 return APInt::tcLSB(significandParts(), partCount());
1432}
1433
1434/* Note that a zero result is NOT normalized to fcZero. */
1435lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1436 /* Our exponent should not overflow. */
1437 assert((ExponentType) (exponent + bits) >= exponent);
1438
1439 exponent += bits;
1440
1441 return shiftRight(significandParts(), partCount(), bits);
1442}
1443
1444/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1445void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1446 assert(bits < semantics->precision);
1447
1448 if (bits) {
1449 unsigned int partsCount = partCount();
1450
1451 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1452 exponent -= bits;
1453
1454 assert(!APInt::tcIsZero(significandParts(), partsCount));
1455 }
1456}
1457
1460 int compare;
1461
1462 assert(semantics == rhs.semantics);
1464 assert(rhs.isFiniteNonZero());
1465
1466 compare = exponent - rhs.exponent;
1467
1468 /* If exponents are equal, do an unsigned bignum comparison of the
1469 significands. */
1470 if (compare == 0)
1471 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1472 partCount());
1473
1474 if (compare > 0)
1475 return cmpGreaterThan;
1476 else if (compare < 0)
1477 return cmpLessThan;
1478 else
1479 return cmpEqual;
1480}
1481
1482/* Set the least significant BITS bits of a bignum, clear the
1483 rest. */
1484static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1485 unsigned bits) {
1486 unsigned i = 0;
1487 while (bits > APInt::APINT_BITS_PER_WORD) {
1488 dst[i++] = ~(APInt::WordType)0;
1490 }
1491
1492 if (bits)
1493 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1494
1495 while (i < parts)
1496 dst[i++] = 0;
1497}
1498
1499/* Handle overflow. Sign is preserved. We either become infinity or
1500 the largest finite number. */
1501IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1502 /* Infinity? */
1503 if (rounding_mode == rmNearestTiesToEven ||
1504 rounding_mode == rmNearestTiesToAway ||
1505 (rounding_mode == rmTowardPositive && !sign) ||
1506 (rounding_mode == rmTowardNegative && sign)) {
1508 makeNaN(false, sign);
1509 else
1510 category = fcInfinity;
1511 return (opStatus) (opOverflow | opInexact);
1512 }
1513
1514 /* Otherwise we become the largest finite number. */
1515 category = fcNormal;
1516 exponent = semantics->maxExponent;
1517 tcSetLeastSignificantBits(significandParts(), partCount(),
1518 semantics->precision);
1521 APInt::tcClearBit(significandParts(), 0);
1522
1523 return opInexact;
1524}
1525
1526/* Returns TRUE if, when truncating the current number, with BIT the
1527 new LSB, with the given lost fraction and rounding mode, the result
1528 would need to be rounded away from zero (i.e., by increasing the
1529 signficand). This routine must work for fcZero of both signs, and
1530 fcNormal numbers. */
1531bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1532 lostFraction lost_fraction,
1533 unsigned int bit) const {
1534 /* NaNs and infinities should not have lost fractions. */
1535 assert(isFiniteNonZero() || category == fcZero);
1536
1537 /* Current callers never pass this so we don't handle it. */
1538 assert(lost_fraction != lfExactlyZero);
1539
1540 switch (rounding_mode) {
1542 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1543
1545 if (lost_fraction == lfMoreThanHalf)
1546 return true;
1547
1548 /* Our zeroes don't have a significand to test. */
1549 if (lost_fraction == lfExactlyHalf && category != fcZero)
1550 return APInt::tcExtractBit(significandParts(), bit);
1551
1552 return false;
1553
1554 case rmTowardZero:
1555 return false;
1556
1557 case rmTowardPositive:
1558 return !sign;
1559
1560 case rmTowardNegative:
1561 return sign;
1562
1563 default:
1564 break;
1565 }
1566 llvm_unreachable("Invalid rounding mode found");
1567}
1568
1569IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1570 lostFraction lost_fraction) {
1571 unsigned int omsb; /* One, not zero, based MSB. */
1572 int exponentChange;
1573
1574 if (!isFiniteNonZero())
1575 return opOK;
1576
1577 /* Before rounding normalize the exponent of fcNormal numbers. */
1578 omsb = significandMSB() + 1;
1579
1580 if (omsb) {
1581 /* OMSB is numbered from 1. We want to place it in the integer
1582 bit numbered PRECISION if possible, with a compensating change in
1583 the exponent. */
1584 exponentChange = omsb - semantics->precision;
1585
1586 /* If the resulting exponent is too high, overflow according to
1587 the rounding mode. */
1588 if (exponent + exponentChange > semantics->maxExponent)
1589 return handleOverflow(rounding_mode);
1590
1591 /* Subnormal numbers have exponent minExponent, and their MSB
1592 is forced based on that. */
1593 if (exponent + exponentChange < semantics->minExponent)
1594 exponentChange = semantics->minExponent - exponent;
1595
1596 /* Shifting left is easy as we don't lose precision. */
1597 if (exponentChange < 0) {
1598 assert(lost_fraction == lfExactlyZero);
1599
1600 shiftSignificandLeft(-exponentChange);
1601
1602 return opOK;
1603 }
1604
1605 if (exponentChange > 0) {
1606 lostFraction lf;
1607
1608 /* Shift right and capture any new lost fraction. */
1609 lf = shiftSignificandRight(exponentChange);
1610
1611 lost_fraction = combineLostFractions(lf, lost_fraction);
1612
1613 /* Keep OMSB up-to-date. */
1614 if (omsb > (unsigned) exponentChange)
1615 omsb -= exponentChange;
1616 else
1617 omsb = 0;
1618 }
1619 }
1620
1621 // The all-ones values is an overflow if NaN is all ones. If NaN is
1622 // represented by negative zero, then it is a valid finite value.
1624 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1625 exponent == semantics->maxExponent && isSignificandAllOnes())
1626 return handleOverflow(rounding_mode);
1627
1628 /* Now round the number according to rounding_mode given the lost
1629 fraction. */
1630
1631 /* As specified in IEEE 754, since we do not trap we do not report
1632 underflow for exact results. */
1633 if (lost_fraction == lfExactlyZero) {
1634 /* Canonicalize zeroes. */
1635 if (omsb == 0) {
1636 category = fcZero;
1637 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1638 sign = false;
1639 }
1640
1641 return opOK;
1642 }
1643
1644 /* Increment the significand if we're rounding away from zero. */
1645 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1646 if (omsb == 0)
1647 exponent = semantics->minExponent;
1648
1649 incrementSignificand();
1650 omsb = significandMSB() + 1;
1651
1652 /* Did the significand increment overflow? */
1653 if (omsb == (unsigned) semantics->precision + 1) {
1654 /* Renormalize by incrementing the exponent and shifting our
1655 significand right one. However if we already have the
1656 maximum exponent we overflow to infinity. */
1657 if (exponent == semantics->maxExponent)
1658 // Invoke overflow handling with a rounding mode that will guarantee
1659 // that the result gets turned into the correct infinity representation.
1660 // This is needed instead of just setting the category to infinity to
1661 // account for 8-bit floating point types that have no inf, only NaN.
1662 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1663
1664 shiftSignificandRight(1);
1665
1666 return opInexact;
1667 }
1668
1669 // The all-ones values is an overflow if NaN is all ones. If NaN is
1670 // represented by negative zero, then it is a valid finite value.
1672 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1673 exponent == semantics->maxExponent && isSignificandAllOnes())
1674 return handleOverflow(rounding_mode);
1675 }
1676
1677 /* The normal case - we were and are not denormal, and any
1678 significand increment above didn't overflow. */
1679 if (omsb == semantics->precision)
1680 return opInexact;
1681
1682 /* We have a non-zero denormal. */
1683 assert(omsb < semantics->precision);
1684
1685 /* Canonicalize zeroes. */
1686 if (omsb == 0) {
1687 category = fcZero;
1688 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1689 sign = false;
1690 }
1691
1692 /* The fcZero case is a denormal that underflowed to zero. */
1693 return (opStatus) (opUnderflow | opInexact);
1694}
1695
1696IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1697 bool subtract) {
1698 switch (PackCategoriesIntoKey(category, rhs.category)) {
1699 default:
1700 llvm_unreachable(nullptr);
1701
1705 assign(rhs);
1706 [[fallthrough]];
1711 if (isSignaling()) {
1712 makeQuiet();
1713 return opInvalidOp;
1714 }
1715 return rhs.isSignaling() ? opInvalidOp : opOK;
1716
1720 return opOK;
1721
1724 category = fcInfinity;
1725 sign = rhs.sign ^ subtract;
1726 return opOK;
1727
1729 assign(rhs);
1730 sign = rhs.sign ^ subtract;
1731 return opOK;
1732
1734 /* Sign depends on rounding mode; handled by caller. */
1735 return opOK;
1736
1738 /* Differently signed infinities can only be validly
1739 subtracted. */
1740 if (((sign ^ rhs.sign)!=0) != subtract) {
1741 makeNaN();
1742 return opInvalidOp;
1743 }
1744
1745 return opOK;
1746
1748 return opDivByZero;
1749 }
1750}
1751
1752/* Add or subtract two normal numbers. */
1753lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1754 bool subtract) {
1755 integerPart carry;
1756 lostFraction lost_fraction;
1757 int bits;
1758
1759 /* Determine if the operation on the absolute values is effectively
1760 an addition or subtraction. */
1761 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1762
1763 /* Are we bigger exponent-wise than the RHS? */
1764 bits = exponent - rhs.exponent;
1765
1766 /* Subtraction is more subtle than one might naively expect. */
1767 if (subtract) {
1768 IEEEFloat temp_rhs(rhs);
1769
1770 if (bits == 0)
1771 lost_fraction = lfExactlyZero;
1772 else if (bits > 0) {
1773 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1774 shiftSignificandLeft(1);
1775 } else {
1776 lost_fraction = shiftSignificandRight(-bits - 1);
1777 temp_rhs.shiftSignificandLeft(1);
1778 }
1779
1780 // Should we reverse the subtraction.
1781 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1782 carry = temp_rhs.subtractSignificand
1783 (*this, lost_fraction != lfExactlyZero);
1784 copySignificand(temp_rhs);
1785 sign = !sign;
1786 } else {
1787 carry = subtractSignificand
1788 (temp_rhs, lost_fraction != lfExactlyZero);
1789 }
1790
1791 /* Invert the lost fraction - it was on the RHS and
1792 subtracted. */
1793 if (lost_fraction == lfLessThanHalf)
1794 lost_fraction = lfMoreThanHalf;
1795 else if (lost_fraction == lfMoreThanHalf)
1796 lost_fraction = lfLessThanHalf;
1797
1798 /* The code above is intended to ensure that no borrow is
1799 necessary. */
1800 assert(!carry);
1801 (void)carry;
1802 } else {
1803 if (bits > 0) {
1804 IEEEFloat temp_rhs(rhs);
1805
1806 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1807 carry = addSignificand(temp_rhs);
1808 } else {
1809 lost_fraction = shiftSignificandRight(-bits);
1810 carry = addSignificand(rhs);
1811 }
1812
1813 /* We have a guard bit; generating a carry cannot happen. */
1814 assert(!carry);
1815 (void)carry;
1816 }
1817
1818 return lost_fraction;
1819}
1820
1821IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1822 switch (PackCategoriesIntoKey(category, rhs.category)) {
1823 default:
1824 llvm_unreachable(nullptr);
1825
1829 assign(rhs);
1830 sign = false;
1831 [[fallthrough]];
1836 sign ^= rhs.sign; // restore the original sign
1837 if (isSignaling()) {
1838 makeQuiet();
1839 return opInvalidOp;
1840 }
1841 return rhs.isSignaling() ? opInvalidOp : opOK;
1842
1846 category = fcInfinity;
1847 return opOK;
1848
1852 category = fcZero;
1853 return opOK;
1854
1857 makeNaN();
1858 return opInvalidOp;
1859
1861 return opOK;
1862 }
1863}
1864
1865IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1866 switch (PackCategoriesIntoKey(category, rhs.category)) {
1867 default:
1868 llvm_unreachable(nullptr);
1869
1873 assign(rhs);
1874 sign = false;
1875 [[fallthrough]];
1880 sign ^= rhs.sign; // restore the original sign
1881 if (isSignaling()) {
1882 makeQuiet();
1883 return opInvalidOp;
1884 }
1885 return rhs.isSignaling() ? opInvalidOp : opOK;
1886
1891 return opOK;
1892
1894 category = fcZero;
1895 return opOK;
1896
1898 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1899 makeNaN(false, sign);
1900 else
1901 category = fcInfinity;
1902 return opDivByZero;
1903
1906 makeNaN();
1907 return opInvalidOp;
1908
1910 return opOK;
1911 }
1912}
1913
1914IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1915 switch (PackCategoriesIntoKey(category, rhs.category)) {
1916 default:
1917 llvm_unreachable(nullptr);
1918
1922 assign(rhs);
1923 [[fallthrough]];
1928 if (isSignaling()) {
1929 makeQuiet();
1930 return opInvalidOp;
1931 }
1932 return rhs.isSignaling() ? opInvalidOp : opOK;
1933
1937 return opOK;
1938
1944 makeNaN();
1945 return opInvalidOp;
1946
1948 return opOK;
1949 }
1950}
1951
1952IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1953 switch (PackCategoriesIntoKey(category, rhs.category)) {
1954 default:
1955 llvm_unreachable(nullptr);
1956
1960 assign(rhs);
1961 [[fallthrough]];
1966 if (isSignaling()) {
1967 makeQuiet();
1968 return opInvalidOp;
1969 }
1970 return rhs.isSignaling() ? opInvalidOp : opOK;
1971
1975 return opOK;
1976
1982 makeNaN();
1983 return opInvalidOp;
1984
1986 return opDivByZero; // fake status, indicating this is not a special case
1987 }
1988}
1989
1990/* Change sign. */
1992 // With NaN-as-negative-zero, neither NaN or negative zero can change
1993 // their signs.
1994 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
1995 (isZero() || isNaN()))
1996 return;
1997 /* Look mummy, this one's easy. */
1998 sign = !sign;
1999}
2000
2001/* Normalized addition or subtraction. */
2002IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2003 roundingMode rounding_mode,
2004 bool subtract) {
2005 opStatus fs;
2006
2007 fs = addOrSubtractSpecials(rhs, subtract);
2008
2009 /* This return code means it was not a simple case. */
2010 if (fs == opDivByZero) {
2011 lostFraction lost_fraction;
2012
2013 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2014 fs = normalize(rounding_mode, lost_fraction);
2015
2016 /* Can only be zero if we lost no fraction. */
2017 assert(category != fcZero || lost_fraction == lfExactlyZero);
2018 }
2019
2020 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2021 positive zero unless rounding to minus infinity, except that
2022 adding two like-signed zeroes gives that zero. */
2023 if (category == fcZero) {
2024 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2025 sign = (rounding_mode == rmTowardNegative);
2026 // NaN-in-negative-zero means zeros need to be normalized to +0.
2027 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2028 sign = false;
2029 }
2030
2031 return fs;
2032}
2033
2034/* Normalized addition. */
2036 roundingMode rounding_mode) {
2037 return addOrSubtract(rhs, rounding_mode, false);
2038}
2039
2040/* Normalized subtraction. */
2042 roundingMode rounding_mode) {
2043 return addOrSubtract(rhs, rounding_mode, true);
2044}
2045
2046/* Normalized multiply. */
2048 roundingMode rounding_mode) {
2049 opStatus fs;
2050
2051 sign ^= rhs.sign;
2052 fs = multiplySpecials(rhs);
2053
2054 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2055 sign = false;
2056 if (isFiniteNonZero()) {
2057 lostFraction lost_fraction = multiplySignificand(rhs);
2058 fs = normalize(rounding_mode, lost_fraction);
2059 if (lost_fraction != lfExactlyZero)
2060 fs = (opStatus) (fs | opInexact);
2061 }
2062
2063 return fs;
2064}
2065
2066/* Normalized divide. */
2068 roundingMode rounding_mode) {
2069 opStatus fs;
2070
2071 sign ^= rhs.sign;
2072 fs = divideSpecials(rhs);
2073
2074 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2075 sign = false;
2076 if (isFiniteNonZero()) {
2077 lostFraction lost_fraction = divideSignificand(rhs);
2078 fs = normalize(rounding_mode, lost_fraction);
2079 if (lost_fraction != lfExactlyZero)
2080 fs = (opStatus) (fs | opInexact);
2081 }
2082
2083 return fs;
2084}
2085
2086/* Normalized remainder. */
2088 opStatus fs;
2089 unsigned int origSign = sign;
2090
2091 // First handle the special cases.
2092 fs = remainderSpecials(rhs);
2093 if (fs != opDivByZero)
2094 return fs;
2095
2096 fs = opOK;
2097
2098 // Make sure the current value is less than twice the denom. If the addition
2099 // did not succeed (an overflow has happened), which means that the finite
2100 // value we currently posses must be less than twice the denom (as we are
2101 // using the same semantics).
2102 IEEEFloat P2 = rhs;
2103 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2104 fs = mod(P2);
2105 assert(fs == opOK);
2106 }
2107
2108 // Lets work with absolute numbers.
2109 IEEEFloat P = rhs;
2110 P.sign = false;
2111 sign = false;
2112
2113 //
2114 // To calculate the remainder we use the following scheme.
2115 //
2116 // The remainder is defained as follows:
2117 //
2118 // remainder = numer - rquot * denom = x - r * p
2119 //
2120 // Where r is the result of: x/p, rounded toward the nearest integral value
2121 // (with halfway cases rounded toward the even number).
2122 //
2123 // Currently, (after x mod 2p):
2124 // r is the number of 2p's present inside x, which is inherently, an even
2125 // number of p's.
2126 //
2127 // We may split the remaining calculation into 4 options:
2128 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2129 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2130 // are done as well.
2131 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2132 // to subtract 1p at least once.
2133 // - if x >= p then we must subtract p at least once, as x must be a
2134 // remainder.
2135 //
2136 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2137 //
2138 // We can now split the remaining calculation to the following 3 options:
2139 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2140 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2141 // must round up to the next even number. so we must subtract p once more.
2142 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2143 // integral, and subtract p once more.
2144 //
2145
2146 // Extend the semantics to prevent an overflow/underflow or inexact result.
2147 bool losesInfo;
2148 fltSemantics extendedSemantics = *semantics;
2149 extendedSemantics.maxExponent++;
2150 extendedSemantics.minExponent--;
2151 extendedSemantics.precision += 2;
2152
2153 IEEEFloat VEx = *this;
2154 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2155 assert(fs == opOK && !losesInfo);
2156 IEEEFloat PEx = P;
2157 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2158 assert(fs == opOK && !losesInfo);
2159
2160 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2161 // any fraction.
2162 fs = VEx.add(VEx, rmNearestTiesToEven);
2163 assert(fs == opOK);
2164
2165 if (VEx.compare(PEx) == cmpGreaterThan) {
2167 assert(fs == opOK);
2168
2169 // Make VEx = this.add(this), but because we have different semantics, we do
2170 // not want to `convert` again, so we just subtract PEx twice (which equals
2171 // to the desired value).
2172 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2173 assert(fs == opOK);
2174 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2175 assert(fs == opOK);
2176
2177 cmpResult result = VEx.compare(PEx);
2178 if (result == cmpGreaterThan || result == cmpEqual) {
2180 assert(fs == opOK);
2181 }
2182 }
2183
2184 if (isZero()) {
2185 sign = origSign; // IEEE754 requires this
2186 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2187 // But some 8-bit floats only have positive 0.
2188 sign = false;
2189 }
2190
2191 else
2192 sign ^= origSign;
2193 return fs;
2194}
2195
2196/* Normalized llvm frem (C fmod). */
2198 opStatus fs;
2199 fs = modSpecials(rhs);
2200 unsigned int origSign = sign;
2201
2202 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2204 int Exp = ilogb(*this) - ilogb(rhs);
2205 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2206 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2207 // check for it.
2208 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2209 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2210 V.sign = sign;
2211
2213 assert(fs==opOK);
2214 }
2215 if (isZero()) {
2216 sign = origSign; // fmod requires this
2217 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2218 sign = false;
2219 }
2220 return fs;
2221}
2222
2223/* Normalized fused-multiply-add. */
2225 const IEEEFloat &addend,
2226 roundingMode rounding_mode) {
2227 opStatus fs;
2228
2229 /* Post-multiplication sign, before addition. */
2230 sign ^= multiplicand.sign;
2231
2232 /* If and only if all arguments are normal do we need to do an
2233 extended-precision calculation. */
2234 if (isFiniteNonZero() &&
2235 multiplicand.isFiniteNonZero() &&
2236 addend.isFinite()) {
2237 lostFraction lost_fraction;
2238
2239 lost_fraction = multiplySignificand(multiplicand, addend);
2240 fs = normalize(rounding_mode, lost_fraction);
2241 if (lost_fraction != lfExactlyZero)
2242 fs = (opStatus) (fs | opInexact);
2243
2244 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2245 positive zero unless rounding to minus infinity, except that
2246 adding two like-signed zeroes gives that zero. */
2247 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2248 sign = (rounding_mode == rmTowardNegative);
2249 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2250 sign = false;
2251 }
2252 } else {
2253 fs = multiplySpecials(multiplicand);
2254
2255 /* FS can only be opOK or opInvalidOp. There is no more work
2256 to do in the latter case. The IEEE-754R standard says it is
2257 implementation-defined in this case whether, if ADDEND is a
2258 quiet NaN, we raise invalid op; this implementation does so.
2259
2260 If we need to do the addition we can do so with normal
2261 precision. */
2262 if (fs == opOK)
2263 fs = addOrSubtract(addend, rounding_mode, false);
2264 }
2265
2266 return fs;
2267}
2268
2269/* Rounding-mode correct round to integral value. */
2271 opStatus fs;
2272
2273 if (isInfinity())
2274 // [IEEE Std 754-2008 6.1]:
2275 // The behavior of infinity in floating-point arithmetic is derived from the
2276 // limiting cases of real arithmetic with operands of arbitrarily
2277 // large magnitude, when such a limit exists.
2278 // ...
2279 // Operations on infinite operands are usually exact and therefore signal no
2280 // exceptions ...
2281 return opOK;
2282
2283 if (isNaN()) {
2284 if (isSignaling()) {
2285 // [IEEE Std 754-2008 6.2]:
2286 // Under default exception handling, any operation signaling an invalid
2287 // operation exception and for which a floating-point result is to be
2288 // delivered shall deliver a quiet NaN.
2289 makeQuiet();
2290 // [IEEE Std 754-2008 6.2]:
2291 // Signaling NaNs shall be reserved operands that, under default exception
2292 // handling, signal the invalid operation exception(see 7.2) for every
2293 // general-computational and signaling-computational operation except for
2294 // the conversions described in 5.12.
2295 return opInvalidOp;
2296 } else {
2297 // [IEEE Std 754-2008 6.2]:
2298 // For an operation with quiet NaN inputs, other than maximum and minimum
2299 // operations, if a floating-point result is to be delivered the result
2300 // shall be a quiet NaN which should be one of the input NaNs.
2301 // ...
2302 // Every general-computational and quiet-computational operation involving
2303 // one or more input NaNs, none of them signaling, shall signal no
2304 // exception, except fusedMultiplyAdd might signal the invalid operation
2305 // exception(see 7.2).
2306 return opOK;
2307 }
2308 }
2309
2310 if (isZero()) {
2311 // [IEEE Std 754-2008 6.3]:
2312 // ... the sign of the result of conversions, the quantize operation, the
2313 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2314 // the sign of the first or only operand.
2315 return opOK;
2316 }
2317
2318 // If the exponent is large enough, we know that this value is already
2319 // integral, and the arithmetic below would potentially cause it to saturate
2320 // to +/-Inf. Bail out early instead.
2321 if (exponent+1 >= (int)semanticsPrecision(*semantics))
2322 return opOK;
2323
2324 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2325 // precision of our format, and then subtract it back off again. The choice
2326 // of rounding modes for the addition/subtraction determines the rounding mode
2327 // for our integral rounding as well.
2328 // NOTE: When the input value is negative, we do subtraction followed by
2329 // addition instead.
2330 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2331 IntegerConstant <<= semanticsPrecision(*semantics)-1;
2332 IEEEFloat MagicConstant(*semantics);
2333 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2335 assert(fs == opOK);
2336 MagicConstant.sign = sign;
2337
2338 // Preserve the input sign so that we can handle the case of zero result
2339 // correctly.
2340 bool inputSign = isNegative();
2341
2342 fs = add(MagicConstant, rounding_mode);
2343
2344 // Current value and 'MagicConstant' are both integers, so the result of the
2345 // subtraction is always exact according to Sterbenz' lemma.
2346 subtract(MagicConstant, rounding_mode);
2347
2348 // Restore the input sign.
2349 if (inputSign != isNegative())
2350 changeSign();
2351
2352 return fs;
2353}
2354
2355
2356/* Comparison requires normalized numbers. */
2358 cmpResult result;
2359
2360 assert(semantics == rhs.semantics);
2361
2362 switch (PackCategoriesIntoKey(category, rhs.category)) {
2363 default:
2364 llvm_unreachable(nullptr);
2365
2373 return cmpUnordered;
2374
2378 if (sign)
2379 return cmpLessThan;
2380 else
2381 return cmpGreaterThan;
2382
2386 if (rhs.sign)
2387 return cmpGreaterThan;
2388 else
2389 return cmpLessThan;
2390
2392 if (sign == rhs.sign)
2393 return cmpEqual;
2394 else if (sign)
2395 return cmpLessThan;
2396 else
2397 return cmpGreaterThan;
2398
2400 return cmpEqual;
2401
2403 break;
2404 }
2405
2406 /* Two normal numbers. Do they have the same sign? */
2407 if (sign != rhs.sign) {
2408 if (sign)
2409 result = cmpLessThan;
2410 else
2411 result = cmpGreaterThan;
2412 } else {
2413 /* Compare absolute values; invert result if negative. */
2414 result = compareAbsoluteValue(rhs);
2415
2416 if (sign) {
2417 if (result == cmpLessThan)
2418 result = cmpGreaterThan;
2419 else if (result == cmpGreaterThan)
2420 result = cmpLessThan;
2421 }
2422 }
2423
2424 return result;
2425}
2426
2427/// IEEEFloat::convert - convert a value of one floating point type to another.
2428/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2429/// records whether the transformation lost information, i.e. whether
2430/// converting the result back to the original type will produce the
2431/// original value (this is almost the same as return value==fsOK, but there
2432/// are edge cases where this is not so).
2433
2435 roundingMode rounding_mode,
2436 bool *losesInfo) {
2438 unsigned int newPartCount, oldPartCount;
2439 opStatus fs;
2440 int shift;
2441 const fltSemantics &fromSemantics = *semantics;
2442 bool is_signaling = isSignaling();
2443
2445 newPartCount = partCountForBits(toSemantics.precision + 1);
2446 oldPartCount = partCount();
2447 shift = toSemantics.precision - fromSemantics.precision;
2448
2449 bool X86SpecialNan = false;
2450 if (&fromSemantics == &semX87DoubleExtended &&
2451 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2452 (!(*significandParts() & 0x8000000000000000ULL) ||
2453 !(*significandParts() & 0x4000000000000000ULL))) {
2454 // x86 has some unusual NaNs which cannot be represented in any other
2455 // format; note them here.
2456 X86SpecialNan = true;
2457 }
2458
2459 // If this is a truncation of a denormal number, and the target semantics
2460 // has larger exponent range than the source semantics (this can happen
2461 // when truncating from PowerPC double-double to double format), the
2462 // right shift could lose result mantissa bits. Adjust exponent instead
2463 // of performing excessive shift.
2464 // Also do a similar trick in case shifting denormal would produce zero
2465 // significand as this case isn't handled correctly by normalize.
2466 if (shift < 0 && isFiniteNonZero()) {
2467 int omsb = significandMSB() + 1;
2468 int exponentChange = omsb - fromSemantics.precision;
2469 if (exponent + exponentChange < toSemantics.minExponent)
2470 exponentChange = toSemantics.minExponent - exponent;
2471 if (exponentChange < shift)
2472 exponentChange = shift;
2473 if (exponentChange < 0) {
2474 shift -= exponentChange;
2475 exponent += exponentChange;
2476 } else if (omsb <= -shift) {
2477 exponentChange = omsb + shift - 1; // leave at least one bit set
2478 shift -= exponentChange;
2479 exponent += exponentChange;
2480 }
2481 }
2482
2483 // If this is a truncation, perform the shift before we narrow the storage.
2484 if (shift < 0 && (isFiniteNonZero() ||
2485 (category == fcNaN && semantics->nonFiniteBehavior !=
2487 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2488
2489 // Fix the storage so it can hold to new value.
2490 if (newPartCount > oldPartCount) {
2491 // The new type requires more storage; make it available.
2492 integerPart *newParts;
2493 newParts = new integerPart[newPartCount];
2494 APInt::tcSet(newParts, 0, newPartCount);
2495 if (isFiniteNonZero() || category==fcNaN)
2496 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2497 freeSignificand();
2498 significand.parts = newParts;
2499 } else if (newPartCount == 1 && oldPartCount != 1) {
2500 // Switch to built-in storage for a single part.
2501 integerPart newPart = 0;
2502 if (isFiniteNonZero() || category==fcNaN)
2503 newPart = significandParts()[0];
2504 freeSignificand();
2505 significand.part = newPart;
2506 }
2507
2508 // Now that we have the right storage, switch the semantics.
2509 semantics = &toSemantics;
2510
2511 // If this is an extension, perform the shift now that the storage is
2512 // available.
2513 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2514 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2515
2516 if (isFiniteNonZero()) {
2517 fs = normalize(rounding_mode, lostFraction);
2518 *losesInfo = (fs != opOK);
2519 } else if (category == fcNaN) {
2521 *losesInfo =
2523 makeNaN(false, sign);
2524 return is_signaling ? opInvalidOp : opOK;
2525 }
2526
2527 // If NaN is negative zero, we need to create a new NaN to avoid converting
2528 // NaN to -Inf.
2529 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2531 makeNaN(false, false);
2532
2533 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2534
2535 // For x87 extended precision, we want to make a NaN, not a special NaN if
2536 // the input wasn't special either.
2537 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2538 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2539
2540 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2541 // This also guarantees that a sNaN does not become Inf on a truncation
2542 // that loses all payload bits.
2543 if (is_signaling) {
2544 makeQuiet();
2545 fs = opInvalidOp;
2546 } else {
2547 fs = opOK;
2548 }
2549 } else if (category == fcInfinity &&
2551 makeNaN(false, sign);
2552 *losesInfo = true;
2553 fs = opInexact;
2554 } else if (category == fcZero &&
2556 // Negative zero loses info, but positive zero doesn't.
2557 *losesInfo =
2558 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2559 fs = *losesInfo ? opInexact : opOK;
2560 // NaN is negative zero means -0 -> +0, which can lose information
2561 sign = false;
2562 } else {
2563 *losesInfo = false;
2564 fs = opOK;
2565 }
2566
2567 return fs;
2568}
2569
2570/* Convert a floating point number to an integer according to the
2571 rounding mode. If the rounded integer value is out of range this
2572 returns an invalid operation exception and the contents of the
2573 destination parts are unspecified. If the rounded value is in
2574 range but the floating point number is not the exact integer, the C
2575 standard doesn't require an inexact exception to be raised. IEEE
2576 854 does require it so we do that.
2577
2578 Note that for conversions to integer type the C standard requires
2579 round-to-zero to always be used. */
2580IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2581 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2582 roundingMode rounding_mode, bool *isExact) const {
2583 lostFraction lost_fraction;
2584 const integerPart *src;
2585 unsigned int dstPartsCount, truncatedBits;
2586
2587 *isExact = false;
2588
2589 /* Handle the three special cases first. */
2590 if (category == fcInfinity || category == fcNaN)
2591 return opInvalidOp;
2592
2593 dstPartsCount = partCountForBits(width);
2594 assert(dstPartsCount <= parts.size() && "Integer too big");
2595
2596 if (category == fcZero) {
2597 APInt::tcSet(parts.data(), 0, dstPartsCount);
2598 // Negative zero can't be represented as an int.
2599 *isExact = !sign;
2600 return opOK;
2601 }
2602
2603 src = significandParts();
2604
2605 /* Step 1: place our absolute value, with any fraction truncated, in
2606 the destination. */
2607 if (exponent < 0) {
2608 /* Our absolute value is less than one; truncate everything. */
2609 APInt::tcSet(parts.data(), 0, dstPartsCount);
2610 /* For exponent -1 the integer bit represents .5, look at that.
2611 For smaller exponents leftmost truncated bit is 0. */
2612 truncatedBits = semantics->precision -1U - exponent;
2613 } else {
2614 /* We want the most significant (exponent + 1) bits; the rest are
2615 truncated. */
2616 unsigned int bits = exponent + 1U;
2617
2618 /* Hopelessly large in magnitude? */
2619 if (bits > width)
2620 return opInvalidOp;
2621
2622 if (bits < semantics->precision) {
2623 /* We truncate (semantics->precision - bits) bits. */
2624 truncatedBits = semantics->precision - bits;
2625 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2626 } else {
2627 /* We want at least as many bits as are available. */
2628 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2629 0);
2630 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2631 bits - semantics->precision);
2632 truncatedBits = 0;
2633 }
2634 }
2635
2636 /* Step 2: work out any lost fraction, and increment the absolute
2637 value if we would round away from zero. */
2638 if (truncatedBits) {
2639 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2640 truncatedBits);
2641 if (lost_fraction != lfExactlyZero &&
2642 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2643 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2644 return opInvalidOp; /* Overflow. */
2645 }
2646 } else {
2647 lost_fraction = lfExactlyZero;
2648 }
2649
2650 /* Step 3: check if we fit in the destination. */
2651 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2652
2653 if (sign) {
2654 if (!isSigned) {
2655 /* Negative numbers cannot be represented as unsigned. */
2656 if (omsb != 0)
2657 return opInvalidOp;
2658 } else {
2659 /* It takes omsb bits to represent the unsigned integer value.
2660 We lose a bit for the sign, but care is needed as the
2661 maximally negative integer is a special case. */
2662 if (omsb == width &&
2663 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2664 return opInvalidOp;
2665
2666 /* This case can happen because of rounding. */
2667 if (omsb > width)
2668 return opInvalidOp;
2669 }
2670
2671 APInt::tcNegate (parts.data(), dstPartsCount);
2672 } else {
2673 if (omsb >= width + !isSigned)
2674 return opInvalidOp;
2675 }
2676
2677 if (lost_fraction == lfExactlyZero) {
2678 *isExact = true;
2679 return opOK;
2680 } else
2681 return opInexact;
2682}
2683
2684/* Same as convertToSignExtendedInteger, except we provide
2685 deterministic values in case of an invalid operation exception,
2686 namely zero for NaNs and the minimal or maximal value respectively
2687 for underflow or overflow.
2688 The *isExact output tells whether the result is exact, in the sense
2689 that converting it back to the original floating point type produces
2690 the original value. This is almost equivalent to result==opOK,
2691 except for negative zeroes.
2692*/
2695 unsigned int width, bool isSigned,
2696 roundingMode rounding_mode, bool *isExact) const {
2697 opStatus fs;
2698
2699 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2700 isExact);
2701
2702 if (fs == opInvalidOp) {
2703 unsigned int bits, dstPartsCount;
2704
2705 dstPartsCount = partCountForBits(width);
2706 assert(dstPartsCount <= parts.size() && "Integer too big");
2707
2708 if (category == fcNaN)
2709 bits = 0;
2710 else if (sign)
2711 bits = isSigned;
2712 else
2713 bits = width - isSigned;
2714
2715 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2716 if (sign && isSigned)
2717 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2718 }
2719
2720 return fs;
2721}
2722
2723/* Convert an unsigned integer SRC to a floating point number,
2724 rounding according to ROUNDING_MODE. The sign of the floating
2725 point number is not modified. */
2726IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2727 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2728 unsigned int omsb, precision, dstCount;
2729 integerPart *dst;
2730 lostFraction lost_fraction;
2731
2732 category = fcNormal;
2733 omsb = APInt::tcMSB(src, srcCount) + 1;
2734 dst = significandParts();
2735 dstCount = partCount();
2736 precision = semantics->precision;
2737
2738 /* We want the most significant PRECISION bits of SRC. There may not
2739 be that many; extract what we can. */
2740 if (precision <= omsb) {
2741 exponent = omsb - 1;
2742 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2743 omsb - precision);
2744 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2745 } else {
2746 exponent = precision - 1;
2747 lost_fraction = lfExactlyZero;
2748 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2749 }
2750
2751 return normalize(rounding_mode, lost_fraction);
2752}
2753
2755 roundingMode rounding_mode) {
2756 unsigned int partCount = Val.getNumWords();
2757 APInt api = Val;
2758
2759 sign = false;
2760 if (isSigned && api.isNegative()) {
2761 sign = true;
2762 api = -api;
2763 }
2764
2765 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2766}
2767
2768/* Convert a two's complement integer SRC to a floating point number,
2769 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2770 integer is signed, in which case it must be sign-extended. */
2773 unsigned int srcCount, bool isSigned,
2774 roundingMode rounding_mode) {
2775 opStatus status;
2776
2777 if (isSigned &&
2778 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2780
2781 /* If we're signed and negative negate a copy. */
2782 sign = true;
2783 copy = new integerPart[srcCount];
2784 APInt::tcAssign(copy, src, srcCount);
2785 APInt::tcNegate(copy, srcCount);
2786 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2787 delete [] copy;
2788 } else {
2789 sign = false;
2790 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2791 }
2792
2793 return status;
2794}
2795
2796/* FIXME: should this just take a const APInt reference? */
2799 unsigned int width, bool isSigned,
2800 roundingMode rounding_mode) {
2801 unsigned int partCount = partCountForBits(width);
2802 APInt api = APInt(width, ArrayRef(parts, partCount));
2803
2804 sign = false;
2805 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2806 sign = true;
2807 api = -api;
2808 }
2809
2810 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2811}
2812
2814IEEEFloat::convertFromHexadecimalString(StringRef s,
2815 roundingMode rounding_mode) {
2816 lostFraction lost_fraction = lfExactlyZero;
2817
2818 category = fcNormal;
2819 zeroSignificand();
2820 exponent = 0;
2821
2822 integerPart *significand = significandParts();
2823 unsigned partsCount = partCount();
2824 unsigned bitPos = partsCount * integerPartWidth;
2825 bool computedTrailingFraction = false;
2826
2827 // Skip leading zeroes and any (hexa)decimal point.
2828 StringRef::iterator begin = s.begin();
2829 StringRef::iterator end = s.end();
2831 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2832 if (!PtrOrErr)
2833 return PtrOrErr.takeError();
2834 StringRef::iterator p = *PtrOrErr;
2835 StringRef::iterator firstSignificantDigit = p;
2836
2837 while (p != end) {
2838 integerPart hex_value;
2839
2840 if (*p == '.') {
2841 if (dot != end)
2842 return createError("String contains multiple dots");
2843 dot = p++;
2844 continue;
2845 }
2846
2847 hex_value = hexDigitValue(*p);
2848 if (hex_value == UINT_MAX)
2849 break;
2850
2851 p++;
2852
2853 // Store the number while we have space.
2854 if (bitPos) {
2855 bitPos -= 4;
2856 hex_value <<= bitPos % integerPartWidth;
2857 significand[bitPos / integerPartWidth] |= hex_value;
2858 } else if (!computedTrailingFraction) {
2859 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2860 if (!FractOrErr)
2861 return FractOrErr.takeError();
2862 lost_fraction = *FractOrErr;
2863 computedTrailingFraction = true;
2864 }
2865 }
2866
2867 /* Hex floats require an exponent but not a hexadecimal point. */
2868 if (p == end)
2869 return createError("Hex strings require an exponent");
2870 if (*p != 'p' && *p != 'P')
2871 return createError("Invalid character in significand");
2872 if (p == begin)
2873 return createError("Significand has no digits");
2874 if (dot != end && p - begin == 1)
2875 return createError("Significand has no digits");
2876
2877 /* Ignore the exponent if we are zero. */
2878 if (p != firstSignificantDigit) {
2879 int expAdjustment;
2880
2881 /* Implicit hexadecimal point? */
2882 if (dot == end)
2883 dot = p;
2884
2885 /* Calculate the exponent adjustment implicit in the number of
2886 significant digits. */
2887 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2888 if (expAdjustment < 0)
2889 expAdjustment++;
2890 expAdjustment = expAdjustment * 4 - 1;
2891
2892 /* Adjust for writing the significand starting at the most
2893 significant nibble. */
2894 expAdjustment += semantics->precision;
2895 expAdjustment -= partsCount * integerPartWidth;
2896
2897 /* Adjust for the given exponent. */
2898 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2899 if (!ExpOrErr)
2900 return ExpOrErr.takeError();
2901 exponent = *ExpOrErr;
2902 }
2903
2904 return normalize(rounding_mode, lost_fraction);
2905}
2906
2908IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2909 unsigned sigPartCount, int exp,
2910 roundingMode rounding_mode) {
2911 unsigned int parts, pow5PartCount;
2912 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2914 bool isNearest;
2915
2916 isNearest = (rounding_mode == rmNearestTiesToEven ||
2917 rounding_mode == rmNearestTiesToAway);
2918
2919 parts = partCountForBits(semantics->precision + 11);
2920
2921 /* Calculate pow(5, abs(exp)). */
2922 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2923
2924 for (;; parts *= 2) {
2925 opStatus sigStatus, powStatus;
2926 unsigned int excessPrecision, truncatedBits;
2927
2928 calcSemantics.precision = parts * integerPartWidth - 1;
2929 excessPrecision = calcSemantics.precision - semantics->precision;
2930 truncatedBits = excessPrecision;
2931
2932 IEEEFloat decSig(calcSemantics, uninitialized);
2933 decSig.makeZero(sign);
2934 IEEEFloat pow5(calcSemantics);
2935
2936 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2938 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2940 /* Add exp, as 10^n = 5^n * 2^n. */
2941 decSig.exponent += exp;
2942
2943 lostFraction calcLostFraction;
2944 integerPart HUerr, HUdistance;
2945 unsigned int powHUerr;
2946
2947 if (exp >= 0) {
2948 /* multiplySignificand leaves the precision-th bit set to 1. */
2949 calcLostFraction = decSig.multiplySignificand(pow5);
2950 powHUerr = powStatus != opOK;
2951 } else {
2952 calcLostFraction = decSig.divideSignificand(pow5);
2953 /* Denormal numbers have less precision. */
2954 if (decSig.exponent < semantics->minExponent) {
2955 excessPrecision += (semantics->minExponent - decSig.exponent);
2956 truncatedBits = excessPrecision;
2957 if (excessPrecision > calcSemantics.precision)
2958 excessPrecision = calcSemantics.precision;
2959 }
2960 /* Extra half-ulp lost in reciprocal of exponent. */
2961 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2962 }
2963
2964 /* Both multiplySignificand and divideSignificand return the
2965 result with the integer bit set. */
2967 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2968
2969 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2970 powHUerr);
2971 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2972 excessPrecision, isNearest);
2973
2974 /* Are we guaranteed to round correctly if we truncate? */
2975 if (HUdistance >= HUerr) {
2976 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2977 calcSemantics.precision - excessPrecision,
2978 excessPrecision);
2979 /* Take the exponent of decSig. If we tcExtract-ed less bits
2980 above we must adjust our exponent to compensate for the
2981 implicit right shift. */
2982 exponent = (decSig.exponent + semantics->precision
2983 - (calcSemantics.precision - excessPrecision));
2984 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2985 decSig.partCount(),
2986 truncatedBits);
2987 return normalize(rounding_mode, calcLostFraction);
2988 }
2989 }
2990}
2991
2993IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2994 decimalInfo D;
2995 opStatus fs;
2996
2997 /* Scan the text. */
2998 StringRef::iterator p = str.begin();
2999 if (Error Err = interpretDecimal(p, str.end(), &D))
3000 return std::move(Err);
3001
3002 /* Handle the quick cases. First the case of no significant digits,
3003 i.e. zero, and then exponents that are obviously too large or too
3004 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3005 definitely overflows if
3006
3007 (exp - 1) * L >= maxExponent
3008
3009 and definitely underflows to zero where
3010
3011 (exp + 1) * L <= minExponent - precision
3012
3013 With integer arithmetic the tightest bounds for L are
3014
3015 93/28 < L < 196/59 [ numerator <= 256 ]
3016 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3017 */
3018
3019 // Test if we have a zero number allowing for strings with no null terminators
3020 // and zero decimals with non-zero exponents.
3021 //
3022 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3023 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3024 // be at most one dot. On the other hand, if we have a zero with a non-zero
3025 // exponent, then we know that D.firstSigDigit will be non-numeric.
3026 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3027 category = fcZero;
3028 fs = opOK;
3029 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3030 sign = false;
3031
3032 /* Check whether the normalized exponent is high enough to overflow
3033 max during the log-rebasing in the max-exponent check below. */
3034 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3035 fs = handleOverflow(rounding_mode);
3036
3037 /* If it wasn't, then it also wasn't high enough to overflow max
3038 during the log-rebasing in the min-exponent check. Check that it
3039 won't overflow min in either check, then perform the min-exponent
3040 check. */
3041 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3042 (D.normalizedExponent + 1) * 28738 <=
3043 8651 * (semantics->minExponent - (int) semantics->precision)) {
3044 /* Underflow to zero and round. */
3045 category = fcNormal;
3046 zeroSignificand();
3047 fs = normalize(rounding_mode, lfLessThanHalf);
3048
3049 /* We can finally safely perform the max-exponent check. */
3050 } else if ((D.normalizedExponent - 1) * 42039
3051 >= 12655 * semantics->maxExponent) {
3052 /* Overflow and round. */
3053 fs = handleOverflow(rounding_mode);
3054 } else {
3055 integerPart *decSignificand;
3056 unsigned int partCount;
3057
3058 /* A tight upper bound on number of bits required to hold an
3059 N-digit decimal integer is N * 196 / 59. Allocate enough space
3060 to hold the full significand, and an extra part required by
3061 tcMultiplyPart. */
3062 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3063 partCount = partCountForBits(1 + 196 * partCount / 59);
3064 decSignificand = new integerPart[partCount + 1];
3065 partCount = 0;
3066
3067 /* Convert to binary efficiently - we do almost all multiplication
3068 in an integerPart. When this would overflow do we do a single
3069 bignum multiplication, and then revert again to multiplication
3070 in an integerPart. */
3071 do {
3072 integerPart decValue, val, multiplier;
3073
3074 val = 0;
3075 multiplier = 1;
3076
3077 do {
3078 if (*p == '.') {
3079 p++;
3080 if (p == str.end()) {
3081 break;
3082 }
3083 }
3084 decValue = decDigitValue(*p++);
3085 if (decValue >= 10U) {
3086 delete[] decSignificand;
3087 return createError("Invalid character in significand");
3088 }
3089 multiplier *= 10;
3090 val = val * 10 + decValue;
3091 /* The maximum number that can be multiplied by ten with any
3092 digit added without overflowing an integerPart. */
3093 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3094
3095 /* Multiply out the current part. */
3096 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3097 partCount, partCount + 1, false);
3098
3099 /* If we used another part (likely but not guaranteed), increase
3100 the count. */
3101 if (decSignificand[partCount])
3102 partCount++;
3103 } while (p <= D.lastSigDigit);
3104
3105 category = fcNormal;
3106 fs = roundSignificandWithExponent(decSignificand, partCount,
3107 D.exponent, rounding_mode);
3108
3109 delete [] decSignificand;
3110 }
3111
3112 return fs;
3113}
3114
3115bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3116 const size_t MIN_NAME_SIZE = 3;
3117
3118 if (str.size() < MIN_NAME_SIZE)
3119 return false;
3120
3121 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3122 makeInf(false);
3123 return true;
3124 }
3125
3126 bool IsNegative = str.front() == '-';
3127 if (IsNegative) {
3128 str = str.drop_front();
3129 if (str.size() < MIN_NAME_SIZE)
3130 return false;
3131
3132 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3133 makeInf(true);
3134 return true;
3135 }
3136 }
3137
3138 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3139 bool IsSignaling = str.front() == 's' || str.front() == 'S';
3140 if (IsSignaling) {
3141 str = str.drop_front();
3142 if (str.size() < MIN_NAME_SIZE)
3143 return false;
3144 }
3145
3146 if (str.starts_with("nan") || str.starts_with("NaN")) {
3147 str = str.drop_front(3);
3148
3149 // A NaN without payload.
3150 if (str.empty()) {
3151 makeNaN(IsSignaling, IsNegative);
3152 return true;
3153 }
3154
3155 // Allow the payload to be inside parentheses.
3156 if (str.front() == '(') {
3157 // Parentheses should be balanced (and not empty).
3158 if (str.size() <= 2 || str.back() != ')')
3159 return false;
3160
3161 str = str.slice(1, str.size() - 1);
3162 }
3163
3164 // Determine the payload number's radix.
3165 unsigned Radix = 10;
3166 if (str[0] == '0') {
3167 if (str.size() > 1 && tolower(str[1]) == 'x') {
3168 str = str.drop_front(2);
3169 Radix = 16;
3170 } else
3171 Radix = 8;
3172 }
3173
3174 // Parse the payload and make the NaN.
3175 APInt Payload;
3176 if (!str.getAsInteger(Radix, Payload)) {
3177 makeNaN(IsSignaling, IsNegative, &Payload);
3178 return true;
3179 }
3180 }
3181
3182 return false;
3183}
3184
3187 if (str.empty())
3188 return createError("Invalid string length");
3189
3190 // Handle special cases.
3191 if (convertFromStringSpecials(str))
3192 return opOK;
3193
3194 /* Handle a leading minus sign. */
3195 StringRef::iterator p = str.begin();
3196 size_t slen = str.size();
3197 sign = *p == '-' ? 1 : 0;
3198 if (*p == '-' || *p == '+') {
3199 p++;
3200 slen--;
3201 if (!slen)
3202 return createError("String has no digits");
3203 }
3204
3205 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3206 if (slen == 2)
3207 return createError("Invalid string");
3208 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3209 rounding_mode);
3210 }
3211
3212 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3213}
3214
3215/* Write out a hexadecimal representation of the floating point value
3216 to DST, which must be of sufficient size, in the C99 form
3217 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3218 excluding the terminating NUL.
3219
3220 If UPPERCASE, the output is in upper case, otherwise in lower case.
3221
3222 HEXDIGITS digits appear altogether, rounding the value if
3223 necessary. If HEXDIGITS is 0, the minimal precision to display the
3224 number precisely is used instead. If nothing would appear after
3225 the decimal point it is suppressed.
3226
3227 The decimal exponent is always printed and has at least one digit.
3228 Zero values display an exponent of zero. Infinities and NaNs
3229 appear as "infinity" or "nan" respectively.
3230
3231 The above rules are as specified by C99. There is ambiguity about
3232 what the leading hexadecimal digit should be. This implementation
3233 uses whatever is necessary so that the exponent is displayed as
3234 stored. This implies the exponent will fall within the IEEE format
3235 range, and the leading hexadecimal digit will be 0 (for denormals),
3236 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3237 any other digits zero).
3238*/
3239unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3240 bool upperCase,
3241 roundingMode rounding_mode) const {
3242 char *p;
3243
3244 p = dst;
3245 if (sign)
3246 *dst++ = '-';
3247
3248 switch (category) {
3249 case fcInfinity:
3250 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3251 dst += sizeof infinityL - 1;
3252 break;
3253
3254 case fcNaN:
3255 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3256 dst += sizeof NaNU - 1;
3257 break;
3258
3259 case fcZero:
3260 *dst++ = '0';
3261 *dst++ = upperCase ? 'X': 'x';
3262 *dst++ = '0';
3263 if (hexDigits > 1) {
3264 *dst++ = '.';
3265 memset (dst, '0', hexDigits - 1);
3266 dst += hexDigits - 1;
3267 }
3268 *dst++ = upperCase ? 'P': 'p';
3269 *dst++ = '0';
3270 break;
3271
3272 case fcNormal:
3273 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3274 break;
3275 }
3276
3277 *dst = 0;
3278
3279 return static_cast<unsigned int>(dst - p);
3280}
3281
3282/* Does the hard work of outputting the correctly rounded hexadecimal
3283 form of a normal floating point number with the specified number of
3284 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3285 digits necessary to print the value precisely is output. */
3286char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3287 bool upperCase,
3288 roundingMode rounding_mode) const {
3289 unsigned int count, valueBits, shift, partsCount, outputDigits;
3290 const char *hexDigitChars;
3291 const integerPart *significand;
3292 char *p;
3293 bool roundUp;
3294
3295 *dst++ = '0';
3296 *dst++ = upperCase ? 'X': 'x';
3297
3298 roundUp = false;
3299 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3300
3301 significand = significandParts();
3302 partsCount = partCount();
3303
3304 /* +3 because the first digit only uses the single integer bit, so
3305 we have 3 virtual zero most-significant-bits. */
3306 valueBits = semantics->precision + 3;
3307 shift = integerPartWidth - valueBits % integerPartWidth;
3308
3309 /* The natural number of digits required ignoring trailing
3310 insignificant zeroes. */
3311 outputDigits = (valueBits - significandLSB () + 3) / 4;
3312
3313 /* hexDigits of zero means use the required number for the
3314 precision. Otherwise, see if we are truncating. If we are,
3315 find out if we need to round away from zero. */
3316 if (hexDigits) {
3317 if (hexDigits < outputDigits) {
3318 /* We are dropping non-zero bits, so need to check how to round.
3319 "bits" is the number of dropped bits. */
3320 unsigned int bits;
3321 lostFraction fraction;
3322
3323 bits = valueBits - hexDigits * 4;
3324 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3325 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3326 }
3327 outputDigits = hexDigits;
3328 }
3329
3330 /* Write the digits consecutively, and start writing in the location
3331 of the hexadecimal point. We move the most significant digit
3332 left and add the hexadecimal point later. */
3333 p = ++dst;
3334
3335 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3336
3337 while (outputDigits && count) {
3338 integerPart part;
3339
3340 /* Put the most significant integerPartWidth bits in "part". */
3341 if (--count == partsCount)
3342 part = 0; /* An imaginary higher zero part. */
3343 else
3344 part = significand[count] << shift;
3345
3346 if (count && shift)
3347 part |= significand[count - 1] >> (integerPartWidth - shift);
3348
3349 /* Convert as much of "part" to hexdigits as we can. */
3350 unsigned int curDigits = integerPartWidth / 4;
3351
3352 if (curDigits > outputDigits)
3353 curDigits = outputDigits;
3354 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3355 outputDigits -= curDigits;
3356 }
3357
3358 if (roundUp) {
3359 char *q = dst;
3360
3361 /* Note that hexDigitChars has a trailing '0'. */
3362 do {
3363 q--;
3364 *q = hexDigitChars[hexDigitValue (*q) + 1];
3365 } while (*q == '0');
3366 assert(q >= p);
3367 } else {
3368 /* Add trailing zeroes. */
3369 memset (dst, '0', outputDigits);
3370 dst += outputDigits;
3371 }
3372
3373 /* Move the most significant digit to before the point, and if there
3374 is something after the decimal point add it. This must come
3375 after rounding above. */
3376 p[-1] = p[0];
3377 if (dst -1 == p)
3378 dst--;
3379 else
3380 p[0] = '.';
3381
3382 /* Finally output the exponent. */
3383 *dst++ = upperCase ? 'P': 'p';
3384
3385 return writeSignedDecimal (dst, exponent);
3386}
3387
3389 if (!Arg.isFiniteNonZero())
3390 return hash_combine((uint8_t)Arg.category,
3391 // NaN has no sign, fix it at zero.
3392 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3393 Arg.semantics->precision);
3394
3395 // Normal floats need their exponent and significand hashed.
3396 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3397 Arg.semantics->precision, Arg.exponent,
3399 Arg.significandParts(),
3400 Arg.significandParts() + Arg.partCount()));
3401}
3402
3403// Conversion from APFloat to/from host float/double. It may eventually be
3404// possible to eliminate these and have everybody deal with APFloats, but that
3405// will take a while. This approach will not easily extend to long double.
3406// Current implementation requires integerPartWidth==64, which is correct at
3407// the moment but could be made more general.
3408
3409// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3410// the actual IEEE respresentations. We compensate for that here.
3411
3412APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3413 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3414 assert(partCount()==2);
3415
3416 uint64_t myexponent, mysignificand;
3417
3418 if (isFiniteNonZero()) {
3419 myexponent = exponent+16383; //bias
3420 mysignificand = significandParts()[0];
3421 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3422 myexponent = 0; // denormal
3423 } else if (category==fcZero) {
3424 myexponent = 0;
3425 mysignificand = 0;
3426 } else if (category==fcInfinity) {
3427 myexponent = 0x7fff;
3428 mysignificand = 0x8000000000000000ULL;
3429 } else {
3430 assert(category == fcNaN && "Unknown category");
3431 myexponent = 0x7fff;
3432 mysignificand = significandParts()[0];
3433 }
3434
3435 uint64_t words[2];
3436 words[0] = mysignificand;
3437 words[1] = ((uint64_t)(sign & 1) << 15) |
3438 (myexponent & 0x7fffLL);
3439 return APInt(80, words);
3440}
3441
3442APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3443 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3444 assert(partCount()==2);
3445
3446 uint64_t words[2];
3447 opStatus fs;
3448 bool losesInfo;
3449
3450 // Convert number to double. To avoid spurious underflows, we re-
3451 // normalize against the "double" minExponent first, and only *then*
3452 // truncate the mantissa. The result of that second conversion
3453 // may be inexact, but should never underflow.
3454 // Declare fltSemantics before APFloat that uses it (and
3455 // saves pointer to it) to ensure correct destruction order.
3456 fltSemantics extendedSemantics = *semantics;
3457 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3458 IEEEFloat extended(*this);
3459 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3460 assert(fs == opOK && !losesInfo);
3461 (void)fs;
3462
3463 IEEEFloat u(extended);
3464 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3465 assert(fs == opOK || fs == opInexact);
3466 (void)fs;
3467 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3468
3469 // If conversion was exact or resulted in a special case, we're done;
3470 // just set the second double to zero. Otherwise, re-convert back to
3471 // the extended format and compute the difference. This now should
3472 // convert exactly to double.
3473 if (u.isFiniteNonZero() && losesInfo) {
3474 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3475 assert(fs == opOK && !losesInfo);
3476 (void)fs;
3477
3478 IEEEFloat v(extended);
3479 v.subtract(u, rmNearestTiesToEven);
3480 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3481 assert(fs == opOK && !losesInfo);
3482 (void)fs;
3483 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3484 } else {
3485 words[1] = 0;
3486 }
3487
3488 return APInt(128, words);
3489}
3490
3491template <const fltSemantics &S>
3492APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3493 assert(semantics == &S);
3494
3495 constexpr int bias = -(S.minExponent - 1);
3496 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3497 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3498 constexpr integerPart integer_bit =
3499 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3500 constexpr uint64_t significand_mask = integer_bit - 1;
3501 constexpr unsigned int exponent_bits =
3502 S.sizeInBits - 1 - trailing_significand_bits;
3503 static_assert(exponent_bits < 64);
3504 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3505
3506 uint64_t myexponent;
3507 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3508 mysignificand;
3509
3510 if (isFiniteNonZero()) {
3511 myexponent = exponent + bias;
3512 std::copy_n(significandParts(), mysignificand.size(),
3513 mysignificand.begin());
3514 if (myexponent == 1 &&
3515 !(significandParts()[integer_bit_part] & integer_bit))
3516 myexponent = 0; // denormal
3517 } else if (category == fcZero) {
3518 myexponent = ::exponentZero(S) + bias;
3519 mysignificand.fill(0);
3520 } else if (category == fcInfinity) {
3521 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
3522 llvm_unreachable("semantics don't support inf!");
3523 }
3524 myexponent = ::exponentInf(S) + bias;
3525 mysignificand.fill(0);
3526 } else {
3527 assert(category == fcNaN && "Unknown category!");
3528 myexponent = ::exponentNaN(S) + bias;
3529 std::copy_n(significandParts(), mysignificand.size(),
3530 mysignificand.begin());
3531 }
3532 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3533 auto words_iter =
3534 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3535 if constexpr (significand_mask != 0) {
3536 // Clear the integer bit.
3537 words[mysignificand.size() - 1] &= significand_mask;
3538 }
3539 std::fill(words_iter, words.end(), uint64_t{0});
3540 constexpr size_t last_word = words.size() - 1;
3541 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3542 << ((S.sizeInBits - 1) % 64);
3543 words[last_word] |= shifted_sign;
3544 uint64_t shifted_exponent = (myexponent & exponent_mask)
3545 << (trailing_significand_bits % 64);
3546 words[last_word] |= shifted_exponent;
3547 if constexpr (last_word == 0) {
3548 return APInt(S.sizeInBits, words[0]);
3549 }
3550 return APInt(S.sizeInBits, words);
3551}
3552
3553APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3554 assert(partCount() == 2);
3555 return convertIEEEFloatToAPInt<semIEEEquad>();
3556}
3557
3558APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3559 assert(partCount()==1);
3560 return convertIEEEFloatToAPInt<semIEEEdouble>();
3561}
3562
3563APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3564 assert(partCount()==1);
3565 return convertIEEEFloatToAPInt<semIEEEsingle>();
3566}
3567
3568APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3569 assert(partCount() == 1);
3570 return convertIEEEFloatToAPInt<semBFloat>();
3571}
3572
3573APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3574 assert(partCount()==1);
3575 return convertIEEEFloatToAPInt<semIEEEhalf>();
3576}
3577
3578APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3579 assert(partCount() == 1);
3580 return convertIEEEFloatToAPInt<semFloat8E5M2>();
3581}
3582
3583APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3584 assert(partCount() == 1);
3585 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3586}
3587
3588APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3589 assert(partCount() == 1);
3590 return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3591}
3592
3593APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3594 assert(partCount() == 1);
3595 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3596}
3597
3598APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3599 assert(partCount() == 1);
3600 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3601}
3602
3603APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3604 assert(partCount() == 1);
3605 return convertIEEEFloatToAPInt<semFloatTF32>();
3606}
3607
3608// This function creates an APInt that is just a bit map of the floating
3609// point constant as it would appear in memory. It is not a conversion,
3610// and treating the result as a normal integer is unlikely to be useful.
3611
3613 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3614 return convertHalfAPFloatToAPInt();
3615
3616 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3617 return convertBFloatAPFloatToAPInt();
3618
3619 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3620 return convertFloatAPFloatToAPInt();
3621
3622 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3623 return convertDoubleAPFloatToAPInt();
3624
3625 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3626 return convertQuadrupleAPFloatToAPInt();
3627
3628 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3629 return convertPPCDoubleDoubleAPFloatToAPInt();
3630
3631 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3632 return convertFloat8E5M2APFloatToAPInt();
3633
3634 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3635 return convertFloat8E5M2FNUZAPFloatToAPInt();
3636
3637 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3638 return convertFloat8E4M3FNAPFloatToAPInt();
3639
3640 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3641 return convertFloat8E4M3FNUZAPFloatToAPInt();
3642
3643 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3644 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3645
3646 if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3647 return convertFloatTF32APFloatToAPInt();
3648
3649 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3650 "unknown format!");
3651 return convertF80LongDoubleAPFloatToAPInt();
3652}
3653
3655 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3656 "Float semantics are not IEEEsingle");
3657 APInt api = bitcastToAPInt();
3658 return api.bitsToFloat();
3659}
3660
3662 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3663 "Float semantics are not IEEEdouble");
3664 APInt api = bitcastToAPInt();
3665 return api.bitsToDouble();
3666}
3667
3668/// Integer bit is explicit in this format. Intel hardware (387 and later)
3669/// does not support these bit patterns:
3670/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3671/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3672/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3673/// exponent = 0, integer bit 1 ("pseudodenormal")
3674/// At the moment, the first three are treated as NaNs, the last one as Normal.
3675void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3676 uint64_t i1 = api.getRawData()[0];
3677 uint64_t i2 = api.getRawData()[1];
3678 uint64_t myexponent = (i2 & 0x7fff);
3679 uint64_t mysignificand = i1;
3680 uint8_t myintegerbit = mysignificand >> 63;
3681
3682 initialize(&semX87DoubleExtended);
3683 assert(partCount()==2);
3684
3685 sign = static_cast<unsigned int>(i2>>15);
3686 if (myexponent == 0 && mysignificand == 0) {
3687 makeZero(sign);
3688 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3689 makeInf(sign);
3690 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3691 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3692 category = fcNaN;
3693 exponent = exponentNaN();
3694 significandParts()[0] = mysignificand;
3695 significandParts()[1] = 0;
3696 } else {
3697 category = fcNormal;
3698 exponent = myexponent - 16383;
3699 significandParts()[0] = mysignificand;
3700 significandParts()[1] = 0;
3701 if (myexponent==0) // denormal
3702 exponent = -16382;
3703 }
3704}
3705
3706void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3707 uint64_t i1 = api.getRawData()[0];
3708 uint64_t i2 = api.getRawData()[1];
3709 opStatus fs;
3710 bool losesInfo;
3711
3712 // Get the first double and convert to our format.
3713 initFromDoubleAPInt(APInt(64, i1));
3715 assert(fs == opOK && !losesInfo);
3716 (void)fs;
3717
3718 // Unless we have a special case, add in second double.
3719 if (isFiniteNonZero()) {
3720 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3721 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3722 assert(fs == opOK && !losesInfo);
3723 (void)fs;
3724
3726 }
3727}
3728
3729template <const fltSemantics &S>
3730void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3731 assert(api.getBitWidth() == S.sizeInBits);
3732 constexpr integerPart integer_bit = integerPart{1}
3733 << ((S.precision - 1) % integerPartWidth);
3734 constexpr uint64_t significand_mask = integer_bit - 1;
3735 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3736 constexpr unsigned int stored_significand_parts =
3737 partCountForBits(trailing_significand_bits);
3738 constexpr unsigned int exponent_bits =
3739 S.sizeInBits - 1 - trailing_significand_bits;
3740 static_assert(exponent_bits < 64);
3741 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3742 constexpr int bias = -(S.minExponent - 1);
3743
3744 // Copy the bits of the significand. We need to clear out the exponent and
3745 // sign bit in the last word.
3746 std::array<integerPart, stored_significand_parts> mysignificand;
3747 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3748 if constexpr (significand_mask != 0) {
3749 mysignificand[mysignificand.size() - 1] &= significand_mask;
3750 }
3751
3752 // We assume the last word holds the sign bit, the exponent, and potentially
3753 // some of the trailing significand field.
3754 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3755 uint64_t myexponent =
3756 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3757
3758 initialize(&S);
3759 assert(partCount() == mysignificand.size());
3760
3761 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3762
3763 bool all_zero_significand =
3764 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3765
3766 bool is_zero = myexponent == 0 && all_zero_significand;
3767
3768 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3769 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3770 makeInf(sign);
3771 return;
3772 }
3773 }
3774
3775 bool is_nan = false;
3776
3777 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3778 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3779 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3780 bool all_ones_significand =
3781 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3782 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3783 (!significand_mask ||
3784 mysignificand[mysignificand.size() - 1] == significand_mask);
3785 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3786 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3787 is_nan = is_zero && sign;
3788 }
3789
3790 if (is_nan) {
3791 category = fcNaN;
3792 exponent = ::exponentNaN(S);
3793 std::copy_n(mysignificand.begin(), mysignificand.size(),
3794 significandParts());
3795 return;
3796 }
3797
3798 if (is_zero) {
3799 makeZero(sign);
3800 return;
3801 }
3802
3803 category = fcNormal;
3804 exponent = myexponent - bias;
3805 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3806 if (myexponent == 0) // denormal
3807 exponent = S.minExponent;
3808 else
3809 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3810}
3811
3812void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3813 initFromIEEEAPInt<semIEEEquad>(api);
3814}
3815
3816void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3817 initFromIEEEAPInt<semIEEEdouble>(api);
3818}
3819
3820void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3821 initFromIEEEAPInt<semIEEEsingle>(api);
3822}
3823
3824void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3825 initFromIEEEAPInt<semBFloat>(api);
3826}
3827
3828void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3829 initFromIEEEAPInt<semIEEEhalf>(api);
3830}
3831
3832void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3833 initFromIEEEAPInt<semFloat8E5M2>(api);
3834}
3835
3836void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3837 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3838}
3839
3840void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3841 initFromIEEEAPInt<semFloat8E4M3FN>(api);
3842}
3843
3844void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3845 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3846}
3847
3848void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3849 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3850}
3851
3852void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3853 initFromIEEEAPInt<semFloatTF32>(api);
3854}
3855
3856/// Treat api as containing the bits of a floating point number.
3857void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3858 assert(api.getBitWidth() == Sem->sizeInBits);
3859 if (Sem == &semIEEEhalf)
3860 return initFromHalfAPInt(api);
3861 if (Sem == &semBFloat)
3862 return initFromBFloatAPInt(api);
3863 if (Sem == &semIEEEsingle)
3864 return initFromFloatAPInt(api);
3865 if (Sem == &semIEEEdouble)
3866 return initFromDoubleAPInt(api);
3867 if (Sem == &semX87DoubleExtended)
3868 return initFromF80LongDoubleAPInt(api);
3869 if (Sem == &semIEEEquad)
3870 return initFromQuadrupleAPInt(api);
3871 if (Sem == &semPPCDoubleDoubleLegacy)
3872 return initFromPPCDoubleDoubleAPInt(api);
3873 if (Sem == &semFloat8E5M2)
3874 return initFromFloat8E5M2APInt(api);
3875 if (Sem == &semFloat8E5M2FNUZ)
3876 return initFromFloat8E5M2FNUZAPInt(api);
3877 if (Sem == &semFloat8E4M3FN)
3878 return initFromFloat8E4M3FNAPInt(api);
3879 if (Sem == &semFloat8E4M3FNUZ)
3880 return initFromFloat8E4M3FNUZAPInt(api);
3881 if (Sem == &semFloat8E4M3B11FNUZ)
3882 return initFromFloat8E4M3B11FNUZAPInt(api);
3883 if (Sem == &semFloatTF32)
3884 return initFromFloatTF32APInt(api);
3885
3886 llvm_unreachable(nullptr);
3887}
3888
3889/// Make this number the largest magnitude normal number in the given
3890/// semantics.
3891void IEEEFloat::makeLargest(bool Negative) {
3892 // We want (in interchange format):
3893 // sign = {Negative}
3894 // exponent = 1..10
3895 // significand = 1..1
3896 category = fcNormal;
3897 sign = Negative;
3898 exponent = semantics->maxExponent;
3899
3900 // Use memset to set all but the highest integerPart to all ones.
3901 integerPart *significand = significandParts();
3902 unsigned PartCount = partCount();
3903 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3904
3905 // Set the high integerPart especially setting all unused top bits for
3906 // internal consistency.
3907 const unsigned NumUnusedHighBits =
3908 PartCount*integerPartWidth - semantics->precision;
3909 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3910 ? (~integerPart(0) >> NumUnusedHighBits)
3911 : 0;
3912
3913 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3914 semantics->nanEncoding == fltNanEncoding::AllOnes)
3915 significand[0] &= ~integerPart(1);
3916}
3917
3918/// Make this number the smallest magnitude denormal number in the given
3919/// semantics.
3920void IEEEFloat::makeSmallest(bool Negative) {
3921 // We want (in interchange format):
3922 // sign = {Negative}
3923 // exponent = 0..0
3924 // significand = 0..01
3925 category = fcNormal;
3926 sign = Negative;
3927 exponent = semantics->minExponent;
3928 APInt::tcSet(significandParts(), 1, partCount());
3929}
3930
3931void IEEEFloat::makeSmallestNormalized(bool Negative) {
3932 // We want (in interchange format):
3933 // sign = {Negative}
3934 // exponent = 0..0
3935 // significand = 10..0
3936
3937 category = fcNormal;
3938 zeroSignificand();
3939 sign = Negative;
3940 exponent = semantics->minExponent;
3941 APInt::tcSetBit(significandParts(), semantics->precision - 1);
3942}
3943
3944IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3945 initFromAPInt(&Sem, API);
3946}
3947
3948IEEEFloat::IEEEFloat(float f) {
3949 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3950}
3951
3952IEEEFloat::IEEEFloat(double d) {
3953 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3954}
3955
3956namespace {
3957 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3958 Buffer.append(Str.begin(), Str.end());
3959 }
3960
3961 /// Removes data from the given significand until it is no more
3962 /// precise than is required for the desired precision.
3963 void AdjustToPrecision(APInt &significand,
3964 int &exp, unsigned FormatPrecision) {
3965 unsigned bits = significand.getActiveBits();
3966
3967 // 196/59 is a very slight overestimate of lg_2(10).
3968 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3969
3970 if (bits <= bitsRequired) return;
3971
3972 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3973 if (!tensRemovable) return;
3974
3975 exp += tensRemovable;
3976
3977 APInt divisor(significand.getBitWidth(), 1);
3978 APInt powten(significand.getBitWidth(), 10);
3979 while (true) {
3980 if (tensRemovable & 1)
3981 divisor *= powten;
3982 tensRemovable >>= 1;
3983 if (!tensRemovable) break;
3984 powten *= powten;
3985 }
3986
3987 significand = significand.udiv(divisor);
3988
3989 // Truncate the significand down to its active bit count.
3990 significand = significand.trunc(significand.getActiveBits());
3991 }
3992
3993
3994 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3995 int &exp, unsigned FormatPrecision) {
3996 unsigned N = buffer.size();
3997 if (N <= FormatPrecision) return;
3998
3999 // The most significant figures are the last ones in the buffer.
4000 unsigned FirstSignificant = N - FormatPrecision;
4001
4002 // Round.
4003 // FIXME: this probably shouldn't use 'round half up'.
4004
4005 // Rounding down is just a truncation, except we also want to drop
4006 // trailing zeros from the new result.
4007 if (buffer[FirstSignificant - 1] < '5') {
4008 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4009 FirstSignificant++;
4010
4011 exp += FirstSignificant;
4012 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4013 return;
4014 }
4015
4016 // Rounding up requires a decimal add-with-carry. If we continue
4017 // the carry, the newly-introduced zeros will just be truncated.
4018 for (unsigned I = FirstSignificant; I != N; ++I) {
4019 if (buffer[I] == '9') {
4020 FirstSignificant++;
4021 } else {
4022 buffer[I]++;
4023 break;
4024 }
4025 }
4026
4027 // If we carried through, we have exactly one digit of precision.
4028 if (FirstSignificant == N) {
4029 exp += FirstSignificant;
4030 buffer.clear();
4031 buffer.push_back('1');
4032 return;
4033 }
4034
4035 exp += FirstSignificant;
4036 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4037 }
4038} // namespace
4039
4040void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4041 unsigned FormatMaxPadding, bool TruncateZero) const {
4042 switch (category) {
4043 case fcInfinity:
4044 if (isNegative())
4045 return append(Str, "-Inf");
4046 else
4047 return append(Str, "+Inf");
4048
4049 case fcNaN: return append(Str, "NaN");
4050
4051 case fcZero:
4052 if (isNegative())
4053 Str.push_back('-');
4054
4055 if (!FormatMaxPadding) {
4056 if (TruncateZero)
4057 append(Str, "0.0E+0");
4058 else {
4059 append(Str, "0.0");
4060 if (FormatPrecision > 1)
4061 Str.append(FormatPrecision - 1, '0');
4062 append(Str, "e+00");
4063 }
4064 } else
4065 Str.push_back('0');
4066 return;
4067
4068 case fcNormal:
4069 break;
4070 }
4071
4072 if (isNegative())
4073 Str.push_back('-');
4074
4075 // Decompose the number into an APInt and an exponent.
4076 int exp = exponent - ((int) semantics->precision - 1);
4077 APInt significand(
4078 semantics->precision,
4079 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4080
4081 // Set FormatPrecision if zero. We want to do this before we
4082 // truncate trailing zeros, as those are part of the precision.
4083 if (!FormatPrecision) {
4084 // We use enough digits so the number can be round-tripped back to an
4085 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4086 // Accurately" by Steele and White.
4087 // FIXME: Using a formula based purely on the precision is conservative;
4088 // we can print fewer digits depending on the actual value being printed.
4089
4090 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4091 FormatPrecision = 2 + semantics->precision * 59 / 196;
4092 }
4093
4094 // Ignore trailing binary zeros.
4095 int trailingZeros = significand.countr_zero();
4096 exp += trailingZeros;
4097 significand.lshrInPlace(trailingZeros);
4098
4099 // Change the exponent from 2^e to 10^e.
4100 if (exp == 0) {
4101 // Nothing to do.
4102 } else if (exp > 0) {
4103 // Just shift left.
4104 significand = significand.zext(semantics->precision + exp);
4105 significand <<= exp;
4106 exp = 0;
4107 } else { /* exp < 0 */
4108 int texp = -exp;
4109
4110 // We transform this using the identity:
4111 // (N)(2^-e) == (N)(5^e)(10^-e)
4112 // This means we have to multiply N (the significand) by 5^e.
4113 // To avoid overflow, we have to operate on numbers large
4114 // enough to store N * 5^e:
4115 // log2(N * 5^e) == log2(N) + e * log2(5)
4116 // <= semantics->precision + e * 137 / 59
4117 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4118
4119 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4120
4121 // Multiply significand by 5^e.
4122 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4123 significand = significand.zext(precision);
4124 APInt five_to_the_i(precision, 5);
4125 while (true) {
4126 if (texp & 1) significand *= five_to_the_i;
4127
4128 texp >>= 1;
4129 if (!texp) break;
4130 five_to_the_i *= five_to_the_i;
4131 }
4132 }
4133
4134 AdjustToPrecision(significand, exp, FormatPrecision);
4135
4137
4138 // Fill the buffer.
4139 unsigned precision = significand.getBitWidth();
4140 if (precision < 4) {
4141 // We need enough precision to store the value 10.
4142 precision = 4;
4143 significand = significand.zext(precision);
4144 }
4145 APInt ten(precision, 10);
4146 APInt digit(precision, 0);
4147
4148 bool inTrail = true;
4149 while (significand != 0) {
4150 // digit <- significand % 10
4151 // significand <- significand / 10
4152 APInt::udivrem(significand, ten, significand, digit);
4153
4154 unsigned d = digit.getZExtValue();
4155
4156 // Drop trailing zeros.
4157 if (inTrail && !d) exp++;
4158 else {
4159 buffer.push_back((char) ('0' + d));
4160 inTrail = false;
4161 }
4162 }
4163
4164 assert(!buffer.empty() && "no characters in buffer!");
4165
4166 // Drop down to FormatPrecision.
4167 // TODO: don't do more precise calculations above than are required.
4168 AdjustToPrecision(buffer, exp, FormatPrecision);
4169
4170 unsigned NDigits = buffer.size();
4171
4172 // Check whether we should use scientific notation.
4173 bool FormatScientific;
4174 if (!FormatMaxPadding)
4175 FormatScientific = true;
4176 else {
4177 if (exp >= 0) {
4178 // 765e3 --> 765000
4179 // ^^^
4180 // But we shouldn't make the number look more precise than it is.
4181 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4182 NDigits + (unsigned) exp > FormatPrecision);
4183 } else {
4184 // Power of the most significant digit.
4185 int MSD = exp + (int) (NDigits - 1);
4186 if (MSD >= 0) {
4187 // 765e-2 == 7.65
4188 FormatScientific = false;
4189 } else {
4190 // 765e-5 == 0.00765
4191 // ^ ^^
4192 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4193 }
4194 }
4195 }
4196
4197 // Scientific formatting is pretty straightforward.
4198 if (FormatScientific) {
4199 exp += (NDigits - 1);
4200
4201 Str.push_back(buffer[NDigits-1]);
4202 Str.push_back('.');
4203 if (NDigits == 1 && TruncateZero)
4204 Str.push_back('0');
4205 else
4206 for (unsigned I = 1; I != NDigits; ++I)
4207 Str.push_back(buffer[NDigits-1-I]);
4208 // Fill with zeros up to FormatPrecision.
4209 if (!TruncateZero && FormatPrecision > NDigits - 1)
4210 Str.append(FormatPrecision - NDigits + 1, '0');
4211 // For !TruncateZero we use lower 'e'.
4212 Str.push_back(TruncateZero ? 'E' : 'e');
4213
4214 Str.push_back(exp >= 0 ? '+' : '-');
4215 if (exp < 0) exp = -exp;
4216 SmallVector<char, 6> expbuf;
4217 do {
4218 expbuf.push_back((char) ('0' + (exp % 10)));
4219 exp /= 10;
4220 } while (exp);
4221 // Exponent always at least two digits if we do not truncate zeros.
4222 if (!TruncateZero && expbuf.size() < 2)
4223 expbuf.push_back('0');
4224 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4225 Str.push_back(expbuf[E-1-I]);
4226 return;
4227 }
4228
4229 // Non-scientific, positive exponents.
4230 if (exp >= 0) {
4231 for (unsigned I = 0; I != NDigits; ++I)
4232 Str.push_back(buffer[NDigits-1-I]);
4233 for (unsigned I = 0; I != (unsigned) exp; ++I)
4234 Str.push_back('0');
4235 return;
4236 }
4237
4238 // Non-scientific, negative exponents.
4239
4240 // The number of digits to the left of the decimal point.
4241 int NWholeDigits = exp + (int) NDigits;
4242
4243 unsigned I = 0;
4244 if (NWholeDigits > 0) {
4245 for (; I != (unsigned) NWholeDigits; ++I)
4246 Str.push_back(buffer[NDigits-I-1]);
4247 Str.push_back('.');
4248 } else {
4249 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4250
4251 Str.push_back('0');
4252 Str.push_back('.');
4253 for (unsigned Z = 1; Z != NZeros; ++Z)
4254 Str.push_back('0');
4255 }
4256
4257 for (; I != NDigits; ++I)
4258 Str.push_back(buffer[NDigits-I-1]);
4259}
4260
4261bool IEEEFloat::getExactInverse(APFloat *inv) const {
4262 // Special floats and denormals have no exact inverse.
4263 if (!isFiniteNonZero())
4264 return false;
4265
4266 // Check that the number is a power of two by making sure that only the
4267 // integer bit is set in the significand.
4268 if (significandLSB() != semantics->precision - 1)
4269 return false;
4270
4271 // Get the inverse.
4272 IEEEFloat reciprocal(*semantics, 1ULL);
4273 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4274 return false;
4275
4276 // Avoid multiplication with a denormal, it is not safe on all platforms and
4277 // may be slower than a normal division.
4278 if (reciprocal.isDenormal())
4279 return false;
4280
4281 assert(reciprocal.isFiniteNonZero() &&
4282 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4283
4284 if (inv)
4285 *inv = APFloat(reciprocal, *semantics);
4286
4287 return true;
4288}
4289
4290int IEEEFloat::getExactLog2Abs() const {
4291 if (!isFinite() || isZero())
4292 return INT_MIN;
4293
4294 const integerPart *Parts = significandParts();
4295 const int PartCount = partCountForBits(semantics->precision);
4296
4297 int PopCount = 0;
4298 for (int i = 0; i < PartCount; ++i) {
4299 PopCount += llvm::popcount(Parts[i]);
4300 if (PopCount > 1)
4301 return INT_MIN;
4302 }
4303
4304 if (exponent != semantics->minExponent)
4305 return exponent;
4306
4307 int CountrParts = 0;
4308 for (int i = 0; i < PartCount;
4309 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4310 if (Parts[i] != 0) {
4311 return exponent - semantics->precision + CountrParts +
4312 llvm::countr_zero(Parts[i]) + 1;
4313 }
4314 }
4315
4316 llvm_unreachable("didn't find the set bit");
4317}
4318
4319bool IEEEFloat::isSignaling() const {
4320 if (!isNaN())
4321 return false;
4322 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4323 return false;
4324
4325 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4326 // first bit of the trailing significand being 0.
4327 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4328}
4329
4330/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4331///
4332/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4333/// appropriate sign switching before/after the computation.
4334IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4335 // If we are performing nextDown, swap sign so we have -x.
4336 if (nextDown)
4337 changeSign();
4338
4339 // Compute nextUp(x)
4340 opStatus result = opOK;
4341
4342 // Handle each float category separately.
4343 switch (category) {
4344 case fcInfinity:
4345 // nextUp(+inf) = +inf
4346 if (!isNegative())
4347 break;
4348 // nextUp(-inf) = -getLargest()
4349 makeLargest(true);
4350 break;
4351 case fcNaN:
4352 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4353 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4354 // change the payload.
4355 if (isSignaling()) {
4356 result = opInvalidOp;
4357 // For consistency, propagate the sign of the sNaN to the qNaN.
4358 makeNaN(false, isNegative(), nullptr);
4359 }
4360 break;
4361 case fcZero:
4362 // nextUp(pm 0) = +getSmallest()
4363 makeSmallest(false);
4364 break;
4365 case fcNormal:
4366 // nextUp(-getSmallest()) = -0
4367 if (isSmallest() && isNegative()) {
4368 APInt::tcSet(significandParts(), 0, partCount());
4369 category = fcZero;
4370 exponent = 0;
4371 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4372 sign = false;
4373 break;
4374 }
4375
4376 if (isLargest() && !isNegative()) {
4377 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4378 // nextUp(getLargest()) == NAN
4379 makeNaN();
4380 break;
4381 } else {
4382 // nextUp(getLargest()) == INFINITY
4383 APInt::tcSet(significandParts(), 0, partCount());
4384 category = fcInfinity;
4385 exponent = semantics->maxExponent + 1;
4386 break;
4387 }
4388 }
4389
4390 // nextUp(normal) == normal + inc.
4391 if (isNegative()) {
4392 // If we are negative, we need to decrement the significand.
4393
4394 // We only cross a binade boundary that requires adjusting the exponent
4395 // if:
4396 // 1. exponent != semantics->minExponent. This implies we are not in the
4397 // smallest binade or are dealing with denormals.
4398 // 2. Our significand excluding the integral bit is all zeros.
4399 bool WillCrossBinadeBoundary =
4400 exponent != semantics->minExponent && isSignificandAllZeros();
4401
4402 // Decrement the significand.
4403 //
4404 // We always do this since:
4405 // 1. If we are dealing with a non-binade decrement, by definition we
4406 // just decrement the significand.
4407 // 2. If we are dealing with a normal -> normal binade decrement, since
4408 // we have an explicit integral bit the fact that all bits but the
4409 // integral bit are zero implies that subtracting one will yield a
4410 // significand with 0 integral bit and 1 in all other spots. Thus we
4411 // must just adjust the exponent and set the integral bit to 1.
4412 // 3. If we are dealing with a normal -> denormal binade decrement,
4413 // since we set the integral bit to 0 when we represent denormals, we
4414 // just decrement the significand.
4415 integerPart *Parts = significandParts();
4416 APInt::tcDecrement(Parts, partCount());
4417
4418 if (WillCrossBinadeBoundary) {
4419 // Our result is a normal number. Do the following:
4420 // 1. Set the integral bit to 1.
4421 // 2. Decrement the exponent.
4422 APInt::tcSetBit(Parts, semantics->precision - 1);
4423 exponent--;
4424 }
4425 } else {
4426 // If we are positive, we need to increment the significand.
4427
4428 // We only cross a binade boundary that requires adjusting the exponent if
4429 // the input is not a denormal and all of said input's significand bits
4430 // are set. If all of said conditions are true: clear the significand, set
4431 // the integral bit to 1, and increment the exponent. If we have a
4432 // denormal always increment since moving denormals and the numbers in the
4433 // smallest normal binade have the same exponent in our representation.
4434 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4435
4436 if (WillCrossBinadeBoundary) {
4437 integerPart *Parts = significandParts();
4438 APInt::tcSet(Parts, 0, partCount());
4439 APInt::tcSetBit(Parts, semantics->precision - 1);
4440 assert(exponent != semantics->maxExponent &&
4441 "We can not increment an exponent beyond the maxExponent allowed"
4442 " by the given floating point semantics.");
4443 exponent++;
4444 } else {
4445 incrementSignificand();
4446 }
4447 }
4448 break;
4449 }
4450
4451 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4452 if (nextDown)
4453 changeSign();
4454
4455 return result;
4456}
4457
4458APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4459 return ::exponentNaN(*semantics);
4460}
4461
4462APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4463 return ::exponentInf(*semantics);
4464}
4465
4466APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4467 return ::exponentZero(*semantics);
4468}
4469
4470void IEEEFloat::makeInf(bool Negative) {
4471 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4472 // There is no Inf, so make NaN instead.
4473 makeNaN(false, Negative);
4474 return;
4475 }
4476 category = fcInfinity;
4477 sign = Negative;
4478 exponent = exponentInf();
4479 APInt::tcSet(significandParts(), 0, partCount());
4480}
4481
4482void IEEEFloat::makeZero(bool Negative) {
4483 category = fcZero;
4484 sign = Negative;
4485 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4486 // Merge negative zero to positive because 0b10000...000 is used for NaN
4487 sign = false;
4488 }
4489 exponent = exponentZero();
4490 APInt::tcSet(significandParts(), 0, partCount());
4491}
4492
4493void IEEEFloat::makeQuiet() {
4494 assert(isNaN());
4495 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4496 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4497}
4498
4499int ilogb(const IEEEFloat &Arg) {
4500 if (Arg.isNaN())
4501 return IEEEFloat::IEK_NaN;
4502 if (Arg.isZero())
4503 return IEEEFloat::IEK_Zero;
4504 if (Arg.isInfinity())
4505 return IEEEFloat::IEK_Inf;
4506 if (!Arg.isDenormal())
4507 return Arg.exponent;
4508
4509 IEEEFloat Normalized(Arg);
4510 int SignificandBits = Arg.getSemantics().precision - 1;
4511
4512 Normalized.exponent += SignificandBits;
4513 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4514 return Normalized.exponent - SignificandBits;
4515}
4516
4518 auto MaxExp = X.getSemantics().maxExponent;
4519 auto MinExp = X.getSemantics().minExponent;
4520
4521 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4522 // overflow; clamp it to a safe range before adding, but ensure that the range
4523 // is large enough that the clamp does not change the result. The range we
4524 // need to support is the difference between the largest possible exponent and
4525 // the normalized exponent of half the smallest denormal.
4526
4527 int SignificandBits = X.getSemantics().precision - 1;
4528 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4529
4530 // Clamp to one past the range ends to let normalize handle overlflow.
4531 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4532 X.normalize(RoundingMode, lfExactlyZero);
4533 if (X.isNaN())
4534 X.makeQuiet();
4535 return X;
4536}
4537
4539 Exp = ilogb(Val);
4540
4541 // Quiet signalling nans.
4542 if (Exp == IEEEFloat::IEK_NaN) {
4543 IEEEFloat Quiet(Val);
4544 Quiet.makeQuiet();
4545 return Quiet;
4546 }
4547
4548 if (Exp == IEEEFloat::IEK_Inf)
4549 return Val;
4550
4551 // 1 is added because frexp is defined to return a normalized fraction in
4552 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4553 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4554 return scalbn(Val, -Exp, RM);
4555}
4556
4557DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4558 : Semantics(&S),
4560 assert(Semantics == &semPPCDoubleDouble);
4561}
4562
4564 : Semantics(&S),
4567 assert(Semantics == &semPPCDoubleDouble);
4568}
4569
4571 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4573 assert(Semantics == &semPPCDoubleDouble);
4574}
4575
4577 : Semantics(&S),
4578 Floats(new APFloat[2]{
4579 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4580 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4581 assert(Semantics == &semPPCDoubleDouble);
4582}
4583
4585 APFloat &&Second)
4586 : Semantics(&S),
4587 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4588 assert(Semantics == &semPPCDoubleDouble);
4589 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4590 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4591}
4592
4595 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4596 APFloat(RHS.Floats[1])}
4597 : nullptr) {
4598 assert(Semantics == &semPPCDoubleDouble);
4599}
4600
4602 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4603 RHS.Semantics = &semBogus;
4605}
4606
4608 if (Semantics == RHS.Semantics && RHS.Floats) {
4609 Floats[0] = RHS.Floats[0];
4610 Floats[1] = RHS.Floats[1];
4611 } else if (this != &RHS) {
4612 this->~DoubleAPFloat();
4613 new (this) DoubleAPFloat(RHS);
4614 }
4615 return *this;
4616}
4617
4618// Implement addition, subtraction, multiplication and division based on:
4619// "Software for Doubled-Precision Floating-Point Computations",
4620// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4621APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4622 const APFloat &c, const APFloat &cc,
4623 roundingMode RM) {
4624 int Status = opOK;
4625 APFloat z = a;
4626 Status |= z.add(c, RM);
4627 if (!z.isFinite()) {
4628 if (!z.isInfinity()) {
4629 Floats[0] = std::move(z);
4630 Floats[1].makeZero(/* Neg = */ false);
4631 return (opStatus)Status;
4632 }
4633 Status = opOK;
4634 auto AComparedToC = a.compareAbsoluteValue(c);
4635 z = cc;
4636 Status |= z.add(aa, RM);
4637 if (AComparedToC == APFloat::cmpGreaterThan) {
4638 // z = cc + aa + c + a;
4639 Status |= z.add(c, RM);
4640 Status |= z.add(a, RM);
4641 } else {
4642 // z = cc + aa + a + c;
4643 Status |= z.add(a, RM);
4644 Status |= z.add(c, RM);
4645 }
4646 if (!z.isFinite()) {
4647 Floats[0] = std::move(z);
4648 Floats[1].makeZero(/* Neg = */ false);
4649 return (opStatus)Status;
4650 }
4651 Floats[0] = z;
4652 APFloat zz = aa;
4653 Status |= zz.add(cc, RM);
4654 if (AComparedToC == APFloat::cmpGreaterThan) {
4655 // Floats[1] = a - z + c + zz;
4656 Floats[1] = a;
4657 Status |= Floats[1].subtract(z, RM);
4658 Status |= Floats[1].add(c, RM);
4659 Status |= Floats[1].add(zz, RM);
4660 } else {
4661 // Floats[1] = c - z + a + zz;
4662 Floats[1] = c;
4663 Status |= Floats[1].subtract(z, RM);
4664 Status |= Floats[1].add(a, RM);
4665 Status |= Floats[1].add(zz, RM);
4666 }
4667 } else {
4668 // q = a - z;
4669 APFloat q = a;
4670 Status |= q.subtract(z, RM);
4671
4672 // zz = q + c + (a - (q + z)) + aa + cc;
4673 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4674 auto zz = q;
4675 Status |= zz.add(c, RM);
4676 Status |= q.add(z, RM);
4677 Status |= q.subtract(a, RM);
4678 q.changeSign();
4679 Status |= zz.add(q, RM);
4680 Status |= zz.add(aa, RM);
4681 Status |= zz.add(cc, RM);
4682 if (zz.isZero() && !zz.isNegative()) {
4683 Floats[0] = std::move(z);
4684 Floats[1].makeZero(/* Neg = */ false);
4685 return opOK;
4686 }
4687 Floats[0] = z;
4688 Status |= Floats[0].add(zz, RM);
4689 if (!Floats[0].isFinite()) {
4690 Floats[1].makeZero(/* Neg = */ false);
4691 return (opStatus)Status;
4692 }
4693 Floats[1] = std::move(z);
4694 Status |= Floats[1].subtract(Floats[0], RM);
4695 Status |= Floats[1].add(zz, RM);
4696 }
4697 return (opStatus)Status;
4698}
4699
4700APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4701 const DoubleAPFloat &RHS,
4702 DoubleAPFloat &Out,
4703 roundingMode RM) {
4704 if (LHS.getCategory() == fcNaN) {
4705 Out = LHS;
4706 return opOK;
4707 }
4708 if (RHS.getCategory() == fcNaN) {
4709 Out = RHS;
4710 return opOK;
4711 }
4712 if (LHS.getCategory() == fcZero) {
4713 Out = RHS;
4714 return opOK;
4715 }
4716 if (RHS.getCategory() == fcZero) {
4717 Out = LHS;
4718 return opOK;
4719 }
4720 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4721 LHS.isNegative() != RHS.isNegative()) {
4722 Out.makeNaN(false, Out.isNegative(), nullptr);
4723 return opInvalidOp;
4724 }
4725 if (LHS.getCategory() == fcInfinity) {
4726 Out = LHS;
4727 return opOK;
4728 }
4729 if (RHS.getCategory() == fcInfinity) {
4730 Out = RHS;
4731 return opOK;
4732 }
4733 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4734
4735 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4736 CC(RHS.Floats[1]);
4737 assert(&A.getSemantics() == &semIEEEdouble);
4738 assert(&AA.getSemantics() == &semIEEEdouble);
4739 assert(&C.getSemantics() == &semIEEEdouble);
4740 assert(&CC.getSemantics() == &semIEEEdouble);
4741 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4742 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4743 return Out.addImpl(A, AA, C, CC, RM);
4744}
4745
4747 roundingMode RM) {
4748 return addWithSpecial(*this, RHS, *this, RM);
4749}
4750
4752 roundingMode RM) {
4753 changeSign();
4754 auto Ret = add(RHS, RM);
4755 changeSign();
4756 return Ret;
4757}
4758
4761 const auto &LHS = *this;
4762 auto &Out = *this;
4763 /* Interesting observation: For special categories, finding the lowest
4764 common ancestor of the following layered graph gives the correct
4765 return category:
4766
4767 NaN
4768 / \
4769 Zero Inf
4770 \ /
4771 Normal
4772
4773 e.g. NaN * NaN = NaN
4774 Zero * Inf = NaN
4775 Normal * Zero = Zero
4776 Normal * Inf = Inf
4777 */
4778 if (LHS.getCategory() == fcNaN) {
4779 Out = LHS;
4780 return opOK;
4781 }
4782 if (RHS.getCategory() == fcNaN) {
4783 Out = RHS;
4784 return opOK;
4785 }
4786 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4787 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4788 Out.makeNaN(false, false, nullptr);
4789 return opOK;
4790 }
4791 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4792 Out = LHS;
4793 return opOK;
4794 }
4795 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4796 Out = RHS;
4797 return opOK;
4798 }
4799 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4800 "Special cases not handled exhaustively");
4801
4802 int Status = opOK;
4803 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4804 // t = a * c
4805 APFloat T = A;
4806 Status |= T.multiply(C, RM);
4807 if (!T.isFiniteNonZero()) {
4808 Floats[0] = T;
4809 Floats[1].makeZero(/* Neg = */ false);
4810 return (opStatus)Status;
4811 }
4812
4813 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4814 APFloat Tau = A;
4815 T.changeSign();
4816 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4817 T.changeSign();
4818 {
4819 // v = a * d
4820 APFloat V = A;
4821 Status |= V.multiply(D, RM);
4822 // w = b * c
4823 APFloat W = B;
4824 Status |= W.multiply(C, RM);
4825 Status |= V.add(W, RM);
4826 // tau += v + w
4827 Status |= Tau.add(V, RM);
4828 }
4829 // u = t + tau
4830 APFloat U = T;
4831 Status |= U.add(Tau, RM);
4832
4833 Floats[0] = U;
4834 if (!U.isFinite()) {
4835 Floats[1].makeZero(/* Neg = */ false);
4836 } else {
4837 // Floats[1] = (t - u) + tau
4838 Status |= T.subtract(U, RM);
4839 Status |= T.add(Tau, RM);
4840 Floats[1] = T;
4841 }
4842 return (opStatus)Status;
4843}
4844
4847 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4849 auto Ret =
4850 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4852 return Ret;
4853}
4854
4856 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4858 auto Ret =
4859 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4861 return Ret;
4862}
4863
4865 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4867 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4869 return Ret;
4870}
4871
4874 const DoubleAPFloat &Addend,
4876 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4878 auto Ret = Tmp.fusedMultiplyAdd(
4882 return Ret;
4883}
4884
4886 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4888 auto Ret = Tmp.roundToIntegral(RM);
4890 return Ret;
4891}
4892
4894 Floats[0].changeSign();
4895 Floats[1].changeSign();
4896}
4897
4900 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4901 if (Result != cmpEqual)
4902 return Result;
4903 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4904 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4905 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4906 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4907 if (Against && !RHSAgainst)
4908 return cmpLessThan;
4909 if (!Against && RHSAgainst)
4910 return cmpGreaterThan;
4911 if (!Against && !RHSAgainst)
4912 return Result;
4913 if (Against && RHSAgainst)
4914 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4915 }
4916 return Result;
4917}
4918
4920 return Floats[0].getCategory();
4921}
4922
4923bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4924
4926 Floats[0].makeInf(Neg);
4927 Floats[1].makeZero(/* Neg = */ false);
4928}
4929
4931 Floats[0].makeZero(Neg);
4932 Floats[1].makeZero(/* Neg = */ false);
4933}
4934
4936 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4937 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4938 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4939 if (Neg)
4940 changeSign();
4941}
4942
4944 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4945 Floats[0].makeSmallest(Neg);
4946 Floats[1].makeZero(/* Neg = */ false);
4947}
4948
4950 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4951 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4952 if (Neg)
4953 Floats[0].changeSign();
4954 Floats[1].makeZero(/* Neg = */ false);
4955}
4956
4957void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4958 Floats[0].makeNaN(SNaN, Neg, fill);
4959 Floats[1].makeZero(/* Neg = */ false);
4960}
4961
4963 auto Result = Floats[0].compare(RHS.Floats[0]);
4964 // |Float[0]| > |Float[1]|
4965 if (Result == APFloat::cmpEqual)
4966 return Floats[1].compare(RHS.Floats[1]);
4967 return Result;
4968}
4969
4971 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4972 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4973}
4974
4976 if (Arg.Floats)
4977 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4978 return hash_combine(Arg.Semantics);
4979}
4980
4982 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4983 uint64_t Data[] = {
4984 Floats[0].bitcastToAPInt().getRawData()[0],
4985 Floats[1].bitcastToAPInt().getRawData()[0],
4986 };
4987 return APInt(128, 2, Data);
4988}
4989
4991 roundingMode RM) {
4992 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4994 auto Ret = Tmp.convertFromString(S, RM);
4996 return Ret;
4997}
4998
5000 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5002 auto Ret = Tmp.next(nextDown);
5004 return Ret;
5005}
5006
5009 unsigned int Width, bool IsSigned,
5010 roundingMode RM, bool *IsExact) const {
5011 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5013 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5014}
5015
5017 bool IsSigned,
5018 roundingMode RM) {
5019 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5021 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5023 return Ret;
5024}
5025
5028 unsigned int InputSize,
5029 bool IsSigned, roundingMode RM) {
5030 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5032 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5034 return Ret;
5035}
5036
5039 unsigned int InputSize,
5040 bool IsSigned, roundingMode RM) {
5041 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5043 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5045 return Ret;
5046}
5047
5049 unsigned int HexDigits,
5050 bool UpperCase,
5051 roundingMode RM) const {
5052 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5054 .convertToHexString(DST, HexDigits, UpperCase, RM);
5055}
5056
5058 return getCategory() == fcNormal &&
5059 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5060 // (double)(Hi + Lo) == Hi defines a normal number.
5061 Floats[0] != Floats[0] + Floats[1]);
5062}
5063
5065 if (getCategory() != fcNormal)
5066 return false;
5067 DoubleAPFloat Tmp(*this);
5068 Tmp.makeSmallest(this->isNegative());
5069 return Tmp.compare(*this) == cmpEqual;
5070}
5071
5073 if (getCategory() != fcNormal)
5074 return false;
5075
5076 DoubleAPFloat Tmp(*this);
5078 return Tmp.compare(*this) == cmpEqual;
5079}
5080
5082 if (getCategory() != fcNormal)
5083 return false;
5084 DoubleAPFloat Tmp(*this);
5085 Tmp.makeLargest(this->isNegative());
5086 return Tmp.compare(*this) == cmpEqual;
5087}
5088
5090 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5091 return Floats[0].isInteger() && Floats[1].isInteger();
5092}
5093
5095 unsigned FormatPrecision,
5096 unsigned FormatMaxPadding,
5097 bool TruncateZero) const {
5098 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5100 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5101}
5102
5104 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5106 if (!inv)
5107 return Tmp.getExactInverse(nullptr);
5109 auto Ret = Tmp.getExactInverse(&Inv);
5111 return Ret;
5112}
5113
5115 // TODO: Implement me
5116 return INT_MIN;
5117}
5118
5120 // TODO: Implement me
5121 return INT_MIN;
5122}
5123
5126 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5127<