LLVM 20.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/Error.h"
28#include <cstring>
29#include <limits.h>
30
31#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
39
40using namespace llvm;
41
42/// A macro used to combine two fcCategory enums into one key which can be used
43/// in a switch statement to classify how the interaction of two APFloat's
44/// categories affects an operation.
45///
46/// TODO: If clang source code is ever allowed to use constexpr in its own
47/// codebase, change this into a static inline function.
48#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50/* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54namespace llvm {
55
56// How the nonfinite values Inf and NaN are represented.
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
61 IEEE754,
62
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
70 NanOnly,
71
72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73 // Float4E2M1FN types, which do not support Inf or NaN values.
75};
76
77// How NaN values are represented. This is curently only used in combination
78// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79// while having IEEE non-finite behavior is liable to lead to unexpected
80// results.
81enum class fltNanEncoding {
82 // Represents the standard IEEE behavior where a value is NaN if its
83 // exponent is all 1s and the significand is non-zero.
84 IEEE,
85
86 // Represents the behavior in the Float8E4M3FN floating point type where NaN
87 // is represented by having the exponent and mantissa set to all 1s.
88 // This behavior matches the FP8 E4M3 type described in
89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90 // as non-signalling, although the paper does not state whether the NaN
91 // values are signalling or not.
92 AllOnes,
93
94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97 // there is only one NaN value, it is treated as quiet NaN. This matches the
98 // behavior described in https://arxiv.org/abs/2206.02915 .
100};
101
102/* Represents floating point arithmetic semantics. */
104 /* The largest E such that 2^E is representable; this matches the
105 definition of IEEE 754. */
107
108 /* The smallest E such that 2^E is a normalized number; this
109 matches the definition of IEEE 754. */
111
112 /* Number of bits in the significand. This includes the integer
113 bit. */
114 unsigned int precision;
115
116 /* Number of bits actually used in the semantics. */
117 unsigned int sizeInBits;
118
120
122 // Returns true if any number described by this semantics can be precisely
123 // represented by the specified semantics. Does not take into account
124 // the value of fltNonfiniteBehavior.
125 bool isRepresentableBy(const fltSemantics &S) const {
126 return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
127 precision <= S.precision;
128 }
129};
130
131static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
132static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
133static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
134static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
135static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
136static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
137static constexpr fltSemantics semFloat8E5M2FNUZ = {
139static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
140static constexpr fltSemantics semFloat8E4M3FN = {
142static constexpr fltSemantics semFloat8E4M3FNUZ = {
146static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
147static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
148static constexpr fltSemantics semFloat6E3M2FN = {
150static constexpr fltSemantics semFloat6E2M3FN = {
152static constexpr fltSemantics semFloat4E2M1FN = {
154static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
155static constexpr fltSemantics semBogus = {0, 0, 0, 0};
156
157/* The IBM double-double semantics. Such a number consists of a pair of IEEE
158 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
159 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
160 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
161 to each other, and two 11-bit exponents.
162
163 Note: we need to make the value different from semBogus as otherwise
164 an unsafe optimization may collapse both values to a single address,
165 and we heavily rely on them having distinct addresses. */
166static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
167
168/* These are legacy semantics for the fallback, inaccrurate implementation of
169 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
170 operation. It's equivalent to having an IEEE number with consecutive 106
171 bits of mantissa and 11 bits of exponent.
172
173 It's not equivalent to IBM double-double. For example, a legit IBM
174 double-double, 1 + epsilon:
175
176 1 + epsilon = 1 + (1 >> 1076)
177
178 is not representable by a consecutive 106 bits of mantissa.
179
180 Currently, these semantics are used in the following way:
181
182 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
183 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
184 semPPCDoubleDoubleLegacy -> IEEE operations
185
186 We use bitcastToAPInt() to get the bit representation (in APInt) of the
187 underlying IEEEdouble, then use the APInt constructor to construct the
188 legacy IEEE float.
189
190 TODO: Implement all operations in semPPCDoubleDouble, and delete these
191 semantics. */
192static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
193 53 + 53, 128};
194
196 switch (S) {
197 case S_IEEEhalf:
198 return IEEEhalf();
199 case S_BFloat:
200 return BFloat();
201 case S_IEEEsingle:
202 return IEEEsingle();
203 case S_IEEEdouble:
204 return IEEEdouble();
205 case S_IEEEquad:
206 return IEEEquad();
208 return PPCDoubleDouble();
209 case S_Float8E5M2:
210 return Float8E5M2();
211 case S_Float8E5M2FNUZ:
212 return Float8E5M2FNUZ();
213 case S_Float8E4M3:
214 return Float8E4M3();
215 case S_Float8E4M3FN:
216 return Float8E4M3FN();
217 case S_Float8E4M3FNUZ:
218 return Float8E4M3FNUZ();
220 return Float8E4M3B11FNUZ();
221 case S_Float8E3M4:
222 return Float8E3M4();
223 case S_FloatTF32:
224 return FloatTF32();
225 case S_Float6E3M2FN:
226 return Float6E3M2FN();
227 case S_Float6E2M3FN:
228 return Float6E2M3FN();
229 case S_Float4E2M1FN:
230 return Float4E2M1FN();
232 return x87DoubleExtended();
233 }
234 llvm_unreachable("Unrecognised floating semantics");
235}
236
239 if (&Sem == &llvm::APFloat::IEEEhalf())
240 return S_IEEEhalf;
241 else if (&Sem == &llvm::APFloat::BFloat())
242 return S_BFloat;
243 else if (&Sem == &llvm::APFloat::IEEEsingle())
244 return S_IEEEsingle;
245 else if (&Sem == &llvm::APFloat::IEEEdouble())
246 return S_IEEEdouble;
247 else if (&Sem == &llvm::APFloat::IEEEquad())
248 return S_IEEEquad;
249 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
250 return S_PPCDoubleDouble;
251 else if (&Sem == &llvm::APFloat::Float8E5M2())
252 return S_Float8E5M2;
253 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
254 return S_Float8E5M2FNUZ;
255 else if (&Sem == &llvm::APFloat::Float8E4M3())
256 return S_Float8E4M3;
257 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
258 return S_Float8E4M3FN;
259 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
260 return S_Float8E4M3FNUZ;
261 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
262 return S_Float8E4M3B11FNUZ;
263 else if (&Sem == &llvm::APFloat::Float8E3M4())
264 return S_Float8E3M4;
265 else if (&Sem == &llvm::APFloat::FloatTF32())
266 return S_FloatTF32;
267 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
268 return S_Float6E3M2FN;
269 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
270 return S_Float6E2M3FN;
271 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
272 return S_Float4E2M1FN;
273 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
274 return S_x87DoubleExtended;
275 else
276 llvm_unreachable("Unknown floating semantics");
277}
278
285 return semPPCDoubleDouble;
286}
294}
302}
304
310
311/* A tight upper bound on number of parts required to hold the value
312 pow(5, power) is
313
314 power * 815 / (351 * integerPartWidth) + 1
315
316 However, whilst the result may require only this many parts,
317 because we are multiplying two values to get it, the
318 multiplication may require an extra part with the excess part
319 being zero (consider the trivial case of 1 * 1, tcFullMultiply
320 requires two parts to hold the single-part result). So we add an
321 extra one to guarantee enough space whilst multiplying. */
322const unsigned int maxExponent = 16383;
323const unsigned int maxPrecision = 113;
325const unsigned int maxPowerOfFiveParts =
326 2 +
328
329unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
330 return semantics.precision;
331}
334 return semantics.maxExponent;
335}
338 return semantics.minExponent;
339}
340unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
341 return semantics.sizeInBits;
342}
344 bool isSigned) {
345 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
346 // at least one more bit than the MaxExponent to hold the max FP value.
347 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
348 // Extra sign bit needed.
349 if (isSigned)
350 ++MinBitWidth;
351 return MinBitWidth;
352}
353
355 const fltSemantics &Dst) {
356 // Exponent range must be larger.
357 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
358 return false;
359
360 // If the mantissa is long enough, the result value could still be denormal
361 // with a larger exponent range.
362 //
363 // FIXME: This condition is probably not accurate but also shouldn't be a
364 // practical concern with existing types.
365 return Dst.precision >= Src.precision;
366}
367
369 return Sem.sizeInBits;
370}
371
372static constexpr APFloatBase::ExponentType
373exponentZero(const fltSemantics &semantics) {
374 return semantics.minExponent - 1;
375}
376
377static constexpr APFloatBase::ExponentType
378exponentInf(const fltSemantics &semantics) {
379 return semantics.maxExponent + 1;
380}
381
382static constexpr APFloatBase::ExponentType
383exponentNaN(const fltSemantics &semantics) {
386 return exponentZero(semantics);
387 return semantics.maxExponent;
388 }
389 return semantics.maxExponent + 1;
390}
391
392/* A bunch of private, handy routines. */
393
394static inline Error createError(const Twine &Err) {
395 return make_error<StringError>(Err, inconvertibleErrorCode());
396}
397
398static constexpr inline unsigned int partCountForBits(unsigned int bits) {
400}
401
402/* Returns 0U-9U. Return values >= 10U are not digits. */
403static inline unsigned int
404decDigitValue(unsigned int c)
405{
406 return c - '0';
407}
408
409/* Return the value of a decimal exponent of the form
410 [+-]ddddddd.
411
412 If the exponent overflows, returns a large exponent with the
413 appropriate sign. */
416 bool isNegative;
417 unsigned int absExponent;
418 const unsigned int overlargeExponent = 24000; /* FIXME. */
419 StringRef::iterator p = begin;
420
421 // Treat no exponent as 0 to match binutils
422 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
423 return 0;
424 }
425
426 isNegative = (*p == '-');
427 if (*p == '-' || *p == '+') {
428 p++;
429 if (p == end)
430 return createError("Exponent has no digits");
431 }
432
433 absExponent = decDigitValue(*p++);
434 if (absExponent >= 10U)
435 return createError("Invalid character in exponent");
436
437 for (; p != end; ++p) {
438 unsigned int value;
439
440 value = decDigitValue(*p);
441 if (value >= 10U)
442 return createError("Invalid character in exponent");
443
444 absExponent = absExponent * 10U + value;
445 if (absExponent >= overlargeExponent) {
446 absExponent = overlargeExponent;
447 break;
448 }
449 }
450
451 if (isNegative)
452 return -(int) absExponent;
453 else
454 return (int) absExponent;
455}
456
457/* This is ugly and needs cleaning up, but I don't immediately see
458 how whilst remaining safe. */
461 int exponentAdjustment) {
462 int unsignedExponent;
463 bool negative, overflow;
464 int exponent = 0;
465
466 if (p == end)
467 return createError("Exponent has no digits");
468
469 negative = *p == '-';
470 if (*p == '-' || *p == '+') {
471 p++;
472 if (p == end)
473 return createError("Exponent has no digits");
474 }
475
476 unsignedExponent = 0;
477 overflow = false;
478 for (; p != end; ++p) {
479 unsigned int value;
480
481 value = decDigitValue(*p);
482 if (value >= 10U)
483 return createError("Invalid character in exponent");
484
485 unsignedExponent = unsignedExponent * 10 + value;
486 if (unsignedExponent > 32767) {
487 overflow = true;
488 break;
489 }
490 }
491
492 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
493 overflow = true;
494
495 if (!overflow) {
496 exponent = unsignedExponent;
497 if (negative)
498 exponent = -exponent;
499 exponent += exponentAdjustment;
500 if (exponent > 32767 || exponent < -32768)
501 overflow = true;
502 }
503
504 if (overflow)
505 exponent = negative ? -32768: 32767;
506
507 return exponent;
508}
509
512 StringRef::iterator *dot) {
513 StringRef::iterator p = begin;
514 *dot = end;
515 while (p != end && *p == '0')
516 p++;
517
518 if (p != end && *p == '.') {
519 *dot = p++;
520
521 if (end - begin == 1)
522 return createError("Significand has no digits");
523
524 while (p != end && *p == '0')
525 p++;
526 }
527
528 return p;
529}
530
531/* Given a normal decimal floating point number of the form
532
533 dddd.dddd[eE][+-]ddd
534
535 where the decimal point and exponent are optional, fill out the
536 structure D. Exponent is appropriate if the significand is
537 treated as an integer, and normalizedExponent if the significand
538 is taken to have the decimal point after a single leading
539 non-zero digit.
540
541 If the value is zero, V->firstSigDigit points to a non-digit, and
542 the return exponent is zero.
543*/
545 const char *firstSigDigit;
546 const char *lastSigDigit;
549};
550
553 StringRef::iterator dot = end;
554
555 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
556 if (!PtrOrErr)
557 return PtrOrErr.takeError();
558 StringRef::iterator p = *PtrOrErr;
559
560 D->firstSigDigit = p;
561 D->exponent = 0;
562 D->normalizedExponent = 0;
563
564 for (; p != end; ++p) {
565 if (*p == '.') {
566 if (dot != end)
567 return createError("String contains multiple dots");
568 dot = p++;
569 if (p == end)
570 break;
571 }
572 if (decDigitValue(*p) >= 10U)
573 break;
574 }
575
576 if (p != end) {
577 if (*p != 'e' && *p != 'E')
578 return createError("Invalid character in significand");
579 if (p == begin)
580 return createError("Significand has no digits");
581 if (dot != end && p - begin == 1)
582 return createError("Significand has no digits");
583
584 /* p points to the first non-digit in the string */
585 auto ExpOrErr = readExponent(p + 1, end);
586 if (!ExpOrErr)
587 return ExpOrErr.takeError();
588 D->exponent = *ExpOrErr;
589
590 /* Implied decimal point? */
591 if (dot == end)
592 dot = p;
593 }
594
595 /* If number is all zeroes accept any exponent. */
596 if (p != D->firstSigDigit) {
597 /* Drop insignificant trailing zeroes. */
598 if (p != begin) {
599 do
600 do
601 p--;
602 while (p != begin && *p == '0');
603 while (p != begin && *p == '.');
604 }
605
606 /* Adjust the exponents for any decimal point. */
607 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
608 D->normalizedExponent = (D->exponent +
609 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
610 - (dot > D->firstSigDigit && dot < p)));
611 }
612
613 D->lastSigDigit = p;
614 return Error::success();
615}
616
617/* Return the trailing fraction of a hexadecimal number.
618 DIGITVALUE is the first hex digit of the fraction, P points to
619 the next digit. */
622 unsigned int digitValue) {
623 unsigned int hexDigit;
624
625 /* If the first trailing digit isn't 0 or 8 we can work out the
626 fraction immediately. */
627 if (digitValue > 8)
628 return lfMoreThanHalf;
629 else if (digitValue < 8 && digitValue > 0)
630 return lfLessThanHalf;
631
632 // Otherwise we need to find the first non-zero digit.
633 while (p != end && (*p == '0' || *p == '.'))
634 p++;
635
636 if (p == end)
637 return createError("Invalid trailing hexadecimal fraction!");
638
639 hexDigit = hexDigitValue(*p);
640
641 /* If we ran off the end it is exactly zero or one-half, otherwise
642 a little more. */
643 if (hexDigit == UINT_MAX)
644 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
645 else
646 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
647}
648
649/* Return the fraction lost were a bignum truncated losing the least
650 significant BITS bits. */
651static lostFraction
653 unsigned int partCount,
654 unsigned int bits)
655{
656 unsigned int lsb;
657
658 lsb = APInt::tcLSB(parts, partCount);
659
660 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
661 if (bits <= lsb)
662 return lfExactlyZero;
663 if (bits == lsb + 1)
664 return lfExactlyHalf;
665 if (bits <= partCount * APFloatBase::integerPartWidth &&
666 APInt::tcExtractBit(parts, bits - 1))
667 return lfMoreThanHalf;
668
669 return lfLessThanHalf;
670}
671
672/* Shift DST right BITS bits noting lost fraction. */
673static lostFraction
674shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
675{
676 lostFraction lost_fraction;
677
678 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
679
680 APInt::tcShiftRight(dst, parts, bits);
681
682 return lost_fraction;
683}
684
685/* Combine the effect of two lost fractions. */
686static lostFraction
688 lostFraction lessSignificant)
689{
690 if (lessSignificant != lfExactlyZero) {
691 if (moreSignificant == lfExactlyZero)
692 moreSignificant = lfLessThanHalf;
693 else if (moreSignificant == lfExactlyHalf)
694 moreSignificant = lfMoreThanHalf;
695 }
696
697 return moreSignificant;
698}
699
700/* The error from the true value, in half-ulps, on multiplying two
701 floating point numbers, which differ from the value they
702 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
703 than the returned value.
704
705 See "How to Read Floating Point Numbers Accurately" by William D
706 Clinger. */
707static unsigned int
708HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
709{
710 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
711
712 if (HUerr1 + HUerr2 == 0)
713 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
714 else
715 return inexactMultiply + 2 * (HUerr1 + HUerr2);
716}
717
718/* The number of ulps from the boundary (zero, or half if ISNEAREST)
719 when the least significant BITS are truncated. BITS cannot be
720 zero. */
722ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
723 bool isNearest) {
724 unsigned int count, partBits;
725 APFloatBase::integerPart part, boundary;
726
727 assert(bits != 0);
728
729 bits--;
731 partBits = bits % APFloatBase::integerPartWidth + 1;
732
733 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
734
735 if (isNearest)
736 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
737 else
738 boundary = 0;
739
740 if (count == 0) {
741 if (part - boundary <= boundary - part)
742 return part - boundary;
743 else
744 return boundary - part;
745 }
746
747 if (part == boundary) {
748 while (--count)
749 if (parts[count])
750 return ~(APFloatBase::integerPart) 0; /* A lot. */
751
752 return parts[0];
753 } else if (part == boundary - 1) {
754 while (--count)
755 if (~parts[count])
756 return ~(APFloatBase::integerPart) 0; /* A lot. */
757
758 return -parts[0];
759 }
760
761 return ~(APFloatBase::integerPart) 0; /* A lot. */
762}
763
764/* Place pow(5, power) in DST, and return the number of parts used.
765 DST must be at least one part larger than size of the answer. */
766static unsigned int
767powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
768 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
770 pow5s[0] = 78125 * 5;
771
772 unsigned int partsCount = 1;
773 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
774 unsigned int result;
775 assert(power <= maxExponent);
776
777 p1 = dst;
778 p2 = scratch;
779
780 *p1 = firstEightPowers[power & 7];
781 power >>= 3;
782
783 result = 1;
784 pow5 = pow5s;
785
786 for (unsigned int n = 0; power; power >>= 1, n++) {
787 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
788 if (n != 0) {
789 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
790 partsCount, partsCount);
791 partsCount *= 2;
792 if (pow5[partsCount - 1] == 0)
793 partsCount--;
794 }
795
796 if (power & 1) {
798
799 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
800 result += partsCount;
801 if (p2[result - 1] == 0)
802 result--;
803
804 /* Now result is in p1 with partsCount parts and p2 is scratch
805 space. */
806 tmp = p1;
807 p1 = p2;
808 p2 = tmp;
809 }
810
811 pow5 += partsCount;
812 }
813
814 if (p1 != dst)
815 APInt::tcAssign(dst, p1, result);
816
817 return result;
818}
819
820/* Zero at the end to avoid modular arithmetic when adding one; used
821 when rounding up during hexadecimal output. */
822static const char hexDigitsLower[] = "0123456789abcdef0";
823static const char hexDigitsUpper[] = "0123456789ABCDEF0";
824static const char infinityL[] = "infinity";
825static const char infinityU[] = "INFINITY";
826static const char NaNL[] = "nan";
827static const char NaNU[] = "NAN";
828
829/* Write out an integerPart in hexadecimal, starting with the most
830 significant nibble. Write out exactly COUNT hexdigits, return
831 COUNT. */
832static unsigned int
833partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
834 const char *hexDigitChars)
835{
836 unsigned int result = count;
837
839
840 part >>= (APFloatBase::integerPartWidth - 4 * count);
841 while (count--) {
842 dst[count] = hexDigitChars[part & 0xf];
843 part >>= 4;
844 }
845
846 return result;
847}
848
849/* Write out an unsigned decimal integer. */
850static char *
851writeUnsignedDecimal (char *dst, unsigned int n)
852{
853 char buff[40], *p;
854
855 p = buff;
856 do
857 *p++ = '0' + n % 10;
858 while (n /= 10);
859
860 do
861 *dst++ = *--p;
862 while (p != buff);
863
864 return dst;
865}
866
867/* Write out a signed decimal integer. */
868static char *
869writeSignedDecimal (char *dst, int value)
870{
871 if (value < 0) {
872 *dst++ = '-';
873 dst = writeUnsignedDecimal(dst, -(unsigned) value);
874 } else
875 dst = writeUnsignedDecimal(dst, value);
876
877 return dst;
878}
879
880namespace detail {
881/* Constructors. */
882void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
883 unsigned int count;
884
885 semantics = ourSemantics;
886 count = partCount();
887 if (count > 1)
888 significand.parts = new integerPart[count];
889}
890
891void IEEEFloat::freeSignificand() {
892 if (needsCleanup())
893 delete [] significand.parts;
894}
895
896void IEEEFloat::assign(const IEEEFloat &rhs) {
897 assert(semantics == rhs.semantics);
898
899 sign = rhs.sign;
900 category = rhs.category;
901 exponent = rhs.exponent;
902 if (isFiniteNonZero() || category == fcNaN)
903 copySignificand(rhs);
904}
905
906void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
907 assert(isFiniteNonZero() || category == fcNaN);
908 assert(rhs.partCount() >= partCount());
909
910 APInt::tcAssign(significandParts(), rhs.significandParts(),
911 partCount());
912}
913
914/* Make this number a NaN, with an arbitrary but deterministic value
915 for the significand. If double or longer, this is a signalling NaN,
916 which may not be ideal. If float, this is QNaN(0). */
917void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
919 llvm_unreachable("This floating point format does not support NaN");
920
921 category = fcNaN;
922 sign = Negative;
923 exponent = exponentNaN();
924
925 integerPart *significand = significandParts();
926 unsigned numParts = partCount();
927
928 APInt fill_storage;
930 // Finite-only types do not distinguish signalling and quiet NaN, so
931 // make them all signalling.
932 SNaN = false;
933 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
934 sign = true;
935 fill_storage = APInt::getZero(semantics->precision - 1);
936 } else {
937 fill_storage = APInt::getAllOnes(semantics->precision - 1);
938 }
939 fill = &fill_storage;
940 }
941
942 // Set the significand bits to the fill.
943 if (!fill || fill->getNumWords() < numParts)
944 APInt::tcSet(significand, 0, numParts);
945 if (fill) {
946 APInt::tcAssign(significand, fill->getRawData(),
947 std::min(fill->getNumWords(), numParts));
948
949 // Zero out the excess bits of the significand.
950 unsigned bitsToPreserve = semantics->precision - 1;
951 unsigned part = bitsToPreserve / 64;
952 bitsToPreserve %= 64;
953 significand[part] &= ((1ULL << bitsToPreserve) - 1);
954 for (part++; part != numParts; ++part)
955 significand[part] = 0;
956 }
957
958 unsigned QNaNBit = semantics->precision - 2;
959
960 if (SNaN) {
961 // We always have to clear the QNaN bit to make it an SNaN.
962 APInt::tcClearBit(significand, QNaNBit);
963
964 // If there are no bits set in the payload, we have to set
965 // *something* to make it a NaN instead of an infinity;
966 // conventionally, this is the next bit down from the QNaN bit.
967 if (APInt::tcIsZero(significand, numParts))
968 APInt::tcSetBit(significand, QNaNBit - 1);
969 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
970 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
971 // Do nothing.
972 } else {
973 // We always have to set the QNaN bit to make it a QNaN.
974 APInt::tcSetBit(significand, QNaNBit);
975 }
976
977 // For x87 extended precision, we want to make a NaN, not a
978 // pseudo-NaN. Maybe we should expose the ability to make
979 // pseudo-NaNs?
980 if (semantics == &semX87DoubleExtended)
981 APInt::tcSetBit(significand, QNaNBit + 1);
982}
983
985 if (this != &rhs) {
986 if (semantics != rhs.semantics) {
987 freeSignificand();
988 initialize(rhs.semantics);
989 }
990 assign(rhs);
991 }
992
993 return *this;
994}
995
997 freeSignificand();
998
999 semantics = rhs.semantics;
1000 significand = rhs.significand;
1001 exponent = rhs.exponent;
1002 category = rhs.category;
1003 sign = rhs.sign;
1004
1005 rhs.semantics = &semBogus;
1006 return *this;
1007}
1008
1010 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1011 (APInt::tcExtractBit(significandParts(),
1012 semantics->precision - 1) == 0);
1013}
1014
1016 // The smallest number by magnitude in our format will be the smallest
1017 // denormal, i.e. the floating point number with exponent being minimum
1018 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1019 return isFiniteNonZero() && exponent == semantics->minExponent &&
1020 significandMSB() == 0;
1021}
1022
1024 return getCategory() == fcNormal && exponent == semantics->minExponent &&
1025 isSignificandAllZerosExceptMSB();
1026}
1027
1028bool IEEEFloat::isSignificandAllOnes() const {
1029 // Test if the significand excluding the integral bit is all ones. This allows
1030 // us to test for binade boundaries.
1031 const integerPart *Parts = significandParts();
1032 const unsigned PartCount = partCountForBits(semantics->precision);
1033 for (unsigned i = 0; i < PartCount - 1; i++)
1034 if (~Parts[i])
1035 return false;
1036
1037 // Set the unused high bits to all ones when we compare.
1038 const unsigned NumHighBits =
1039 PartCount*integerPartWidth - semantics->precision + 1;
1040 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1041 "Can not have more high bits to fill than integerPartWidth");
1042 const integerPart HighBitFill =
1043 ~integerPart(0) << (integerPartWidth - NumHighBits);
1044 if (~(Parts[PartCount - 1] | HighBitFill))
1045 return false;
1046
1047 return true;
1048}
1049
1050bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1051 // Test if the significand excluding the integral bit is all ones except for
1052 // the least significant bit.
1053 const integerPart *Parts = significandParts();
1054
1055 if (Parts[0] & 1)
1056 return false;
1057
1058 const unsigned PartCount = partCountForBits(semantics->precision);
1059 for (unsigned i = 0; i < PartCount - 1; i++) {
1060 if (~Parts[i] & ~unsigned{!i})
1061 return false;
1062 }
1063
1064 // Set the unused high bits to all ones when we compare.
1065 const unsigned NumHighBits =
1066 PartCount * integerPartWidth - semantics->precision + 1;
1067 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1068 "Can not have more high bits to fill than integerPartWidth");
1069 const integerPart HighBitFill = ~integerPart(0)
1070 << (integerPartWidth - NumHighBits);
1071 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1072 return false;
1073
1074 return true;
1075}
1076
1077bool IEEEFloat::isSignificandAllZeros() const {
1078 // Test if the significand excluding the integral bit is all zeros. This
1079 // allows us to test for binade boundaries.
1080 const integerPart *Parts = significandParts();
1081 const unsigned PartCount = partCountForBits(semantics->precision);
1082
1083 for (unsigned i = 0; i < PartCount - 1; i++)
1084 if (Parts[i])
1085 return false;
1086
1087 // Compute how many bits are used in the final word.
1088 const unsigned NumHighBits =
1089 PartCount*integerPartWidth - semantics->precision + 1;
1090 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1091 "clear than integerPartWidth");
1092 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1093
1094 if (Parts[PartCount - 1] & HighBitMask)
1095 return false;
1096
1097 return true;
1098}
1099
1100bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1101 const integerPart *Parts = significandParts();
1102 const unsigned PartCount = partCountForBits(semantics->precision);
1103
1104 for (unsigned i = 0; i < PartCount - 1; i++) {
1105 if (Parts[i])
1106 return false;
1107 }
1108
1109 const unsigned NumHighBits =
1110 PartCount * integerPartWidth - semantics->precision + 1;
1111 return Parts[PartCount - 1] == integerPart(1)
1112 << (integerPartWidth - NumHighBits);
1113}
1114
1117 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1118 // The largest number by magnitude in our format will be the floating point
1119 // number with maximum exponent and with significand that is all ones except
1120 // the LSB.
1121 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1122 isSignificandAllOnesExceptLSB();
1123 } else {
1124 // The largest number by magnitude in our format will be the floating point
1125 // number with maximum exponent and with significand that is all ones.
1126 return isFiniteNonZero() && exponent == semantics->maxExponent &&
1127 isSignificandAllOnes();
1128 }
1129}
1130
1132 // This could be made more efficient; I'm going for obviously correct.
1133 if (!isFinite()) return false;
1134 IEEEFloat truncated = *this;
1135 truncated.roundToIntegral(rmTowardZero);
1136 return compare(truncated) == cmpEqual;
1137}
1138
1139bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1140 if (this == &rhs)
1141 return true;
1142 if (semantics != rhs.semantics ||
1143 category != rhs.category ||
1144 sign != rhs.sign)
1145 return false;
1146 if (category==fcZero || category==fcInfinity)
1147 return true;
1148
1149 if (isFiniteNonZero() && exponent != rhs.exponent)
1150 return false;
1151
1152 return std::equal(significandParts(), significandParts() + partCount(),
1153 rhs.significandParts());
1154}
1155
1157 initialize(&ourSemantics);
1158 sign = 0;
1159 category = fcNormal;
1160 zeroSignificand();
1161 exponent = ourSemantics.precision - 1;
1162 significandParts()[0] = value;
1164}
1165
1167 initialize(&ourSemantics);
1168 makeZero(false);
1169}
1170
1171// Delegate to the previous constructor, because later copy constructor may
1172// actually inspects category, which can't be garbage.
1174 : IEEEFloat(ourSemantics) {}
1175
1177 initialize(rhs.semantics);
1178 assign(rhs);
1179}
1180
1182 *this = std::move(rhs);
1183}
1184
1185IEEEFloat::~IEEEFloat() { freeSignificand(); }
1186
1187unsigned int IEEEFloat::partCount() const {
1188 return partCountForBits(semantics->precision + 1);
1189}
1190
1191const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1192 return const_cast<IEEEFloat *>(this)->significandParts();
1193}
1194
1195IEEEFloat::integerPart *IEEEFloat::significandParts() {
1196 if (partCount() > 1)
1197 return significand.parts;
1198 else
1199 return &significand.part;
1200}
1201
1202void IEEEFloat::zeroSignificand() {
1203 APInt::tcSet(significandParts(), 0, partCount());
1204}
1205
1206/* Increment an fcNormal floating point number's significand. */
1207void IEEEFloat::incrementSignificand() {
1208 integerPart carry;
1209
1210 carry = APInt::tcIncrement(significandParts(), partCount());
1211
1212 /* Our callers should never cause us to overflow. */
1213 assert(carry == 0);
1214 (void)carry;
1215}
1216
1217/* Add the significand of the RHS. Returns the carry flag. */
1218IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1219 integerPart *parts;
1220
1221 parts = significandParts();
1222
1223 assert(semantics == rhs.semantics);
1224 assert(exponent == rhs.exponent);
1225
1226 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1227}
1228
1229/* Subtract the significand of the RHS with a borrow flag. Returns
1230 the borrow flag. */
1231IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1232 integerPart borrow) {
1233 integerPart *parts;
1234
1235 parts = significandParts();
1236
1237 assert(semantics == rhs.semantics);
1238 assert(exponent == rhs.exponent);
1239
1240 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1241 partCount());
1242}
1243
1244/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1245 on to the full-precision result of the multiplication. Returns the
1246 lost fraction. */
1247lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1248 IEEEFloat addend) {
1249 unsigned int omsb; // One, not zero, based MSB.
1250 unsigned int partsCount, newPartsCount, precision;
1251 integerPart *lhsSignificand;
1252 integerPart scratch[4];
1253 integerPart *fullSignificand;
1254 lostFraction lost_fraction;
1255 bool ignored;
1256
1257 assert(semantics == rhs.semantics);
1258
1259 precision = semantics->precision;
1260
1261 // Allocate space for twice as many bits as the original significand, plus one
1262 // extra bit for the addition to overflow into.
1263 newPartsCount = partCountForBits(precision * 2 + 1);
1264
1265 if (newPartsCount > 4)
1266 fullSignificand = new integerPart[newPartsCount];
1267 else
1268 fullSignificand = scratch;
1269
1270 lhsSignificand = significandParts();
1271 partsCount = partCount();
1272
1273 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1274 rhs.significandParts(), partsCount, partsCount);
1275
1276 lost_fraction = lfExactlyZero;
1277 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1278 exponent += rhs.exponent;
1279
1280 // Assume the operands involved in the multiplication are single-precision
1281 // FP, and the two multiplicants are:
1282 // *this = a23 . a22 ... a0 * 2^e1
1283 // rhs = b23 . b22 ... b0 * 2^e2
1284 // the result of multiplication is:
1285 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1286 // Note that there are three significant bits at the left-hand side of the
1287 // radix point: two for the multiplication, and an overflow bit for the
1288 // addition (that will always be zero at this point). Move the radix point
1289 // toward left by two bits, and adjust exponent accordingly.
1290 exponent += 2;
1291
1292 if (addend.isNonZero()) {
1293 // The intermediate result of the multiplication has "2 * precision"
1294 // signicant bit; adjust the addend to be consistent with mul result.
1295 //
1296 Significand savedSignificand = significand;
1297 const fltSemantics *savedSemantics = semantics;
1298 fltSemantics extendedSemantics;
1300 unsigned int extendedPrecision;
1301
1302 // Normalize our MSB to one below the top bit to allow for overflow.
1303 extendedPrecision = 2 * precision + 1;
1304 if (omsb != extendedPrecision - 1) {
1305 assert(extendedPrecision > omsb);
1306 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1307 (extendedPrecision - 1) - omsb);
1308 exponent -= (extendedPrecision - 1) - omsb;
1309 }
1310
1311 /* Create new semantics. */
1312 extendedSemantics = *semantics;
1313 extendedSemantics.precision = extendedPrecision;
1314
1315 if (newPartsCount == 1)
1316 significand.part = fullSignificand[0];
1317 else
1318 significand.parts = fullSignificand;
1319 semantics = &extendedSemantics;
1320
1321 // Make a copy so we can convert it to the extended semantics.
1322 // Note that we cannot convert the addend directly, as the extendedSemantics
1323 // is a local variable (which we take a reference to).
1324 IEEEFloat extendedAddend(addend);
1325 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1326 assert(status == opOK);
1327 (void)status;
1328
1329 // Shift the significand of the addend right by one bit. This guarantees
1330 // that the high bit of the significand is zero (same as fullSignificand),
1331 // so the addition will overflow (if it does overflow at all) into the top bit.
1332 lost_fraction = extendedAddend.shiftSignificandRight(1);
1333 assert(lost_fraction == lfExactlyZero &&
1334 "Lost precision while shifting addend for fused-multiply-add.");
1335
1336 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1337
1338 /* Restore our state. */
1339 if (newPartsCount == 1)
1340 fullSignificand[0] = significand.part;
1341 significand = savedSignificand;
1342 semantics = savedSemantics;
1343
1344 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1345 }
1346
1347 // Convert the result having "2 * precision" significant-bits back to the one
1348 // having "precision" significant-bits. First, move the radix point from
1349 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1350 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1351 exponent -= precision + 1;
1352
1353 // In case MSB resides at the left-hand side of radix point, shift the
1354 // mantissa right by some amount to make sure the MSB reside right before
1355 // the radix point (i.e. "MSB . rest-significant-bits").
1356 //
1357 // Note that the result is not normalized when "omsb < precision". So, the
1358 // caller needs to call IEEEFloat::normalize() if normalized value is
1359 // expected.
1360 if (omsb > precision) {
1361 unsigned int bits, significantParts;
1362 lostFraction lf;
1363
1364 bits = omsb - precision;
1365 significantParts = partCountForBits(omsb);
1366 lf = shiftRight(fullSignificand, significantParts, bits);
1367 lost_fraction = combineLostFractions(lf, lost_fraction);
1368 exponent += bits;
1369 }
1370
1371 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1372
1373 if (newPartsCount > 4)
1374 delete [] fullSignificand;
1375
1376 return lost_fraction;
1377}
1378
1379lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1380 return multiplySignificand(rhs, IEEEFloat(*semantics));
1381}
1382
1383/* Multiply the significands of LHS and RHS to DST. */
1384lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1385 unsigned int bit, i, partsCount;
1386 const integerPart *rhsSignificand;
1387 integerPart *lhsSignificand, *dividend, *divisor;
1388 integerPart scratch[4];
1389 lostFraction lost_fraction;
1390
1391 assert(semantics == rhs.semantics);
1392
1393 lhsSignificand = significandParts();
1394 rhsSignificand = rhs.significandParts();
1395 partsCount = partCount();
1396
1397 if (partsCount > 2)
1398 dividend = new integerPart[partsCount * 2];
1399 else
1400 dividend = scratch;
1401
1402 divisor = dividend + partsCount;
1403
1404 /* Copy the dividend and divisor as they will be modified in-place. */
1405 for (i = 0; i < partsCount; i++) {
1406 dividend[i] = lhsSignificand[i];
1407 divisor[i] = rhsSignificand[i];
1408 lhsSignificand[i] = 0;
1409 }
1410
1411 exponent -= rhs.exponent;
1412
1413 unsigned int precision = semantics->precision;
1414
1415 /* Normalize the divisor. */
1416 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1417 if (bit) {
1418 exponent += bit;
1419 APInt::tcShiftLeft(divisor, partsCount, bit);
1420 }
1421
1422 /* Normalize the dividend. */
1423 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1424 if (bit) {
1425 exponent -= bit;
1426 APInt::tcShiftLeft(dividend, partsCount, bit);
1427 }
1428
1429 /* Ensure the dividend >= divisor initially for the loop below.
1430 Incidentally, this means that the division loop below is
1431 guaranteed to set the integer bit to one. */
1432 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1433 exponent--;
1434 APInt::tcShiftLeft(dividend, partsCount, 1);
1435 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1436 }
1437
1438 /* Long division. */
1439 for (bit = precision; bit; bit -= 1) {
1440 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1441 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1442 APInt::tcSetBit(lhsSignificand, bit - 1);
1443 }
1444
1445 APInt::tcShiftLeft(dividend, partsCount, 1);
1446 }
1447
1448 /* Figure out the lost fraction. */
1449 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1450
1451 if (cmp > 0)
1452 lost_fraction = lfMoreThanHalf;
1453 else if (cmp == 0)
1454 lost_fraction = lfExactlyHalf;
1455 else if (APInt::tcIsZero(dividend, partsCount))
1456 lost_fraction = lfExactlyZero;
1457 else
1458 lost_fraction = lfLessThanHalf;
1459
1460 if (partsCount > 2)
1461 delete [] dividend;
1462
1463 return lost_fraction;
1464}
1465
1466unsigned int IEEEFloat::significandMSB() const {
1467 return APInt::tcMSB(significandParts(), partCount());
1468}
1469
1470unsigned int IEEEFloat::significandLSB() const {
1471 return APInt::tcLSB(significandParts(), partCount());
1472}
1473
1474/* Note that a zero result is NOT normalized to fcZero. */
1475lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1476 /* Our exponent should not overflow. */
1477 assert((ExponentType) (exponent + bits) >= exponent);
1478
1479 exponent += bits;
1480
1481 return shiftRight(significandParts(), partCount(), bits);
1482}
1483
1484/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1485void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1486 assert(bits < semantics->precision);
1487
1488 if (bits) {
1489 unsigned int partsCount = partCount();
1490
1491 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1492 exponent -= bits;
1493
1494 assert(!APInt::tcIsZero(significandParts(), partsCount));
1495 }
1496}
1497
1500 int compare;
1501
1502 assert(semantics == rhs.semantics);
1504 assert(rhs.isFiniteNonZero());
1505
1506 compare = exponent - rhs.exponent;
1507
1508 /* If exponents are equal, do an unsigned bignum comparison of the
1509 significands. */
1510 if (compare == 0)
1511 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1512 partCount());
1513
1514 if (compare > 0)
1515 return cmpGreaterThan;
1516 else if (compare < 0)
1517 return cmpLessThan;
1518 else
1519 return cmpEqual;
1520}
1521
1522/* Set the least significant BITS bits of a bignum, clear the
1523 rest. */
1524static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1525 unsigned bits) {
1526 unsigned i = 0;
1527 while (bits > APInt::APINT_BITS_PER_WORD) {
1528 dst[i++] = ~(APInt::WordType)0;
1530 }
1531
1532 if (bits)
1533 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1534
1535 while (i < parts)
1536 dst[i++] = 0;
1537}
1538
1539/* Handle overflow. Sign is preserved. We either become infinity or
1540 the largest finite number. */
1541IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1543 /* Infinity? */
1544 if (rounding_mode == rmNearestTiesToEven ||
1545 rounding_mode == rmNearestTiesToAway ||
1546 (rounding_mode == rmTowardPositive && !sign) ||
1547 (rounding_mode == rmTowardNegative && sign)) {
1549 makeNaN(false, sign);
1550 else
1551 category = fcInfinity;
1552 return static_cast<opStatus>(opOverflow | opInexact);
1553 }
1554 }
1555
1556 /* Otherwise we become the largest finite number. */
1557 category = fcNormal;
1558 exponent = semantics->maxExponent;
1559 tcSetLeastSignificantBits(significandParts(), partCount(),
1560 semantics->precision);
1563 APInt::tcClearBit(significandParts(), 0);
1564
1565 return opInexact;
1566}
1567
1568/* Returns TRUE if, when truncating the current number, with BIT the
1569 new LSB, with the given lost fraction and rounding mode, the result
1570 would need to be rounded away from zero (i.e., by increasing the
1571 signficand). This routine must work for fcZero of both signs, and
1572 fcNormal numbers. */
1573bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1574 lostFraction lost_fraction,
1575 unsigned int bit) const {
1576 /* NaNs and infinities should not have lost fractions. */
1577 assert(isFiniteNonZero() || category == fcZero);
1578
1579 /* Current callers never pass this so we don't handle it. */
1580 assert(lost_fraction != lfExactlyZero);
1581
1582 switch (rounding_mode) {
1584 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1585
1587 if (lost_fraction == lfMoreThanHalf)
1588 return true;
1589
1590 /* Our zeroes don't have a significand to test. */
1591 if (lost_fraction == lfExactlyHalf && category != fcZero)
1592 return APInt::tcExtractBit(significandParts(), bit);
1593
1594 return false;
1595
1596 case rmTowardZero:
1597 return false;
1598
1599 case rmTowardPositive:
1600 return !sign;
1601
1602 case rmTowardNegative:
1603 return sign;
1604
1605 default:
1606 break;
1607 }
1608 llvm_unreachable("Invalid rounding mode found");
1609}
1610
1611IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1612 lostFraction lost_fraction) {
1613 unsigned int omsb; /* One, not zero, based MSB. */
1614 int exponentChange;
1615
1616 if (!isFiniteNonZero())
1617 return opOK;
1618
1619 /* Before rounding normalize the exponent of fcNormal numbers. */
1620 omsb = significandMSB() + 1;
1621
1622 if (omsb) {
1623 /* OMSB is numbered from 1. We want to place it in the integer
1624 bit numbered PRECISION if possible, with a compensating change in
1625 the exponent. */
1626 exponentChange = omsb - semantics->precision;
1627
1628 /* If the resulting exponent is too high, overflow according to
1629 the rounding mode. */
1630 if (exponent + exponentChange > semantics->maxExponent)
1631 return handleOverflow(rounding_mode);
1632
1633 /* Subnormal numbers have exponent minExponent, and their MSB
1634 is forced based on that. */
1635 if (exponent + exponentChange < semantics->minExponent)
1636 exponentChange = semantics->minExponent - exponent;
1637
1638 /* Shifting left is easy as we don't lose precision. */
1639 if (exponentChange < 0) {
1640 assert(lost_fraction == lfExactlyZero);
1641
1642 shiftSignificandLeft(-exponentChange);
1643
1644 return opOK;
1645 }
1646
1647 if (exponentChange > 0) {
1648 lostFraction lf;
1649
1650 /* Shift right and capture any new lost fraction. */
1651 lf = shiftSignificandRight(exponentChange);
1652
1653 lost_fraction = combineLostFractions(lf, lost_fraction);
1654
1655 /* Keep OMSB up-to-date. */
1656 if (omsb > (unsigned) exponentChange)
1657 omsb -= exponentChange;
1658 else
1659 omsb = 0;
1660 }
1661 }
1662
1663 // The all-ones values is an overflow if NaN is all ones. If NaN is
1664 // represented by negative zero, then it is a valid finite value.
1666 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1667 exponent == semantics->maxExponent && isSignificandAllOnes())
1668 return handleOverflow(rounding_mode);
1669
1670 /* Now round the number according to rounding_mode given the lost
1671 fraction. */
1672
1673 /* As specified in IEEE 754, since we do not trap we do not report
1674 underflow for exact results. */
1675 if (lost_fraction == lfExactlyZero) {
1676 /* Canonicalize zeroes. */
1677 if (omsb == 0) {
1678 category = fcZero;
1679 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1680 sign = false;
1681 }
1682
1683 return opOK;
1684 }
1685
1686 /* Increment the significand if we're rounding away from zero. */
1687 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1688 if (omsb == 0)
1689 exponent = semantics->minExponent;
1690
1691 incrementSignificand();
1692 omsb = significandMSB() + 1;
1693
1694 /* Did the significand increment overflow? */
1695 if (omsb == (unsigned) semantics->precision + 1) {
1696 /* Renormalize by incrementing the exponent and shifting our
1697 significand right one. However if we already have the
1698 maximum exponent we overflow to infinity. */
1699 if (exponent == semantics->maxExponent)
1700 // Invoke overflow handling with a rounding mode that will guarantee
1701 // that the result gets turned into the correct infinity representation.
1702 // This is needed instead of just setting the category to infinity to
1703 // account for 8-bit floating point types that have no inf, only NaN.
1704 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1705
1706 shiftSignificandRight(1);
1707
1708 return opInexact;
1709 }
1710
1711 // The all-ones values is an overflow if NaN is all ones. If NaN is
1712 // represented by negative zero, then it is a valid finite value.
1714 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1715 exponent == semantics->maxExponent && isSignificandAllOnes())
1716 return handleOverflow(rounding_mode);
1717 }
1718
1719 /* The normal case - we were and are not denormal, and any
1720 significand increment above didn't overflow. */
1721 if (omsb == semantics->precision)
1722 return opInexact;
1723
1724 /* We have a non-zero denormal. */
1725 assert(omsb < semantics->precision);
1726
1727 /* Canonicalize zeroes. */
1728 if (omsb == 0) {
1729 category = fcZero;
1730 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1731 sign = false;
1732 }
1733
1734 /* The fcZero case is a denormal that underflowed to zero. */
1735 return (opStatus) (opUnderflow | opInexact);
1736}
1737
1738IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1739 bool subtract) {
1740 switch (PackCategoriesIntoKey(category, rhs.category)) {
1741 default:
1742 llvm_unreachable(nullptr);
1743
1747 assign(rhs);
1748 [[fallthrough]];
1753 if (isSignaling()) {
1754 makeQuiet();
1755 return opInvalidOp;
1756 }
1757 return rhs.isSignaling() ? opInvalidOp : opOK;
1758
1762 return opOK;
1763
1766 category = fcInfinity;
1767 sign = rhs.sign ^ subtract;
1768 return opOK;
1769
1771 assign(rhs);
1772 sign = rhs.sign ^ subtract;
1773 return opOK;
1774
1776 /* Sign depends on rounding mode; handled by caller. */
1777 return opOK;
1778
1780 /* Differently signed infinities can only be validly
1781 subtracted. */
1782 if (((sign ^ rhs.sign)!=0) != subtract) {
1783 makeNaN();
1784 return opInvalidOp;
1785 }
1786
1787 return opOK;
1788
1790 return opDivByZero;
1791 }
1792}
1793
1794/* Add or subtract two normal numbers. */
1795lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1796 bool subtract) {
1797 integerPart carry;
1798 lostFraction lost_fraction;
1799 int bits;
1800
1801 /* Determine if the operation on the absolute values is effectively
1802 an addition or subtraction. */
1803 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1804
1805 /* Are we bigger exponent-wise than the RHS? */
1806 bits = exponent - rhs.exponent;
1807
1808 /* Subtraction is more subtle than one might naively expect. */
1809 if (subtract) {
1810 IEEEFloat temp_rhs(rhs);
1811
1812 if (bits == 0)
1813 lost_fraction = lfExactlyZero;
1814 else if (bits > 0) {
1815 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1816 shiftSignificandLeft(1);
1817 } else {
1818 lost_fraction = shiftSignificandRight(-bits - 1);
1819 temp_rhs.shiftSignificandLeft(1);
1820 }
1821
1822 // Should we reverse the subtraction.
1823 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1824 carry = temp_rhs.subtractSignificand
1825 (*this, lost_fraction != lfExactlyZero);
1826 copySignificand(temp_rhs);
1827 sign = !sign;
1828 } else {
1829 carry = subtractSignificand
1830 (temp_rhs, lost_fraction != lfExactlyZero);
1831 }
1832
1833 /* Invert the lost fraction - it was on the RHS and
1834 subtracted. */
1835 if (lost_fraction == lfLessThanHalf)
1836 lost_fraction = lfMoreThanHalf;
1837 else if (lost_fraction == lfMoreThanHalf)
1838 lost_fraction = lfLessThanHalf;
1839
1840 /* The code above is intended to ensure that no borrow is
1841 necessary. */
1842 assert(!carry);
1843 (void)carry;
1844 } else {
1845 if (bits > 0) {
1846 IEEEFloat temp_rhs(rhs);
1847
1848 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1849 carry = addSignificand(temp_rhs);
1850 } else {
1851 lost_fraction = shiftSignificandRight(-bits);
1852 carry = addSignificand(rhs);
1853 }
1854
1855 /* We have a guard bit; generating a carry cannot happen. */
1856 assert(!carry);
1857 (void)carry;
1858 }
1859
1860 return lost_fraction;
1861}
1862
1863IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1864 switch (PackCategoriesIntoKey(category, rhs.category)) {
1865 default:
1866 llvm_unreachable(nullptr);
1867
1871 assign(rhs);
1872 sign = false;
1873 [[fallthrough]];
1878 sign ^= rhs.sign; // restore the original sign
1879 if (isSignaling()) {
1880 makeQuiet();
1881 return opInvalidOp;
1882 }
1883 return rhs.isSignaling() ? opInvalidOp : opOK;
1884
1888 category = fcInfinity;
1889 return opOK;
1890
1894 category = fcZero;
1895 return opOK;
1896
1899 makeNaN();
1900 return opInvalidOp;
1901
1903 return opOK;
1904 }
1905}
1906
1907IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1908 switch (PackCategoriesIntoKey(category, rhs.category)) {
1909 default:
1910 llvm_unreachable(nullptr);
1911
1915 assign(rhs);
1916 sign = false;
1917 [[fallthrough]];
1922 sign ^= rhs.sign; // restore the original sign
1923 if (isSignaling()) {
1924 makeQuiet();
1925 return opInvalidOp;
1926 }
1927 return rhs.isSignaling() ? opInvalidOp : opOK;
1928
1933 return opOK;
1934
1936 category = fcZero;
1937 return opOK;
1938
1940 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1941 makeNaN(false, sign);
1942 else
1943 category = fcInfinity;
1944 return opDivByZero;
1945
1948 makeNaN();
1949 return opInvalidOp;
1950
1952 return opOK;
1953 }
1954}
1955
1956IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1957 switch (PackCategoriesIntoKey(category, rhs.category)) {
1958 default:
1959 llvm_unreachable(nullptr);
1960
1964 assign(rhs);
1965 [[fallthrough]];
1970 if (isSignaling()) {
1971 makeQuiet();
1972 return opInvalidOp;
1973 }
1974 return rhs.isSignaling() ? opInvalidOp : opOK;
1975
1979 return opOK;
1980
1986 makeNaN();
1987 return opInvalidOp;
1988
1990 return opOK;
1991 }
1992}
1993
1994IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1995 switch (PackCategoriesIntoKey(category, rhs.category)) {
1996 default:
1997 llvm_unreachable(nullptr);
1998
2002 assign(rhs);
2003 [[fallthrough]];
2008 if (isSignaling()) {
2009 makeQuiet();
2010 return opInvalidOp;
2011 }
2012 return rhs.isSignaling() ? opInvalidOp : opOK;
2013
2017 return opOK;
2018
2024 makeNaN();
2025 return opInvalidOp;
2026
2028 return opDivByZero; // fake status, indicating this is not a special case
2029 }
2030}
2031
2032/* Change sign. */
2034 // With NaN-as-negative-zero, neither NaN or negative zero can change
2035 // their signs.
2036 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2037 (isZero() || isNaN()))
2038 return;
2039 /* Look mummy, this one's easy. */
2040 sign = !sign;
2041}
2042
2043/* Normalized addition or subtraction. */
2044IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2045 roundingMode rounding_mode,
2046 bool subtract) {
2047 opStatus fs;
2048
2049 fs = addOrSubtractSpecials(rhs, subtract);
2050
2051 /* This return code means it was not a simple case. */
2052 if (fs == opDivByZero) {
2053 lostFraction lost_fraction;
2054
2055 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2056 fs = normalize(rounding_mode, lost_fraction);
2057
2058 /* Can only be zero if we lost no fraction. */
2059 assert(category != fcZero || lost_fraction == lfExactlyZero);
2060 }
2061
2062 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2063 positive zero unless rounding to minus infinity, except that
2064 adding two like-signed zeroes gives that zero. */
2065 if (category == fcZero) {
2066 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2067 sign = (rounding_mode == rmTowardNegative);
2068 // NaN-in-negative-zero means zeros need to be normalized to +0.
2069 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2070 sign = false;
2071 }
2072
2073 return fs;
2074}
2075
2076/* Normalized addition. */
2078 roundingMode rounding_mode) {
2079 return addOrSubtract(rhs, rounding_mode, false);
2080}
2081
2082/* Normalized subtraction. */
2084 roundingMode rounding_mode) {
2085 return addOrSubtract(rhs, rounding_mode, true);
2086}
2087
2088/* Normalized multiply. */
2090 roundingMode rounding_mode) {
2091 opStatus fs;
2092
2093 sign ^= rhs.sign;
2094 fs = multiplySpecials(rhs);
2095
2096 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2097 sign = false;
2098 if (isFiniteNonZero()) {
2099 lostFraction lost_fraction = multiplySignificand(rhs);
2100 fs = normalize(rounding_mode, lost_fraction);
2101 if (lost_fraction != lfExactlyZero)
2102 fs = (opStatus) (fs | opInexact);
2103 }
2104
2105 return fs;
2106}
2107
2108/* Normalized divide. */
2110 roundingMode rounding_mode) {
2111 opStatus fs;
2112
2113 sign ^= rhs.sign;
2114 fs = divideSpecials(rhs);
2115
2116 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2117 sign = false;
2118 if (isFiniteNonZero()) {
2119 lostFraction lost_fraction = divideSignificand(rhs);
2120 fs = normalize(rounding_mode, lost_fraction);
2121 if (lost_fraction != lfExactlyZero)
2122 fs = (opStatus) (fs | opInexact);
2123 }
2124
2125 return fs;
2126}
2127
2128/* Normalized remainder. */
2130 opStatus fs;
2131 unsigned int origSign = sign;
2132
2133 // First handle the special cases.
2134 fs = remainderSpecials(rhs);
2135 if (fs != opDivByZero)
2136 return fs;
2137
2138 fs = opOK;
2139
2140 // Make sure the current value is less than twice the denom. If the addition
2141 // did not succeed (an overflow has happened), which means that the finite
2142 // value we currently posses must be less than twice the denom (as we are
2143 // using the same semantics).
2144 IEEEFloat P2 = rhs;
2145 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2146 fs = mod(P2);
2147 assert(fs == opOK);
2148 }
2149
2150 // Lets work with absolute numbers.
2151 IEEEFloat P = rhs;
2152 P.sign = false;
2153 sign = false;
2154
2155 //
2156 // To calculate the remainder we use the following scheme.
2157 //
2158 // The remainder is defained as follows:
2159 //
2160 // remainder = numer - rquot * denom = x - r * p
2161 //
2162 // Where r is the result of: x/p, rounded toward the nearest integral value
2163 // (with halfway cases rounded toward the even number).
2164 //
2165 // Currently, (after x mod 2p):
2166 // r is the number of 2p's present inside x, which is inherently, an even
2167 // number of p's.
2168 //
2169 // We may split the remaining calculation into 4 options:
2170 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2171 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2172 // are done as well.
2173 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2174 // to subtract 1p at least once.
2175 // - if x >= p then we must subtract p at least once, as x must be a
2176 // remainder.
2177 //
2178 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2179 //
2180 // We can now split the remaining calculation to the following 3 options:
2181 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2182 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2183 // must round up to the next even number. so we must subtract p once more.
2184 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2185 // integral, and subtract p once more.
2186 //
2187
2188 // Extend the semantics to prevent an overflow/underflow or inexact result.
2189 bool losesInfo;
2190 fltSemantics extendedSemantics = *semantics;
2191 extendedSemantics.maxExponent++;
2192 extendedSemantics.minExponent--;
2193 extendedSemantics.precision += 2;
2194
2195 IEEEFloat VEx = *this;
2196 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2197 assert(fs == opOK && !losesInfo);
2198 IEEEFloat PEx = P;
2199 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2200 assert(fs == opOK && !losesInfo);
2201
2202 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2203 // any fraction.
2204 fs = VEx.add(VEx, rmNearestTiesToEven);
2205 assert(fs == opOK);
2206
2207 if (VEx.compare(PEx) == cmpGreaterThan) {
2209 assert(fs == opOK);
2210
2211 // Make VEx = this.add(this), but because we have different semantics, we do
2212 // not want to `convert` again, so we just subtract PEx twice (which equals
2213 // to the desired value).
2214 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2215 assert(fs == opOK);
2216 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2217 assert(fs == opOK);
2218
2219 cmpResult result = VEx.compare(PEx);
2220 if (result == cmpGreaterThan || result == cmpEqual) {
2222 assert(fs == opOK);
2223 }
2224 }
2225
2226 if (isZero()) {
2227 sign = origSign; // IEEE754 requires this
2228 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2229 // But some 8-bit floats only have positive 0.
2230 sign = false;
2231 }
2232
2233 else
2234 sign ^= origSign;
2235 return fs;
2236}
2237
2238/* Normalized llvm frem (C fmod). */
2240 opStatus fs;
2241 fs = modSpecials(rhs);
2242 unsigned int origSign = sign;
2243
2244 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2246 int Exp = ilogb(*this) - ilogb(rhs);
2247 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2248 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2249 // check for it.
2250 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2251 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2252 V.sign = sign;
2253
2255 assert(fs==opOK);
2256 }
2257 if (isZero()) {
2258 sign = origSign; // fmod requires this
2259 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2260 sign = false;
2261 }
2262 return fs;
2263}
2264
2265/* Normalized fused-multiply-add. */
2267 const IEEEFloat &addend,
2268 roundingMode rounding_mode) {
2269 opStatus fs;
2270
2271 /* Post-multiplication sign, before addition. */
2272 sign ^= multiplicand.sign;
2273
2274 /* If and only if all arguments are normal do we need to do an
2275 extended-precision calculation. */
2276 if (isFiniteNonZero() &&
2277 multiplicand.isFiniteNonZero() &&
2278 addend.isFinite()) {
2279 lostFraction lost_fraction;
2280
2281 lost_fraction = multiplySignificand(multiplicand, addend);
2282 fs = normalize(rounding_mode, lost_fraction);
2283 if (lost_fraction != lfExactlyZero)
2284 fs = (opStatus) (fs | opInexact);
2285
2286 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2287 positive zero unless rounding to minus infinity, except that
2288 adding two like-signed zeroes gives that zero. */
2289 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2290 sign = (rounding_mode == rmTowardNegative);
2291 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2292 sign = false;
2293 }
2294 } else {
2295 fs = multiplySpecials(multiplicand);
2296
2297 /* FS can only be opOK or opInvalidOp. There is no more work
2298 to do in the latter case. The IEEE-754R standard says it is
2299 implementation-defined in this case whether, if ADDEND is a
2300 quiet NaN, we raise invalid op; this implementation does so.
2301
2302 If we need to do the addition we can do so with normal
2303 precision. */
2304 if (fs == opOK)
2305 fs = addOrSubtract(addend, rounding_mode, false);
2306 }
2307
2308 return fs;
2309}
2310
2311/* Rounding-mode correct round to integral value. */
2313 opStatus fs;
2314
2315 if (isInfinity())
2316 // [IEEE Std 754-2008 6.1]:
2317 // The behavior of infinity in floating-point arithmetic is derived from the
2318 // limiting cases of real arithmetic with operands of arbitrarily
2319 // large magnitude, when such a limit exists.
2320 // ...
2321 // Operations on infinite operands are usually exact and therefore signal no
2322 // exceptions ...
2323 return opOK;
2324
2325 if (isNaN()) {
2326 if (isSignaling()) {
2327 // [IEEE Std 754-2008 6.2]:
2328 // Under default exception handling, any operation signaling an invalid
2329 // operation exception and for which a floating-point result is to be
2330 // delivered shall deliver a quiet NaN.
2331 makeQuiet();
2332 // [IEEE Std 754-2008 6.2]:
2333 // Signaling NaNs shall be reserved operands that, under default exception
2334 // handling, signal the invalid operation exception(see 7.2) for every
2335 // general-computational and signaling-computational operation except for
2336 // the conversions described in 5.12.
2337 return opInvalidOp;
2338 } else {
2339 // [IEEE Std 754-2008 6.2]:
2340 // For an operation with quiet NaN inputs, other than maximum and minimum
2341 // operations, if a floating-point result is to be delivered the result
2342 // shall be a quiet NaN which should be one of the input NaNs.
2343 // ...
2344 // Every general-computational and quiet-computational operation involving
2345 // one or more input NaNs, none of them signaling, shall signal no
2346 // exception, except fusedMultiplyAdd might signal the invalid operation
2347 // exception(see 7.2).
2348 return opOK;
2349 }
2350 }
2351
2352 if (isZero()) {
2353 // [IEEE Std 754-2008 6.3]:
2354 // ... the sign of the result of conversions, the quantize operation, the
2355 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2356 // the sign of the first or only operand.
2357 return opOK;
2358 }
2359
2360 // If the exponent is large enough, we know that this value is already
2361 // integral, and the arithmetic below would potentially cause it to saturate
2362 // to +/-Inf. Bail out early instead.
2363 if (exponent+1 >= (int)semanticsPrecision(*semantics))
2364 return opOK;
2365
2366 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2367 // precision of our format, and then subtract it back off again. The choice
2368 // of rounding modes for the addition/subtraction determines the rounding mode
2369 // for our integral rounding as well.
2370 // NOTE: When the input value is negative, we do subtraction followed by
2371 // addition instead.
2372 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2373 IntegerConstant <<= semanticsPrecision(*semantics)-1;
2374 IEEEFloat MagicConstant(*semantics);
2375 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2377 assert(fs == opOK);
2378 MagicConstant.sign = sign;
2379
2380 // Preserve the input sign so that we can handle the case of zero result
2381 // correctly.
2382 bool inputSign = isNegative();
2383
2384 fs = add(MagicConstant, rounding_mode);
2385
2386 // Current value and 'MagicConstant' are both integers, so the result of the
2387 // subtraction is always exact according to Sterbenz' lemma.
2388 subtract(MagicConstant, rounding_mode);
2389
2390 // Restore the input sign.
2391 if (inputSign != isNegative())
2392 changeSign();
2393
2394 return fs;
2395}
2396
2397
2398/* Comparison requires normalized numbers. */
2400 cmpResult result;
2401
2402 assert(semantics == rhs.semantics);
2403
2404 switch (PackCategoriesIntoKey(category, rhs.category)) {
2405 default:
2406 llvm_unreachable(nullptr);
2407
2415 return cmpUnordered;
2416
2420 if (sign)
2421 return cmpLessThan;
2422 else
2423 return cmpGreaterThan;
2424
2428 if (rhs.sign)
2429 return cmpGreaterThan;
2430 else
2431 return cmpLessThan;
2432
2434 if (sign == rhs.sign)
2435 return cmpEqual;
2436 else if (sign)
2437 return cmpLessThan;
2438 else
2439 return cmpGreaterThan;
2440
2442 return cmpEqual;
2443
2445 break;
2446 }
2447
2448 /* Two normal numbers. Do they have the same sign? */
2449 if (sign != rhs.sign) {
2450 if (sign)
2451 result = cmpLessThan;
2452 else
2453 result = cmpGreaterThan;
2454 } else {
2455 /* Compare absolute values; invert result if negative. */
2456 result = compareAbsoluteValue(rhs);
2457
2458 if (sign) {
2459 if (result == cmpLessThan)
2460 result = cmpGreaterThan;
2461 else if (result == cmpGreaterThan)
2462 result = cmpLessThan;
2463 }
2464 }
2465
2466 return result;
2467}
2468
2469/// IEEEFloat::convert - convert a value of one floating point type to another.
2470/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2471/// records whether the transformation lost information, i.e. whether
2472/// converting the result back to the original type will produce the
2473/// original value (this is almost the same as return value==fsOK, but there
2474/// are edge cases where this is not so).
2475
2477 roundingMode rounding_mode,
2478 bool *losesInfo) {
2480 unsigned int newPartCount, oldPartCount;
2481 opStatus fs;
2482 int shift;
2483 const fltSemantics &fromSemantics = *semantics;
2484 bool is_signaling = isSignaling();
2485
2487 newPartCount = partCountForBits(toSemantics.precision + 1);
2488 oldPartCount = partCount();
2489 shift = toSemantics.precision - fromSemantics.precision;
2490
2491 bool X86SpecialNan = false;
2492 if (&fromSemantics == &semX87DoubleExtended &&
2493 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2494 (!(*significandParts() & 0x8000000000000000ULL) ||
2495 !(*significandParts() & 0x4000000000000000ULL))) {
2496 // x86 has some unusual NaNs which cannot be represented in any other
2497 // format; note them here.
2498 X86SpecialNan = true;
2499 }
2500
2501 // If this is a truncation of a denormal number, and the target semantics
2502 // has larger exponent range than the source semantics (this can happen
2503 // when truncating from PowerPC double-double to double format), the
2504 // right shift could lose result mantissa bits. Adjust exponent instead
2505 // of performing excessive shift.
2506 // Also do a similar trick in case shifting denormal would produce zero
2507 // significand as this case isn't handled correctly by normalize.
2508 if (shift < 0 && isFiniteNonZero()) {
2509 int omsb = significandMSB() + 1;
2510 int exponentChange = omsb - fromSemantics.precision;
2511 if (exponent + exponentChange < toSemantics.minExponent)
2512 exponentChange = toSemantics.minExponent - exponent;
2513 if (exponentChange < shift)
2514 exponentChange = shift;
2515 if (exponentChange < 0) {
2516 shift -= exponentChange;
2517 exponent += exponentChange;
2518 } else if (omsb <= -shift) {
2519 exponentChange = omsb + shift - 1; // leave at least one bit set
2520 shift -= exponentChange;
2521 exponent += exponentChange;
2522 }
2523 }
2524
2525 // If this is a truncation, perform the shift before we narrow the storage.
2526 if (shift < 0 && (isFiniteNonZero() ||
2527 (category == fcNaN && semantics->nonFiniteBehavior !=
2529 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2530
2531 // Fix the storage so it can hold to new value.
2532 if (newPartCount > oldPartCount) {
2533 // The new type requires more storage; make it available.
2534 integerPart *newParts;
2535 newParts = new integerPart[newPartCount];
2536 APInt::tcSet(newParts, 0, newPartCount);
2537 if (isFiniteNonZero() || category==fcNaN)
2538 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2539 freeSignificand();
2540 significand.parts = newParts;
2541 } else if (newPartCount == 1 && oldPartCount != 1) {
2542 // Switch to built-in storage for a single part.
2543 integerPart newPart = 0;
2544 if (isFiniteNonZero() || category==fcNaN)
2545 newPart = significandParts()[0];
2546 freeSignificand();
2547 significand.part = newPart;
2548 }
2549
2550 // Now that we have the right storage, switch the semantics.
2551 semantics = &toSemantics;
2552
2553 // If this is an extension, perform the shift now that the storage is
2554 // available.
2555 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2556 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2557
2558 if (isFiniteNonZero()) {
2559 fs = normalize(rounding_mode, lostFraction);
2560 *losesInfo = (fs != opOK);
2561 } else if (category == fcNaN) {
2563 *losesInfo =
2565 makeNaN(false, sign);
2566 return is_signaling ? opInvalidOp : opOK;
2567 }
2568
2569 // If NaN is negative zero, we need to create a new NaN to avoid converting
2570 // NaN to -Inf.
2571 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2573 makeNaN(false, false);
2574
2575 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2576
2577 // For x87 extended precision, we want to make a NaN, not a special NaN if
2578 // the input wasn't special either.
2579 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2580 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2581
2582 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2583 // This also guarantees that a sNaN does not become Inf on a truncation
2584 // that loses all payload bits.
2585 if (is_signaling) {
2586 makeQuiet();
2587 fs = opInvalidOp;
2588 } else {
2589 fs = opOK;
2590 }
2591 } else if (category == fcInfinity &&
2593 makeNaN(false, sign);
2594 *losesInfo = true;
2595 fs = opInexact;
2596 } else if (category == fcZero &&
2598 // Negative zero loses info, but positive zero doesn't.
2599 *losesInfo =
2600 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2601 fs = *losesInfo ? opInexact : opOK;
2602 // NaN is negative zero means -0 -> +0, which can lose information
2603 sign = false;
2604 } else {
2605 *losesInfo = false;
2606 fs = opOK;
2607 }
2608
2609 return fs;
2610}
2611
2612/* Convert a floating point number to an integer according to the
2613 rounding mode. If the rounded integer value is out of range this
2614 returns an invalid operation exception and the contents of the
2615 destination parts are unspecified. If the rounded value is in
2616 range but the floating point number is not the exact integer, the C
2617 standard doesn't require an inexact exception to be raised. IEEE
2618 854 does require it so we do that.
2619
2620 Note that for conversions to integer type the C standard requires
2621 round-to-zero to always be used. */
2622IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2623 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2624 roundingMode rounding_mode, bool *isExact) const {
2625 lostFraction lost_fraction;
2626 const integerPart *src;
2627 unsigned int dstPartsCount, truncatedBits;
2628
2629 *isExact = false;
2630
2631 /* Handle the three special cases first. */
2632 if (category == fcInfinity || category == fcNaN)
2633 return opInvalidOp;
2634
2635 dstPartsCount = partCountForBits(width);
2636 assert(dstPartsCount <= parts.size() && "Integer too big");
2637
2638 if (category == fcZero) {
2639 APInt::tcSet(parts.data(), 0, dstPartsCount);
2640 // Negative zero can't be represented as an int.
2641 *isExact = !sign;
2642 return opOK;
2643 }
2644
2645 src = significandParts();
2646
2647 /* Step 1: place our absolute value, with any fraction truncated, in
2648 the destination. */
2649 if (exponent < 0) {
2650 /* Our absolute value is less than one; truncate everything. */
2651 APInt::tcSet(parts.data(), 0, dstPartsCount);
2652 /* For exponent -1 the integer bit represents .5, look at that.
2653 For smaller exponents leftmost truncated bit is 0. */
2654 truncatedBits = semantics->precision -1U - exponent;
2655 } else {
2656 /* We want the most significant (exponent + 1) bits; the rest are
2657 truncated. */
2658 unsigned int bits = exponent + 1U;
2659
2660 /* Hopelessly large in magnitude? */
2661 if (bits > width)
2662 return opInvalidOp;
2663
2664 if (bits < semantics->precision) {
2665 /* We truncate (semantics->precision - bits) bits. */
2666 truncatedBits = semantics->precision - bits;
2667 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2668 } else {
2669 /* We want at least as many bits as are available. */
2670 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2671 0);
2672 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2673 bits - semantics->precision);
2674 truncatedBits = 0;
2675 }
2676 }
2677
2678 /* Step 2: work out any lost fraction, and increment the absolute
2679 value if we would round away from zero. */
2680 if (truncatedBits) {
2681 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2682 truncatedBits);
2683 if (lost_fraction != lfExactlyZero &&
2684 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2685 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2686 return opInvalidOp; /* Overflow. */
2687 }
2688 } else {
2689 lost_fraction = lfExactlyZero;
2690 }
2691
2692 /* Step 3: check if we fit in the destination. */
2693 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2694
2695 if (sign) {
2696 if (!isSigned) {
2697 /* Negative numbers cannot be represented as unsigned. */
2698 if (omsb != 0)
2699 return opInvalidOp;
2700 } else {
2701 /* It takes omsb bits to represent the unsigned integer value.
2702 We lose a bit for the sign, but care is needed as the
2703 maximally negative integer is a special case. */
2704 if (omsb == width &&
2705 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2706 return opInvalidOp;
2707
2708 /* This case can happen because of rounding. */
2709 if (omsb > width)
2710 return opInvalidOp;
2711 }
2712
2713 APInt::tcNegate (parts.data(), dstPartsCount);
2714 } else {
2715 if (omsb >= width + !isSigned)
2716 return opInvalidOp;
2717 }
2718
2719 if (lost_fraction == lfExactlyZero) {
2720 *isExact = true;
2721 return opOK;
2722 } else
2723 return opInexact;
2724}
2725
2726/* Same as convertToSignExtendedInteger, except we provide
2727 deterministic values in case of an invalid operation exception,
2728 namely zero for NaNs and the minimal or maximal value respectively
2729 for underflow or overflow.
2730 The *isExact output tells whether the result is exact, in the sense
2731 that converting it back to the original floating point type produces
2732 the original value. This is almost equivalent to result==opOK,
2733 except for negative zeroes.
2734*/
2737 unsigned int width, bool isSigned,
2738 roundingMode rounding_mode, bool *isExact) const {
2739 opStatus fs;
2740
2741 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2742 isExact);
2743
2744 if (fs == opInvalidOp) {
2745 unsigned int bits, dstPartsCount;
2746
2747 dstPartsCount = partCountForBits(width);
2748 assert(dstPartsCount <= parts.size() && "Integer too big");
2749
2750 if (category == fcNaN)
2751 bits = 0;
2752 else if (sign)
2753 bits = isSigned;
2754 else
2755 bits = width - isSigned;
2756
2757 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2758 if (sign && isSigned)
2759 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2760 }
2761
2762 return fs;
2763}
2764
2765/* Convert an unsigned integer SRC to a floating point number,
2766 rounding according to ROUNDING_MODE. The sign of the floating
2767 point number is not modified. */
2768IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2769 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2770 unsigned int omsb, precision, dstCount;
2771 integerPart *dst;
2772 lostFraction lost_fraction;
2773
2774 category = fcNormal;
2775 omsb = APInt::tcMSB(src, srcCount) + 1;
2776 dst = significandParts();
2777 dstCount = partCount();
2778 precision = semantics->precision;
2779
2780 /* We want the most significant PRECISION bits of SRC. There may not
2781 be that many; extract what we can. */
2782 if (precision <= omsb) {
2783 exponent = omsb - 1;
2784 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2785 omsb - precision);
2786 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2787 } else {
2788 exponent = precision - 1;
2789 lost_fraction = lfExactlyZero;
2790 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2791 }
2792
2793 return normalize(rounding_mode, lost_fraction);
2794}
2795
2797 roundingMode rounding_mode) {
2798 unsigned int partCount = Val.getNumWords();
2799 APInt api = Val;
2800
2801 sign = false;
2802 if (isSigned && api.isNegative()) {
2803 sign = true;
2804 api = -api;
2805 }
2806
2807 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2808}
2809
2810/* Convert a two's complement integer SRC to a floating point number,
2811 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2812 integer is signed, in which case it must be sign-extended. */
2815 unsigned int srcCount, bool isSigned,
2816 roundingMode rounding_mode) {
2817 opStatus status;
2818
2819 if (isSigned &&
2820 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2822
2823 /* If we're signed and negative negate a copy. */
2824 sign = true;
2825 copy = new integerPart[srcCount];
2826 APInt::tcAssign(copy, src, srcCount);
2827 APInt::tcNegate(copy, srcCount);
2828 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2829 delete [] copy;
2830 } else {
2831 sign = false;
2832 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2833 }
2834
2835 return status;
2836}
2837
2838/* FIXME: should this just take a const APInt reference? */
2841 unsigned int width, bool isSigned,
2842 roundingMode rounding_mode) {
2843 unsigned int partCount = partCountForBits(width);
2844 APInt api = APInt(width, ArrayRef(parts, partCount));
2845
2846 sign = false;
2847 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2848 sign = true;
2849 api = -api;
2850 }
2851
2852 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2853}
2854
2856IEEEFloat::convertFromHexadecimalString(StringRef s,
2857 roundingMode rounding_mode) {
2858 lostFraction lost_fraction = lfExactlyZero;
2859
2860 category = fcNormal;
2861 zeroSignificand();
2862 exponent = 0;
2863
2864 integerPart *significand = significandParts();
2865 unsigned partsCount = partCount();
2866 unsigned bitPos = partsCount * integerPartWidth;
2867 bool computedTrailingFraction = false;
2868
2869 // Skip leading zeroes and any (hexa)decimal point.
2870 StringRef::iterator begin = s.begin();
2871 StringRef::iterator end = s.end();
2873 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2874 if (!PtrOrErr)
2875 return PtrOrErr.takeError();
2876 StringRef::iterator p = *PtrOrErr;
2877 StringRef::iterator firstSignificantDigit = p;
2878
2879 while (p != end) {
2880 integerPart hex_value;
2881
2882 if (*p == '.') {
2883 if (dot != end)
2884 return createError("String contains multiple dots");
2885 dot = p++;
2886 continue;
2887 }
2888
2889 hex_value = hexDigitValue(*p);
2890 if (hex_value == UINT_MAX)
2891 break;
2892
2893 p++;
2894
2895 // Store the number while we have space.
2896 if (bitPos) {
2897 bitPos -= 4;
2898 hex_value <<= bitPos % integerPartWidth;
2899 significand[bitPos / integerPartWidth] |= hex_value;
2900 } else if (!computedTrailingFraction) {
2901 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2902 if (!FractOrErr)
2903 return FractOrErr.takeError();
2904 lost_fraction = *FractOrErr;
2905 computedTrailingFraction = true;
2906 }
2907 }
2908
2909 /* Hex floats require an exponent but not a hexadecimal point. */
2910 if (p == end)
2911 return createError("Hex strings require an exponent");
2912 if (*p != 'p' && *p != 'P')
2913 return createError("Invalid character in significand");
2914 if (p == begin)
2915 return createError("Significand has no digits");
2916 if (dot != end && p - begin == 1)
2917 return createError("Significand has no digits");
2918
2919 /* Ignore the exponent if we are zero. */
2920 if (p != firstSignificantDigit) {
2921 int expAdjustment;
2922
2923 /* Implicit hexadecimal point? */
2924 if (dot == end)
2925 dot = p;
2926
2927 /* Calculate the exponent adjustment implicit in the number of
2928 significant digits. */
2929 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2930 if (expAdjustment < 0)
2931 expAdjustment++;
2932 expAdjustment = expAdjustment * 4 - 1;
2933
2934 /* Adjust for writing the significand starting at the most
2935 significant nibble. */
2936 expAdjustment += semantics->precision;
2937 expAdjustment -= partsCount * integerPartWidth;
2938
2939 /* Adjust for the given exponent. */
2940 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2941 if (!ExpOrErr)
2942 return ExpOrErr.takeError();
2943 exponent = *ExpOrErr;
2944 }
2945
2946 return normalize(rounding_mode, lost_fraction);
2947}
2948
2950IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2951 unsigned sigPartCount, int exp,
2952 roundingMode rounding_mode) {
2953 unsigned int parts, pow5PartCount;
2954 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2956 bool isNearest;
2957
2958 isNearest = (rounding_mode == rmNearestTiesToEven ||
2959 rounding_mode == rmNearestTiesToAway);
2960
2961 parts = partCountForBits(semantics->precision + 11);
2962
2963 /* Calculate pow(5, abs(exp)). */
2964 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2965
2966 for (;; parts *= 2) {
2967 opStatus sigStatus, powStatus;
2968 unsigned int excessPrecision, truncatedBits;
2969
2970 calcSemantics.precision = parts * integerPartWidth - 1;
2971 excessPrecision = calcSemantics.precision - semantics->precision;
2972 truncatedBits = excessPrecision;
2973
2974 IEEEFloat decSig(calcSemantics, uninitialized);
2975 decSig.makeZero(sign);
2976 IEEEFloat pow5(calcSemantics);
2977
2978 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2980 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2982 /* Add exp, as 10^n = 5^n * 2^n. */
2983 decSig.exponent += exp;
2984
2985 lostFraction calcLostFraction;
2986 integerPart HUerr, HUdistance;
2987 unsigned int powHUerr;
2988
2989 if (exp >= 0) {
2990 /* multiplySignificand leaves the precision-th bit set to 1. */
2991 calcLostFraction = decSig.multiplySignificand(pow5);
2992 powHUerr = powStatus != opOK;
2993 } else {
2994 calcLostFraction = decSig.divideSignificand(pow5);
2995 /* Denormal numbers have less precision. */
2996 if (decSig.exponent < semantics->minExponent) {
2997 excessPrecision += (semantics->minExponent - decSig.exponent);
2998 truncatedBits = excessPrecision;
2999 if (excessPrecision > calcSemantics.precision)
3000 excessPrecision = calcSemantics.precision;
3001 }
3002 /* Extra half-ulp lost in reciprocal of exponent. */
3003 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
3004 }
3005
3006 /* Both multiplySignificand and divideSignificand return the
3007 result with the integer bit set. */
3009 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3010
3011 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3012 powHUerr);
3013 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3014 excessPrecision, isNearest);
3015
3016 /* Are we guaranteed to round correctly if we truncate? */
3017 if (HUdistance >= HUerr) {
3018 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3019 calcSemantics.precision - excessPrecision,
3020 excessPrecision);
3021 /* Take the exponent of decSig. If we tcExtract-ed less bits
3022 above we must adjust our exponent to compensate for the
3023 implicit right shift. */
3024 exponent = (decSig.exponent + semantics->precision
3025 - (calcSemantics.precision - excessPrecision));
3026 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3027 decSig.partCount(),
3028 truncatedBits);
3029 return normalize(rounding_mode, calcLostFraction);
3030 }
3031 }
3032}
3033
3035IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3036 decimalInfo D;
3037 opStatus fs;
3038
3039 /* Scan the text. */
3040 StringRef::iterator p = str.begin();
3041 if (Error Err = interpretDecimal(p, str.end(), &D))
3042 return std::move(Err);
3043
3044 /* Handle the quick cases. First the case of no significant digits,
3045 i.e. zero, and then exponents that are obviously too large or too
3046 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3047 definitely overflows if
3048
3049 (exp - 1) * L >= maxExponent
3050
3051 and definitely underflows to zero where
3052
3053 (exp + 1) * L <= minExponent - precision
3054
3055 With integer arithmetic the tightest bounds for L are
3056
3057 93/28 < L < 196/59 [ numerator <= 256 ]
3058 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3059 */
3060
3061 // Test if we have a zero number allowing for strings with no null terminators
3062 // and zero decimals with non-zero exponents.
3063 //
3064 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3065 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3066 // be at most one dot. On the other hand, if we have a zero with a non-zero
3067 // exponent, then we know that D.firstSigDigit will be non-numeric.
3068 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3069 category = fcZero;
3070 fs = opOK;
3071 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3072 sign = false;
3073
3074 /* Check whether the normalized exponent is high enough to overflow
3075 max during the log-rebasing in the max-exponent check below. */
3076 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3077 fs = handleOverflow(rounding_mode);
3078
3079 /* If it wasn't, then it also wasn't high enough to overflow max
3080 during the log-rebasing in the min-exponent check. Check that it
3081 won't overflow min in either check, then perform the min-exponent
3082 check. */
3083 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3084 (D.normalizedExponent + 1) * 28738 <=
3085 8651 * (semantics->minExponent - (int) semantics->precision)) {
3086 /* Underflow to zero and round. */
3087 category = fcNormal;
3088 zeroSignificand();
3089 fs = normalize(rounding_mode, lfLessThanHalf);
3090
3091 /* We can finally safely perform the max-exponent check. */
3092 } else if ((D.normalizedExponent - 1) * 42039
3093 >= 12655 * semantics->maxExponent) {
3094 /* Overflow and round. */
3095 fs = handleOverflow(rounding_mode);
3096 } else {
3097 integerPart *decSignificand;
3098 unsigned int partCount;
3099
3100 /* A tight upper bound on number of bits required to hold an
3101 N-digit decimal integer is N * 196 / 59. Allocate enough space
3102 to hold the full significand, and an extra part required by
3103 tcMultiplyPart. */
3104 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3105 partCount = partCountForBits(1 + 196 * partCount / 59);
3106 decSignificand = new integerPart[partCount + 1];
3107 partCount = 0;
3108
3109 /* Convert to binary efficiently - we do almost all multiplication
3110 in an integerPart. When this would overflow do we do a single
3111 bignum multiplication, and then revert again to multiplication
3112 in an integerPart. */
3113 do {
3114 integerPart decValue, val, multiplier;
3115
3116 val = 0;
3117 multiplier = 1;
3118
3119 do {
3120 if (*p == '.') {
3121 p++;
3122 if (p == str.end()) {
3123 break;
3124 }
3125 }
3126 decValue = decDigitValue(*p++);
3127 if (decValue >= 10U) {
3128 delete[] decSignificand;
3129 return createError("Invalid character in significand");
3130 }
3131 multiplier *= 10;
3132 val = val * 10 + decValue;
3133 /* The maximum number that can be multiplied by ten with any
3134 digit added without overflowing an integerPart. */
3135 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3136
3137 /* Multiply out the current part. */
3138 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3139 partCount, partCount + 1, false);
3140
3141 /* If we used another part (likely but not guaranteed), increase
3142 the count. */
3143 if (decSignificand[partCount])
3144 partCount++;
3145 } while (p <= D.lastSigDigit);
3146
3147 category = fcNormal;
3148 fs = roundSignificandWithExponent(decSignificand, partCount,
3149 D.exponent, rounding_mode);
3150
3151 delete [] decSignificand;
3152 }
3153
3154 return fs;
3155}
3156
3157bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3158 const size_t MIN_NAME_SIZE = 3;
3159
3160 if (str.size() < MIN_NAME_SIZE)
3161 return false;
3162
3163 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3164 makeInf(false);
3165 return true;
3166 }
3167
3168 bool IsNegative = str.front() == '-';
3169 if (IsNegative) {
3170 str = str.drop_front();
3171 if (str.size() < MIN_NAME_SIZE)
3172 return false;
3173
3174 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3175 makeInf(true);
3176 return true;
3177 }
3178 }
3179
3180 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3181 bool IsSignaling = str.front() == 's' || str.front() == 'S';
3182 if (IsSignaling) {
3183 str = str.drop_front();
3184 if (str.size() < MIN_NAME_SIZE)
3185 return false;
3186 }
3187
3188 if (str.starts_with("nan") || str.starts_with("NaN")) {
3189 str = str.drop_front(3);
3190
3191 // A NaN without payload.
3192 if (str.empty()) {
3193 makeNaN(IsSignaling, IsNegative);
3194 return true;
3195 }
3196
3197 // Allow the payload to be inside parentheses.
3198 if (str.front() == '(') {
3199 // Parentheses should be balanced (and not empty).
3200 if (str.size() <= 2 || str.back() != ')')
3201 return false;
3202
3203 str = str.slice(1, str.size() - 1);
3204 }
3205
3206 // Determine the payload number's radix.
3207 unsigned Radix = 10;
3208 if (str[0] == '0') {
3209 if (str.size() > 1 && tolower(str[1]) == 'x') {
3210 str = str.drop_front(2);
3211 Radix = 16;
3212 } else
3213 Radix = 8;
3214 }
3215
3216 // Parse the payload and make the NaN.
3217 APInt Payload;
3218 if (!str.getAsInteger(Radix, Payload)) {
3219 makeNaN(IsSignaling, IsNegative, &Payload);
3220 return true;
3221 }
3222 }
3223
3224 return false;
3225}
3226
3229 if (str.empty())
3230 return createError("Invalid string length");
3231
3232 // Handle special cases.
3233 if (convertFromStringSpecials(str))
3234 return opOK;
3235
3236 /* Handle a leading minus sign. */
3237 StringRef::iterator p = str.begin();
3238 size_t slen = str.size();
3239 sign = *p == '-' ? 1 : 0;
3240 if (*p == '-' || *p == '+') {
3241 p++;
3242 slen--;
3243 if (!slen)
3244 return createError("String has no digits");
3245 }
3246
3247 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3248 if (slen == 2)
3249 return createError("Invalid string");
3250 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3251 rounding_mode);
3252 }
3253
3254 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3255}
3256
3257/* Write out a hexadecimal representation of the floating point value
3258 to DST, which must be of sufficient size, in the C99 form
3259 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3260 excluding the terminating NUL.
3261
3262 If UPPERCASE, the output is in upper case, otherwise in lower case.
3263
3264 HEXDIGITS digits appear altogether, rounding the value if
3265 necessary. If HEXDIGITS is 0, the minimal precision to display the
3266 number precisely is used instead. If nothing would appear after
3267 the decimal point it is suppressed.
3268
3269 The decimal exponent is always printed and has at least one digit.
3270 Zero values display an exponent of zero. Infinities and NaNs
3271 appear as "infinity" or "nan" respectively.
3272
3273 The above rules are as specified by C99. There is ambiguity about
3274 what the leading hexadecimal digit should be. This implementation
3275 uses whatever is necessary so that the exponent is displayed as
3276 stored. This implies the exponent will fall within the IEEE format
3277 range, and the leading hexadecimal digit will be 0 (for denormals),
3278 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3279 any other digits zero).
3280*/
3281unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3282 bool upperCase,
3283 roundingMode rounding_mode) const {
3284 char *p;
3285
3286 p = dst;
3287 if (sign)
3288 *dst++ = '-';
3289
3290 switch (category) {
3291 case fcInfinity:
3292 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3293 dst += sizeof infinityL - 1;
3294 break;
3295
3296 case fcNaN:
3297 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3298 dst += sizeof NaNU - 1;
3299 break;
3300
3301 case fcZero:
3302 *dst++ = '0';
3303 *dst++ = upperCase ? 'X': 'x';
3304 *dst++ = '0';
3305 if (hexDigits > 1) {
3306 *dst++ = '.';
3307 memset (dst, '0', hexDigits - 1);
3308 dst += hexDigits - 1;
3309 }
3310 *dst++ = upperCase ? 'P': 'p';
3311 *dst++ = '0';
3312 break;
3313
3314 case fcNormal:
3315 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3316 break;
3317 }
3318
3319 *dst = 0;
3320
3321 return static_cast<unsigned int>(dst - p);
3322}
3323
3324/* Does the hard work of outputting the correctly rounded hexadecimal
3325 form of a normal floating point number with the specified number of
3326 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3327 digits necessary to print the value precisely is output. */
3328char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3329 bool upperCase,
3330 roundingMode rounding_mode) const {
3331 unsigned int count, valueBits, shift, partsCount, outputDigits;
3332 const char *hexDigitChars;
3333 const integerPart *significand;
3334 char *p;
3335 bool roundUp;
3336
3337 *dst++ = '0';
3338 *dst++ = upperCase ? 'X': 'x';
3339
3340 roundUp = false;
3341 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3342
3343 significand = significandParts();
3344 partsCount = partCount();
3345
3346 /* +3 because the first digit only uses the single integer bit, so
3347 we have 3 virtual zero most-significant-bits. */
3348 valueBits = semantics->precision + 3;
3349 shift = integerPartWidth - valueBits % integerPartWidth;
3350
3351 /* The natural number of digits required ignoring trailing
3352 insignificant zeroes. */
3353 outputDigits = (valueBits - significandLSB () + 3) / 4;
3354
3355 /* hexDigits of zero means use the required number for the
3356 precision. Otherwise, see if we are truncating. If we are,
3357 find out if we need to round away from zero. */
3358 if (hexDigits) {
3359 if (hexDigits < outputDigits) {
3360 /* We are dropping non-zero bits, so need to check how to round.
3361 "bits" is the number of dropped bits. */
3362 unsigned int bits;
3363 lostFraction fraction;
3364
3365 bits = valueBits - hexDigits * 4;
3366 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3367 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3368 }
3369 outputDigits = hexDigits;
3370 }
3371
3372 /* Write the digits consecutively, and start writing in the location
3373 of the hexadecimal point. We move the most significant digit
3374 left and add the hexadecimal point later. */
3375 p = ++dst;
3376
3377 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3378
3379 while (outputDigits && count) {
3380 integerPart part;
3381
3382 /* Put the most significant integerPartWidth bits in "part". */
3383 if (--count == partsCount)
3384 part = 0; /* An imaginary higher zero part. */
3385 else
3386 part = significand[count] << shift;
3387
3388 if (count && shift)
3389 part |= significand[count - 1] >> (integerPartWidth - shift);
3390
3391 /* Convert as much of "part" to hexdigits as we can. */
3392 unsigned int curDigits = integerPartWidth / 4;
3393
3394 if (curDigits > outputDigits)
3395 curDigits = outputDigits;
3396 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3397 outputDigits -= curDigits;
3398 }
3399
3400 if (roundUp) {
3401 char *q = dst;
3402
3403 /* Note that hexDigitChars has a trailing '0'. */
3404 do {
3405 q--;
3406 *q = hexDigitChars[hexDigitValue (*q) + 1];
3407 } while (*q == '0');
3408 assert(q >= p);
3409 } else {
3410 /* Add trailing zeroes. */
3411 memset (dst, '0', outputDigits);
3412 dst += outputDigits;
3413 }
3414
3415 /* Move the most significant digit to before the point, and if there
3416 is something after the decimal point add it. This must come
3417 after rounding above. */
3418 p[-1] = p[0];
3419 if (dst -1 == p)
3420 dst--;
3421 else
3422 p[0] = '.';
3423
3424 /* Finally output the exponent. */
3425 *dst++ = upperCase ? 'P': 'p';
3426
3427 return writeSignedDecimal (dst, exponent);
3428}
3429
3431 if (!Arg.isFiniteNonZero())
3432 return hash_combine((uint8_t)Arg.category,
3433 // NaN has no sign, fix it at zero.
3434 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3435 Arg.semantics->precision);
3436
3437 // Normal floats need their exponent and significand hashed.
3438 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3439 Arg.semantics->precision, Arg.exponent,
3441 Arg.significandParts(),
3442 Arg.significandParts() + Arg.partCount()));
3443}
3444
3445// Conversion from APFloat to/from host float/double. It may eventually be
3446// possible to eliminate these and have everybody deal with APFloats, but that
3447// will take a while. This approach will not easily extend to long double.
3448// Current implementation requires integerPartWidth==64, which is correct at
3449// the moment but could be made more general.
3450
3451// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3452// the actual IEEE respresentations. We compensate for that here.
3453
3454APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3455 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3456 assert(partCount()==2);
3457
3458 uint64_t myexponent, mysignificand;
3459
3460 if (isFiniteNonZero()) {
3461 myexponent = exponent+16383; //bias
3462 mysignificand = significandParts()[0];
3463 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3464 myexponent = 0; // denormal
3465 } else if (category==fcZero) {
3466 myexponent = 0;
3467 mysignificand = 0;
3468 } else if (category==fcInfinity) {
3469 myexponent = 0x7fff;
3470 mysignificand = 0x8000000000000000ULL;
3471 } else {
3472 assert(category == fcNaN && "Unknown category");
3473 myexponent = 0x7fff;
3474 mysignificand = significandParts()[0];
3475 }
3476
3477 uint64_t words[2];
3478 words[0] = mysignificand;
3479 words[1] = ((uint64_t)(sign & 1) << 15) |
3480 (myexponent & 0x7fffLL);
3481 return APInt(80, words);
3482}
3483
3484APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3485 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3486 assert(partCount()==2);
3487
3488 uint64_t words[2];
3489 opStatus fs;
3490 bool losesInfo;
3491
3492 // Convert number to double. To avoid spurious underflows, we re-
3493 // normalize against the "double" minExponent first, and only *then*
3494 // truncate the mantissa. The result of that second conversion
3495 // may be inexact, but should never underflow.
3496 // Declare fltSemantics before APFloat that uses it (and
3497 // saves pointer to it) to ensure correct destruction order.
3498 fltSemantics extendedSemantics = *semantics;
3499 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3500 IEEEFloat extended(*this);
3501 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3502 assert(fs == opOK && !losesInfo);
3503 (void)fs;
3504
3505 IEEEFloat u(extended);
3506 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3507 assert(fs == opOK || fs == opInexact);
3508 (void)fs;
3509 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3510
3511 // If conversion was exact or resulted in a special case, we're done;
3512 // just set the second double to zero. Otherwise, re-convert back to
3513 // the extended format and compute the difference. This now should
3514 // convert exactly to double.
3515 if (u.isFiniteNonZero() && losesInfo) {
3516 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3517 assert(fs == opOK && !losesInfo);
3518 (void)fs;
3519
3520 IEEEFloat v(extended);
3521 v.subtract(u, rmNearestTiesToEven);
3522 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3523 assert(fs == opOK && !losesInfo);
3524 (void)fs;
3525 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3526 } else {
3527 words[1] = 0;
3528 }
3529
3530 return APInt(128, words);
3531}
3532
3533template <const fltSemantics &S>
3534APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3535 assert(semantics == &S);
3536
3537 constexpr int bias = -(S.minExponent - 1);
3538 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3539 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3540 constexpr integerPart integer_bit =
3541 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3542 constexpr uint64_t significand_mask = integer_bit - 1;
3543 constexpr unsigned int exponent_bits =
3544 S.sizeInBits - 1 - trailing_significand_bits;
3545 static_assert(exponent_bits < 64);
3546 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3547
3548 uint64_t myexponent;
3549 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3550 mysignificand;
3551
3552 if (isFiniteNonZero()) {
3553 myexponent = exponent + bias;
3554 std::copy_n(significandParts(), mysignificand.size(),
3555 mysignificand.begin());
3556 if (myexponent == 1 &&
3557 !(significandParts()[integer_bit_part] & integer_bit))
3558 myexponent = 0; // denormal
3559 } else if (category == fcZero) {
3560 myexponent = ::exponentZero(S) + bias;
3561 mysignificand.fill(0);
3562 } else if (category == fcInfinity) {
3563 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3564 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3565 llvm_unreachable("semantics don't support inf!");
3566 myexponent = ::exponentInf(S) + bias;
3567 mysignificand.fill(0);
3568 } else {
3569 assert(category == fcNaN && "Unknown category!");
3570 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3571 llvm_unreachable("semantics don't support NaN!");
3572 myexponent = ::exponentNaN(S) + bias;
3573 std::copy_n(significandParts(), mysignificand.size(),
3574 mysignificand.begin());
3575 }
3576 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3577 auto words_iter =
3578 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3579 if constexpr (significand_mask != 0) {
3580 // Clear the integer bit.
3581 words[mysignificand.size() - 1] &= significand_mask;
3582 }
3583 std::fill(words_iter, words.end(), uint64_t{0});
3584 constexpr size_t last_word = words.size() - 1;
3585 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3586 << ((S.sizeInBits - 1) % 64);
3587 words[last_word] |= shifted_sign;
3588 uint64_t shifted_exponent = (myexponent & exponent_mask)
3589 << (trailing_significand_bits % 64);
3590 words[last_word] |= shifted_exponent;
3591 if constexpr (last_word == 0) {
3592 return APInt(S.sizeInBits, words[0]);
3593 }
3594 return APInt(S.sizeInBits, words);
3595}
3596
3597APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3598 assert(partCount() == 2);
3599 return convertIEEEFloatToAPInt<semIEEEquad>();
3600}
3601
3602APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3603 assert(partCount()==1);
3604 return convertIEEEFloatToAPInt<semIEEEdouble>();
3605}
3606
3607APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3608 assert(partCount()==1);
3609 return convertIEEEFloatToAPInt<semIEEEsingle>();
3610}
3611
3612APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3613 assert(partCount() == 1);
3614 return convertIEEEFloatToAPInt<semBFloat>();
3615}
3616
3617APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3618 assert(partCount()==1);
3619 return convertIEEEFloatToAPInt<semIEEEhalf>();
3620}
3621
3622APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3623 assert(partCount() == 1);
3624 return convertIEEEFloatToAPInt<semFloat8E5M2>();
3625}
3626
3627APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3628 assert(partCount() == 1);
3629 return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3630}
3631
3632APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3633 assert(partCount() == 1);
3634 return convertIEEEFloatToAPInt<semFloat8E4M3>();
3635}
3636
3637APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3638 assert(partCount() == 1);
3639 return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3640}
3641
3642APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3643 assert(partCount() == 1);
3644 return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3645}
3646
3647APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3648 assert(partCount() == 1);
3649 return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3650}
3651
3652APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3653 assert(partCount() == 1);
3654 return convertIEEEFloatToAPInt<semFloat8E3M4>();
3655}
3656
3657APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3658 assert(partCount() == 1);
3659 return convertIEEEFloatToAPInt<semFloatTF32>();
3660}
3661
3662APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3663 assert(partCount() == 1);
3664 return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3665}
3666
3667APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3668 assert(partCount() == 1);
3669 return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3670}
3671
3672APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3673 assert(partCount() == 1);
3674 return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
3675}
3676
3677// This function creates an APInt that is just a bit map of the floating
3678// point constant as it would appear in memory. It is not a conversion,
3679// and treating the result as a normal integer is unlikely to be useful.
3680
3682 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3683 return convertHalfAPFloatToAPInt();
3684
3685 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3686 return convertBFloatAPFloatToAPInt();
3687
3688 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3689 return convertFloatAPFloatToAPInt();
3690
3691 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3692 return convertDoubleAPFloatToAPInt();
3693
3694 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3695 return convertQuadrupleAPFloatToAPInt();
3696
3697 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3698 return convertPPCDoubleDoubleAPFloatToAPInt();
3699
3700 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3701 return convertFloat8E5M2APFloatToAPInt();
3702
3703 if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3704 return convertFloat8E5M2FNUZAPFloatToAPInt();
3705
3706 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3707 return convertFloat8E4M3APFloatToAPInt();
3708
3709 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3710 return convertFloat8E4M3FNAPFloatToAPInt();
3711
3712 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3713 return convertFloat8E4M3FNUZAPFloatToAPInt();
3714
3715 if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3716 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3717
3718 if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
3719 return convertFloat8E3M4APFloatToAPInt();
3720
3721 if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3722 return convertFloatTF32APFloatToAPInt();
3723
3724 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3725 return convertFloat6E3M2FNAPFloatToAPInt();
3726
3727 if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3728 return convertFloat6E2M3FNAPFloatToAPInt();
3729
3730 if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
3731 return convertFloat4E2M1FNAPFloatToAPInt();
3732
3733 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3734 "unknown format!");
3735 return convertF80LongDoubleAPFloatToAPInt();
3736}
3737
3739 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3740 "Float semantics are not IEEEsingle");
3741 APInt api = bitcastToAPInt();
3742 return api.bitsToFloat();
3743}
3744
3746 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3747 "Float semantics are not IEEEdouble");
3748 APInt api = bitcastToAPInt();
3749 return api.bitsToDouble();
3750}
3751
3752#ifdef HAS_IEE754_FLOAT128
3753float128 IEEEFloat::convertToQuad() const {
3754 assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3755 "Float semantics are not IEEEquads");
3756 APInt api = bitcastToAPInt();
3757 return api.bitsToQuad();
3758}
3759#endif
3760
3761/// Integer bit is explicit in this format. Intel hardware (387 and later)
3762/// does not support these bit patterns:
3763/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3764/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3765/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3766/// exponent = 0, integer bit 1 ("pseudodenormal")
3767/// At the moment, the first three are treated as NaNs, the last one as Normal.
3768void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3769 uint64_t i1 = api.getRawData()[0];
3770 uint64_t i2 = api.getRawData()[1];
3771 uint64_t myexponent = (i2 & 0x7fff);
3772 uint64_t mysignificand = i1;
3773 uint8_t myintegerbit = mysignificand >> 63;
3774
3775 initialize(&semX87DoubleExtended);
3776 assert(partCount()==2);
3777
3778 sign = static_cast<unsigned int>(i2>>15);
3779 if (myexponent == 0 && mysignificand == 0) {
3780 makeZero(sign);
3781 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3782 makeInf(sign);
3783 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3784 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3785 category = fcNaN;
3786 exponent = exponentNaN();
3787 significandParts()[0] = mysignificand;
3788 significandParts()[1] = 0;
3789 } else {
3790 category = fcNormal;
3791 exponent = myexponent - 16383;
3792 significandParts()[0] = mysignificand;
3793 significandParts()[1] = 0;
3794 if (myexponent==0) // denormal
3795 exponent = -16382;
3796 }
3797}
3798
3799void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3800 uint64_t i1 = api.getRawData()[0];
3801 uint64_t i2 = api.getRawData()[1];
3802 opStatus fs;
3803 bool losesInfo;
3804
3805 // Get the first double and convert to our format.
3806 initFromDoubleAPInt(APInt(64, i1));
3808 assert(fs == opOK && !losesInfo);
3809 (void)fs;
3810
3811 // Unless we have a special case, add in second double.
3812 if (isFiniteNonZero()) {
3813 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3814 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3815 assert(fs == opOK && !losesInfo);
3816 (void)fs;
3817
3819 }
3820}
3821
3822template <const fltSemantics &S>
3823void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3824 assert(api.getBitWidth() == S.sizeInBits);
3825 constexpr integerPart integer_bit = integerPart{1}
3826 << ((S.precision - 1) % integerPartWidth);
3827 constexpr uint64_t significand_mask = integer_bit - 1;
3828 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3829 constexpr unsigned int stored_significand_parts =
3830 partCountForBits(trailing_significand_bits);
3831 constexpr unsigned int exponent_bits =
3832 S.sizeInBits - 1 - trailing_significand_bits;
3833 static_assert(exponent_bits < 64);
3834 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3835 constexpr int bias = -(S.minExponent - 1);
3836
3837 // Copy the bits of the significand. We need to clear out the exponent and
3838 // sign bit in the last word.
3839 std::array<integerPart, stored_significand_parts> mysignificand;
3840 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3841 if constexpr (significand_mask != 0) {
3842 mysignificand[mysignificand.size() - 1] &= significand_mask;
3843 }
3844
3845 // We assume the last word holds the sign bit, the exponent, and potentially
3846 // some of the trailing significand field.
3847 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3848 uint64_t myexponent =
3849 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3850
3851 initialize(&S);
3852 assert(partCount() == mysignificand.size());
3853
3854 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3855
3856 bool all_zero_significand =
3857 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3858
3859 bool is_zero = myexponent == 0 && all_zero_significand;
3860
3861 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3862 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3863 makeInf(sign);
3864 return;
3865 }
3866 }
3867
3868 bool is_nan = false;
3869
3870 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3871 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3872 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3873 bool all_ones_significand =
3874 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3875 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3876 (!significand_mask ||
3877 mysignificand[mysignificand.size() - 1] == significand_mask);
3878 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3879 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3880 is_nan = is_zero && sign;
3881 }
3882
3883 if (is_nan) {
3884 category = fcNaN;
3885 exponent = ::exponentNaN(S);
3886 std::copy_n(mysignificand.begin(), mysignificand.size(),
3887 significandParts());
3888 return;
3889 }
3890
3891 if (is_zero) {
3892 makeZero(sign);
3893 return;
3894 }
3895
3896 category = fcNormal;
3897 exponent = myexponent - bias;
3898 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3899 if (myexponent == 0) // denormal
3900 exponent = S.minExponent;
3901 else
3902 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3903}
3904
3905void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3906 initFromIEEEAPInt<semIEEEquad>(api);
3907}
3908
3909void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3910 initFromIEEEAPInt<semIEEEdouble>(api);
3911}
3912
3913void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3914 initFromIEEEAPInt<semIEEEsingle>(api);
3915}
3916
3917void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3918 initFromIEEEAPInt<semBFloat>(api);
3919}
3920
3921void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3922 initFromIEEEAPInt<semIEEEhalf>(api);
3923}
3924
3925void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3926 initFromIEEEAPInt<semFloat8E5M2>(api);
3927}
3928
3929void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3930 initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3931}
3932
3933void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3934 initFromIEEEAPInt<semFloat8E4M3>(api);
3935}
3936
3937void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3938 initFromIEEEAPInt<semFloat8E4M3FN>(api);
3939}
3940
3941void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3942 initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3943}
3944
3945void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3946 initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3947}
3948
3949void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
3950 initFromIEEEAPInt<semFloat8E3M4>(api);
3951}
3952
3953void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3954 initFromIEEEAPInt<semFloatTF32>(api);
3955}
3956
3957void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3958 initFromIEEEAPInt<semFloat6E3M2FN>(api);
3959}
3960
3961void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3962 initFromIEEEAPInt<semFloat6E2M3FN>(api);
3963}
3964
3965void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3966 initFromIEEEAPInt<semFloat4E2M1FN>(api);
3967}
3968
3969/// Treat api as containing the bits of a floating point number.
3970void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3971 assert(api.getBitWidth() == Sem->sizeInBits);
3972 if (Sem == &semIEEEhalf)
3973 return initFromHalfAPInt(api);
3974 if (Sem == &semBFloat)
3975 return initFromBFloatAPInt(api);
3976 if (Sem == &semIEEEsingle)
3977 return initFromFloatAPInt(api);
3978 if (Sem == &semIEEEdouble)
3979 return initFromDoubleAPInt(api);
3980 if (Sem == &semX87DoubleExtended)
3981 return initFromF80LongDoubleAPInt(api);
3982 if (Sem == &semIEEEquad)
3983 return initFromQuadrupleAPInt(api);
3984 if (Sem == &semPPCDoubleDoubleLegacy)
3985 return initFromPPCDoubleDoubleAPInt(api);
3986 if (Sem == &semFloat8E5M2)
3987 return initFromFloat8E5M2APInt(api);
3988 if (Sem == &semFloat8E5M2FNUZ)
3989 return initFromFloat8E5M2FNUZAPInt(api);
3990 if (Sem == &semFloat8E4M3)
3991 return initFromFloat8E4M3APInt(api);
3992 if (Sem == &semFloat8E4M3FN)
3993 return initFromFloat8E4M3FNAPInt(api);
3994 if (Sem == &semFloat8E4M3FNUZ)
3995 return initFromFloat8E4M3FNUZAPInt(api);
3996 if (Sem == &semFloat8E4M3B11FNUZ)
3997 return initFromFloat8E4M3B11FNUZAPInt(api);
3998 if (Sem == &semFloat8E3M4)
3999 return initFromFloat8E3M4APInt(api);
4000 if (Sem == &semFloatTF32)
4001 return initFromFloatTF32APInt(api);
4002 if (Sem == &semFloat6E3M2FN)
4003 return initFromFloat6E3M2FNAPInt(api);
4004 if (Sem == &semFloat6E2M3FN)
4005 return initFromFloat6E2M3FNAPInt(api);
4006 if (Sem == &semFloat4E2M1FN)
4007 return initFromFloat4E2M1FNAPInt(api);
4008
4009 llvm_unreachable(nullptr);
4010}
4011
4012/// Make this number the largest magnitude normal number in the given
4013/// semantics.
4014void IEEEFloat::makeLargest(bool Negative) {
4015 // We want (in interchange format):
4016 // sign = {Negative}
4017 // exponent = 1..10
4018 // significand = 1..1
4019 category = fcNormal;
4020 sign = Negative;
4021 exponent = semantics->maxExponent;
4022
4023 // Use memset to set all but the highest integerPart to all ones.
4024 integerPart *significand = significandParts();
4025 unsigned PartCount = partCount();
4026 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4027
4028 // Set the high integerPart especially setting all unused top bits for
4029 // internal consistency.
4030 const unsigned NumUnusedHighBits =
4031 PartCount*integerPartWidth - semantics->precision;
4032 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4033 ? (~integerPart(0) >> NumUnusedHighBits)
4034 : 0;
4035
4036 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4037 semantics->nanEncoding == fltNanEncoding::AllOnes)
4038 significand[0] &= ~integerPart(1);
4039}
4040
4041/// Make this number the smallest magnitude denormal number in the given
4042/// semantics.
4043void IEEEFloat::makeSmallest(bool Negative) {
4044 // We want (in interchange format):
4045 // sign = {Negative}
4046 // exponent = 0..0
4047 // significand = 0..01
4048 category = fcNormal;
4049 sign = Negative;
4050 exponent = semantics->minExponent;
4051 APInt::tcSet(significandParts(), 1, partCount());
4052}
4053
4054void IEEEFloat::makeSmallestNormalized(bool Negative) {
4055 // We want (in interchange format):
4056 // sign = {Negative}
4057 // exponent = 0..0
4058 // significand = 10..0
4059
4060 category = fcNormal;
4061 zeroSignificand();
4062 sign = Negative;
4063 exponent = semantics->minExponent;
4064 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4065}
4066
4067IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4068 initFromAPInt(&Sem, API);
4069}
4070
4071IEEEFloat::IEEEFloat(float f) {
4072 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
4073}
4074
4075IEEEFloat::IEEEFloat(double d) {
4076 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
4077}
4078
4079namespace {
4080 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4081 Buffer.append(Str.begin(), Str.end());
4082 }
4083
4084 /// Removes data from the given significand until it is no more
4085 /// precise than is required for the desired precision.
4086 void AdjustToPrecision(APInt &significand,
4087 int &exp, unsigned FormatPrecision) {
4088 unsigned bits = significand.getActiveBits();
4089
4090 // 196/59 is a very slight overestimate of lg_2(10).
4091 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4092
4093 if (bits <= bitsRequired) return;
4094
4095 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4096 if (!tensRemovable) return;
4097
4098 exp += tensRemovable;
4099
4100 APInt divisor(significand.getBitWidth(), 1);
4101 APInt powten(significand.getBitWidth(), 10);
4102 while (true) {
4103 if (tensRemovable & 1)
4104 divisor *= powten;
4105 tensRemovable >>= 1;
4106 if (!tensRemovable) break;
4107 powten *= powten;
4108 }
4109
4110 significand = significand.udiv(divisor);
4111
4112 // Truncate the significand down to its active bit count.
4113 significand = significand.trunc(significand.getActiveBits());
4114 }
4115
4116
4117 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4118 int &exp, unsigned FormatPrecision) {
4119 unsigned N = buffer.size();
4120 if (N <= FormatPrecision) return;
4121
4122 // The most significant figures are the last ones in the buffer.
4123 unsigned FirstSignificant = N - FormatPrecision;
4124
4125 // Round.
4126 // FIXME: this probably shouldn't use 'round half up'.
4127
4128 // Rounding down is just a truncation, except we also want to drop
4129 // trailing zeros from the new result.
4130 if (buffer[FirstSignificant - 1] < '5') {
4131 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4132 FirstSignificant++;
4133
4134 exp += FirstSignificant;
4135 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4136 return;
4137 }
4138
4139 // Rounding up requires a decimal add-with-carry. If we continue
4140 // the carry, the newly-introduced zeros will just be truncated.
4141 for (unsigned I = FirstSignificant; I != N; ++I) {
4142 if (buffer[I] == '9') {
4143 FirstSignificant++;
4144 } else {
4145 buffer[I]++;
4146 break;
4147 }
4148 }
4149
4150 // If we carried through, we have exactly one digit of precision.
4151 if (FirstSignificant == N) {
4152 exp += FirstSignificant;
4153 buffer.clear();
4154 buffer.push_back('1');
4155 return;
4156 }
4157
4158 exp += FirstSignificant;
4159 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4160 }
4161
4162 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4163 APInt significand, unsigned FormatPrecision,
4164 unsigned FormatMaxPadding, bool TruncateZero) {
4165 const int semanticsPrecision = significand.getBitWidth();
4166
4167 if (isNeg)
4168 Str.push_back('-');
4169
4170 // Set FormatPrecision if zero. We want to do this before we
4171 // truncate trailing zeros, as those are part of the precision.
4172 if (!FormatPrecision) {
4173 // We use enough digits so the number can be round-tripped back to an
4174 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4175 // Accurately" by Steele and White.
4176 // FIXME: Using a formula based purely on the precision is conservative;
4177 // we can print fewer digits depending on the actual value being printed.
4178
4179 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4180 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4181 }
4182
4183 // Ignore trailing binary zeros.
4184 int trailingZeros = significand.countr_zero();
4185 exp += trailingZeros;
4186 significand.lshrInPlace(trailingZeros);
4187
4188 // Change the exponent from 2^e to 10^e.
4189 if (exp == 0) {
4190 // Nothing to do.
4191 } else if (exp > 0) {
4192 // Just shift left.
4193 significand = significand.zext(semanticsPrecision + exp);
4194 significand <<= exp;
4195 exp = 0;
4196 } else { /* exp < 0 */
4197 int texp = -exp;
4198
4199 // We transform this using the identity:
4200 // (N)(2^-e) == (N)(5^e)(10^-e)
4201 // This means we have to multiply N (the significand) by 5^e.
4202 // To avoid overflow, we have to operate on numbers large
4203 // enough to store N * 5^e:
4204 // log2(N * 5^e) == log2(N) + e * log2(5)
4205 // <= semantics->precision + e * 137 / 59
4206 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4207
4208 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4209
4210 // Multiply significand by 5^e.
4211 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4212 significand = significand.zext(precision);
4213 APInt five_to_the_i(precision, 5);
4214 while (true) {
4215 if (texp & 1)
4216 significand *= five_to_the_i;
4217
4218 texp >>= 1;
4219 if (!texp)
4220 break;
4221 five_to_the_i *= five_to_the_i;
4222 }
4223 }
4224
4225 AdjustToPrecision(significand, exp, FormatPrecision);
4226
4228
4229 // Fill the buffer.
4230 unsigned precision = significand.getBitWidth();
4231 if (precision < 4) {
4232 // We need enough precision to store the value 10.
4233 precision = 4;
4234 significand = significand.zext(precision);
4235 }
4236 APInt ten(precision, 10);
4237 APInt digit(precision, 0);
4238
4239 bool inTrail = true;
4240 while (significand != 0) {
4241 // digit <- significand % 10
4242 // significand <- significand / 10
4243 APInt::udivrem(significand, ten, significand, digit);
4244
4245 unsigned d = digit.getZExtValue();
4246
4247 // Drop trailing zeros.
4248 if (inTrail && !d)
4249 exp++;
4250 else {
4251 buffer.push_back((char) ('0' + d));
4252 inTrail = false;
4253 }
4254 }
4255
4256 assert(!buffer.empty() && "no characters in buffer!");
4257
4258 // Drop down to FormatPrecision.
4259 // TODO: don't do more precise calculations above than are required.
4260 AdjustToPrecision(buffer, exp, FormatPrecision);
4261
4262 unsigned NDigits = buffer.size();
4263
4264 // Check whether we should use scientific notation.
4265 bool FormatScientific;
4266 if (!FormatMaxPadding)
4267 FormatScientific = true;
4268 else {
4269 if (exp >= 0) {
4270 // 765e3 --> 765000
4271 // ^^^
4272 // But we shouldn't make the number look more precise than it is.
4273 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4274 NDigits + (unsigned) exp > FormatPrecision);
4275 } else {
4276 // Power of the most significant digit.
4277 int MSD = exp + (int) (NDigits - 1);
4278 if (MSD >= 0) {
4279 // 765e-2 == 7.65
4280 FormatScientific = false;
4281 } else {
4282 // 765e-5 == 0.00765
4283 // ^ ^^
4284 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4285 }
4286 }
4287 }
4288
4289 // Scientific formatting is pretty straightforward.
4290 if (FormatScientific) {
4291 exp += (NDigits - 1);
4292
4293 Str.push_back(buffer[NDigits-1]);
4294 Str.push_back('.');
4295 if (NDigits == 1 && TruncateZero)
4296 Str.push_back('0');
4297 else
4298 for (unsigned I = 1; I != NDigits; ++I)
4299 Str.push_back(buffer[NDigits-1-I]);
4300 // Fill with zeros up to FormatPrecision.
4301 if (!TruncateZero && FormatPrecision > NDigits - 1)
4302 Str.append(FormatPrecision - NDigits + 1, '0');
4303 // For !TruncateZero we use lower 'e'.
4304 Str.push_back(TruncateZero ? 'E' : 'e');
4305
4306 Str.push_back(exp >= 0 ? '+' : '-');
4307 if (exp < 0)
4308 exp = -exp;
4309 SmallVector<char, 6> expbuf;
4310 do {
4311 expbuf.push_back((char) ('0' + (exp % 10)));
4312 exp /= 10;
4313 } while (exp);
4314 // Exponent always at least two digits if we do not truncate zeros.
4315 if (!TruncateZero && expbuf.size() < 2)
4316 expbuf.push_back('0');
4317 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4318 Str.push_back(expbuf[E-1-I]);
4319 return;
4320 }
4321
4322 // Non-scientific, positive exponents.
4323 if (exp >= 0) {
4324 for (unsigned I = 0; I != NDigits; ++I)
4325 Str.push_back(buffer[NDigits-1-I]);
4326 for (unsigned I = 0; I != (unsigned) exp; ++I)
4327 Str.push_back('0');
4328 return;
4329 }
4330
4331 // Non-scientific, negative exponents.
4332
4333 // The number of digits to the left of the decimal point.
4334 int NWholeDigits = exp + (int) NDigits;
4335
4336 unsigned I = 0;
4337 if (NWholeDigits > 0) {
4338 for (; I != (unsigned) NWholeDigits; ++I)
4339 Str.push_back(buffer[NDigits-I-1]);
4340 Str.push_back('.');
4341 } else {
4342 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4343
4344 Str.push_back('0');
4345 Str.push_back('.');
4346 for (unsigned Z = 1; Z != NZeros; ++Z)
4347 Str.push_back('0');
4348 }
4349
4350 for (; I != NDigits; ++I)
4351 Str.push_back(buffer[NDigits-I-1]);
4352
4353 }
4354} // namespace
4355
4356void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4357 unsigned FormatMaxPadding, bool TruncateZero) const {
4358 switch (category) {
4359 case fcInfinity:
4360 if (isNegative())
4361 return append(Str, "-Inf");
4362 else
4363 return append(Str, "+Inf");
4364
4365 case fcNaN: return append(Str, "NaN");
4366
4367 case fcZero:
4368 if (isNegative())
4369 Str.push_back('-');
4370
4371 if (!FormatMaxPadding) {
4372 if (TruncateZero)
4373 append(Str, "0.0E+0");
4374 else {
4375 append(Str, "0.0");
4376 if (FormatPrecision > 1)
4377 Str.append(FormatPrecision - 1, '0');
4378 append(Str, "e+00");
4379 }
4380 } else
4381 Str.push_back('0');
4382 return;
4383
4384 case fcNormal:
4385 break;
4386 }
4387
4388 // Decompose the number into an APInt and an exponent.
4389 int exp = exponent - ((int) semantics->precision - 1);
4390 APInt significand(
4391 semantics->precision,
4392 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4393
4394 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4395 FormatMaxPadding, TruncateZero);
4396
4397}
4398
4399bool IEEEFloat::getExactInverse(APFloat *inv) const {
4400 // Special floats and denormals have no exact inverse.
4401 if (!isFiniteNonZero())
4402 return false;
4403
4404 // Check that the number is a power of two by making sure that only the
4405 // integer bit is set in the significand.
4406 if (significandLSB() != semantics->precision - 1)
4407 return false;
4408
4409 // Get the inverse.
4410 IEEEFloat reciprocal(*semantics, 1ULL);
4411 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4412 return false;
4413
4414 // Avoid multiplication with a denormal, it is not safe on all platforms and
4415 // may be slower than a normal division.
4416 if (reciprocal.isDenormal())
4417 return false;
4418
4419 assert(reciprocal.isFiniteNonZero() &&
4420 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4421
4422 if (inv)
4423 *inv = APFloat(reciprocal, *semantics);
4424
4425 return true;
4426}
4427
4428int IEEEFloat::getExactLog2Abs() const {
4429 if (!isFinite() || isZero())
4430 return INT_MIN;
4431
4432 const integerPart *Parts = significandParts();
4433 const int PartCount = partCountForBits(semantics->precision);
4434
4435 int PopCount = 0;
4436 for (int i = 0; i < PartCount; ++i) {
4437 PopCount += llvm::popcount(Parts[i]);
4438 if (PopCount > 1)
4439 return INT_MIN;
4440 }
4441
4442 if (exponent != semantics->minExponent)
4443 return exponent;
4444
4445 int CountrParts = 0;
4446 for (int i = 0; i < PartCount;
4447 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4448 if (Parts[i] != 0) {
4449 return exponent - semantics->precision + CountrParts +
4450 llvm::countr_zero(Parts[i]) + 1;
4451 }
4452 }
4453
4454 llvm_unreachable("didn't find the set bit");
4455}
4456
4457bool IEEEFloat::isSignaling() const {
4458 if (!isNaN())
4459 return false;
4460 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4461 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4462 return false;
4463
4464 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4465 // first bit of the trailing significand being 0.
4466 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4467}
4468
4469/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4470///
4471/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4472/// appropriate sign switching before/after the computation.
4473IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4474 // If we are performing nextDown, swap sign so we have -x.
4475 if (nextDown)
4476 changeSign();
4477
4478 // Compute nextUp(x)
4479 opStatus result = opOK;
4480
4481 // Handle each float category separately.
4482 switch (category) {
4483 case fcInfinity:
4484 // nextUp(+inf) = +inf
4485 if (!isNegative())
4486 break;
4487 // nextUp(-inf) = -getLargest()
4488 makeLargest(true);
4489 break;
4490 case fcNaN:
4491 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4492 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4493 // change the payload.
4494 if (isSignaling()) {
4495 result = opInvalidOp;
4496 // For consistency, propagate the sign of the sNaN to the qNaN.
4497 makeNaN(false, isNegative(), nullptr);
4498 }
4499 break;
4500 case fcZero:
4501 // nextUp(pm 0) = +getSmallest()
4502 makeSmallest(false);
4503 break;
4504 case fcNormal:
4505 // nextUp(-getSmallest()) = -0
4506 if (isSmallest() && isNegative()) {
4507 APInt::tcSet(significandParts(), 0, partCount());
4508 category = fcZero;
4509 exponent = 0;
4510 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4511 sign = false;
4512 break;
4513 }
4514
4515 if (isLargest() && !isNegative()) {
4516 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4517 // nextUp(getLargest()) == NAN
4518 makeNaN();
4519 break;
4520 } else if (semantics->nonFiniteBehavior ==
4522 // nextUp(getLargest()) == getLargest()
4523 break;
4524 } else {
4525 // nextUp(getLargest()) == INFINITY
4526 APInt::tcSet(significandParts(), 0, partCount());
4527 category = fcInfinity;
4528 exponent = semantics->maxExponent + 1;
4529 break;
4530 }
4531 }
4532
4533 // nextUp(normal) == normal + inc.
4534 if (isNegative()) {
4535 // If we are negative, we need to decrement the significand.
4536
4537 // We only cross a binade boundary that requires adjusting the exponent
4538 // if:
4539 // 1. exponent != semantics->minExponent. This implies we are not in the
4540 // smallest binade or are dealing with denormals.
4541 // 2. Our significand excluding the integral bit is all zeros.
4542 bool WillCrossBinadeBoundary =
4543 exponent != semantics->minExponent && isSignificandAllZeros();
4544
4545 // Decrement the significand.
4546 //
4547 // We always do this since:
4548 // 1. If we are dealing with a non-binade decrement, by definition we
4549 // just decrement the significand.
4550 // 2. If we are dealing with a normal -> normal binade decrement, since
4551 // we have an explicit integral bit the fact that all bits but the
4552 // integral bit are zero implies that subtracting one will yield a
4553 // significand with 0 integral bit and 1 in all other spots. Thus we
4554 // must just adjust the exponent and set the integral bit to 1.
4555 // 3. If we are dealing with a normal -> denormal binade decrement,
4556 // since we set the integral bit to 0 when we represent denormals, we
4557 // just decrement the significand.
4558 integerPart *Parts = significandParts();
4559 APInt::tcDecrement(Parts, partCount());
4560
4561 if (WillCrossBinadeBoundary) {
4562 // Our result is a normal number. Do the following:
4563 // 1. Set the integral bit to 1.
4564 // 2. Decrement the exponent.
4565 APInt::tcSetBit(Parts, semantics->precision - 1);
4566 exponent--;
4567 }
4568 } else {
4569 // If we are positive, we need to increment the significand.
4570
4571 // We only cross a binade boundary that requires adjusting the exponent if
4572 // the input is not a denormal and all of said input's significand bits
4573 // are set. If all of said conditions are true: clear the significand, set
4574 // the integral bit to 1, and increment the exponent. If we have a
4575 // denormal always increment since moving denormals and the numbers in the
4576 // smallest normal binade have the same exponent in our representation.
4577 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4578
4579 if (WillCrossBinadeBoundary) {
4580 integerPart *Parts = significandParts();
4581 APInt::tcSet(Parts, 0, partCount());
4582 APInt::tcSetBit(Parts, semantics->precision - 1);
4583 assert(exponent != semantics->maxExponent &&
4584 "We can not increment an exponent beyond the maxExponent allowed"
4585 " by the given floating point semantics.");
4586 exponent++;
4587 } else {
4588 incrementSignificand();
4589 }
4590 }
4591 break;
4592 }
4593
4594 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4595 if (nextDown)
4596 changeSign();
4597
4598 return result;
4599}
4600
4601APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4602 return ::exponentNaN(*semantics);
4603}
4604
4605APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4606 return ::exponentInf(*semantics);
4607}
4608
4609APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4610 return ::exponentZero(*semantics);
4611}
4612
4613void IEEEFloat::makeInf(bool Negative) {
4614 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4615 llvm_unreachable("This floating point format does not support Inf");
4616
4617 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4618 // There is no Inf, so make NaN instead.
4619 makeNaN(false, Negative);
4620 return;
4621 }
4622 category = fcInfinity;
4623 sign = Negative;
4624 exponent = exponentInf();
4625 APInt::tcSet(significandParts(), 0, partCount());
4626}
4627
4628void IEEEFloat::makeZero(bool Negative) {
4629 category = fcZero;
4630 sign = Negative;
4631 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4632 // Merge negative zero to positive because 0b10000...000 is used for NaN
4633 sign = false;
4634 }
4635 exponent = exponentZero();
4636 APInt::tcSet(significandParts(), 0, partCount());
4637}
4638
4639void IEEEFloat::makeQuiet() {
4640 assert(isNaN());
4641 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4642 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4643}
4644
4645int ilogb(const IEEEFloat &Arg) {
4646 if (Arg.isNaN())
4647 return IEEEFloat::IEK_NaN;
4648 if (Arg.isZero())
4649 return IEEEFloat::IEK_Zero;
4650 if (Arg.isInfinity())
4651 return IEEEFloat::IEK_Inf;
4652 if (!Arg.isDenormal())
4653 return Arg.exponent;
4654
4655 IEEEFloat Normalized(Arg);
4656 int SignificandBits = Arg.getSemantics().precision - 1;
4657
4658 Normalized.exponent += SignificandBits;
4659 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4660 return Normalized.exponent - SignificandBits;
4661}
4662
4664 auto MaxExp = X.getSemantics().maxExponent;
4665 auto MinExp = X.getSemantics().minExponent;
4666
4667 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4668 // overflow; clamp it to a safe range before adding, but ensure that the range
4669 // is large enough that the clamp does not change the result. The range we
4670 // need to support is the difference between the largest possible exponent and
4671 // the normalized exponent of half the smallest denormal.
4672
4673 int SignificandBits = X.getSemantics().precision - 1;
4674 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4675
4676 // Clamp to one past the range ends to let normalize handle overlflow.
4677 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4678 X.normalize(RoundingMode, lfExactlyZero);
4679 if (X.isNaN())
4680 X.makeQuiet();
4681 return X;
4682}
4683
4685 Exp = ilogb(Val);
4686
4687 // Quiet signalling nans.
4688 if (Exp == IEEEFloat::IEK_NaN) {
4689 IEEEFloat Quiet(Val);
4690 Quiet.makeQuiet();
4691 return Quiet;
4692 }
4693
4694 if (Exp == IEEEFloat::IEK_Inf)
4695 return Val;
4696
4697 // 1 is added because frexp is defined to return a normalized fraction in
4698 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4699 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4700 return scalbn(Val, -Exp, RM);
4701}
4702
4703DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4704 : Semantics(&S),
4706 assert(Semantics == &semPPCDoubleDouble);
4707}
4708
4710 : Semantics(&S),
4713 assert(Semantics == &semPPCDoubleDouble);
4714}
4715
4717 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4719 assert(Semantics == &semPPCDoubleDouble);
4720}
4721
4723 : Semantics(&S),
4724 Floats(new APFloat[2]{
4725 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4726 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4727 assert(Semantics == &semPPCDoubleDouble);
4728}
4729
4731 APFloat &&Second)
4732 : Semantics(&S),
4733 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4734 assert(Semantics == &semPPCDoubleDouble);
4735 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4736 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4737}
4738
4741 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4742 APFloat(RHS.Floats[1])}
4743 : nullptr) {
4744 assert(Semantics == &semPPCDoubleDouble);
4745}
4746
4748 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4749 RHS.Semantics = &semBogus;
4751}
4752
4754 if (Semantics == RHS.Semantics && RHS.Floats) {
4755 Floats[0] = RHS.Floats[0];
4756 Floats[1] = RHS.Floats[1];
4757 } else if (this != &RHS) {
4758 this->~DoubleAPFloat();
4759 new (this) DoubleAPFloat(RHS);
4760 }
4761 return *this;
4762}
4763
4764// Implement addition, subtraction, multiplication and division based on:
4765// "Software for Doubled-Precision Floating-Point Computations",
4766// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4767APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4768 const APFloat &c, const APFloat &cc,
4769 roundingMode RM) {
4770 int Status = opOK;
4771 APFloat z = a;
4772 Status |= z.add(c, RM);
4773 if (!z.isFinite()) {
4774 if (!z.isInfinity()) {
4775 Floats[0] = std::move(z);
4776 Floats[1].makeZero(/* Neg = */ false);
4777 return (opStatus)Status;
4778 }
4779 Status = opOK;
4780 auto AComparedToC = a.compareAbsoluteValue(c);
4781 z = cc;
4782 Status |= z.add(aa, RM);
4783 if (AComparedToC == APFloat::cmpGreaterThan) {
4784 // z = cc + aa + c + a;
4785 Status |= z.add(c, RM);
4786 Status |= z.add(a, RM);
4787 } else {
4788 // z = cc + aa + a + c;
4789 Status |= z.add(a, RM);
4790 Status |= z.add(c, RM);
4791 }
4792 if (!z.isFinite()) {
4793 Floats[0] = std::move(z);
4794 Floats[1].makeZero(/* Neg = */ false);
4795 return (opStatus)Status;
4796 }
4797 Floats[0] = z;
4798 APFloat zz = aa;
4799 Status |= zz.add(cc, RM);
4800 if (AComparedToC == APFloat::cmpGreaterThan) {
4801 // Floats[1] = a - z + c + zz;
4802 Floats[1] = a;
4803 Status |= Floats[1].subtract(z, RM);
4804 Status |= Floats[1].add(c, RM);
4805 Status |= Floats[1].add(zz, RM);
4806 } else {
4807 // Floats[1] = c - z + a + zz;
4808 Floats[1] = c;
4809 Status |= Floats[1].subtract(z, RM);
4810 Status |= Floats[1].add(a, RM);
4811 Status |= Floats[1].add(zz, RM);
4812 }
4813 } else {
4814 // q = a - z;
4815 APFloat q = a;
4816 Status |= q.subtract(z, RM);
4817
4818 // zz = q + c + (a - (q + z)) + aa + cc;
4819 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4820 auto zz = q;
4821 Status |= zz.add(c, RM);
4822 Status |= q.add(z, RM);
4823 Status |= q.subtract(a, RM);
4824 q.changeSign();
4825 Status |= zz.add(q, RM);
4826 Status |= zz.add(aa, RM);
4827 Status |= zz.add(cc, RM);
4828 if (zz.isZero() && !zz.isNegative()) {
4829 Floats[0] = std::move(z);
4830 Floats[1].makeZero(/* Neg = */ false);
4831 return opOK;
4832 }
4833 Floats[0] = z;
4834 Status |= Floats[0].add(zz, RM);
4835 if (!Floats[0].isFinite()) {
4836 Floats[1].makeZero(/* Neg = */ false);
4837 return (opStatus)Status;
4838 }
4839 Floats[1] = std::move(z);
4840 Status |= Floats[1].subtract(Floats[0], RM);
4841 Status |= Floats[1].add(zz, RM);
4842 }
4843 return (opStatus)Status;
4844}
4845
4846APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4847 const DoubleAPFloat &RHS,
4848 DoubleAPFloat &Out,
4849 roundingMode RM) {
4850 if (LHS.getCategory() == fcNaN) {
4851 Out = LHS;
4852 return opOK;
4853 }
4854 if (RHS.getCategory() == fcNaN) {
4855 Out = RHS;
4856 return opOK;
4857 }
4858 if (LHS.getCategory() == fcZero) {
4859 Out = RHS;
4860 return opOK;
4861 }
4862 if (RHS.getCategory() == fcZero) {
4863 Out = LHS;
4864 return opOK;
4865 }
4866 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4867 LHS.isNegative() != RHS.isNegative()) {
4868 Out.makeNaN(false, Out.isNegative(), nullptr);
4869 return opInvalidOp;
4870 }
4871 if (LHS.getCategory() == fcInfinity) {
4872 Out = LHS;
4873 return opOK;
4874 }
4875 if (RHS.getCategory() == fcInfinity) {
4876 Out = RHS;
4877 return opOK;
4878 }
4879 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4880
4881 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4882 CC(RHS.Floats[1]);
4883 assert(&A.getSemantics() == &semIEEEdouble);
4884 assert(&AA.getSemantics() == &semIEEEdouble);
4885 assert(&C.getSemantics() == &semIEEEdouble);
4886 assert(&CC.getSemantics() == &semIEEEdouble);
4887 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4888 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4889 return Out.addImpl(A, AA, C, CC, RM);
4890}
4891
4893 roundingMode RM) {
4894 return addWithSpecial(*this, RHS, *this, RM);
4895}
4896
4898 roundingMode RM) {
4899 changeSign();
4900 auto Ret = add(RHS, RM);
4901 changeSign();
4902 return Ret;
4903}
4904
4907 const auto &LHS = *this;
4908 auto &Out = *this;
4909 /* Interesting observation: For special categories, finding the lowest
4910 common ancestor of the following layered graph gives the correct
4911 return category:
4912
4913 NaN
4914 / \
4915 Zero Inf
4916 \ /
4917 Normal
4918
4919 e.g. NaN * NaN = NaN
4920 Zero * Inf = NaN
4921 Normal * Zero = Zero
4922 Normal * Inf = Inf
4923 */
4924 if (LHS.getCategory() == fcNaN) {
4925 Out = LHS;
4926 return opOK;
4927 }
4928 if (RHS.getCategory() == fcNaN) {
4929 Out = RHS;
4930 return opOK;
4931 }
4932 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4933 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4934 Out.makeNaN(false, false, nullptr);
4935 return opOK;
4936 }
4937 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4938 Out = LHS;
4939 return opOK;
4940 }
4941 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4942 Out = RHS;
4943 return opOK;
4944 }
4945 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4946 "Special cases not handled exhaustively");
4947
4948 int Status = opOK;
4949 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4950 // t = a * c
4951 APFloat T = A;
4952 Status |= T.multiply(C, RM);
4953 if (!T.isFiniteNonZero()) {
4954 Floats[0] = T;
4955 Floats[1].makeZero(/* Neg = */ false);
4956 return (opStatus)Status;
4957 }
4958
4959 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4960 APFloat Tau = A;
4961 T.changeSign();
4962 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4963 T.changeSign();
4964 {
4965 // v = a * d
4966 APFloat V = A;
4967 Status |= V.multiply(D, RM);
4968 // w = b * c
4969 APFloat W = B;
4970 Status |= W.multiply(C, RM);
4971 Status |= V.add(W, RM);
4972 // tau += v + w
4973 Status |= Tau.add(V, RM);
4974 }
4975 // u = t + tau
4976 APFloat U = T;
4977 Status |= U.add(Tau, RM);
4978
4979 Floats[0] = U;
4980 if (!U.isFinite()) {
4981 Floats[1].makeZero(/* Neg = */ false);
4982 } else {
4983 // Floats[1] = (t - u) + tau
4984 Status |= T.subtract(U, RM);
4985 Status |= T.add(Tau, RM);
4986 Floats[1] = T;
4987 }
4988 return (opStatus)Status;
4989}
4990
4993 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4995 auto Ret =
4996 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4998 return Ret;
4999}
5000
5002 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5004 auto Ret =
5005 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5007 return Ret;
5008}
5009
5011 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5013 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5015 return Ret;
5016}
5017
5020 const DoubleAPFloat &Addend,
5022 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5024 auto Ret = Tmp.fusedMultiplyAdd(
5028 return Ret;
5029}
5030
5032 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5034 auto Ret = Tmp.roundToIntegral(RM);
5036 return Ret;
5037}
5038
5040 Floats[0].changeSign();
5041 Floats[1].changeSign();
5042}
5043
5046 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5047 if (Result != cmpEqual)
5048 return Result;
5049 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5050 if (Result == cmpLessThan || Result == cmpGreaterThan) {
5051 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
5052 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
5053 if (Against && !RHSAgainst)
5054 return cmpLessThan;
5055 if (!Against && RHSAgainst)
5056 return cmpGreaterThan;
5057 if (!Against && !RHSAgainst)
5058 return Result;
5059 if (Against && RHSAgainst)
5060 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
5061 }
5062 return Result;
5063}
5064
5066 return Floats[0].getCategory();
5067}
5068
5069bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5070
5072 Floats[0].makeInf(Neg);
5073 Floats[1].makeZero(/* Neg = */ false);
5074}
5075
5077 Floats[0].makeZero(Neg);
5078 Floats[1].makeZero(/* Neg = */ false);
5079}
5080
5082 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5083 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5084 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5085 if (Neg)
5086 changeSign();
5087}
5088
5090 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5091 Floats[0].makeSmallest(Neg);
5092 Floats[1].makeZero(/* Neg = */ false);
5093}
5094
5096 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5097 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5098 if (Neg)
5099 Floats[0].changeSign();
5100 Floats[1].makeZero(/* Neg = */ false);
5101}
5102
5103void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5104 Floats[0].makeNaN(SNaN, Neg, fill);
5105 Floats[1].makeZero(/* Neg = */ false);
5106}
5107
5109 auto Result = Floats[0].compare(RHS.Floats[0]);
5110 // |Float[0]| > |Float[1]|
5111 if (Result == APFloat::cmpEqual)
5112 return Floats[1].compare(RHS.Floats[1]);
5113 return Result;
5114}
5115
5117 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5118 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5119}
5120
5122 if (Arg.Floats)
5123 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5124 return hash_combine(Arg.Semantics);
5125}
5126
5128 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5129 uint64_t Data[] = {
5130 Floats[0].bitcastToAPInt().getRawData()[0],
5131 Floats[1].bitcastToAPInt().getRawData()[0],
5132 };
5133 return APInt(128, 2, Data);
5134}
5135
5137 roundingMode RM) {
5138 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5140 auto Ret = Tmp.convertFromString(S, RM);
5142 return Ret;
5143}
5144
5146 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");