LLVM 22.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/Error.h"
28#include <cstring>
29#include <limits.h>
30
31#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
39
40using namespace llvm;
41
42/// A macro used to combine two fcCategory enums into one key which can be used
43/// in a switch statement to classify how the interaction of two APFloat's
44/// categories affects an operation.
45///
46/// TODO: If clang source code is ever allowed to use constexpr in its own
47/// codebase, change this into a static inline function.
48#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50/* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54namespace llvm {
55
56// How the nonfinite values Inf and NaN are represented.
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
62
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
71
72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73 // Float4E2M1FN types, which do not support Inf or NaN values.
75};
76
77// How NaN values are represented. This is curently only used in combination
78// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79// while having IEEE non-finite behavior is liable to lead to unexpected
80// results.
81enum class fltNanEncoding {
82 // Represents the standard IEEE behavior where a value is NaN if its
83 // exponent is all 1s and the significand is non-zero.
85
86 // Represents the behavior in the Float8E4M3FN floating point type where NaN
87 // is represented by having the exponent and mantissa set to all 1s.
88 // This behavior matches the FP8 E4M3 type described in
89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90 // as non-signalling, although the paper does not state whether the NaN
91 // values are signalling or not.
93
94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97 // there is only one NaN value, it is treated as quiet NaN. This matches the
98 // behavior described in https://arxiv.org/abs/2206.02915 .
100};
101
102/* Represents floating point arithmetic semantics. */
104 /* The largest E such that 2^E is representable; this matches the
105 definition of IEEE 754. */
107
108 /* The smallest E such that 2^E is a normalized number; this
109 matches the definition of IEEE 754. */
111
112 /* Number of bits in the significand. This includes the integer
113 bit. */
114 unsigned int precision;
115
116 /* Number of bits actually used in the semantics. */
117 unsigned int sizeInBits;
118
120
122
123 /* Whether this semantics has an encoding for Zero */
124 bool hasZero = true;
125
126 /* Whether this semantics can represent signed values */
127 bool hasSignedRepr = true;
128
129 /* Whether the sign bit of this semantics is the most significant bit */
130 bool hasSignBitInMSB = true;
131};
132
133constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
134constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
135constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
136constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
137constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
138constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
139constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
141constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
142constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
144constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
146constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
148constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
149constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
150constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
151 127,
152 -127,
153 1,
154 8,
157 false,
158 false,
159 false};
160
161constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
163constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
165constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
167constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
168 80};
169constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
170constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
171constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
172 1023, -1022 + 53, 53 + 53, 128};
173
175 switch (S) {
176 case S_IEEEhalf:
177 return IEEEhalf();
178 case S_BFloat:
179 return BFloat();
180 case S_IEEEsingle:
181 return IEEEsingle();
182 case S_IEEEdouble:
183 return IEEEdouble();
184 case S_IEEEquad:
185 return IEEEquad();
187 return PPCDoubleDouble();
189 return PPCDoubleDoubleLegacy();
190 case S_Float8E5M2:
191 return Float8E5M2();
192 case S_Float8E5M2FNUZ:
193 return Float8E5M2FNUZ();
194 case S_Float8E4M3:
195 return Float8E4M3();
196 case S_Float8E4M3FN:
197 return Float8E4M3FN();
198 case S_Float8E4M3FNUZ:
199 return Float8E4M3FNUZ();
201 return Float8E4M3B11FNUZ();
202 case S_Float8E3M4:
203 return Float8E3M4();
204 case S_FloatTF32:
205 return FloatTF32();
206 case S_Float8E8M0FNU:
207 return Float8E8M0FNU();
208 case S_Float6E3M2FN:
209 return Float6E3M2FN();
210 case S_Float6E2M3FN:
211 return Float6E2M3FN();
212 case S_Float4E2M1FN:
213 return Float4E2M1FN();
215 return x87DoubleExtended();
216 }
217 llvm_unreachable("Unrecognised floating semantics");
218}
219
222 if (&Sem == &llvm::APFloat::IEEEhalf())
223 return S_IEEEhalf;
224 else if (&Sem == &llvm::APFloat::BFloat())
225 return S_BFloat;
226 else if (&Sem == &llvm::APFloat::IEEEsingle())
227 return S_IEEEsingle;
228 else if (&Sem == &llvm::APFloat::IEEEdouble())
229 return S_IEEEdouble;
230 else if (&Sem == &llvm::APFloat::IEEEquad())
231 return S_IEEEquad;
232 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
233 return S_PPCDoubleDouble;
234 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
236 else if (&Sem == &llvm::APFloat::Float8E5M2())
237 return S_Float8E5M2;
238 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
239 return S_Float8E5M2FNUZ;
240 else if (&Sem == &llvm::APFloat::Float8E4M3())
241 return S_Float8E4M3;
242 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
243 return S_Float8E4M3FN;
244 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
245 return S_Float8E4M3FNUZ;
246 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
247 return S_Float8E4M3B11FNUZ;
248 else if (&Sem == &llvm::APFloat::Float8E3M4())
249 return S_Float8E3M4;
250 else if (&Sem == &llvm::APFloat::FloatTF32())
251 return S_FloatTF32;
252 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
253 return S_Float8E8M0FNU;
254 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
255 return S_Float6E3M2FN;
256 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
257 return S_Float6E2M3FN;
258 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
259 return S_Float4E2M1FN;
260 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
261 return S_x87DoubleExtended;
262 else
263 llvm_unreachable("Unknown floating semantics");
264}
265
267 const fltSemantics &B) {
268 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
269 A.precision <= B.precision;
270}
271
277
278/* A tight upper bound on number of parts required to hold the value
279 pow(5, power) is
280
281 power * 815 / (351 * integerPartWidth) + 1
282
283 However, whilst the result may require only this many parts,
284 because we are multiplying two values to get it, the
285 multiplication may require an extra part with the excess part
286 being zero (consider the trivial case of 1 * 1, tcFullMultiply
287 requires two parts to hold the single-part result). So we add an
288 extra one to guarantee enough space whilst multiplying. */
289const unsigned int maxExponent = 16383;
290const unsigned int maxPrecision = 113;
292const unsigned int maxPowerOfFiveParts =
293 2 +
295
296unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
297 return semantics.precision;
298}
301 return semantics.maxExponent;
302}
305 return semantics.minExponent;
306}
307unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
308 return semantics.sizeInBits;
309}
311 bool isSigned) {
312 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
313 // at least one more bit than the MaxExponent to hold the max FP value.
314 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
315 // Extra sign bit needed.
316 if (isSigned)
317 ++MinBitWidth;
318 return MinBitWidth;
319}
320
322 return semantics.hasZero;
323}
324
326 return semantics.hasSignedRepr;
327}
328
332
336
338 // Keep in sync with Type::isIEEELikeFPTy
339 return SemanticsToEnum(semantics) <= S_IEEEquad;
340}
341
343 return semantics.hasSignBitInMSB;
344}
345
347 const fltSemantics &Dst) {
348 // Exponent range must be larger.
349 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
350 return false;
351
352 // If the mantissa is long enough, the result value could still be denormal
353 // with a larger exponent range.
354 //
355 // FIXME: This condition is probably not accurate but also shouldn't be a
356 // practical concern with existing types.
357 return Dst.precision >= Src.precision;
358}
359
361 return Sem.sizeInBits;
362}
363
364static constexpr APFloatBase::ExponentType
365exponentZero(const fltSemantics &semantics) {
366 return semantics.minExponent - 1;
367}
368
369static constexpr APFloatBase::ExponentType
370exponentInf(const fltSemantics &semantics) {
371 return semantics.maxExponent + 1;
372}
373
374static constexpr APFloatBase::ExponentType
375exponentNaN(const fltSemantics &semantics) {
378 return exponentZero(semantics);
379 if (semantics.hasSignedRepr)
380 return semantics.maxExponent;
381 }
382 return semantics.maxExponent + 1;
383}
384
385/* A bunch of private, handy routines. */
386
387static inline Error createError(const Twine &Err) {
389}
390
391static constexpr inline unsigned int partCountForBits(unsigned int bits) {
392 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
394}
395
396/* Returns 0U-9U. Return values >= 10U are not digits. */
397static inline unsigned int
398decDigitValue(unsigned int c)
399{
400 return c - '0';
401}
402
403/* Return the value of a decimal exponent of the form
404 [+-]ddddddd.
405
406 If the exponent overflows, returns a large exponent with the
407 appropriate sign. */
410 bool isNegative;
411 unsigned int absExponent;
412 const unsigned int overlargeExponent = 24000; /* FIXME. */
413 StringRef::iterator p = begin;
414
415 // Treat no exponent as 0 to match binutils
416 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
417 return 0;
418 }
419
420 isNegative = (*p == '-');
421 if (*p == '-' || *p == '+') {
422 p++;
423 if (p == end)
424 return createError("Exponent has no digits");
425 }
426
427 absExponent = decDigitValue(*p++);
428 if (absExponent >= 10U)
429 return createError("Invalid character in exponent");
430
431 for (; p != end; ++p) {
432 unsigned int value;
433
434 value = decDigitValue(*p);
435 if (value >= 10U)
436 return createError("Invalid character in exponent");
437
438 absExponent = absExponent * 10U + value;
439 if (absExponent >= overlargeExponent) {
440 absExponent = overlargeExponent;
441 break;
442 }
443 }
444
445 if (isNegative)
446 return -(int) absExponent;
447 else
448 return (int) absExponent;
449}
450
451/* This is ugly and needs cleaning up, but I don't immediately see
452 how whilst remaining safe. */
455 int exponentAdjustment) {
456 int unsignedExponent;
457 bool negative, overflow;
458 int exponent = 0;
459
460 if (p == end)
461 return createError("Exponent has no digits");
462
463 negative = *p == '-';
464 if (*p == '-' || *p == '+') {
465 p++;
466 if (p == end)
467 return createError("Exponent has no digits");
468 }
469
470 unsignedExponent = 0;
471 overflow = false;
472 for (; p != end; ++p) {
473 unsigned int value;
474
475 value = decDigitValue(*p);
476 if (value >= 10U)
477 return createError("Invalid character in exponent");
478
479 unsignedExponent = unsignedExponent * 10 + value;
480 if (unsignedExponent > 32767) {
481 overflow = true;
482 break;
483 }
484 }
485
486 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
487 overflow = true;
488
489 if (!overflow) {
490 exponent = unsignedExponent;
491 if (negative)
492 exponent = -exponent;
493 exponent += exponentAdjustment;
494 if (exponent > 32767 || exponent < -32768)
495 overflow = true;
496 }
497
498 if (overflow)
499 exponent = negative ? -32768: 32767;
500
501 return exponent;
502}
503
506 StringRef::iterator *dot) {
507 StringRef::iterator p = begin;
508 *dot = end;
509 while (p != end && *p == '0')
510 p++;
511
512 if (p != end && *p == '.') {
513 *dot = p++;
514
515 if (end - begin == 1)
516 return createError("Significand has no digits");
517
518 while (p != end && *p == '0')
519 p++;
520 }
521
522 return p;
523}
524
525/* Given a normal decimal floating point number of the form
526
527 dddd.dddd[eE][+-]ddd
528
529 where the decimal point and exponent are optional, fill out the
530 structure D. Exponent is appropriate if the significand is
531 treated as an integer, and normalizedExponent if the significand
532 is taken to have the decimal point after a single leading
533 non-zero digit.
534
535 If the value is zero, V->firstSigDigit points to a non-digit, and
536 the return exponent is zero.
537*/
539 const char *firstSigDigit;
540 const char *lastSigDigit;
543};
544
547 StringRef::iterator dot = end;
548
549 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
550 if (!PtrOrErr)
551 return PtrOrErr.takeError();
552 StringRef::iterator p = *PtrOrErr;
553
554 D->firstSigDigit = p;
555 D->exponent = 0;
556 D->normalizedExponent = 0;
557
558 for (; p != end; ++p) {
559 if (*p == '.') {
560 if (dot != end)
561 return createError("String contains multiple dots");
562 dot = p++;
563 if (p == end)
564 break;
565 }
566 if (decDigitValue(*p) >= 10U)
567 break;
568 }
569
570 if (p != end) {
571 if (*p != 'e' && *p != 'E')
572 return createError("Invalid character in significand");
573 if (p == begin)
574 return createError("Significand has no digits");
575 if (dot != end && p - begin == 1)
576 return createError("Significand has no digits");
577
578 /* p points to the first non-digit in the string */
579 auto ExpOrErr = readExponent(p + 1, end);
580 if (!ExpOrErr)
581 return ExpOrErr.takeError();
582 D->exponent = *ExpOrErr;
583
584 /* Implied decimal point? */
585 if (dot == end)
586 dot = p;
587 }
588
589 /* If number is all zeroes accept any exponent. */
590 if (p != D->firstSigDigit) {
591 /* Drop insignificant trailing zeroes. */
592 if (p != begin) {
593 do
594 do
595 p--;
596 while (p != begin && *p == '0');
597 while (p != begin && *p == '.');
598 }
599
600 /* Adjust the exponents for any decimal point. */
601 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
602 D->normalizedExponent = (D->exponent +
603 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
604 - (dot > D->firstSigDigit && dot < p)));
605 }
606
607 D->lastSigDigit = p;
608 return Error::success();
609}
610
611/* Return the trailing fraction of a hexadecimal number.
612 DIGITVALUE is the first hex digit of the fraction, P points to
613 the next digit. */
616 unsigned int digitValue) {
617 unsigned int hexDigit;
618
619 /* If the first trailing digit isn't 0 or 8 we can work out the
620 fraction immediately. */
621 if (digitValue > 8)
622 return lfMoreThanHalf;
623 else if (digitValue < 8 && digitValue > 0)
624 return lfLessThanHalf;
625
626 // Otherwise we need to find the first non-zero digit.
627 while (p != end && (*p == '0' || *p == '.'))
628 p++;
629
630 if (p == end)
631 return createError("Invalid trailing hexadecimal fraction!");
632
633 hexDigit = hexDigitValue(*p);
634
635 /* If we ran off the end it is exactly zero or one-half, otherwise
636 a little more. */
637 if (hexDigit == UINT_MAX)
638 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
639 else
640 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
641}
642
643/* Return the fraction lost were a bignum truncated losing the least
644 significant BITS bits. */
645static lostFraction
647 unsigned int partCount,
648 unsigned int bits)
649{
650 unsigned int lsb;
651
652 lsb = APInt::tcLSB(parts, partCount);
653
654 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
655 if (bits <= lsb)
656 return lfExactlyZero;
657 if (bits == lsb + 1)
658 return lfExactlyHalf;
659 if (bits <= partCount * APFloatBase::integerPartWidth &&
660 APInt::tcExtractBit(parts, bits - 1))
661 return lfMoreThanHalf;
662
663 return lfLessThanHalf;
664}
665
666/* Shift DST right BITS bits noting lost fraction. */
667static lostFraction
668shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
669{
670 lostFraction lost_fraction;
671
672 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
673
674 APInt::tcShiftRight(dst, parts, bits);
675
676 return lost_fraction;
677}
678
679/* Combine the effect of two lost fractions. */
680static lostFraction
682 lostFraction lessSignificant)
683{
684 if (lessSignificant != lfExactlyZero) {
685 if (moreSignificant == lfExactlyZero)
686 moreSignificant = lfLessThanHalf;
687 else if (moreSignificant == lfExactlyHalf)
688 moreSignificant = lfMoreThanHalf;
689 }
690
691 return moreSignificant;
692}
693
694/* The error from the true value, in half-ulps, on multiplying two
695 floating point numbers, which differ from the value they
696 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
697 than the returned value.
698
699 See "How to Read Floating Point Numbers Accurately" by William D
700 Clinger. */
701static unsigned int
702HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
703{
704 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
705
706 if (HUerr1 + HUerr2 == 0)
707 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
708 else
709 return inexactMultiply + 2 * (HUerr1 + HUerr2);
710}
711
712/* The number of ulps from the boundary (zero, or half if ISNEAREST)
713 when the least significant BITS are truncated. BITS cannot be
714 zero. */
716ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
717 bool isNearest) {
718 unsigned int count, partBits;
719 APFloatBase::integerPart part, boundary;
720
721 assert(bits != 0);
722
723 bits--;
725 partBits = bits % APFloatBase::integerPartWidth + 1;
726
727 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
728
729 if (isNearest)
730 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
731 else
732 boundary = 0;
733
734 if (count == 0) {
735 if (part - boundary <= boundary - part)
736 return part - boundary;
737 else
738 return boundary - part;
739 }
740
741 if (part == boundary) {
742 while (--count)
743 if (parts[count])
744 return ~(APFloatBase::integerPart) 0; /* A lot. */
745
746 return parts[0];
747 } else if (part == boundary - 1) {
748 while (--count)
749 if (~parts[count])
750 return ~(APFloatBase::integerPart) 0; /* A lot. */
751
752 return -parts[0];
753 }
754
755 return ~(APFloatBase::integerPart) 0; /* A lot. */
756}
757
758/* Place pow(5, power) in DST, and return the number of parts used.
759 DST must be at least one part larger than size of the answer. */
760static unsigned int
761powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
762 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
764 pow5s[0] = 78125 * 5;
765
766 unsigned int partsCount = 1;
767 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
768 unsigned int result;
769 assert(power <= maxExponent);
770
771 p1 = dst;
772 p2 = scratch;
773
774 *p1 = firstEightPowers[power & 7];
775 power >>= 3;
776
777 result = 1;
778 pow5 = pow5s;
779
780 for (unsigned int n = 0; power; power >>= 1, n++) {
781 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
782 if (n != 0) {
783 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
784 partsCount, partsCount);
785 partsCount *= 2;
786 if (pow5[partsCount - 1] == 0)
787 partsCount--;
788 }
789
790 if (power & 1) {
792
793 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
794 result += partsCount;
795 if (p2[result - 1] == 0)
796 result--;
797
798 /* Now result is in p1 with partsCount parts and p2 is scratch
799 space. */
800 tmp = p1;
801 p1 = p2;
802 p2 = tmp;
803 }
804
805 pow5 += partsCount;
806 }
807
808 if (p1 != dst)
809 APInt::tcAssign(dst, p1, result);
810
811 return result;
812}
813
814/* Zero at the end to avoid modular arithmetic when adding one; used
815 when rounding up during hexadecimal output. */
816static const char hexDigitsLower[] = "0123456789abcdef0";
817static const char hexDigitsUpper[] = "0123456789ABCDEF0";
818static const char infinityL[] = "infinity";
819static const char infinityU[] = "INFINITY";
820static const char NaNL[] = "nan";
821static const char NaNU[] = "NAN";
822
823/* Write out an integerPart in hexadecimal, starting with the most
824 significant nibble. Write out exactly COUNT hexdigits, return
825 COUNT. */
826static unsigned int
827partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
828 const char *hexDigitChars)
829{
830 unsigned int result = count;
831
833
834 part >>= (APFloatBase::integerPartWidth - 4 * count);
835 while (count--) {
836 dst[count] = hexDigitChars[part & 0xf];
837 part >>= 4;
838 }
839
840 return result;
841}
842
843/* Write out an unsigned decimal integer. */
844static char *
845writeUnsignedDecimal (char *dst, unsigned int n)
846{
847 char buff[40], *p;
848
849 p = buff;
850 do
851 *p++ = '0' + n % 10;
852 while (n /= 10);
853
854 do
855 *dst++ = *--p;
856 while (p != buff);
857
858 return dst;
859}
860
861/* Write out a signed decimal integer. */
862static char *
863writeSignedDecimal (char *dst, int value)
864{
865 if (value < 0) {
866 *dst++ = '-';
867 dst = writeUnsignedDecimal(dst, -(unsigned) value);
868 } else {
869 dst = writeUnsignedDecimal(dst, value);
870 }
871
872 return dst;
873}
874
875// Compute the ULP of the input using a definition from:
876// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
877// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
878static APFloat harrisonUlp(const APFloat &X) {
879 const fltSemantics &Sem = X.getSemantics();
880 switch (X.getCategory()) {
881 case APFloat::fcNaN:
882 return APFloat::getQNaN(Sem);
884 return APFloat::getInf(Sem);
885 case APFloat::fcZero:
886 return APFloat::getSmallest(Sem);
888 break;
889 }
890 if (X.isDenormal() || X.isSmallestNormalized())
891 return APFloat::getSmallest(Sem);
892 int Exp = ilogb(X);
893 if (X.getExactLog2() != INT_MIN)
894 Exp -= 1;
895 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
897}
898
899namespace detail {
900/* Constructors. */
901void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
902 unsigned int count;
903
904 semantics = ourSemantics;
905 count = partCount();
906 if (count > 1)
907 significand.parts = new integerPart[count];
908}
909
910void IEEEFloat::freeSignificand() {
911 if (needsCleanup())
912 delete [] significand.parts;
913}
914
915void IEEEFloat::assign(const IEEEFloat &rhs) {
916 assert(semantics == rhs.semantics);
917
918 sign = rhs.sign;
919 category = rhs.category;
920 exponent = rhs.exponent;
921 if (isFiniteNonZero() || category == fcNaN)
922 copySignificand(rhs);
923}
924
925void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
926 assert(isFiniteNonZero() || category == fcNaN);
927 assert(rhs.partCount() >= partCount());
928
929 APInt::tcAssign(significandParts(), rhs.significandParts(),
930 partCount());
931}
932
933/* Make this number a NaN, with an arbitrary but deterministic value
934 for the significand. If double or longer, this is a signalling NaN,
935 which may not be ideal. If float, this is QNaN(0). */
936void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
937 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
938 llvm_unreachable("This floating point format does not support NaN");
939
940 if (Negative && !semantics->hasSignedRepr)
942 "This floating point format does not support signed values");
943
944 category = fcNaN;
945 sign = Negative;
946 exponent = exponentNaN();
947
948 integerPart *significand = significandParts();
949 unsigned numParts = partCount();
950
951 APInt fill_storage;
952 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
953 // Finite-only types do not distinguish signalling and quiet NaN, so
954 // make them all signalling.
955 SNaN = false;
956 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
957 sign = true;
958 fill_storage = APInt::getZero(semantics->precision - 1);
959 } else {
960 fill_storage = APInt::getAllOnes(semantics->precision - 1);
961 }
962 fill = &fill_storage;
963 }
964
965 // Set the significand bits to the fill.
966 if (!fill || fill->getNumWords() < numParts)
967 APInt::tcSet(significand, 0, numParts);
968 if (fill) {
969 APInt::tcAssign(significand, fill->getRawData(),
970 std::min(fill->getNumWords(), numParts));
971
972 // Zero out the excess bits of the significand.
973 unsigned bitsToPreserve = semantics->precision - 1;
974 unsigned part = bitsToPreserve / 64;
975 bitsToPreserve %= 64;
976 significand[part] &= ((1ULL << bitsToPreserve) - 1);
977 for (part++; part != numParts; ++part)
978 significand[part] = 0;
979 }
980
981 unsigned QNaNBit =
982 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
983
984 if (SNaN) {
985 // We always have to clear the QNaN bit to make it an SNaN.
986 APInt::tcClearBit(significand, QNaNBit);
987
988 // If there are no bits set in the payload, we have to set
989 // *something* to make it a NaN instead of an infinity;
990 // conventionally, this is the next bit down from the QNaN bit.
991 if (APInt::tcIsZero(significand, numParts))
992 APInt::tcSetBit(significand, QNaNBit - 1);
993 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
994 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
995 // Do nothing.
996 } else {
997 // We always have to set the QNaN bit to make it a QNaN.
998 APInt::tcSetBit(significand, QNaNBit);
999 }
1000
1001 // For x87 extended precision, we want to make a NaN, not a
1002 // pseudo-NaN. Maybe we should expose the ability to make
1003 // pseudo-NaNs?
1004 if (semantics == &APFloatBase::semX87DoubleExtended)
1005 APInt::tcSetBit(significand, QNaNBit + 1);
1006}
1007
1009 if (this != &rhs) {
1010 if (semantics != rhs.semantics) {
1011 freeSignificand();
1012 initialize(rhs.semantics);
1013 }
1014 assign(rhs);
1015 }
1016
1017 return *this;
1018}
1019
1021 freeSignificand();
1022
1023 semantics = rhs.semantics;
1024 significand = rhs.significand;
1025 exponent = rhs.exponent;
1026 category = rhs.category;
1027 sign = rhs.sign;
1028
1029 rhs.semantics = &APFloatBase::semBogus;
1030 return *this;
1031}
1032
1034 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1035 (APInt::tcExtractBit(significandParts(),
1036 semantics->precision - 1) == 0);
1037}
1038
1040 // The smallest number by magnitude in our format will be the smallest
1041 // denormal, i.e. the floating point number with exponent being minimum
1042 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1043 return isFiniteNonZero() && exponent == semantics->minExponent &&
1044 significandMSB() == 0;
1045}
1046
1048 return getCategory() == fcNormal && exponent == semantics->minExponent &&
1049 isSignificandAllZerosExceptMSB();
1050}
1051
1052unsigned int IEEEFloat::getNumHighBits() const {
1053 const unsigned int PartCount = partCountForBits(semantics->precision);
1054 const unsigned int Bits = PartCount * integerPartWidth;
1055
1056 // Compute how many bits are used in the final word.
1057 // When precision is just 1, it represents the 'Pth'
1058 // Precision bit and not the actual significand bit.
1059 const unsigned int NumHighBits = (semantics->precision > 1)
1060 ? (Bits - semantics->precision + 1)
1061 : (Bits - semantics->precision);
1062 return NumHighBits;
1063}
1064
1065bool IEEEFloat::isSignificandAllOnes() const {
1066 // Test if the significand excluding the integral bit is all ones. This allows
1067 // us to test for binade boundaries.
1068 const integerPart *Parts = significandParts();
1069 const unsigned PartCount = partCountForBits(semantics->precision);
1070 for (unsigned i = 0; i < PartCount - 1; i++)
1071 if (~Parts[i])
1072 return false;
1073
1074 // Set the unused high bits to all ones when we compare.
1075 const unsigned NumHighBits = getNumHighBits();
1076 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1077 "Can not have more high bits to fill than integerPartWidth");
1078 const integerPart HighBitFill =
1079 ~integerPart(0) << (integerPartWidth - NumHighBits);
1080 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
1081 return false;
1082
1083 return true;
1084}
1085
1086bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1087 // Test if the significand excluding the integral bit is all ones except for
1088 // the least significant bit.
1089 const integerPart *Parts = significandParts();
1090
1091 if (Parts[0] & 1)
1092 return false;
1093
1094 const unsigned PartCount = partCountForBits(semantics->precision);
1095 for (unsigned i = 0; i < PartCount - 1; i++) {
1096 if (~Parts[i] & ~unsigned{!i})
1097 return false;
1098 }
1099
1100 // Set the unused high bits to all ones when we compare.
1101 const unsigned NumHighBits = getNumHighBits();
1102 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1103 "Can not have more high bits to fill than integerPartWidth");
1104 const integerPart HighBitFill = ~integerPart(0)
1105 << (integerPartWidth - NumHighBits);
1106 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1107 return false;
1108
1109 return true;
1110}
1111
1112bool IEEEFloat::isSignificandAllZeros() const {
1113 // Test if the significand excluding the integral bit is all zeros. This
1114 // allows us to test for binade boundaries.
1115 const integerPart *Parts = significandParts();
1116 const unsigned PartCount = partCountForBits(semantics->precision);
1117
1118 for (unsigned i = 0; i < PartCount - 1; i++)
1119 if (Parts[i])
1120 return false;
1121
1122 // Compute how many bits are used in the final word.
1123 const unsigned NumHighBits = getNumHighBits();
1124 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1125 "clear than integerPartWidth");
1126 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1127
1128 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1129 return false;
1130
1131 return true;
1132}
1133
1134bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1135 const integerPart *Parts = significandParts();
1136 const unsigned PartCount = partCountForBits(semantics->precision);
1137
1138 for (unsigned i = 0; i < PartCount - 1; i++) {
1139 if (Parts[i])
1140 return false;
1141 }
1142
1143 const unsigned NumHighBits = getNumHighBits();
1144 const integerPart MSBMask = integerPart(1)
1145 << (integerPartWidth - NumHighBits);
1146 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1147}
1148
1150 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1151 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1152 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1153 // The largest number by magnitude in our format will be the floating point
1154 // number with maximum exponent and with significand that is all ones except
1155 // the LSB.
1156 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1157 ? isSignificandAllOnesExceptLSB()
1158 : IsMaxExp;
1159 } else {
1160 // The largest number by magnitude in our format will be the floating point
1161 // number with maximum exponent and with significand that is all ones.
1162 return IsMaxExp && isSignificandAllOnes();
1163 }
1164}
1165
1167 // This could be made more efficient; I'm going for obviously correct.
1168 if (!isFinite()) return false;
1169 IEEEFloat truncated = *this;
1170 truncated.roundToIntegral(rmTowardZero);
1171 return compare(truncated) == cmpEqual;
1172}
1173
1174bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1175 if (this == &rhs)
1176 return true;
1177 if (semantics != rhs.semantics ||
1178 category != rhs.category ||
1179 sign != rhs.sign)
1180 return false;
1181 if (category==fcZero || category==fcInfinity)
1182 return true;
1183
1184 if (isFiniteNonZero() && exponent != rhs.exponent)
1185 return false;
1186
1187 return std::equal(significandParts(), significandParts() + partCount(),
1188 rhs.significandParts());
1189}
1190
1192 initialize(&ourSemantics);
1193 sign = 0;
1194 category = fcNormal;
1195 zeroSignificand();
1196 exponent = ourSemantics.precision - 1;
1197 significandParts()[0] = value;
1199}
1200
1202 initialize(&ourSemantics);
1203 // The Float8E8MOFNU format does not have a representation
1204 // for zero. So, use the closest representation instead.
1205 // Moreover, the all-zero encoding represents a valid
1206 // normal value (which is the smallestNormalized here).
1207 // Hence, we call makeSmallestNormalized (where category is
1208 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1209 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1210}
1211
1212// Delegate to the previous constructor, because later copy constructor may
1213// actually inspects category, which can't be garbage.
1215 : IEEEFloat(ourSemantics) {}
1216
1218 initialize(rhs.semantics);
1219 assign(rhs);
1220}
1221
1222IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1223 *this = std::move(rhs);
1224}
1225
1226IEEEFloat::~IEEEFloat() { freeSignificand(); }
1227
1228unsigned int IEEEFloat::partCount() const {
1229 return partCountForBits(semantics->precision + 1);
1230}
1231
1232const APFloat::integerPart *IEEEFloat::significandParts() const {
1233 return const_cast<IEEEFloat *>(this)->significandParts();
1234}
1235
1236APFloat::integerPart *IEEEFloat::significandParts() {
1237 if (partCount() > 1)
1238 return significand.parts;
1239 else
1240 return &significand.part;
1241}
1242
1243void IEEEFloat::zeroSignificand() {
1244 APInt::tcSet(significandParts(), 0, partCount());
1245}
1246
1247/* Increment an fcNormal floating point number's significand. */
1248void IEEEFloat::incrementSignificand() {
1249 integerPart carry;
1250
1251 carry = APInt::tcIncrement(significandParts(), partCount());
1252
1253 /* Our callers should never cause us to overflow. */
1254 assert(carry == 0);
1255 (void)carry;
1256}
1257
1258/* Add the significand of the RHS. Returns the carry flag. */
1259APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1260 integerPart *parts;
1261
1262 parts = significandParts();
1263
1264 assert(semantics == rhs.semantics);
1265 assert(exponent == rhs.exponent);
1266
1267 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1268}
1269
1270/* Subtract the significand of the RHS with a borrow flag. Returns
1271 the borrow flag. */
1272APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1273 integerPart borrow) {
1274 integerPart *parts;
1275
1276 parts = significandParts();
1277
1278 assert(semantics == rhs.semantics);
1279 assert(exponent == rhs.exponent);
1280
1281 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1282 partCount());
1283}
1284
1285/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1286 on to the full-precision result of the multiplication. Returns the
1287 lost fraction. */
1288lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1289 IEEEFloat addend,
1290 bool ignoreAddend) {
1291 unsigned int omsb; // One, not zero, based MSB.
1292 unsigned int partsCount, newPartsCount, precision;
1293 integerPart *lhsSignificand;
1294 integerPart scratch[4];
1295 integerPart *fullSignificand;
1296 lostFraction lost_fraction;
1297 bool ignored;
1298
1299 assert(semantics == rhs.semantics);
1300
1301 precision = semantics->precision;
1302
1303 // Allocate space for twice as many bits as the original significand, plus one
1304 // extra bit for the addition to overflow into.
1305 newPartsCount = partCountForBits(precision * 2 + 1);
1306
1307 if (newPartsCount > 4)
1308 fullSignificand = new integerPart[newPartsCount];
1309 else
1310 fullSignificand = scratch;
1311
1312 lhsSignificand = significandParts();
1313 partsCount = partCount();
1314
1315 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1316 rhs.significandParts(), partsCount, partsCount);
1317
1318 lost_fraction = lfExactlyZero;
1319 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1320 exponent += rhs.exponent;
1321
1322 // Assume the operands involved in the multiplication are single-precision
1323 // FP, and the two multiplicants are:
1324 // *this = a23 . a22 ... a0 * 2^e1
1325 // rhs = b23 . b22 ... b0 * 2^e2
1326 // the result of multiplication is:
1327 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1328 // Note that there are three significant bits at the left-hand side of the
1329 // radix point: two for the multiplication, and an overflow bit for the
1330 // addition (that will always be zero at this point). Move the radix point
1331 // toward left by two bits, and adjust exponent accordingly.
1332 exponent += 2;
1333
1334 if (!ignoreAddend && addend.isNonZero()) {
1335 // The intermediate result of the multiplication has "2 * precision"
1336 // signicant bit; adjust the addend to be consistent with mul result.
1337 //
1338 Significand savedSignificand = significand;
1339 const fltSemantics *savedSemantics = semantics;
1340 fltSemantics extendedSemantics;
1342 unsigned int extendedPrecision;
1343
1344 // Normalize our MSB to one below the top bit to allow for overflow.
1345 extendedPrecision = 2 * precision + 1;
1346 if (omsb != extendedPrecision - 1) {
1347 assert(extendedPrecision > omsb);
1348 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1349 (extendedPrecision - 1) - omsb);
1350 exponent -= (extendedPrecision - 1) - omsb;
1351 }
1352
1353 /* Create new semantics. */
1354 extendedSemantics = *semantics;
1355 extendedSemantics.precision = extendedPrecision;
1356
1357 if (newPartsCount == 1)
1358 significand.part = fullSignificand[0];
1359 else
1360 significand.parts = fullSignificand;
1361 semantics = &extendedSemantics;
1362
1363 // Make a copy so we can convert it to the extended semantics.
1364 // Note that we cannot convert the addend directly, as the extendedSemantics
1365 // is a local variable (which we take a reference to).
1366 IEEEFloat extendedAddend(addend);
1367 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero,
1368 &ignored);
1369 assert(status == APFloat::opOK);
1370 (void)status;
1371
1372 // Shift the significand of the addend right by one bit. This guarantees
1373 // that the high bit of the significand is zero (same as fullSignificand),
1374 // so the addition will overflow (if it does overflow at all) into the top bit.
1375 lost_fraction = extendedAddend.shiftSignificandRight(1);
1376 assert(lost_fraction == lfExactlyZero &&
1377 "Lost precision while shifting addend for fused-multiply-add.");
1378
1379 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1380
1381 /* Restore our state. */
1382 if (newPartsCount == 1)
1383 fullSignificand[0] = significand.part;
1384 significand = savedSignificand;
1385 semantics = savedSemantics;
1386
1387 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1388 }
1389
1390 // Convert the result having "2 * precision" significant-bits back to the one
1391 // having "precision" significant-bits. First, move the radix point from
1392 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1393 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1394 exponent -= precision + 1;
1395
1396 // In case MSB resides at the left-hand side of radix point, shift the
1397 // mantissa right by some amount to make sure the MSB reside right before
1398 // the radix point (i.e. "MSB . rest-significant-bits").
1399 //
1400 // Note that the result is not normalized when "omsb < precision". So, the
1401 // caller needs to call IEEEFloat::normalize() if normalized value is
1402 // expected.
1403 if (omsb > precision) {
1404 unsigned int bits, significantParts;
1405 lostFraction lf;
1406
1407 bits = omsb - precision;
1408 significantParts = partCountForBits(omsb);
1409 lf = shiftRight(fullSignificand, significantParts, bits);
1410 lost_fraction = combineLostFractions(lf, lost_fraction);
1411 exponent += bits;
1412 }
1413
1414 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1415
1416 if (newPartsCount > 4)
1417 delete [] fullSignificand;
1418
1419 return lost_fraction;
1420}
1421
1422lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1423 // When the given semantics has zero, the addend here is a zero.
1424 // i.e . it belongs to the 'fcZero' category.
1425 // But when the semantics does not support zero, we need to
1426 // explicitly convey that this addend should be ignored
1427 // for multiplication.
1428 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1429}
1430
1431/* Multiply the significands of LHS and RHS to DST. */
1432lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1433 unsigned int bit, i, partsCount;
1434 const integerPart *rhsSignificand;
1435 integerPart *lhsSignificand, *dividend, *divisor;
1436 integerPart scratch[4];
1437 lostFraction lost_fraction;
1438
1439 assert(semantics == rhs.semantics);
1440
1441 lhsSignificand = significandParts();
1442 rhsSignificand = rhs.significandParts();
1443 partsCount = partCount();
1444
1445 if (partsCount > 2)
1446 dividend = new integerPart[partsCount * 2];
1447 else
1448 dividend = scratch;
1449
1450 divisor = dividend + partsCount;
1451
1452 /* Copy the dividend and divisor as they will be modified in-place. */
1453 for (i = 0; i < partsCount; i++) {
1454 dividend[i] = lhsSignificand[i];
1455 divisor[i] = rhsSignificand[i];
1456 lhsSignificand[i] = 0;
1457 }
1458
1459 exponent -= rhs.exponent;
1460
1461 unsigned int precision = semantics->precision;
1462
1463 /* Normalize the divisor. */
1464 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1465 if (bit) {
1466 exponent += bit;
1467 APInt::tcShiftLeft(divisor, partsCount, bit);
1468 }
1469
1470 /* Normalize the dividend. */
1471 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1472 if (bit) {
1473 exponent -= bit;
1474 APInt::tcShiftLeft(dividend, partsCount, bit);
1475 }
1476
1477 /* Ensure the dividend >= divisor initially for the loop below.
1478 Incidentally, this means that the division loop below is
1479 guaranteed to set the integer bit to one. */
1480 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1481 exponent--;
1482 APInt::tcShiftLeft(dividend, partsCount, 1);
1483 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1484 }
1485
1486 /* Long division. */
1487 for (bit = precision; bit; bit -= 1) {
1488 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1489 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1490 APInt::tcSetBit(lhsSignificand, bit - 1);
1491 }
1492
1493 APInt::tcShiftLeft(dividend, partsCount, 1);
1494 }
1495
1496 /* Figure out the lost fraction. */
1497 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1498
1499 if (cmp > 0)
1500 lost_fraction = lfMoreThanHalf;
1501 else if (cmp == 0)
1502 lost_fraction = lfExactlyHalf;
1503 else if (APInt::tcIsZero(dividend, partsCount))
1504 lost_fraction = lfExactlyZero;
1505 else
1506 lost_fraction = lfLessThanHalf;
1507
1508 if (partsCount > 2)
1509 delete [] dividend;
1510
1511 return lost_fraction;
1512}
1513
1514unsigned int IEEEFloat::significandMSB() const {
1515 return APInt::tcMSB(significandParts(), partCount());
1516}
1517
1518unsigned int IEEEFloat::significandLSB() const {
1519 return APInt::tcLSB(significandParts(), partCount());
1520}
1521
1522/* Note that a zero result is NOT normalized to fcZero. */
1523lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1524 /* Our exponent should not overflow. */
1525 assert((ExponentType) (exponent + bits) >= exponent);
1526
1527 exponent += bits;
1528
1529 return shiftRight(significandParts(), partCount(), bits);
1530}
1531
1532/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1533void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1534 assert(bits < semantics->precision ||
1535 (semantics->precision == 1 && bits <= 1));
1536
1537 if (bits) {
1538 unsigned int partsCount = partCount();
1539
1540 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1541 exponent -= bits;
1542
1543 assert(!APInt::tcIsZero(significandParts(), partsCount));
1544 }
1545}
1546
1548 int compare;
1549
1550 assert(semantics == rhs.semantics);
1552 assert(rhs.isFiniteNonZero());
1553
1554 compare = exponent - rhs.exponent;
1555
1556 /* If exponents are equal, do an unsigned bignum comparison of the
1557 significands. */
1558 if (compare == 0)
1559 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1560 partCount());
1561
1562 if (compare > 0)
1563 return cmpGreaterThan;
1564 else if (compare < 0)
1565 return cmpLessThan;
1566 else
1567 return cmpEqual;
1568}
1569
1570/* Set the least significant BITS bits of a bignum, clear the
1571 rest. */
1572static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1573 unsigned bits) {
1574 unsigned i = 0;
1575 while (bits > APInt::APINT_BITS_PER_WORD) {
1576 dst[i++] = ~(APInt::WordType)0;
1578 }
1579
1580 if (bits)
1581 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1582
1583 while (i < parts)
1584 dst[i++] = 0;
1585}
1586
1587/* Handle overflow. Sign is preserved. We either become infinity or
1588 the largest finite number. */
1589APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1591 /* Infinity? */
1592 if (rounding_mode == rmNearestTiesToEven ||
1593 rounding_mode == rmNearestTiesToAway ||
1594 (rounding_mode == rmTowardPositive && !sign) ||
1595 (rounding_mode == rmTowardNegative && sign)) {
1597 makeNaN(false, sign);
1598 else
1599 category = fcInfinity;
1600 return static_cast<opStatus>(opOverflow | opInexact);
1601 }
1602 }
1603
1604 /* Otherwise we become the largest finite number. */
1605 category = fcNormal;
1606 exponent = semantics->maxExponent;
1607 tcSetLeastSignificantBits(significandParts(), partCount(),
1608 semantics->precision);
1609 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1610 semantics->nanEncoding == fltNanEncoding::AllOnes)
1611 APInt::tcClearBit(significandParts(), 0);
1612
1613 return opInexact;
1614}
1615
1616/* Returns TRUE if, when truncating the current number, with BIT the
1617 new LSB, with the given lost fraction and rounding mode, the result
1618 would need to be rounded away from zero (i.e., by increasing the
1619 signficand). This routine must work for fcZero of both signs, and
1620 fcNormal numbers. */
1621bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1622 lostFraction lost_fraction,
1623 unsigned int bit) const {
1624 /* NaNs and infinities should not have lost fractions. */
1625 assert(isFiniteNonZero() || category == fcZero);
1626
1627 /* Current callers never pass this so we don't handle it. */
1628 assert(lost_fraction != lfExactlyZero);
1629
1630 switch (rounding_mode) {
1632 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1633
1635 if (lost_fraction == lfMoreThanHalf)
1636 return true;
1637
1638 /* Our zeroes don't have a significand to test. */
1639 if (lost_fraction == lfExactlyHalf && category != fcZero)
1640 return APInt::tcExtractBit(significandParts(), bit);
1641
1642 return false;
1643
1644 case rmTowardZero:
1645 return false;
1646
1647 case rmTowardPositive:
1648 return !sign;
1649
1650 case rmTowardNegative:
1651 return sign;
1652
1653 default:
1654 break;
1655 }
1656 llvm_unreachable("Invalid rounding mode found");
1657}
1658
1659APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1660 lostFraction lost_fraction) {
1661 unsigned int omsb; /* One, not zero, based MSB. */
1662 int exponentChange;
1663
1664 if (!isFiniteNonZero())
1665 return opOK;
1666
1667 /* Before rounding normalize the exponent of fcNormal numbers. */
1668 omsb = significandMSB() + 1;
1669
1670 // Only skip this `if` if the value is exactly zero.
1671 if (omsb || lost_fraction != lfExactlyZero) {
1672 /* OMSB is numbered from 1. We want to place it in the integer
1673 bit numbered PRECISION if possible, with a compensating change in
1674 the exponent. */
1675 exponentChange = omsb - semantics->precision;
1676
1677 /* If the resulting exponent is too high, overflow according to
1678 the rounding mode. */
1679 if (exponent + exponentChange > semantics->maxExponent)
1680 return handleOverflow(rounding_mode);
1681
1682 /* Subnormal numbers have exponent minExponent, and their MSB
1683 is forced based on that. */
1684 if (exponent + exponentChange < semantics->minExponent)
1685 exponentChange = semantics->minExponent - exponent;
1686
1687 /* Shifting left is easy as we don't lose precision. */
1688 if (exponentChange < 0) {
1689 assert(lost_fraction == lfExactlyZero);
1690
1691 shiftSignificandLeft(-exponentChange);
1692
1693 return opOK;
1694 }
1695
1696 if (exponentChange > 0) {
1697 lostFraction lf;
1698
1699 /* Shift right and capture any new lost fraction. */
1700 lf = shiftSignificandRight(exponentChange);
1701
1702 lost_fraction = combineLostFractions(lf, lost_fraction);
1703
1704 /* Keep OMSB up-to-date. */
1705 if (omsb > (unsigned) exponentChange)
1706 omsb -= exponentChange;
1707 else
1708 omsb = 0;
1709 }
1710 }
1711
1712 // The all-ones values is an overflow if NaN is all ones. If NaN is
1713 // represented by negative zero, then it is a valid finite value.
1714 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1715 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1716 exponent == semantics->maxExponent && isSignificandAllOnes())
1717 return handleOverflow(rounding_mode);
1718
1719 /* Now round the number according to rounding_mode given the lost
1720 fraction. */
1721
1722 /* As specified in IEEE 754, since we do not trap we do not report
1723 underflow for exact results. */
1724 if (lost_fraction == lfExactlyZero) {
1725 /* Canonicalize zeroes. */
1726 if (omsb == 0) {
1727 category = fcZero;
1728 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1729 sign = false;
1730 if (!semantics->hasZero)
1732 }
1733
1734 return opOK;
1735 }
1736
1737 /* Increment the significand if we're rounding away from zero. */
1738 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1739 if (omsb == 0)
1740 exponent = semantics->minExponent;
1741
1742 incrementSignificand();
1743 omsb = significandMSB() + 1;
1744
1745 /* Did the significand increment overflow? */
1746 if (omsb == (unsigned) semantics->precision + 1) {
1747 /* Renormalize by incrementing the exponent and shifting our
1748 significand right one. However if we already have the
1749 maximum exponent we overflow to infinity. */
1750 if (exponent == semantics->maxExponent)
1751 // Invoke overflow handling with a rounding mode that will guarantee
1752 // that the result gets turned into the correct infinity representation.
1753 // This is needed instead of just setting the category to infinity to
1754 // account for 8-bit floating point types that have no inf, only NaN.
1755 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1756
1757 shiftSignificandRight(1);
1758
1759 return opInexact;
1760 }
1761
1762 // The all-ones values is an overflow if NaN is all ones. If NaN is
1763 // represented by negative zero, then it is a valid finite value.
1764 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1765 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1766 exponent == semantics->maxExponent && isSignificandAllOnes())
1767 return handleOverflow(rounding_mode);
1768 }
1769
1770 /* The normal case - we were and are not denormal, and any
1771 significand increment above didn't overflow. */
1772 if (omsb == semantics->precision)
1773 return opInexact;
1774
1775 /* We have a non-zero denormal. */
1776 assert(omsb < semantics->precision);
1777
1778 /* Canonicalize zeroes. */
1779 if (omsb == 0) {
1780 category = fcZero;
1781 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1782 sign = false;
1783 // This condition handles the case where the semantics
1784 // does not have zero but uses the all-zero encoding
1785 // to represent the smallest normal value.
1786 if (!semantics->hasZero)
1788 }
1789
1790 /* The fcZero case is a denormal that underflowed to zero. */
1791 return (opStatus) (opUnderflow | opInexact);
1792}
1793
1794APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1795 bool subtract) {
1796 switch (PackCategoriesIntoKey(category, rhs.category)) {
1797 default:
1798 llvm_unreachable(nullptr);
1799
1803 assign(rhs);
1804 [[fallthrough]];
1809 if (isSignaling()) {
1810 makeQuiet();
1811 return opInvalidOp;
1812 }
1813 return rhs.isSignaling() ? opInvalidOp : opOK;
1814
1818 return opOK;
1819
1822 category = fcInfinity;
1823 sign = rhs.sign ^ subtract;
1824 return opOK;
1825
1827 assign(rhs);
1828 sign = rhs.sign ^ subtract;
1829 return opOK;
1830
1832 /* Sign depends on rounding mode; handled by caller. */
1833 return opOK;
1834
1836 /* Differently signed infinities can only be validly
1837 subtracted. */
1838 if (((sign ^ rhs.sign)!=0) != subtract) {
1839 makeNaN();
1840 return opInvalidOp;
1841 }
1842
1843 return opOK;
1844
1846 return opDivByZero;
1847 }
1848}
1849
1850/* Add or subtract two normal numbers. */
1851lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1852 bool subtract) {
1853 integerPart carry = 0;
1854 lostFraction lost_fraction;
1855 int bits;
1856
1857 /* Determine if the operation on the absolute values is effectively
1858 an addition or subtraction. */
1859 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1860
1861 /* Are we bigger exponent-wise than the RHS? */
1862 bits = exponent - rhs.exponent;
1863
1864 /* Subtraction is more subtle than one might naively expect. */
1865 if (subtract) {
1866 if ((bits < 0) && !semantics->hasSignedRepr)
1868 "This floating point format does not support signed values");
1869
1870 IEEEFloat temp_rhs(rhs);
1871 bool lost_fraction_is_from_rhs = false;
1872
1873 if (bits == 0)
1874 lost_fraction = lfExactlyZero;
1875 else if (bits > 0) {
1876 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1877 lost_fraction_is_from_rhs = true;
1878 shiftSignificandLeft(1);
1879 } else {
1880 lost_fraction = shiftSignificandRight(-bits - 1);
1881 temp_rhs.shiftSignificandLeft(1);
1882 }
1883
1884 // Should we reverse the subtraction.
1885 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1886 if (cmp_result == cmpLessThan) {
1887 bool borrow =
1888 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1889 if (borrow) {
1890 // The lost fraction is being subtracted, borrow from the significand
1891 // and invert `lost_fraction`.
1892 if (lost_fraction == lfLessThanHalf)
1893 lost_fraction = lfMoreThanHalf;
1894 else if (lost_fraction == lfMoreThanHalf)
1895 lost_fraction = lfLessThanHalf;
1896 }
1897 carry = temp_rhs.subtractSignificand(*this, borrow);
1898 copySignificand(temp_rhs);
1899 sign = !sign;
1900 } else if (cmp_result == cmpGreaterThan) {
1901 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1902 if (borrow) {
1903 // The lost fraction is being subtracted, borrow from the significand
1904 // and invert `lost_fraction`.
1905 if (lost_fraction == lfLessThanHalf)
1906 lost_fraction = lfMoreThanHalf;
1907 else if (lost_fraction == lfMoreThanHalf)
1908 lost_fraction = lfLessThanHalf;
1909 }
1910 carry = subtractSignificand(temp_rhs, borrow);
1911 } else { // cmpEqual
1912 zeroSignificand();
1913 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1914 // rhs is slightly larger due to the lost fraction, flip the sign.
1915 sign = !sign;
1916 }
1917 }
1918
1919 /* The code above is intended to ensure that no borrow is
1920 necessary. */
1921 assert(!carry);
1922 (void)carry;
1923 } else {
1924 if (bits > 0) {
1925 IEEEFloat temp_rhs(rhs);
1926
1927 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1928 carry = addSignificand(temp_rhs);
1929 } else {
1930 lost_fraction = shiftSignificandRight(-bits);
1931 carry = addSignificand(rhs);
1932 }
1933
1934 /* We have a guard bit; generating a carry cannot happen. */
1935 assert(!carry);
1936 (void)carry;
1937 }
1938
1939 return lost_fraction;
1940}
1941
1942APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1943 switch (PackCategoriesIntoKey(category, rhs.category)) {
1944 default:
1945 llvm_unreachable(nullptr);
1946
1950 assign(rhs);
1951 sign = false;
1952 [[fallthrough]];
1957 sign ^= rhs.sign; // restore the original sign
1958 if (isSignaling()) {
1959 makeQuiet();
1960 return opInvalidOp;
1961 }
1962 return rhs.isSignaling() ? opInvalidOp : opOK;
1963
1967 category = fcInfinity;
1968 return opOK;
1969
1973 category = fcZero;
1974 return opOK;
1975
1978 makeNaN();
1979 return opInvalidOp;
1980
1982 return opOK;
1983 }
1984}
1985
1986APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1987 switch (PackCategoriesIntoKey(category, rhs.category)) {
1988 default:
1989 llvm_unreachable(nullptr);
1990
1994 assign(rhs);
1995 sign = false;
1996 [[fallthrough]];
2001 sign ^= rhs.sign; // restore the original sign
2002 if (isSignaling()) {
2003 makeQuiet();
2004 return opInvalidOp;
2005 }
2006 return rhs.isSignaling() ? opInvalidOp : opOK;
2007
2012 return opOK;
2013
2015 category = fcZero;
2016 return opOK;
2017
2019 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
2020 makeNaN(false, sign);
2021 else
2022 category = fcInfinity;
2023 return opDivByZero;
2024
2027 makeNaN();
2028 return opInvalidOp;
2029
2031 return opOK;
2032 }
2033}
2034
2035APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
2036 switch (PackCategoriesIntoKey(category, rhs.category)) {
2037 default:
2038 llvm_unreachable(nullptr);
2039
2043 assign(rhs);
2044 [[fallthrough]];
2049 if (isSignaling()) {
2050 makeQuiet();
2051 return opInvalidOp;
2052 }
2053 return rhs.isSignaling() ? opInvalidOp : opOK;
2054
2058 return opOK;
2059
2065 makeNaN();
2066 return opInvalidOp;
2067
2069 return opOK;
2070 }
2071}
2072
2073APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
2074 switch (PackCategoriesIntoKey(category, rhs.category)) {
2075 default:
2076 llvm_unreachable(nullptr);
2077
2081 assign(rhs);
2082 [[fallthrough]];
2087 if (isSignaling()) {
2088 makeQuiet();
2089 return opInvalidOp;
2090 }
2091 return rhs.isSignaling() ? opInvalidOp : opOK;
2092
2096 return opOK;
2097
2103 makeNaN();
2104 return opInvalidOp;
2105
2107 return opDivByZero; // fake status, indicating this is not a special case
2108 }
2109}
2110
2111/* Change sign. */
2113 // With NaN-as-negative-zero, neither NaN or negative zero can change
2114 // their signs.
2115 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2116 (isZero() || isNaN()))
2117 return;
2118 /* Look mummy, this one's easy. */
2119 sign = !sign;
2120}
2121
2122/* Normalized addition or subtraction. */
2123APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2124 roundingMode rounding_mode,
2125 bool subtract) {
2126 opStatus fs;
2127
2128 fs = addOrSubtractSpecials(rhs, subtract);
2129
2130 /* This return code means it was not a simple case. */
2131 if (fs == opDivByZero) {
2132 lostFraction lost_fraction;
2133
2134 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2135 fs = normalize(rounding_mode, lost_fraction);
2136
2137 /* Can only be zero if we lost no fraction. */
2138 assert(category != fcZero || lost_fraction == lfExactlyZero);
2139 }
2140
2141 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2142 positive zero unless rounding to minus infinity, except that
2143 adding two like-signed zeroes gives that zero. */
2144 if (category == fcZero) {
2145 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2146 sign = (rounding_mode == rmTowardNegative);
2147 // NaN-in-negative-zero means zeros need to be normalized to +0.
2148 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2149 sign = false;
2150 }
2151
2152 return fs;
2153}
2154
2155/* Normalized addition. */
2157 roundingMode rounding_mode) {
2158 return addOrSubtract(rhs, rounding_mode, false);
2159}
2160
2161/* Normalized subtraction. */
2163 roundingMode rounding_mode) {
2164 return addOrSubtract(rhs, rounding_mode, true);
2165}
2166
2167/* Normalized multiply. */
2169 roundingMode rounding_mode) {
2170 opStatus fs;
2171
2172 sign ^= rhs.sign;
2173 fs = multiplySpecials(rhs);
2174
2175 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2176 sign = false;
2177 if (isFiniteNonZero()) {
2178 lostFraction lost_fraction = multiplySignificand(rhs);
2179 fs = normalize(rounding_mode, lost_fraction);
2180 if (lost_fraction != lfExactlyZero)
2181 fs = (opStatus) (fs | opInexact);
2182 }
2183
2184 return fs;
2185}
2186
2187/* Normalized divide. */
2189 roundingMode rounding_mode) {
2190 opStatus fs;
2191
2192 sign ^= rhs.sign;
2193 fs = divideSpecials(rhs);
2194
2195 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2196 sign = false;
2197 if (isFiniteNonZero()) {
2198 lostFraction lost_fraction = divideSignificand(rhs);
2199 fs = normalize(rounding_mode, lost_fraction);
2200 if (lost_fraction != lfExactlyZero)
2201 fs = (opStatus) (fs | opInexact);
2202 }
2203
2204 return fs;
2205}
2206
2207/* Normalized remainder. */
2209 opStatus fs;
2210 unsigned int origSign = sign;
2211
2212 // First handle the special cases.
2213 fs = remainderSpecials(rhs);
2214 if (fs != opDivByZero)
2215 return fs;
2216
2217 fs = opOK;
2218
2219 // Make sure the current value is less than twice the denom. If the addition
2220 // did not succeed (an overflow has happened), which means that the finite
2221 // value we currently posses must be less than twice the denom (as we are
2222 // using the same semantics).
2223 IEEEFloat P2 = rhs;
2224 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2225 fs = mod(P2);
2226 assert(fs == opOK);
2227 }
2228
2229 // Lets work with absolute numbers.
2230 IEEEFloat P = rhs;
2231 P.sign = false;
2232 sign = false;
2233
2234 //
2235 // To calculate the remainder we use the following scheme.
2236 //
2237 // The remainder is defained as follows:
2238 //
2239 // remainder = numer - rquot * denom = x - r * p
2240 //
2241 // Where r is the result of: x/p, rounded toward the nearest integral value
2242 // (with halfway cases rounded toward the even number).
2243 //
2244 // Currently, (after x mod 2p):
2245 // r is the number of 2p's present inside x, which is inherently, an even
2246 // number of p's.
2247 //
2248 // We may split the remaining calculation into 4 options:
2249 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2250 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2251 // are done as well.
2252 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2253 // to subtract 1p at least once.
2254 // - if x >= p then we must subtract p at least once, as x must be a
2255 // remainder.
2256 //
2257 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2258 //
2259 // We can now split the remaining calculation to the following 3 options:
2260 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2261 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2262 // must round up to the next even number. so we must subtract p once more.
2263 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2264 // integral, and subtract p once more.
2265 //
2266
2267 // Extend the semantics to prevent an overflow/underflow or inexact result.
2268 bool losesInfo;
2269 fltSemantics extendedSemantics = *semantics;
2270 extendedSemantics.maxExponent++;
2271 extendedSemantics.minExponent--;
2272 extendedSemantics.precision += 2;
2273
2274 IEEEFloat VEx = *this;
2275 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2276 assert(fs == opOK && !losesInfo);
2277 IEEEFloat PEx = P;
2278 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2279 assert(fs == opOK && !losesInfo);
2280
2281 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2282 // any fraction.
2283 fs = VEx.add(VEx, rmNearestTiesToEven);
2284 assert(fs == opOK);
2285
2286 if (VEx.compare(PEx) == cmpGreaterThan) {
2288 assert(fs == opOK);
2289
2290 // Make VEx = this.add(this), but because we have different semantics, we do
2291 // not want to `convert` again, so we just subtract PEx twice (which equals
2292 // to the desired value).
2293 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2294 assert(fs == opOK);
2295 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2296 assert(fs == opOK);
2297
2298 cmpResult result = VEx.compare(PEx);
2299 if (result == cmpGreaterThan || result == cmpEqual) {
2301 assert(fs == opOK);
2302 }
2303 }
2304
2305 if (isZero()) {
2306 sign = origSign; // IEEE754 requires this
2307 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2308 // But some 8-bit floats only have positive 0.
2309 sign = false;
2310 }
2311
2312 else
2313 sign ^= origSign;
2314 return fs;
2315}
2316
2317/* Normalized llvm frem (C fmod). */
2319 opStatus fs;
2320 fs = modSpecials(rhs);
2321 unsigned int origSign = sign;
2322
2323 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2325 int Exp = ilogb(*this) - ilogb(rhs);
2326 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2327 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2328 // check for it.
2329 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2330 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2331 V.sign = sign;
2332
2334
2335 // When the semantics supports zero, this loop's
2336 // exit-condition is handled by the 'isFiniteNonZero'
2337 // category check above. However, when the semantics
2338 // does not have 'fcZero' and we have reached the
2339 // minimum possible value, (and any further subtract
2340 // will underflow to the same value) explicitly
2341 // provide an exit-path here.
2342 if (!semantics->hasZero && this->isSmallest())
2343 break;
2344
2345 assert(fs==opOK);
2346 }
2347 if (isZero()) {
2348 sign = origSign; // fmod requires this
2349 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2350 sign = false;
2351 }
2352 return fs;
2353}
2354
2355/* Normalized fused-multiply-add. */
2357 const IEEEFloat &addend,
2358 roundingMode rounding_mode) {
2359 opStatus fs;
2360
2361 /* Post-multiplication sign, before addition. */
2362 sign ^= multiplicand.sign;
2363
2364 /* If and only if all arguments are normal do we need to do an
2365 extended-precision calculation. */
2366 if (isFiniteNonZero() &&
2367 multiplicand.isFiniteNonZero() &&
2368 addend.isFinite()) {
2369 lostFraction lost_fraction;
2370
2371 lost_fraction = multiplySignificand(multiplicand, addend);
2372 fs = normalize(rounding_mode, lost_fraction);
2373 if (lost_fraction != lfExactlyZero)
2374 fs = (opStatus) (fs | opInexact);
2375
2376 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2377 positive zero unless rounding to minus infinity, except that
2378 adding two like-signed zeroes gives that zero. */
2379 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2380 sign = (rounding_mode == rmTowardNegative);
2381 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2382 sign = false;
2383 }
2384 } else {
2385 fs = multiplySpecials(multiplicand);
2386
2387 /* FS can only be opOK or opInvalidOp. There is no more work
2388 to do in the latter case. The IEEE-754R standard says it is
2389 implementation-defined in this case whether, if ADDEND is a
2390 quiet NaN, we raise invalid op; this implementation does so.
2391
2392 If we need to do the addition we can do so with normal
2393 precision. */
2394 if (fs == opOK)
2395 fs = addOrSubtract(addend, rounding_mode, false);
2396 }
2397
2398 return fs;
2399}
2400
2401/* Rounding-mode correct round to integral value. */
2403 opStatus fs;
2404
2405 if (isInfinity())
2406 // [IEEE Std 754-2008 6.1]:
2407 // The behavior of infinity in floating-point arithmetic is derived from the
2408 // limiting cases of real arithmetic with operands of arbitrarily
2409 // large magnitude, when such a limit exists.
2410 // ...
2411 // Operations on infinite operands are usually exact and therefore signal no
2412 // exceptions ...
2413 return opOK;
2414
2415 if (isNaN()) {
2416 if (isSignaling()) {
2417 // [IEEE Std 754-2008 6.2]:
2418 // Under default exception handling, any operation signaling an invalid
2419 // operation exception and for which a floating-point result is to be
2420 // delivered shall deliver a quiet NaN.
2421 makeQuiet();
2422 // [IEEE Std 754-2008 6.2]:
2423 // Signaling NaNs shall be reserved operands that, under default exception
2424 // handling, signal the invalid operation exception(see 7.2) for every
2425 // general-computational and signaling-computational operation except for
2426 // the conversions described in 5.12.
2427 return opInvalidOp;
2428 } else {
2429 // [IEEE Std 754-2008 6.2]:
2430 // For an operation with quiet NaN inputs, other than maximum and minimum
2431 // operations, if a floating-point result is to be delivered the result
2432 // shall be a quiet NaN which should be one of the input NaNs.
2433 // ...
2434 // Every general-computational and quiet-computational operation involving
2435 // one or more input NaNs, none of them signaling, shall signal no
2436 // exception, except fusedMultiplyAdd might signal the invalid operation
2437 // exception(see 7.2).
2438 return opOK;
2439 }
2440 }
2441
2442 if (isZero()) {
2443 // [IEEE Std 754-2008 6.3]:
2444 // ... the sign of the result of conversions, the quantize operation, the
2445 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2446 // the sign of the first or only operand.
2447 return opOK;
2448 }
2449
2450 // If the exponent is large enough, we know that this value is already
2451 // integral, and the arithmetic below would potentially cause it to saturate
2452 // to +/-Inf. Bail out early instead.
2453 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2454 return opOK;
2455
2456 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2457 // precision of our format, and then subtract it back off again. The choice
2458 // of rounding modes for the addition/subtraction determines the rounding mode
2459 // for our integral rounding as well.
2460 // NOTE: When the input value is negative, we do subtraction followed by
2461 // addition instead.
2462 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2463 1);
2464 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2465 IEEEFloat MagicConstant(*semantics);
2466 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2468 assert(fs == opOK);
2469 MagicConstant.sign = sign;
2470
2471 // Preserve the input sign so that we can handle the case of zero result
2472 // correctly.
2473 bool inputSign = isNegative();
2474
2475 fs = add(MagicConstant, rounding_mode);
2476
2477 // Current value and 'MagicConstant' are both integers, so the result of the
2478 // subtraction is always exact according to Sterbenz' lemma.
2479 subtract(MagicConstant, rounding_mode);
2480
2481 // Restore the input sign.
2482 if (inputSign != isNegative())
2483 changeSign();
2484
2485 return fs;
2486}
2487
2488/* Comparison requires normalized numbers. */
2490 cmpResult result;
2491
2492 assert(semantics == rhs.semantics);
2493
2494 switch (PackCategoriesIntoKey(category, rhs.category)) {
2495 default:
2496 llvm_unreachable(nullptr);
2497
2505 return cmpUnordered;
2506
2510 if (sign)
2511 return cmpLessThan;
2512 else
2513 return cmpGreaterThan;
2514
2518 if (rhs.sign)
2519 return cmpGreaterThan;
2520 else
2521 return cmpLessThan;
2522
2524 if (sign == rhs.sign)
2525 return cmpEqual;
2526 else if (sign)
2527 return cmpLessThan;
2528 else
2529 return cmpGreaterThan;
2530
2532 return cmpEqual;
2533
2535 break;
2536 }
2537
2538 /* Two normal numbers. Do they have the same sign? */
2539 if (sign != rhs.sign) {
2540 if (sign)
2541 result = cmpLessThan;
2542 else
2543 result = cmpGreaterThan;
2544 } else {
2545 /* Compare absolute values; invert result if negative. */
2546 result = compareAbsoluteValue(rhs);
2547
2548 if (sign) {
2549 if (result == cmpLessThan)
2550 result = cmpGreaterThan;
2551 else if (result == cmpGreaterThan)
2552 result = cmpLessThan;
2553 }
2554 }
2555
2556 return result;
2557}
2558
2559/// IEEEFloat::convert - convert a value of one floating point type to another.
2560/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2561/// records whether the transformation lost information, i.e. whether
2562/// converting the result back to the original type will produce the
2563/// original value (this is almost the same as return value==fsOK, but there
2564/// are edge cases where this is not so).
2565
2567 roundingMode rounding_mode,
2568 bool *losesInfo) {
2570 unsigned int newPartCount, oldPartCount;
2571 opStatus fs;
2572 int shift;
2573 const fltSemantics &fromSemantics = *semantics;
2574 bool is_signaling = isSignaling();
2575
2577 newPartCount = partCountForBits(toSemantics.precision + 1);
2578 oldPartCount = partCount();
2579 shift = toSemantics.precision - fromSemantics.precision;
2580
2581 bool X86SpecialNan = false;
2582 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2583 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2584 (!(*significandParts() & 0x8000000000000000ULL) ||
2585 !(*significandParts() & 0x4000000000000000ULL))) {
2586 // x86 has some unusual NaNs which cannot be represented in any other
2587 // format; note them here.
2588 X86SpecialNan = true;
2589 }
2590
2591 // If this is a truncation of a denormal number, and the target semantics
2592 // has larger exponent range than the source semantics (this can happen
2593 // when truncating from PowerPC double-double to double format), the
2594 // right shift could lose result mantissa bits. Adjust exponent instead
2595 // of performing excessive shift.
2596 // Also do a similar trick in case shifting denormal would produce zero
2597 // significand as this case isn't handled correctly by normalize.
2598 if (shift < 0 && isFiniteNonZero()) {
2599 int omsb = significandMSB() + 1;
2600 int exponentChange = omsb - fromSemantics.precision;
2601 if (exponent + exponentChange < toSemantics.minExponent)
2602 exponentChange = toSemantics.minExponent - exponent;
2603 exponentChange = std::max(exponentChange, shift);
2604 if (exponentChange < 0) {
2605 shift -= exponentChange;
2606 exponent += exponentChange;
2607 } else if (omsb <= -shift) {
2608 exponentChange = omsb + shift - 1; // leave at least one bit set
2609 shift -= exponentChange;
2610 exponent += exponentChange;
2611 }
2612 }
2613
2614 // If this is a truncation, perform the shift before we narrow the storage.
2615 if (shift < 0 && (isFiniteNonZero() ||
2616 (category == fcNaN && semantics->nonFiniteBehavior !=
2618 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2619
2620 // Fix the storage so it can hold to new value.
2621 if (newPartCount > oldPartCount) {
2622 // The new type requires more storage; make it available.
2623 integerPart *newParts;
2624 newParts = new integerPart[newPartCount];
2625 APInt::tcSet(newParts, 0, newPartCount);
2626 if (isFiniteNonZero() || category==fcNaN)
2627 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2628 freeSignificand();
2629 significand.parts = newParts;
2630 } else if (newPartCount == 1 && oldPartCount != 1) {
2631 // Switch to built-in storage for a single part.
2632 integerPart newPart = 0;
2633 if (isFiniteNonZero() || category==fcNaN)
2634 newPart = significandParts()[0];
2635 freeSignificand();
2636 significand.part = newPart;
2637 }
2638
2639 // Now that we have the right storage, switch the semantics.
2640 semantics = &toSemantics;
2641
2642 // If this is an extension, perform the shift now that the storage is
2643 // available.
2644 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2645 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2646
2647 if (isFiniteNonZero()) {
2648 fs = normalize(rounding_mode, lostFraction);
2649 *losesInfo = (fs != opOK);
2650 } else if (category == fcNaN) {
2651 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2652 *losesInfo =
2654 makeNaN(false, sign);
2655 return is_signaling ? opInvalidOp : opOK;
2656 }
2657
2658 // If NaN is negative zero, we need to create a new NaN to avoid converting
2659 // NaN to -Inf.
2660 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2661 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2662 makeNaN(false, false);
2663
2664 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2665
2666 // For x87 extended precision, we want to make a NaN, not a special NaN if
2667 // the input wasn't special either.
2668 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2669 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2670
2671 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2672 // This also guarantees that a sNaN does not become Inf on a truncation
2673 // that loses all payload bits.
2674 if (is_signaling) {
2675 makeQuiet();
2676 fs = opInvalidOp;
2677 } else {
2678 fs = opOK;
2679 }
2680 } else if (category == fcInfinity &&
2681 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2682 makeNaN(false, sign);
2683 *losesInfo = true;
2684 fs = opInexact;
2685 } else if (category == fcZero &&
2686 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2687 // Negative zero loses info, but positive zero doesn't.
2688 *losesInfo =
2689 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2690 fs = *losesInfo ? opInexact : opOK;
2691 // NaN is negative zero means -0 -> +0, which can lose information
2692 sign = false;
2693 } else {
2694 *losesInfo = false;
2695 fs = opOK;
2696 }
2697
2698 if (category == fcZero && !semantics->hasZero)
2700 return fs;
2701}
2702
2703/* Convert a floating point number to an integer according to the
2704 rounding mode. If the rounded integer value is out of range this
2705 returns an invalid operation exception and the contents of the
2706 destination parts are unspecified. If the rounded value is in
2707 range but the floating point number is not the exact integer, the C
2708 standard doesn't require an inexact exception to be raised. IEEE
2709 854 does require it so we do that.
2710
2711 Note that for conversions to integer type the C standard requires
2712 round-to-zero to always be used. */
2713APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2714 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2715 roundingMode rounding_mode, bool *isExact) const {
2716 lostFraction lost_fraction;
2717 const integerPart *src;
2718 unsigned int dstPartsCount, truncatedBits;
2719
2720 *isExact = false;
2721
2722 /* Handle the three special cases first. */
2723 if (category == fcInfinity || category == fcNaN)
2724 return opInvalidOp;
2725
2726 dstPartsCount = partCountForBits(width);
2727 assert(dstPartsCount <= parts.size() && "Integer too big");
2728
2729 if (category == fcZero) {
2730 APInt::tcSet(parts.data(), 0, dstPartsCount);
2731 // Negative zero can't be represented as an int.
2732 *isExact = !sign;
2733 return opOK;
2734 }
2735
2736 src = significandParts();
2737
2738 /* Step 1: place our absolute value, with any fraction truncated, in
2739 the destination. */
2740 if (exponent < 0) {
2741 /* Our absolute value is less than one; truncate everything. */
2742 APInt::tcSet(parts.data(), 0, dstPartsCount);
2743 /* For exponent -1 the integer bit represents .5, look at that.
2744 For smaller exponents leftmost truncated bit is 0. */
2745 truncatedBits = semantics->precision -1U - exponent;
2746 } else {
2747 /* We want the most significant (exponent + 1) bits; the rest are
2748 truncated. */
2749 unsigned int bits = exponent + 1U;
2750
2751 /* Hopelessly large in magnitude? */
2752 if (bits > width)
2753 return opInvalidOp;
2754
2755 if (bits < semantics->precision) {
2756 /* We truncate (semantics->precision - bits) bits. */
2757 truncatedBits = semantics->precision - bits;
2758 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2759 } else {
2760 /* We want at least as many bits as are available. */
2761 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2762 0);
2763 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2764 bits - semantics->precision);
2765 truncatedBits = 0;
2766 }
2767 }
2768
2769 /* Step 2: work out any lost fraction, and increment the absolute
2770 value if we would round away from zero. */
2771 if (truncatedBits) {
2772 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2773 truncatedBits);
2774 if (lost_fraction != lfExactlyZero &&
2775 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2776 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2777 return opInvalidOp; /* Overflow. */
2778 }
2779 } else {
2780 lost_fraction = lfExactlyZero;
2781 }
2782
2783 /* Step 3: check if we fit in the destination. */
2784 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2785
2786 if (sign) {
2787 if (!isSigned) {
2788 /* Negative numbers cannot be represented as unsigned. */
2789 if (omsb != 0)
2790 return opInvalidOp;
2791 } else {
2792 /* It takes omsb bits to represent the unsigned integer value.
2793 We lose a bit for the sign, but care is needed as the
2794 maximally negative integer is a special case. */
2795 if (omsb == width &&
2796 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2797 return opInvalidOp;
2798
2799 /* This case can happen because of rounding. */
2800 if (omsb > width)
2801 return opInvalidOp;
2802 }
2803
2804 APInt::tcNegate (parts.data(), dstPartsCount);
2805 } else {
2806 if (omsb >= width + !isSigned)
2807 return opInvalidOp;
2808 }
2809
2810 if (lost_fraction == lfExactlyZero) {
2811 *isExact = true;
2812 return opOK;
2813 }
2814 return opInexact;
2815}
2816
2817/* Same as convertToSignExtendedInteger, except we provide
2818 deterministic values in case of an invalid operation exception,
2819 namely zero for NaNs and the minimal or maximal value respectively
2820 for underflow or overflow.
2821 The *isExact output tells whether the result is exact, in the sense
2822 that converting it back to the original floating point type produces
2823 the original value. This is almost equivalent to result==opOK,
2824 except for negative zeroes.
2825*/
2828 unsigned int width, bool isSigned,
2829 roundingMode rounding_mode, bool *isExact) const {
2830 opStatus fs;
2831
2832 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2833 isExact);
2834
2835 if (fs == opInvalidOp) {
2836 unsigned int bits, dstPartsCount;
2837
2838 dstPartsCount = partCountForBits(width);
2839 assert(dstPartsCount <= parts.size() && "Integer too big");
2840
2841 if (category == fcNaN)
2842 bits = 0;
2843 else if (sign)
2844 bits = isSigned;
2845 else
2846 bits = width - isSigned;
2847
2848 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2849 if (sign && isSigned)
2850 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2851 }
2852
2853 return fs;
2854}
2855
2856/* Convert an unsigned integer SRC to a floating point number,
2857 rounding according to ROUNDING_MODE. The sign of the floating
2858 point number is not modified. */
2859APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2860 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2861 unsigned int omsb, precision, dstCount;
2862 integerPart *dst;
2863 lostFraction lost_fraction;
2864
2865 category = fcNormal;
2866 omsb = APInt::tcMSB(src, srcCount) + 1;
2867 dst = significandParts();
2868 dstCount = partCount();
2869 precision = semantics->precision;
2870
2871 /* We want the most significant PRECISION bits of SRC. There may not
2872 be that many; extract what we can. */
2873 if (precision <= omsb) {
2874 exponent = omsb - 1;
2875 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2876 omsb - precision);
2877 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2878 } else {
2879 exponent = precision - 1;
2880 lost_fraction = lfExactlyZero;
2881 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2882 }
2883
2884 return normalize(rounding_mode, lost_fraction);
2885}
2886
2888 roundingMode rounding_mode) {
2889 unsigned int partCount = Val.getNumWords();
2890 APInt api = Val;
2891
2892 sign = false;
2893 if (isSigned && api.isNegative()) {
2894 sign = true;
2895 api = -api;
2896 }
2897
2898 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2899}
2900
2902IEEEFloat::convertFromHexadecimalString(StringRef s,
2903 roundingMode rounding_mode) {
2904 lostFraction lost_fraction = lfExactlyZero;
2905
2906 category = fcNormal;
2907 zeroSignificand();
2908 exponent = 0;
2909
2910 integerPart *significand = significandParts();
2911 unsigned partsCount = partCount();
2912 unsigned bitPos = partsCount * integerPartWidth;
2913 bool computedTrailingFraction = false;
2914
2915 // Skip leading zeroes and any (hexa)decimal point.
2916 StringRef::iterator begin = s.begin();
2917 StringRef::iterator end = s.end();
2919 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2920 if (!PtrOrErr)
2921 return PtrOrErr.takeError();
2922 StringRef::iterator p = *PtrOrErr;
2923 StringRef::iterator firstSignificantDigit = p;
2924
2925 while (p != end) {
2926 integerPart hex_value;
2927
2928 if (*p == '.') {
2929 if (dot != end)
2930 return createError("String contains multiple dots");
2931 dot = p++;
2932 continue;
2933 }
2934
2935 hex_value = hexDigitValue(*p);
2936 if (hex_value == UINT_MAX)
2937 break;
2938
2939 p++;
2940
2941 // Store the number while we have space.
2942 if (bitPos) {
2943 bitPos -= 4;
2944 hex_value <<= bitPos % integerPartWidth;
2945 significand[bitPos / integerPartWidth] |= hex_value;
2946 } else if (!computedTrailingFraction) {
2947 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2948 if (!FractOrErr)
2949 return FractOrErr.takeError();
2950 lost_fraction = *FractOrErr;
2951 computedTrailingFraction = true;
2952 }
2953 }
2954
2955 /* Hex floats require an exponent but not a hexadecimal point. */
2956 if (p == end)
2957 return createError("Hex strings require an exponent");
2958 if (*p != 'p' && *p != 'P')
2959 return createError("Invalid character in significand");
2960 if (p == begin)
2961 return createError("Significand has no digits");
2962 if (dot != end && p - begin == 1)
2963 return createError("Significand has no digits");
2964
2965 /* Ignore the exponent if we are zero. */
2966 if (p != firstSignificantDigit) {
2967 int expAdjustment;
2968
2969 /* Implicit hexadecimal point? */
2970 if (dot == end)
2971 dot = p;
2972
2973 /* Calculate the exponent adjustment implicit in the number of
2974 significant digits. */
2975 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2976 if (expAdjustment < 0)
2977 expAdjustment++;
2978 expAdjustment = expAdjustment * 4 - 1;
2979
2980 /* Adjust for writing the significand starting at the most
2981 significant nibble. */
2982 expAdjustment += semantics->precision;
2983 expAdjustment -= partsCount * integerPartWidth;
2984
2985 /* Adjust for the given exponent. */
2986 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2987 if (!ExpOrErr)
2988 return ExpOrErr.takeError();
2989 exponent = *ExpOrErr;
2990 }
2991
2992 return normalize(rounding_mode, lost_fraction);
2993}
2994
2996IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2997 unsigned sigPartCount, int exp,
2998 roundingMode rounding_mode) {
2999 unsigned int parts, pow5PartCount;
3000 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
3002 bool isNearest;
3003
3004 isNearest = (rounding_mode == rmNearestTiesToEven ||
3005 rounding_mode == rmNearestTiesToAway);
3006
3007 parts = partCountForBits(semantics->precision + 11);
3008
3009 /* Calculate pow(5, abs(exp)). */
3010 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
3011
3012 for (;; parts *= 2) {
3013 opStatus sigStatus, powStatus;
3014 unsigned int excessPrecision, truncatedBits;
3015
3016 calcSemantics.precision = parts * integerPartWidth - 1;
3017 excessPrecision = calcSemantics.precision - semantics->precision;
3018 truncatedBits = excessPrecision;
3019
3020 IEEEFloat decSig(calcSemantics, uninitialized);
3021 decSig.makeZero(sign);
3022 IEEEFloat pow5(calcSemantics);
3023
3024 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
3026 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
3028 /* Add exp, as 10^n = 5^n * 2^n. */
3029 decSig.exponent += exp;
3030
3031 lostFraction calcLostFraction;
3032 integerPart HUerr, HUdistance;
3033 unsigned int powHUerr;
3034
3035 if (exp >= 0) {
3036 /* multiplySignificand leaves the precision-th bit set to 1. */
3037 calcLostFraction = decSig.multiplySignificand(pow5);
3038 powHUerr = powStatus != opOK;
3039 } else {
3040 calcLostFraction = decSig.divideSignificand(pow5);
3041 /* Denormal numbers have less precision. */
3042 if (decSig.exponent < semantics->minExponent) {
3043 excessPrecision += (semantics->minExponent - decSig.exponent);
3044 truncatedBits = excessPrecision;
3045 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
3046 }
3047 /* Extra half-ulp lost in reciprocal of exponent. */
3048 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
3049 }
3050
3051 /* Both multiplySignificand and divideSignificand return the
3052 result with the integer bit set. */
3054 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3055
3056 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3057 powHUerr);
3058 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3059 excessPrecision, isNearest);
3060
3061 /* Are we guaranteed to round correctly if we truncate? */
3062 if (HUdistance >= HUerr) {
3063 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3064 calcSemantics.precision - excessPrecision,
3065 excessPrecision);
3066 /* Take the exponent of decSig. If we tcExtract-ed less bits
3067 above we must adjust our exponent to compensate for the
3068 implicit right shift. */
3069 exponent = (decSig.exponent + semantics->precision
3070 - (calcSemantics.precision - excessPrecision));
3071 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3072 decSig.partCount(),
3073 truncatedBits);
3074 return normalize(rounding_mode, calcLostFraction);
3075 }
3076 }
3077}
3078
3079Expected<APFloat::opStatus>
3080IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3081 decimalInfo D;
3082 opStatus fs;
3083
3084 /* Scan the text. */
3085 StringRef::iterator p = str.begin();
3086 if (Error Err = interpretDecimal(p, str.end(), &D))
3087 return std::move(Err);
3088
3089 /* Handle the quick cases. First the case of no significant digits,
3090 i.e. zero, and then exponents that are obviously too large or too
3091 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3092 definitely overflows if
3093
3094 (exp - 1) * L >= maxExponent
3095
3096 and definitely underflows to zero where
3097
3098 (exp + 1) * L <= minExponent - precision
3099
3100 With integer arithmetic the tightest bounds for L are
3101
3102 93/28 < L < 196/59 [ numerator <= 256 ]
3103 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3104 */
3105
3106 // Test if we have a zero number allowing for strings with no null terminators
3107 // and zero decimals with non-zero exponents.
3108 //
3109 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3110 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3111 // be at most one dot. On the other hand, if we have a zero with a non-zero
3112 // exponent, then we know that D.firstSigDigit will be non-numeric.
3113 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3114 category = fcZero;
3115 fs = opOK;
3116 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3117 sign = false;
3118 if (!semantics->hasZero)
3120
3121 /* Check whether the normalized exponent is high enough to overflow
3122 max during the log-rebasing in the max-exponent check below. */
3123 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3124 fs = handleOverflow(rounding_mode);
3125
3126 /* If it wasn't, then it also wasn't high enough to overflow max
3127 during the log-rebasing in the min-exponent check. Check that it
3128 won't overflow min in either check, then perform the min-exponent
3129 check. */
3130 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3131 (D.normalizedExponent + 1) * 28738 <=
3132 8651 * (semantics->minExponent - (int) semantics->precision)) {
3133 /* Underflow to zero and round. */
3134 category = fcNormal;
3135 zeroSignificand();
3136 fs = normalize(rounding_mode, lfLessThanHalf);
3137
3138 /* We can finally safely perform the max-exponent check. */
3139 } else if ((D.normalizedExponent - 1) * 42039
3140 >= 12655 * semantics->maxExponent) {
3141 /* Overflow and round. */
3142 fs = handleOverflow(rounding_mode);
3143 } else {
3144 integerPart *decSignificand;
3145 unsigned int partCount;
3146
3147 /* A tight upper bound on number of bits required to hold an
3148 N-digit decimal integer is N * 196 / 59. Allocate enough space
3149 to hold the full significand, and an extra part required by
3150 tcMultiplyPart. */
3151 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3152 partCount = partCountForBits(1 + 196 * partCount / 59);
3153 decSignificand = new integerPart[partCount + 1];
3154 partCount = 0;
3155
3156 /* Convert to binary efficiently - we do almost all multiplication
3157 in an integerPart. When this would overflow do we do a single
3158 bignum multiplication, and then revert again to multiplication
3159 in an integerPart. */
3160 do {
3161 integerPart decValue, val, multiplier;
3162
3163 val = 0;
3164 multiplier = 1;
3165
3166 do {
3167 if (*p == '.') {
3168 p++;
3169 if (p == str.end()) {
3170 break;
3171 }
3172 }
3173 decValue = decDigitValue(*p++);
3174 if (decValue >= 10U) {
3175 delete[] decSignificand;
3176 return createError("Invalid character in significand");
3177 }
3178 multiplier *= 10;
3179 val = val * 10 + decValue;
3180 /* The maximum number that can be multiplied by ten with any
3181 digit added without overflowing an integerPart. */
3182 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3183
3184 /* Multiply out the current part. */
3185 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3186 partCount, partCount + 1, false);
3187
3188 /* If we used another part (likely but not guaranteed), increase
3189 the count. */
3190 if (decSignificand[partCount])
3191 partCount++;
3192 } while (p <= D.lastSigDigit);
3193
3194 category = fcNormal;
3195 fs = roundSignificandWithExponent(decSignificand, partCount,
3196 D.exponent, rounding_mode);
3197
3198 delete [] decSignificand;
3199 }
3200
3201 return fs;
3202}
3203
3204bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3205 const size_t MIN_NAME_SIZE = 3;
3206
3207 if (str.size() < MIN_NAME_SIZE)
3208 return false;
3209
3210 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3211 makeInf(false);
3212 return true;
3213 }
3214
3215 bool IsNegative = str.consume_front("-");
3216 if (IsNegative) {
3217 if (str.size() < MIN_NAME_SIZE)
3218 return false;
3219
3220 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3221 makeInf(true);
3222 return true;
3223 }
3224 }
3225
3226 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3227 bool IsSignaling = str.consume_front_insensitive("s");
3228 if (IsSignaling) {
3229 if (str.size() < MIN_NAME_SIZE)
3230 return false;
3231 }
3232
3233 if (str.consume_front("nan") || str.consume_front("NaN")) {
3234 // A NaN without payload.
3235 if (str.empty()) {
3236 makeNaN(IsSignaling, IsNegative);
3237 return true;
3238 }
3239
3240 // Allow the payload to be inside parentheses.
3241 if (str.front() == '(') {
3242 // Parentheses should be balanced (and not empty).
3243 if (str.size() <= 2 || str.back() != ')')
3244 return false;
3245
3246 str = str.slice(1, str.size() - 1);
3247 }
3248
3249 // Determine the payload number's radix.
3250 unsigned Radix = 10;
3251 if (str[0] == '0') {
3252 if (str.size() > 1 && tolower(str[1]) == 'x') {
3253 str = str.drop_front(2);
3254 Radix = 16;
3255 } else {
3256 Radix = 8;
3257 }
3258 }
3259
3260 // Parse the payload and make the NaN.
3261 APInt Payload;
3262 if (!str.getAsInteger(Radix, Payload)) {
3263 makeNaN(IsSignaling, IsNegative, &Payload);
3264 return true;
3265 }
3266 }
3267
3268 return false;
3269}
3270
3271Expected<APFloat::opStatus>
3273 if (str.empty())
3274 return createError("Invalid string length");
3275
3276 // Handle special cases.
3277 if (convertFromStringSpecials(str))
3278 return opOK;
3279
3280 /* Handle a leading minus sign. */
3281 StringRef::iterator p = str.begin();
3282 size_t slen = str.size();
3283 sign = *p == '-' ? 1 : 0;
3284 if (sign && !semantics->hasSignedRepr)
3286 "This floating point format does not support signed values");
3287
3288 if (*p == '-' || *p == '+') {
3289 p++;
3290 slen--;
3291 if (!slen)
3292 return createError("String has no digits");
3293 }
3294
3295 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3296 if (slen == 2)
3297 return createError("Invalid string");
3298 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3299 rounding_mode);
3300 }
3301
3302 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3303}
3304
3305/* Write out a hexadecimal representation of the floating point value
3306 to DST, which must be of sufficient size, in the C99 form
3307 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3308 excluding the terminating NUL.
3309
3310 If UPPERCASE, the output is in upper case, otherwise in lower case.
3311
3312 HEXDIGITS digits appear altogether, rounding the value if
3313 necessary. If HEXDIGITS is 0, the minimal precision to display the
3314 number precisely is used instead. If nothing would appear after
3315 the decimal point it is suppressed.
3316
3317 The decimal exponent is always printed and has at least one digit.
3318 Zero values display an exponent of zero. Infinities and NaNs
3319 appear as "infinity" or "nan" respectively.
3320
3321 The above rules are as specified by C99. There is ambiguity about
3322 what the leading hexadecimal digit should be. This implementation
3323 uses whatever is necessary so that the exponent is displayed as
3324 stored. This implies the exponent will fall within the IEEE format
3325 range, and the leading hexadecimal digit will be 0 (for denormals),
3326 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3327 any other digits zero).
3328*/
3329unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3330 bool upperCase,
3331 roundingMode rounding_mode) const {
3332 char *p;
3333
3334 p = dst;
3335 if (sign)
3336 *dst++ = '-';
3337
3338 switch (category) {
3339 case fcInfinity:
3340 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3341 dst += sizeof infinityL - 1;
3342 break;
3343
3344 case fcNaN:
3345 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3346 dst += sizeof NaNU - 1;
3347 break;
3348
3349 case fcZero:
3350 *dst++ = '0';
3351 *dst++ = upperCase ? 'X': 'x';
3352 *dst++ = '0';
3353 if (hexDigits > 1) {
3354 *dst++ = '.';
3355 memset (dst, '0', hexDigits - 1);
3356 dst += hexDigits - 1;
3357 }
3358 *dst++ = upperCase ? 'P': 'p';
3359 *dst++ = '0';
3360 break;
3361
3362 case fcNormal:
3363 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3364 break;
3365 }
3366
3367 *dst = 0;
3368
3369 return static_cast<unsigned int>(dst - p);
3370}
3371
3372/* Does the hard work of outputting the correctly rounded hexadecimal
3373 form of a normal floating point number with the specified number of
3374 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3375 digits necessary to print the value precisely is output. */
3376char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3377 bool upperCase,
3378 roundingMode rounding_mode) const {
3379 unsigned int count, valueBits, shift, partsCount, outputDigits;
3380 const char *hexDigitChars;
3381 const integerPart *significand;
3382 char *p;
3383 bool roundUp;
3384
3385 *dst++ = '0';
3386 *dst++ = upperCase ? 'X': 'x';
3387
3388 roundUp = false;
3389 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3390
3391 significand = significandParts();
3392 partsCount = partCount();
3393
3394 /* +3 because the first digit only uses the single integer bit, so
3395 we have 3 virtual zero most-significant-bits. */
3396 valueBits = semantics->precision + 3;
3397 shift = integerPartWidth - valueBits % integerPartWidth;
3398
3399 /* The natural number of digits required ignoring trailing
3400 insignificant zeroes. */
3401 outputDigits = (valueBits - significandLSB () + 3) / 4;
3402
3403 /* hexDigits of zero means use the required number for the
3404 precision. Otherwise, see if we are truncating. If we are,
3405 find out if we need to round away from zero. */
3406 if (hexDigits) {
3407 if (hexDigits < outputDigits) {
3408 /* We are dropping non-zero bits, so need to check how to round.
3409 "bits" is the number of dropped bits. */
3410 unsigned int bits;
3411 lostFraction fraction;
3412
3413 bits = valueBits - hexDigits * 4;
3414 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3415 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3416 }
3417 outputDigits = hexDigits;
3418 }
3419
3420 /* Write the digits consecutively, and start writing in the location
3421 of the hexadecimal point. We move the most significant digit
3422 left and add the hexadecimal point later. */
3423 p = ++dst;
3424
3425 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3426
3427 while (outputDigits && count) {
3428 integerPart part;
3429
3430 /* Put the most significant integerPartWidth bits in "part". */
3431 if (--count == partsCount)
3432 part = 0; /* An imaginary higher zero part. */
3433 else
3434 part = significand[count] << shift;
3435
3436 if (count && shift)
3437 part |= significand[count - 1] >> (integerPartWidth - shift);
3438
3439 /* Convert as much of "part" to hexdigits as we can. */
3440 unsigned int curDigits = integerPartWidth / 4;
3441
3442 curDigits = std::min(curDigits, outputDigits);
3443 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3444 outputDigits -= curDigits;
3445 }
3446
3447 if (roundUp) {
3448 char *q = dst;
3449
3450 /* Note that hexDigitChars has a trailing '0'. */
3451 do {
3452 q--;
3453 *q = hexDigitChars[hexDigitValue (*q) + 1];
3454 } while (*q == '0');
3455 assert(q >= p);
3456 } else {
3457 /* Add trailing zeroes. */
3458 memset (dst, '0', outputDigits);
3459 dst += outputDigits;
3460 }
3461
3462 /* Move the most significant digit to before the point, and if there
3463 is something after the decimal point add it. This must come
3464 after rounding above. */
3465 p[-1] = p[0];
3466 if (dst -1 == p)
3467 dst--;
3468 else
3469 p[0] = '.';
3470
3471 /* Finally output the exponent. */
3472 *dst++ = upperCase ? 'P': 'p';
3473
3474 return writeSignedDecimal (dst, exponent);
3475}
3476
3478 if (!Arg.isFiniteNonZero())
3479 return hash_combine((uint8_t)Arg.category,
3480 // NaN has no sign, fix it at zero.
3481 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3482 Arg.semantics->precision);
3483
3484 // Normal floats need their exponent and significand hashed.
3485 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3486 Arg.semantics->precision, Arg.exponent,
3488 Arg.significandParts(),
3489 Arg.significandParts() + Arg.partCount()));
3490}
3491
3492// Conversion from APFloat to/from host float/double. It may eventually be
3493// possible to eliminate these and have everybody deal with APFloats, but that
3494// will take a while. This approach will not easily extend to long double.
3495// Current implementation requires integerPartWidth==64, which is correct at
3496// the moment but could be made more general.
3497
3498// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3499// the actual IEEE respresentations. We compensate for that here.
3500
3501APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3502 assert(semantics ==
3503 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3504 assert(partCount()==2);
3505
3506 uint64_t myexponent, mysignificand;
3507
3508 if (isFiniteNonZero()) {
3509 myexponent = exponent+16383; //bias
3510 mysignificand = significandParts()[0];
3511 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3512 myexponent = 0; // denormal
3513 } else if (category==fcZero) {
3514 myexponent = 0;
3515 mysignificand = 0;
3516 } else if (category==fcInfinity) {
3517 myexponent = 0x7fff;
3518 mysignificand = 0x8000000000000000ULL;
3519 } else {
3520 assert(category == fcNaN && "Unknown category");
3521 myexponent = 0x7fff;
3522 mysignificand = significandParts()[0];
3523 }
3524
3525 uint64_t words[2];
3526 words[0] = mysignificand;
3527 words[1] = ((uint64_t)(sign & 1) << 15) |
3528 (myexponent & 0x7fffLL);
3529 return APInt(80, words);
3530}
3531
3532APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3533 assert(semantics ==
3534 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3535 assert(partCount()==2);
3536
3537 uint64_t words[2];
3538 opStatus fs;
3539 bool losesInfo;
3540
3541 // Convert number to double. To avoid spurious underflows, we re-
3542 // normalize against the "double" minExponent first, and only *then*
3543 // truncate the mantissa. The result of that second conversion
3544 // may be inexact, but should never underflow.
3545 // Declare fltSemantics before APFloat that uses it (and
3546 // saves pointer to it) to ensure correct destruction order.
3547 fltSemantics extendedSemantics = *semantics;
3548 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3549 IEEEFloat extended(*this);
3550 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3551 assert(fs == opOK && !losesInfo);
3552 (void)fs;
3553
3554 IEEEFloat u(extended);
3555 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3556 assert(fs == opOK || fs == opInexact);
3557 (void)fs;
3558 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3559
3560 // If conversion was exact or resulted in a special case, we're done;
3561 // just set the second double to zero. Otherwise, re-convert back to
3562 // the extended format and compute the difference. This now should
3563 // convert exactly to double.
3564 if (u.isFiniteNonZero() && losesInfo) {
3565 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3566 assert(fs == opOK && !losesInfo);
3567 (void)fs;
3568
3569 IEEEFloat v(extended);
3570 v.subtract(u, rmNearestTiesToEven);
3571 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3572 assert(fs == opOK && !losesInfo);
3573 (void)fs;
3574 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3575 } else {
3576 words[1] = 0;
3577 }
3578
3579 return APInt(128, words);
3580}
3581
3582template <const fltSemantics &S>
3583APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3584 assert(semantics == &S);
3585 const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
3586 ? -S.minExponent
3587 : -(S.minExponent - 1);
3588 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3589 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3590 constexpr integerPart integer_bit =
3591 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3592 constexpr uint64_t significand_mask = integer_bit - 1;
3593 constexpr unsigned int exponent_bits =
3594 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3595 : S.sizeInBits;
3596 static_assert(exponent_bits < 64);
3597 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3598
3599 uint64_t myexponent;
3600 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3601 mysignificand;
3602
3603 if (isFiniteNonZero()) {
3604 myexponent = exponent + bias;
3605 std::copy_n(significandParts(), mysignificand.size(),
3606 mysignificand.begin());
3607 if (myexponent == 1 &&
3608 !(significandParts()[integer_bit_part] & integer_bit))
3609 myexponent = 0; // denormal
3610 } else if (category == fcZero) {
3611 if (!S.hasZero)
3612 llvm_unreachable("semantics does not support zero!");
3613 myexponent = ::exponentZero(S) + bias;
3614 mysignificand.fill(0);
3615 } else if (category == fcInfinity) {
3616 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3617 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3618 llvm_unreachable("semantics don't support inf!");
3619 myexponent = ::exponentInf(S) + bias;
3620 mysignificand.fill(0);
3621 } else {
3622 assert(category == fcNaN && "Unknown category!");
3623 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3624 llvm_unreachable("semantics don't support NaN!");
3625 myexponent = ::exponentNaN(S) + bias;
3626 std::copy_n(significandParts(), mysignificand.size(),
3627 mysignificand.begin());
3628 }
3629 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3630 auto words_iter =
3631 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3632 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3633 // Clear the integer bit.
3634 words[mysignificand.size() - 1] &= significand_mask;
3635 }
3636 std::fill(words_iter, words.end(), uint64_t{0});
3637 constexpr size_t last_word = words.size() - 1;
3638 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3639 << ((S.sizeInBits - 1) % 64);
3640 words[last_word] |= shifted_sign;
3641 uint64_t shifted_exponent = (myexponent & exponent_mask)
3642 << (trailing_significand_bits % 64);
3643 words[last_word] |= shifted_exponent;
3644 if constexpr (last_word == 0) {
3645 return APInt(S.sizeInBits, words[0]);
3646 }
3647 return APInt(S.sizeInBits, words);
3648}
3649
3650APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3651 assert(partCount() == 2);
3652 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3653}
3654
3655APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3656 assert(partCount()==1);
3657 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3658}
3659
3660APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3661 assert(partCount()==1);
3662 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3663}
3664
3665APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3666 assert(partCount() == 1);
3667 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3668}
3669
3670APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3671 assert(partCount()==1);
3672 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3673}
3674
3675APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3676 assert(partCount() == 1);
3677 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3678}
3679
3680APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3681 assert(partCount() == 1);
3682 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3683}
3684
3685APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3686 assert(partCount() == 1);
3687 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3688}
3689
3690APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3691 assert(partCount() == 1);
3692 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3693}
3694
3695APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3696 assert(partCount() == 1);
3697 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3698}
3699
3700APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3701 assert(partCount() == 1);
3702 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3703}
3704
3705APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3706 assert(partCount() == 1);
3707 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3708}
3709
3710APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3711 assert(partCount() == 1);
3712 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3713}
3714
3715APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3716 assert(partCount() == 1);
3717 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3718}
3719
3720APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3721 assert(partCount() == 1);
3722 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3723}
3724
3725APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3726 assert(partCount() == 1);
3727 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3728}
3729
3730APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3731 assert(partCount() == 1);
3732 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3733}
3734
3735// This function creates an APInt that is just a bit map of the floating
3736// point constant as it would appear in memory. It is not a conversion,
3737// and treating the result as a normal integer is unlikely to be useful.
3738
3740 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3741 return convertHalfAPFloatToAPInt();
3742
3743 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3744 return convertBFloatAPFloatToAPInt();
3745
3746 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3747 return convertFloatAPFloatToAPInt();
3748
3749 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3750 return convertDoubleAPFloatToAPInt();
3751
3752 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3753 return convertQuadrupleAPFloatToAPInt();
3754
3755 if (semantics ==
3756 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3757 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3758
3759 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3760 return convertFloat8E5M2APFloatToAPInt();
3761
3762 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3763 return convertFloat8E5M2FNUZAPFloatToAPInt();
3764
3765 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3766 return convertFloat8E4M3APFloatToAPInt();
3767
3768 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3769 return convertFloat8E4M3FNAPFloatToAPInt();
3770
3771 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3772 return convertFloat8E4M3FNUZAPFloatToAPInt();
3773
3774 if (semantics ==
3775 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3776 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3777
3778 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3779 return convertFloat8E3M4APFloatToAPInt();
3780
3781 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3782 return convertFloatTF32APFloatToAPInt();
3783
3784 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3785 return convertFloat8E8M0FNUAPFloatToAPInt();
3786
3787 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3788 return convertFloat6E3M2FNAPFloatToAPInt();
3789
3790 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3791 return convertFloat6E2M3FNAPFloatToAPInt();
3792
3793 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3794 return convertFloat4E2M1FNAPFloatToAPInt();
3795
3796 assert(semantics ==
3797 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3798 "unknown format!");
3799 return convertF80LongDoubleAPFloatToAPInt();
3800}
3801
3803 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3804 "Float semantics are not IEEEsingle");
3805 APInt api = bitcastToAPInt();
3806 return api.bitsToFloat();
3807}
3808
3810 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3811 "Float semantics are not IEEEdouble");
3812 APInt api = bitcastToAPInt();
3813 return api.bitsToDouble();
3814}
3815
3816#ifdef HAS_IEE754_FLOAT128
3817float128 IEEEFloat::convertToQuad() const {
3818 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3819 "Float semantics are not IEEEquads");
3820 APInt api = bitcastToAPInt();
3821 return api.bitsToQuad();
3822}
3823#endif
3824
3825/// Integer bit is explicit in this format. Intel hardware (387 and later)
3826/// does not support these bit patterns:
3827/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3828/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3829/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3830/// exponent = 0, integer bit 1 ("pseudodenormal")
3831/// At the moment, the first three are treated as NaNs, the last one as Normal.
3832void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3833 uint64_t i1 = api.getRawData()[0];
3834 uint64_t i2 = api.getRawData()[1];
3835 uint64_t myexponent = (i2 & 0x7fff);
3836 uint64_t mysignificand = i1;
3837 uint8_t myintegerbit = mysignificand >> 63;
3838
3839 initialize(&APFloatBase::semX87DoubleExtended);
3840 assert(partCount()==2);
3841
3842 sign = static_cast<unsigned int>(i2>>15);
3843 if (myexponent == 0 && mysignificand == 0) {
3844 makeZero(sign);
3845 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3846 makeInf(sign);
3847 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3848 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3849 category = fcNaN;
3850 exponent = exponentNaN();
3851 significandParts()[0] = mysignificand;
3852 significandParts()[1] = 0;
3853 } else {
3854 category = fcNormal;
3855 exponent = myexponent - 16383;
3856 significandParts()[0] = mysignificand;
3857 significandParts()[1] = 0;
3858 if (myexponent==0) // denormal
3859 exponent = -16382;
3860 }
3861}
3862
3863void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3864 uint64_t i1 = api.getRawData()[0];
3865 uint64_t i2 = api.getRawData()[1];
3866 opStatus fs;
3867 bool losesInfo;
3868
3869 // Get the first double and convert to our format.
3870 initFromDoubleAPInt(APInt(64, i1));
3871 fs = convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3872 &losesInfo);
3873 assert(fs == opOK && !losesInfo);
3874 (void)fs;
3875
3876 // Unless we have a special case, add in second double.
3877 if (isFiniteNonZero()) {
3878 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3879 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3880 &losesInfo);
3881 assert(fs == opOK && !losesInfo);
3882 (void)fs;
3883
3885 }
3886}
3887
3888// The E8M0 format has the following characteristics:
3889// It is an 8-bit unsigned format with only exponents (no actual significand).
3890// No encodings for {zero, infinities or denorms}.
3891// NaN is represented by all 1's.
3892// Bias is 127.
3893void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3894 const uint64_t exponent_mask = 0xff;
3895 uint64_t val = api.getRawData()[0];
3896 uint64_t myexponent = (val & exponent_mask);
3897
3898 initialize(&APFloatBase::semFloat8E8M0FNU);
3899 assert(partCount() == 1);
3900
3901 // This format has unsigned representation only
3902 sign = 0;
3903
3904 // Set the significand
3905 // This format does not have any significand but the 'Pth' precision bit is
3906 // always set to 1 for consistency in APFloat's internal representation.
3907 uint64_t mysignificand = 1;
3908 significandParts()[0] = mysignificand;
3909
3910 // This format can either have a NaN or fcNormal
3911 // All 1's i.e. 255 is a NaN
3912 if (val == exponent_mask) {
3913 category = fcNaN;
3914 exponent = exponentNaN();
3915 return;
3916 }
3917 // Handle fcNormal...
3918 category = fcNormal;
3919 exponent = myexponent - 127; // 127 is bias
3920}
3921template <const fltSemantics &S>
3922void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3923 assert(api.getBitWidth() == S.sizeInBits);
3924 constexpr integerPart integer_bit = integerPart{1}
3925 << ((S.precision - 1) % integerPartWidth);
3926 constexpr uint64_t significand_mask = integer_bit - 1;
3927 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3928 constexpr unsigned int stored_significand_parts =
3929 partCountForBits(trailing_significand_bits);
3930 constexpr unsigned int exponent_bits =
3931 S.sizeInBits - 1 - trailing_significand_bits;
3932 static_assert(exponent_bits < 64);
3933 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3934 constexpr int bias = -(S.minExponent - 1);
3935
3936 // Copy the bits of the significand. We need to clear out the exponent and
3937 // sign bit in the last word.
3938 std::array<integerPart, stored_significand_parts> mysignificand;
3939 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3940 if constexpr (significand_mask != 0) {
3941 mysignificand[mysignificand.size() - 1] &= significand_mask;
3942 }
3943
3944 // We assume the last word holds the sign bit, the exponent, and potentially
3945 // some of the trailing significand field.
3946 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3947 uint64_t myexponent =
3948 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3949
3950 initialize(&S);
3951 assert(partCount() == mysignificand.size());
3952
3953 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3954
3955 bool all_zero_significand =
3956 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3957
3958 bool is_zero = myexponent == 0 && all_zero_significand;
3959
3960 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3961 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3962 makeInf(sign);
3963 return;
3964 }
3965 }
3966
3967 bool is_nan = false;
3968
3969 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3970 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3971 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3972 bool all_ones_significand =
3973 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3974 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3975 (!significand_mask ||
3976 mysignificand[mysignificand.size() - 1] == significand_mask);
3977 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3978 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3979 is_nan = is_zero && sign;
3980 }
3981
3982 if (is_nan) {
3983 category = fcNaN;
3984 exponent = ::exponentNaN(S);
3985 std::copy_n(mysignificand.begin(), mysignificand.size(),
3986 significandParts());
3987 return;
3988 }
3989
3990 if (is_zero) {
3991 makeZero(sign);
3992 return;
3993 }
3994
3995 category = fcNormal;
3996 exponent = myexponent - bias;
3997 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3998 if (myexponent == 0) // denormal
3999 exponent = S.minExponent;
4000 else
4001 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
4002}
4003
4004void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
4005 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
4006}
4007
4008void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
4009 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
4010}
4011
4012void IEEEFloat::initFromFloatAPInt(const APInt &api) {
4013 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
4014}
4015
4016void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
4017 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
4018}
4019
4020void IEEEFloat::initFromHalfAPInt(const APInt &api) {
4021 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
4022}
4023
4024void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
4025 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
4026}
4027
4028void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
4029 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
4030}
4031
4032void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
4033 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
4034}
4035
4036void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
4037 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
4038}
4039
4040void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
4041 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
4042}
4043
4044void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
4045 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
4046}
4047
4048void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
4049 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
4050}
4051
4052void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
4053 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
4054}
4055
4056void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
4057 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
4058}
4059
4060void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
4061 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
4062}
4063
4064void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
4065 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
4066}
4067
4068/// Treat api as containing the bits of a floating point number.
4069void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
4070 assert(api.getBitWidth() == Sem->sizeInBits);
4071 if (Sem == &APFloatBase::semIEEEhalf)
4072 return initFromHalfAPInt(api);
4073 if (Sem == &APFloatBase::semBFloat)
4074 return initFromBFloatAPInt(api);
4075 if (Sem == &APFloatBase::semIEEEsingle)
4076 return initFromFloatAPInt(api);
4077 if (Sem == &APFloatBase::semIEEEdouble)
4078 return initFromDoubleAPInt(api);
4079 if (Sem == &APFloatBase::semX87DoubleExtended)
4080 return initFromF80LongDoubleAPInt(api);
4081 if (Sem == &APFloatBase::semIEEEquad)
4082 return initFromQuadrupleAPInt(api);
4083 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
4084 return initFromPPCDoubleDoubleLegacyAPInt(api);
4085 if (Sem == &APFloatBase::semFloat8E5M2)
4086 return initFromFloat8E5M2APInt(api);
4087 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
4088 return initFromFloat8E5M2FNUZAPInt(api);
4089 if (Sem == &APFloatBase::semFloat8E4M3)
4090 return initFromFloat8E4M3APInt(api);
4091 if (Sem == &APFloatBase::semFloat8E4M3FN)
4092 return initFromFloat8E4M3FNAPInt(api);
4093 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
4094 return initFromFloat8E4M3FNUZAPInt(api);
4095 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
4096 return initFromFloat8E4M3B11FNUZAPInt(api);
4097 if (Sem == &APFloatBase::semFloat8E3M4)
4098 return initFromFloat8E3M4APInt(api);
4099 if (Sem == &APFloatBase::semFloatTF32)
4100 return initFromFloatTF32APInt(api);
4101 if (Sem == &APFloatBase::semFloat8E8M0FNU)
4102 return initFromFloat8E8M0FNUAPInt(api);
4103 if (Sem == &APFloatBase::semFloat6E3M2FN)
4104 return initFromFloat6E3M2FNAPInt(api);
4105 if (Sem == &APFloatBase::semFloat6E2M3FN)
4106 return initFromFloat6E2M3FNAPInt(api);
4107 if (Sem == &APFloatBase::semFloat4E2M1FN)
4108 return initFromFloat4E2M1FNAPInt(api);
4109
4110 llvm_unreachable("unsupported semantics");
4111}
4112
4113/// Make this number the largest magnitude normal number in the given
4114/// semantics.
4115void IEEEFloat::makeLargest(bool Negative) {
4116 if (Negative && !semantics->hasSignedRepr)
4118 "This floating point format does not support signed values");
4119 // We want (in interchange format):
4120 // sign = {Negative}
4121 // exponent = 1..10
4122 // significand = 1..1
4123 category = fcNormal;
4124 sign = Negative;
4125 exponent = semantics->maxExponent;
4126
4127 // Use memset to set all but the highest integerPart to all ones.
4128 integerPart *significand = significandParts();
4129 unsigned PartCount = partCount();
4130 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4131
4132 // Set the high integerPart especially setting all unused top bits for
4133 // internal consistency.
4134 const unsigned NumUnusedHighBits =
4135 PartCount*integerPartWidth - semantics->precision;
4136 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4137 ? (~integerPart(0) >> NumUnusedHighBits)
4138 : 0;
4139 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4140 semantics->nanEncoding == fltNanEncoding::AllOnes &&
4141 (semantics->precision > 1))
4142 significand[0] &= ~integerPart(1);
4143}
4144
4145/// Make this number the smallest magnitude denormal number in the given
4146/// semantics.
4147void IEEEFloat::makeSmallest(bool Negative) {
4148 if (Negative && !semantics->hasSignedRepr)
4150 "This floating point format does not support signed values");
4151 // We want (in interchange format):
4152 // sign = {Negative}
4153 // exponent = 0..0
4154 // significand = 0..01
4155 category = fcNormal;
4156 sign = Negative;
4157 exponent = semantics->minExponent;
4158 APInt::tcSet(significandParts(), 1, partCount());
4159}
4160
4162 if (Negative && !semantics->hasSignedRepr)
4164 "This floating point format does not support signed values");
4165 // We want (in interchange format):
4166 // sign = {Negative}
4167 // exponent = 0..0
4168 // significand = 10..0
4169
4170 category = fcNormal;
4171 zeroSignificand();
4172 sign = Negative;
4173 exponent = semantics->minExponent;
4174 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4175}
4176
4177IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4178 initFromAPInt(&Sem, API);
4179}
4180
4182 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4183}
4184
4186 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4187}
4188
4189namespace {
4190 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4191 Buffer.append(Str.begin(), Str.end());
4192 }
4193
4194 /// Removes data from the given significand until it is no more
4195 /// precise than is required for the desired precision.
4196 void AdjustToPrecision(APInt &significand,
4197 int &exp, unsigned FormatPrecision) {
4198 unsigned bits = significand.getActiveBits();
4199
4200 // 196/59 is a very slight overestimate of lg_2(10).
4201 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4202
4203 if (bits <= bitsRequired) return;
4204
4205 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4206 if (!tensRemovable) return;
4207
4208 exp += tensRemovable;
4209
4210 APInt divisor(significand.getBitWidth(), 1);
4211 APInt powten(significand.getBitWidth(), 10);
4212 while (true) {
4213 if (tensRemovable & 1)
4214 divisor *= powten;
4215 tensRemovable >>= 1;
4216 if (!tensRemovable) break;
4217 powten *= powten;
4218 }
4219
4220 significand = significand.udiv(divisor);
4221
4222 // Truncate the significand down to its active bit count.
4223 significand = significand.trunc(significand.getActiveBits());
4224 }
4225
4226
4227 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4228 int &exp, unsigned FormatPrecision) {
4229 unsigned N = buffer.size();
4230 if (N <= FormatPrecision) return;
4231
4232 // The most significant figures are the last ones in the buffer.
4233 unsigned FirstSignificant = N - FormatPrecision;
4234
4235 // Round.
4236 // FIXME: this probably shouldn't use 'round half up'.
4237
4238 // Rounding down is just a truncation, except we also want to drop
4239 // trailing zeros from the new result.
4240 if (buffer[FirstSignificant - 1] < '5') {
4241 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4242 FirstSignificant++;
4243
4244 exp += FirstSignificant;
4245 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4246 return;
4247 }
4248
4249 // Rounding up requires a decimal add-with-carry. If we continue
4250 // the carry, the newly-introduced zeros will just be truncated.
4251 for (unsigned I = FirstSignificant; I != N; ++I) {
4252 if (buffer[I] == '9') {
4253 FirstSignificant++;
4254 } else {
4255 buffer[I]++;
4256 break;
4257 }
4258 }
4259
4260 // If we carried through, we have exactly one digit of precision.
4261 if (FirstSignificant == N) {
4262 exp += FirstSignificant;
4263 buffer.clear();
4264 buffer.push_back('1');
4265 return;
4266 }
4267
4268 exp += FirstSignificant;
4269 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4270 }
4271
4272 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4273 APInt significand, unsigned FormatPrecision,
4274 unsigned FormatMaxPadding, bool TruncateZero) {
4275 const int semanticsPrecision = significand.getBitWidth();
4276
4277 if (isNeg)
4278 Str.push_back('-');
4279
4280 // Set FormatPrecision if zero. We want to do this before we
4281 // truncate trailing zeros, as those are part of the precision.
4282 if (!FormatPrecision) {
4283 // We use enough digits so the number can be round-tripped back to an
4284 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4285 // Accurately" by Steele and White.
4286 // FIXME: Using a formula based purely on the precision is conservative;
4287 // we can print fewer digits depending on the actual value being printed.
4288
4289 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4290 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4291 }
4292
4293 // Ignore trailing binary zeros.
4294 int trailingZeros = significand.countr_zero();
4295 exp += trailingZeros;
4296 significand.lshrInPlace(trailingZeros);
4297
4298 // Change the exponent from 2^e to 10^e.
4299 if (exp == 0) {
4300 // Nothing to do.
4301 } else if (exp > 0) {
4302 // Just shift left.
4303 significand = significand.zext(semanticsPrecision + exp);
4304 significand <<= exp;
4305 exp = 0;
4306 } else { /* exp < 0 */
4307 int texp = -exp;
4308
4309 // We transform this using the identity:
4310 // (N)(2^-e) == (N)(5^e)(10^-e)
4311 // This means we have to multiply N (the significand) by 5^e.
4312 // To avoid overflow, we have to operate on numbers large
4313 // enough to store N * 5^e:
4314 // log2(N * 5^e) == log2(N) + e * log2(5)
4315 // <= semantics->precision + e * 137 / 59
4316 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4317
4318 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4319
4320 // Multiply significand by 5^e.
4321 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4322 significand = significand.zext(precision);
4323 APInt five_to_the_i(precision, 5);
4324 while (true) {
4325 if (texp & 1)
4326 significand *= five_to_the_i;
4327
4328 texp >>= 1;
4329 if (!texp)
4330 break;
4331 five_to_the_i *= five_to_the_i;
4332 }
4333 }
4334
4335 AdjustToPrecision(significand, exp, FormatPrecision);
4336
4338
4339 // Fill the buffer.
4340 unsigned precision = significand.getBitWidth();
4341 if (precision < 4) {
4342 // We need enough precision to store the value 10.
4343 precision = 4;
4344 significand = significand.zext(precision);
4345 }
4346 APInt ten(precision, 10);
4347 APInt digit(precision, 0);
4348
4349 bool inTrail = true;
4350 while (significand != 0) {
4351 // digit <- significand % 10
4352 // significand <- significand / 10
4353 APInt::udivrem(significand, ten, significand, digit);
4354
4355 unsigned d = digit.getZExtValue();
4356
4357 // Drop trailing zeros.
4358 if (inTrail && !d)
4359 exp++;
4360 else {
4361 buffer.push_back((char) ('0' + d));
4362 inTrail = false;
4363 }
4364 }
4365
4366 assert(!buffer.empty() && "no characters in buffer!");
4367
4368 // Drop down to FormatPrecision.
4369 // TODO: don't do more precise calculations above than are required.
4370 AdjustToPrecision(buffer, exp, FormatPrecision);
4371
4372 unsigned NDigits = buffer.size();
4373
4374 // Check whether we should use scientific notation.
4375 bool FormatScientific;
4376 if (!FormatMaxPadding)
4377 FormatScientific = true;
4378 else {
4379 if (exp >= 0) {
4380 // 765e3 --> 765000
4381 // ^^^
4382 // But we shouldn't make the number look more precise than it is.
4383 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4384 NDigits + (unsigned) exp > FormatPrecision);
4385 } else {
4386 // Power of the most significant digit.
4387 int MSD = exp + (int) (NDigits - 1);
4388 if (MSD >= 0) {
4389 // 765e-2 == 7.65
4390 FormatScientific = false;
4391 } else {
4392 // 765e-5 == 0.00765
4393 // ^ ^^
4394 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4395 }
4396 }
4397 }
4398
4399 // Scientific formatting is pretty straightforward.
4400 if (FormatScientific) {
4401 exp += (NDigits - 1);
4402
4403 Str.push_back(buffer[NDigits-1]);
4404 Str.push_back('.');
4405 if (NDigits == 1 && TruncateZero)
4406 Str.push_back('0');
4407 else
4408 for (unsigned I = 1; I != NDigits; ++I)
4409 Str.push_back(buffer[NDigits-1-I]);
4410 // Fill with zeros up to FormatPrecision.
4411 if (!TruncateZero && FormatPrecision > NDigits - 1)
4412 Str.append(FormatPrecision - NDigits + 1, '0');
4413 // For !TruncateZero we use lower 'e'.
4414 Str.push_back(TruncateZero ? 'E' : 'e');
4415
4416 Str.push_back(exp >= 0 ? '+' : '-');
4417 if (exp < 0)
4418 exp = -exp;
4419 SmallVector<char, 6> expbuf;
4420 do {
4421 expbuf.push_back((char) ('0' + (exp % 10)));
4422 exp /= 10;
4423 } while (exp);
4424 // Exponent always at least two digits if we do not truncate zeros.
4425 if (!TruncateZero && expbuf.size() < 2)
4426 expbuf.push_back('0');
4427 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4428 Str.push_back(expbuf[E-1-I]);
4429 return;
4430 }
4431
4432 // Non-scientific, positive exponents.
4433 if (exp >= 0) {
4434 for (unsigned I = 0; I != NDigits; ++I)
4435 Str.push_back(buffer[NDigits-1-I]);
4436 for (unsigned I = 0; I != (unsigned) exp; ++I)
4437 Str.push_back('0');
4438 return;
4439 }
4440
4441 // Non-scientific, negative exponents.
4442
4443 // The number of digits to the left of the decimal point.
4444 int NWholeDigits = exp + (int) NDigits;
4445
4446 unsigned I = 0;
4447 if (NWholeDigits > 0) {
4448 for (; I != (unsigned) NWholeDigits; ++I)
4449 Str.push_back(buffer[NDigits-I-1]);
4450 Str.push_back('.');
4451 } else {
4452 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4453
4454 Str.push_back('0');
4455 Str.push_back('.');
4456 for (unsigned Z = 1; Z != NZeros; ++Z)
4457 Str.push_back('0');
4458 }
4459
4460 for (; I != NDigits; ++I)
4461 Str.push_back(buffer[NDigits-I-1]);
4462
4463 }
4464} // namespace
4465
4466void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4467 unsigned FormatMaxPadding, bool TruncateZero) const {
4468 switch (category) {
4469 case fcInfinity:
4470 if (isNegative())
4471 return append(Str, "-Inf");
4472 else
4473 return append(Str, "+Inf");
4474
4475 case fcNaN: return append(Str, "NaN");
4476
4477 case fcZero:
4478 if (isNegative())
4479 Str.push_back('-');
4480
4481 if (!FormatMaxPadding) {
4482 if (TruncateZero)
4483 append(Str, "0.0E+0");
4484 else {
4485 append(Str, "0.0");
4486 if (FormatPrecision > 1)
4487 Str.append(FormatPrecision - 1, '0');
4488 append(Str, "e+00");
4489 }
4490 } else {
4491 Str.push_back('0');
4492 }
4493 return;
4494
4495 case fcNormal:
4496 break;
4497 }
4498
4499 // Decompose the number into an APInt and an exponent.
4500 int exp = exponent - ((int) semantics->precision - 1);
4501 APInt significand(
4502 semantics->precision,
4503 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4504
4505 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4506 FormatMaxPadding, TruncateZero);
4507
4508}
4509
4511 if (!isFinite() || isZero())
4512 return INT_MIN;
4513
4514 const integerPart *Parts = significandParts();
4515 const int PartCount = partCountForBits(semantics->precision);
4516
4517 int PopCount = 0;
4518 for (int i = 0; i < PartCount; ++i) {
4519 PopCount += llvm::popcount(Parts[i]);
4520 if (PopCount > 1)
4521 return INT_MIN;
4522 }
4523
4524 if (exponent != semantics->minExponent)
4525 return exponent;
4526
4527 int CountrParts = 0;
4528 for (int i = 0; i < PartCount;
4529 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4530 if (Parts[i] != 0) {
4531 return exponent - semantics->precision + CountrParts +
4532 llvm::countr_zero(Parts[i]) + 1;
4533 }
4534 }
4535
4536 llvm_unreachable("didn't find the set bit");
4537}
4538
4540 if (!isNaN())
4541 return false;
4542 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4543 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4544 return false;
4545
4546 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4547 // first bit of the trailing significand being 0.
4548 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4549}
4550
4551/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4552///
4553/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4554/// appropriate sign switching before/after the computation.
4556 // If we are performing nextDown, swap sign so we have -x.
4557 if (nextDown)
4558 changeSign();
4559
4560 // Compute nextUp(x)
4561 opStatus result = opOK;
4562
4563 // Handle each float category separately.
4564 switch (category) {
4565 case fcInfinity:
4566 // nextUp(+inf) = +inf
4567 if (!isNegative())
4568 break;
4569 // nextUp(-inf) = -getLargest()
4570 makeLargest(true);
4571 break;
4572 case fcNaN:
4573 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4574 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4575 // change the payload.
4576 if (isSignaling()) {
4577 result = opInvalidOp;
4578 // For consistency, propagate the sign of the sNaN to the qNaN.
4579 makeNaN(false, isNegative(), nullptr);
4580 }
4581 break;
4582 case fcZero:
4583 // nextUp(pm 0) = +getSmallest()
4584 makeSmallest(false);
4585 break;
4586 case fcNormal:
4587 // nextUp(-getSmallest()) = -0
4588 if (isSmallest() && isNegative()) {
4589 APInt::tcSet(significandParts(), 0, partCount());
4590 category = fcZero;
4591 exponent = 0;
4592 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4593 sign = false;
4594 if (!semantics->hasZero)
4596 break;
4597 }
4598
4599 if (isLargest() && !isNegative()) {
4600 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4601 // nextUp(getLargest()) == NAN
4602 makeNaN();
4603 break;
4604 } else if (semantics->nonFiniteBehavior ==
4606 // nextUp(getLargest()) == getLargest()
4607 break;
4608 } else {
4609 // nextUp(getLargest()) == INFINITY
4610 APInt::tcSet(significandParts(), 0, partCount());
4611 category = fcInfinity;
4612 exponent = semantics->maxExponent + 1;
4613 break;
4614 }
4615 }
4616
4617 // nextUp(normal) == normal + inc.
4618 if (isNegative()) {
4619 // If we are negative, we need to decrement the significand.
4620
4621 // We only cross a binade boundary that requires adjusting the exponent
4622 // if:
4623 // 1. exponent != semantics->minExponent. This implies we are not in the
4624 // smallest binade or are dealing with denormals.
4625 // 2. Our significand excluding the integral bit is all zeros.
4626 bool WillCrossBinadeBoundary =
4627 exponent != semantics->minExponent && isSignificandAllZeros();
4628
4629 // Decrement the significand.
4630 //
4631 // We always do this since:
4632 // 1. If we are dealing with a non-binade decrement, by definition we
4633 // just decrement the significand.
4634 // 2. If we are dealing with a normal -> normal binade decrement, since
4635 // we have an explicit integral bit the fact that all bits but the
4636 // integral bit are zero implies that subtracting one will yield a
4637 // significand with 0 integral bit and 1 in all other spots. Thus we
4638 // must just adjust the exponent and set the integral bit to 1.
4639 // 3. If we are dealing with a normal -> denormal binade decrement,
4640 // since we set the integral bit to 0 when we represent denormals, we
4641 // just decrement the significand.
4642 integerPart *Parts = significandParts();
4643 APInt::tcDecrement(Parts, partCount());
4644
4645 if (WillCrossBinadeBoundary) {
4646 // Our result is a normal number. Do the following:
4647 // 1. Set the integral bit to 1.
4648 // 2. Decrement the exponent.
4649 APInt::tcSetBit(Parts, semantics->precision - 1);
4650 exponent--;
4651 }
4652 } else {
4653 // If we are positive, we need to increment the significand.
4654
4655 // We only cross a binade boundary that requires adjusting the exponent if
4656 // the input is not a denormal and all of said input's significand bits
4657 // are set. If all of said conditions are true: clear the significand, set
4658 // the integral bit to 1, and increment the exponent. If we have a
4659 // denormal always increment since moving denormals and the numbers in the
4660 // smallest normal binade have the same exponent in our representation.
4661 // If there are only exponents, any increment always crosses the
4662 // BinadeBoundary.
4663 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4664 (!isDenormal() && isSignificandAllOnes());
4665
4666 if (WillCrossBinadeBoundary) {
4667 integerPart *Parts = significandParts();
4668 APInt::tcSet(Parts, 0, partCount());
4669 APInt::tcSetBit(Parts, semantics->precision - 1);
4670 assert(exponent != semantics->maxExponent &&
4671 "We can not increment an exponent beyond the maxExponent allowed"
4672 " by the given floating point semantics.");
4673 exponent++;
4674 } else {
4675 incrementSignificand();
4676 }
4677 }
4678 break;
4679 }
4680
4681 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4682 if (nextDown)
4683 changeSign();
4684
4685 return result;
4686}
4687
4688APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4689 return ::exponentNaN(*semantics);
4690}
4691
4692APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4693 return ::exponentInf(*semantics);
4694}
4695
4696APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4697 return ::exponentZero(*semantics);
4698}
4699
4700void IEEEFloat::makeInf(bool Negative) {
4701 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4702 llvm_unreachable("This floating point format does not support Inf");
4703
4704 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4705 // There is no Inf, so make NaN instead.
4706 makeNaN(false, Negative);
4707 return;
4708 }
4709 category = fcInfinity;
4710 sign = Negative;
4711 exponent = exponentInf();
4712 APInt::tcSet(significandParts(), 0, partCount());
4713}
4714
4715void IEEEFloat::makeZero(bool Negative) {
4716 if (!semantics->hasZero)
4717 llvm_unreachable("This floating point format does not support Zero");
4718
4719 category = fcZero;
4720 sign = Negative;
4721 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4722 // Merge negative zero to positive because 0b10000...000 is used for NaN
4723 sign = false;
4724 }
4725 exponent = exponentZero();
4726 APInt::tcSet(significandParts(), 0, partCount());
4727}
4728
4730 assert(isNaN());
4731 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4732 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4733}
4734
4735int ilogb(const IEEEFloat &Arg) {
4736 if (Arg.isNaN())
4737 return APFloat::IEK_NaN;
4738 if (Arg.isZero())
4739 return APFloat::IEK_Zero;
4740 if (Arg.isInfinity())
4741 return APFloat::IEK_Inf;
4742 if (!Arg.isDenormal())
4743 return Arg.exponent;
4744
4745 IEEEFloat Normalized(Arg);
4746 int SignificandBits = Arg.getSemantics().precision - 1;
4747
4748 Normalized.exponent += SignificandBits;
4749 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4750 return Normalized.exponent - SignificandBits;
4751}
4752
4754 auto MaxExp = X.getSemantics().maxExponent;
4755 auto MinExp = X.getSemantics().minExponent;
4756
4757 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4758 // overflow; clamp it to a safe range before adding, but ensure that the range
4759 // is large enough that the clamp does not change the result. The range we
4760 // need to support is the difference between the largest possible exponent and
4761 // the normalized exponent of half the smallest denormal.
4762
4763 int SignificandBits = X.getSemantics().precision - 1;
4764 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4765
4766 // Clamp to one past the range ends to let normalize handle overlflow.
4767 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4768 X.normalize(RoundingMode, lfExactlyZero);
4769 if (X.isNaN())
4770 X.makeQuiet();
4771 return X;
4772}
4773
4774IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4775 Exp = ilogb(Val);
4776
4777 // Quiet signalling nans.
4778 if (Exp == APFloat::IEK_NaN) {
4779 IEEEFloat Quiet(Val);
4780 Quiet.makeQuiet();
4781 return Quiet;
4782 }
4783
4784 if (Exp == APFloat::IEK_Inf)
4785 return Val;
4786
4787 // 1 is added because frexp is defined to return a normalized fraction in
4788 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4789 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4790 return scalbn(Val, -Exp, RM);
4791}
4792
4794 : Semantics(&S),
4795 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4796 APFloat(APFloatBase::semIEEEdouble)}) {
4797 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4798}
4799
4801 : Semantics(&S), Floats(new APFloat[2]{
4802 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4803 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4804 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4805}
4806
4808 : Semantics(&S),
4809 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4810 APFloat(APFloatBase::semIEEEdouble)}) {
4811 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4812}
4813
4815 : Semantics(&S),
4816 Floats(new APFloat[2]{
4817 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4818 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4819 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4820}
4821
4823 APFloat &&Second)
4824 : Semantics(&S),
4825 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4826 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4827 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4828 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4829}
4830
4832 : Semantics(RHS.Semantics),
4833 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4834 APFloat(RHS.Floats[1])}
4835 : nullptr) {
4836 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4837}
4838
4840 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4841 RHS.Semantics = &APFloatBase::semBogus;
4842 RHS.Floats = nullptr;
4843 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4844}
4845
4847 if (Semantics == RHS.Semantics && RHS.Floats) {
4848 Floats[0] = RHS.Floats[0];
4849 Floats[1] = RHS.Floats[1];
4850 } else if (this != &RHS) {
4851 this->~DoubleAPFloat();
4852 new (this) DoubleAPFloat(RHS);
4853 }
4854 return *this;
4855}
4856
4857// Returns a result such that:
4858// 1. abs(Lo) <= ulp(Hi)/2
4859// 2. Hi == RTNE(Hi + Lo)
4860// 3. Hi + Lo == X + Y
4861//
4862// Requires that log2(X) >= log2(Y).
4863static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4864 if (!X.isFinite())
4865 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4866 APFloat Hi = X + Y;
4867 APFloat Delta = Hi - X;
4868 APFloat Lo = Y - Delta;
4869 return {Hi, Lo};
4870}
4871
4872// Implement addition, subtraction, multiplication and division based on:
4873// "Software for Doubled-Precision Floating-Point Computations",
4874// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4875APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4876 const APFloat &c, const APFloat &cc,
4877 roundingMode RM) {
4878 int Status = opOK;
4879 APFloat z = a;
4880 Status |= z.add(c, RM);
4881 if (!z.isFinite()) {
4882 if (!z.isInfinity()) {
4883 Floats[0] = std::move(z);
4884 Floats[1].makeZero(/* Neg = */ false);
4885 return (opStatus)Status;
4886 }
4887 Status = opOK;
4888 auto AComparedToC = a.compareAbsoluteValue(c);
4889 z = cc;
4890 Status |= z.add(aa, RM);
4891 if (AComparedToC == APFloat::cmpGreaterThan) {
4892 // z = cc + aa + c + a;
4893 Status |= z.add(c, RM);
4894 Status |= z.add(a, RM);
4895 } else {
4896 // z = cc + aa + a + c;
4897 Status |= z.add(a, RM);
4898 Status |= z.add(c, RM);
4899 }
4900 if (!z.isFinite()) {
4901 Floats[0] = std::move(z);
4902 Floats[1].makeZero(/* Neg = */ false);
4903 return (opStatus)Status;
4904 }
4905 Floats[0] = z;
4906 APFloat zz = aa;
4907 Status |= zz.add(cc, RM);
4908 if (AComparedToC == APFloat::cmpGreaterThan) {
4909 // Floats[1] = a - z + c + zz;
4910 Floats[1] = a;
4911 Status |= Floats[1].subtract(z, RM);
4912 Status |= Floats[1].add(c, RM);
4913 Status |= Floats[1].add(zz, RM);
4914 } else {
4915 // Floats[1] = c - z + a + zz;
4916 Floats[1] = c;
4917 Status |= Floats[1].subtract(z, RM);
4918 Status |= Floats[1].add(a, RM);
4919 Status |= Floats[1].add(zz, RM);
4920 }
4921 } else {
4922 // q = a - z;
4923 APFloat q = a;
4924 Status |= q.subtract(z, RM);
4925
4926 // zz = q + c + (a - (q + z)) + aa + cc;
4927 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4928 auto zz = q;
4929 Status |= zz.add(c, RM);
4930 Status |= q.add(z, RM);
4931 Status |= q.subtract(a, RM);
4932 q.changeSign();
4933 Status |= zz.add(q, RM);
4934 Status |= zz.add(aa, RM);
4935 Status |= zz.add(cc, RM);
4936 if (zz.isZero() && !zz.isNegative()) {
4937 Floats[0] = std::move(z);
4938 Floats[1].makeZero(/* Neg = */ false);
4939 return opOK;
4940 }
4941 Floats[0] = z;
4942 Status |= Floats[0].add(zz, RM);
4943 if (!Floats[0].isFinite()) {
4944 Floats[1].makeZero(/* Neg = */ false);
4945 return (opStatus)Status;
4946 }
4947 Floats[1] = std::move(z);
4948 Status |= Floats[1].subtract(Floats[0], RM);
4949 Status |= Floats[1].add(zz, RM);
4950 }
4951 return (opStatus)Status;
4952}
4953
4954APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4955 const DoubleAPFloat &RHS,
4956 DoubleAPFloat &Out,
4957 roundingMode RM) {
4958 if (LHS.getCategory() == fcNaN) {
4959 Out = LHS;
4960 return opOK;
4961 }
4962 if (RHS.getCategory() == fcNaN) {
4963 Out = RHS;
4964 return opOK;
4965 }
4966 if (LHS.getCategory() == fcZero) {
4967 Out = RHS;
4968 return opOK;
4969 }
4970 if (RHS.getCategory() == fcZero) {
4971 Out = LHS;
4972 return opOK;
4973 }
4974 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4975 LHS.isNegative() != RHS.isNegative()) {
4976 Out.makeNaN(false, Out.isNegative(), nullptr);
4977 return opInvalidOp;
4978 }
4979 if (LHS.getCategory() == fcInfinity) {
4980 Out = LHS;
4981 return opOK;
4982 }
4983 if (RHS.getCategory() == fcInfinity) {
4984 Out = RHS;
4985 return opOK;
4986 }
4987 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4988
4989 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4990 CC(RHS.Floats[1]);
4991 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4992 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4993 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4994 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4995 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4996 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4997 return Out.addImpl(A, AA, C, CC, RM);
4998}
4999
5001 roundingMode RM) {
5002 return addWithSpecial(*this, RHS, *this, RM);
5003}
5004
5006 roundingMode RM) {
5007 changeSign();
5008 auto Ret = add(RHS, RM);
5009 changeSign();
5010 return Ret;
5011}
5012
5015 const auto &LHS = *this;
5016 auto &Out = *this;
5017 /* Interesting observation: For special categories, finding the lowest
5018 common ancestor of the following layered graph gives the correct
5019 return category:
5020
5021 NaN
5022 / \
5023 Zero Inf
5024 \ /
5025 Normal
5026
5027 e.g. NaN * NaN = NaN
5028 Zero * Inf = NaN
5029 Normal * Zero = Zero
5030 Normal * Inf = Inf
5031 */
5032 if (LHS.getCategory() == fcNaN) {
5033 Out = LHS;
5034 return opOK;
5035 }
5036 if (RHS.getCategory() == fcNaN) {
5037 Out = RHS;
5038 return opOK;
5039 }
5040 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
5041 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
5042 Out.makeNaN(false, false, nullptr);
5043 return opOK;
5044 }
5045 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
5046 Out = LHS;
5047 return opOK;
5048 }
5049 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
5050 Out = RHS;
5051 return opOK;
5052 }
5053 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
5054 "Special cases not handled exhaustively");
5055
5056 int Status = opOK;
5057 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
5058 // t = a * c
5059 APFloat T = A;
5060 Status |= T.multiply(C, RM);
5061 if (!T.isFiniteNonZero()) {
5062 Floats[0] = T;
5063 Floats[1].makeZero(/* Neg = */ false);
5064 return (opStatus)Status;
5065 }
5066
5067 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
5068 APFloat Tau = A;
5069 T.changeSign();
5070 Status |= Tau.fusedMultiplyAdd(C, T, RM);
5071 T.changeSign();
5072 {
5073 // v = a * d
5074 APFloat V = A;
5075 Status |= V.multiply(D, RM);
5076 // w = b * c
5077 APFloat W = B;
5078 Status |= W.multiply(C, RM);
5079 Status |= V.add(W, RM);
5080 // tau += v + w
5081 Status |= Tau.add(V, RM);
5082 }
5083 // u = t + tau
5084 APFloat U = T;
5085 Status |= U.add(Tau, RM);
5086
5087 Floats[0] = U;
5088 if (!U.isFinite()) {
5089 Floats[1].makeZero(/* Neg = */ false);
5090 } else {
5091 // Floats[1] = (t - u) + tau
5092 Status |= T.subtract(U, RM);
5093 Status |= T.add(Tau, RM);
5094 Floats[1] = T;
5095 }
5096 return (opStatus)Status;
5097}
5098
5101 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5102 "Unexpected Semantics");
5103 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5104 auto Ret = Tmp.divide(
5105 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
5106 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5107 return Ret;
5108}
5109
5111 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5112 "Unexpected Semantics");
5113 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5114 auto Ret = Tmp.remainder(
5115 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5116 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5117 return Ret;
5118}
5119
5121 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5122 "Unexpected Semantics");
5123 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5124 auto Ret = Tmp.mod(
5125 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5126 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5127 return Ret;
5128}
5129
5132 const DoubleAPFloat &Addend,
5134 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5135 "Unexpected Semantics");
5136 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5137 auto Ret = Tmp.fusedMultiplyAdd(
5138 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
5139 Multiplicand.bitcastToAPInt()),
5140 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
5141 RM);
5142 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5143 return Ret;
5144}
5145
5147 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5148 "Unexpected Semantics");
5149 const APFloat &Hi = getFirst();
5150 const APFloat &Lo = getSecond();
5151
5152 APFloat RoundedHi = Hi;
5153 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
5154
5155 // We can reduce the problem to just the high part if the input:
5156 // 1. Represents a non-finite value.
5157 // 2. Has a component which is zero.
5158 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5159 Floats[0] = std::move(RoundedHi);
5160 Floats[1].makeZero(/*Neg=*/false);
5161 return HiStatus;
5162 }
5163
5164 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5165 // halfway point.
5166 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5167 APFloat TieBreaker) {
5168 // RoundingError tells us which direction we rounded:
5169 // - RoundingError > 0: we rounded up.
5170 // - RoundingError < 0: we rounded down.
5171 // Sterbenz' lemma ensures that RoundingError is exact.
5172 const APFloat RoundingError = Rounded - ToRound;
5173 if (TieBreaker.isNonZero() &&
5174 TieBreaker.isNegative() != RoundingError.isNegative() &&
5175 abs(RoundingError).isExactlyValue(0.5))
5176 Rounded.add(
5177 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5179 return Rounded;
5180 };
5181
5182 // Case 1: Hi is not an integer.
5183 // Special cases are for rounding modes that are sensitive to ties.
5184 if (RoundedHi != Hi) {
5185 // We need to consider the case where Hi was between two integers and the
5186 // rounding mode broke the tie when, in fact, Lo may have had a different
5187 // sign than Hi.
5188 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5189 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5190
5191 Floats[0] = std::move(RoundedHi);
5192 Floats[1].makeZero(/*Neg=*/false);
5193 return HiStatus;
5194 }
5195
5196 // Case 2: Hi is an integer.
5197 // Special cases are for rounding modes which are rounding towards or away from zero.
5198 RoundingMode LoRoundingMode;
5199 if (RM == rmTowardZero)
5200 // When our input is positive, we want the Lo component rounded toward
5201 // negative infinity to get the smallest result magnitude. Likewise,
5202 // negative inputs want the Lo component rounded toward positive infinity.
5203 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5204 else
5205 LoRoundingMode = RM;
5206
5207 APFloat RoundedLo = Lo;
5208 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5209 if (LoRoundingMode == rmNearestTiesToAway)
5210 // We need to consider the case where Lo was between two integers and the
5211 // rounding mode broke the tie when, in fact, Hi may have had a different
5212 // sign than Lo.
5213 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5214
5215 // We must ensure that the final result has no overlap between the two APFloat values.
5216 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5217
5218 Floats[0] = std::move(RoundedHi);
5219 Floats[1] = std::move(RoundedLo);
5220 return LoStatus;
5221}
5222
5224 Floats[0].changeSign();
5225 Floats[1].changeSign();
5226}
5227
5230 // Compare absolute values of the high parts.
5231 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5232 if (HiPartCmp != cmpEqual)
5233 return HiPartCmp;
5234
5235 // Zero, regardless of sign, is equal.
5236 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5237 return cmpEqual;
5238
5239 // At this point, |this->Hi| == |RHS.Hi|.
5240 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5241 // same, and Hi-|Lo| if signs are different.
5242 const bool ThisIsSubtractive =
5243 Floats[0].isNegative() != Floats[1].isNegative();
5244 const bool RHSIsSubtractive =
5245 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5246
5247 // Case 1: The low part of 'this' is zero.
5248 if (Floats[1].isZero())
5249 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5250 // If RHS is subtractive, its magnitude is smaller.
5251 // If RHS is additive, its magnitude is larger.
5252 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5253
5254 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5255 if (RHS.Floats[1].isZero())
5256 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5257 // If 'this' is subtractive, its magnitude is smaller.
5258 // If 'this' is additive, its magnitude is larger.
5259 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5260
5261 // If their natures differ, the additive one is larger.
5262 if (ThisIsSubtractive != RHSIsSubtractive)
5263 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5264
5265 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5266 // The comparison now depends on the magnitude of the low parts.
5267 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5268
5269 if (ThisIsSubtractive) {
5270 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5271 if (LoPartCmp == cmpLessThan)
5272 return cmpGreaterThan;
5273 if (LoPartCmp == cmpGreaterThan)
5274 return cmpLessThan;
5275 }
5276
5277 // If additive, the comparison of |Lo| is direct.
5278 // If equal, they are equal.
5279 return LoPartCmp;
5280}
5281
5283 return Floats[0].getCategory();
5284}
5285
5286bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5287
5289 Floats[0].makeInf(Neg);
5290 Floats[1].makeZero(/* Neg = */ false);
5291}
5292
5294 Floats[0].makeZero(Neg);
5295 Floats[1].makeZero(/* Neg = */ false);
5296}
5297
5299 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5300 "Unexpected Semantics");
5301 Floats[0] =
5302 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5303 Floats[1] =
5304 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5305 if (Neg)
5306 changeSign();
5307}
5308
5310 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5311 "Unexpected Semantics");
5312 Floats[0].makeSmallest(Neg);
5313 Floats[1].makeZero(/* Neg = */ false);
5314}
5315
5317 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5318 "Unexpected Semantics");
5319 Floats[0] =
5320 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5321 if (Neg)
5322 Floats[0].changeSign();
5323 Floats[1].makeZero(/* Neg = */ false);
5324}
5325
5326void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5327 Floats[0].makeNaN(SNaN, Neg, fill);
5328 Floats[1].makeZero(/* Neg = */ false);
5329}
5330
5332 auto Result = Floats[0].compare(RHS.Floats[0]);
5333 // |Float[0]| > |Float[1]|
5334 if (Result == APFloat::cmpEqual)
5335 return Floats[1].compare(RHS.Floats[1]);
5336 return Result;
5337}
5338
5340 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5341 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5342}
5343
5345 if (Arg.Floats)
5346 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5347 return hash_combine(Arg.Semantics);
5348}
5349
5351 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5352 "Unexpected Semantics");
5353 uint64_t Data[] = {
5354 Floats[0].bitcastToAPInt().getRawData()[0],
5355 Floats[1].bitcastToAPInt().getRawData()[0],
5356 };
5357 return APInt(128, Data);
5358}
5359
5361 roundingMode RM) {
5362 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5363 "Unexpected Semantics");
5364 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5365 auto Ret = Tmp.convertFromString(S, RM);
5366 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5367 return Ret;
5368}
5369
5370// The double-double lattice of values corresponds to numbers which obey:
5371// - abs(lo) <= 1/2 * ulp(hi)
5372// - roundTiesToEven(hi + lo) == hi
5373//
5374// nextUp must choose the smallest output > input that follows these rules.
5375// nexDown must choose the largest output < input that follows these rules.
5377 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5378 "Unexpected Semantics");
5379 // nextDown(x) = -nextUp(-x)
5380 if (nextDown) {
5381 changeSign();
5382 APFloat::opStatus Result = next(/*nextDown=*/false);
5383 changeSign();
5384 return Result;
5385 }
5386 switch (getCategory()) {
5387 case fcInfinity:
5388 // nextUp(+inf) = +inf
5389 // nextUp(-inf) = -getLargest()
5390 if (isNegative())
5391 makeLargest(true);
5392 return opOK;
5393
5394 case fcNaN:
5395 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5396 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5397 // change the payload.
5398 if (getFirst().isSignaling()) {
5399 // For consistency, propagate the sign of the sNaN to the qNaN.
5400 makeNaN(false, isNegative(), nullptr);
5401 return opInvalidOp;
5402 }
5403 return opOK;
5404
5405 case fcZero:
5406 // nextUp(pm 0) = +getSmallest()
5407 makeSmallest(false);
5408 return opOK;
5409
5410 case fcNormal:
5411 break;
5412 }
5413
5414 const APFloat &HiOld = getFirst();
5415 const APFloat &LoOld = getSecond();
5416
5417 APFloat NextLo = LoOld;
5418 NextLo.next(/*nextDown=*/false);
5419
5420 // We want to admit values where:
5421 // 1. abs(Lo) <= ulp(Hi)/2
5422 // 2. Hi == RTNE(Hi + lo)
5423 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5424 return Hi + Lo == Hi;
5425 };
5426
5427 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5428 if (InLattice(HiOld, NextLo)) {
5429 // Yes, the result is (HiOld, nextUp(LoOld)).
5430 Floats[1] = std::move(NextLo);
5431
5432 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5433 // value is defined to have exactly 106 bits of precision. This limitation
5434 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5435 // value.
5436 DoubleAPFloat Largest{*Semantics, uninitialized};
5437 Largest.makeLargest(/*Neg=*/false);
5438 if (compare(Largest) == cmpGreaterThan)
5439 makeInf(/*Neg=*/false);
5440
5441 return opOK;
5442 }
5443
5444 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5445 // correct result. We know the new hi component will be nextUp(HiOld) but our
5446 // lattice rules make it a little ambiguous what the correct NextLo must be.
5447 APFloat NextHi = HiOld;
5448 NextHi.next(/*nextDown=*/false);
5449
5450 // nextUp(getLargest()) == INFINITY
5451 if (NextHi.isInfinity()) {
5452 makeInf(/*Neg=*/false);
5453 return opOK;
5454 }
5455
5456 // IEEE 754-2019 5.3.1:
5457 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5458 // -0."
5459 if (NextHi.isZero()) {
5460 makeZero(/*Neg=*/true);
5461 return opOK;
5462 }
5463
5464 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5465 // negative infinity as possible.
5466 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5467 if (!InLattice(NextHi, NextLo))
5468 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5469 NextLo.next(/*nextDown=*/false);
5470
5471 Floats[0] = std::move(NextHi);
5472 Floats[1] = std::move(NextLo);
5473
5474 return opOK;
5475}
5476
5477APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5478 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5479 roundingMode RM, bool *IsExact) const {
5480 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5481 "Unexpected Semantics");
5482
5483 // If Hi is not finite, or Lo is zero, the value is entirely represented
5484 // by Hi. Delegate to the simpler single-APFloat conversion.
5485 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5486 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5487
5488 // First, round the full double-double value to an integral value. This
5489 // simplifies the rest of the function, as we no longer need to consider
5490 // fractional parts.
5491 *IsExact = false;
5492 DoubleAPFloat Integral = *this;
5493 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5494 if (RoundStatus == opInvalidOp)
5495 return opInvalidOp;
5496 const APFloat &IntegralHi = Integral.getFirst();
5497 const APFloat &IntegralLo = Integral.getSecond();
5498
5499 // If rounding results in either component being zero, the sum is trivial.
5500 // Delegate to the simpler single-APFloat conversion.
5501 bool HiIsExact;
5502 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5503 const opStatus HiStatus =
5504 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5505 // The conversion from an integer-valued float to an APInt may fail if the
5506 // result would be out of range. Regardless, taking this path is only
5507 // possible if rounding occurred during the initial `roundToIntegral`.
5508 return HiStatus == opOK ? opInexact : HiStatus;
5509 }
5510
5511 // A negative number cannot be represented by an unsigned integer.
5512 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5513 if (!IsSigned && IntegralHi.isNegative())
5514 return opInvalidOp;
5515
5516 // Handle the special boundary case where |Hi| is exactly the power of two
5517 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5518 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5519 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5520 // signed, N for unsigned).
5521 bool LoIsExact;
5522 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5523 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5524 if (HiExactLog2 >= 0 &&
5525 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5526 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5527 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5528 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5529 return opInvalidOp;
5530
5531 // If the signs differ, the sum will fit. We can compute the result using
5532 // properties of two's complement arithmetic without a wide intermediate
5533 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5534 const opStatus LoStatus = IntegralLo.convertToInteger(
5535 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5536 if (LoStatus == opInvalidOp)
5537 return opInvalidOp;
5538
5539 // Adjust the bit pattern of Lo to account for Hi's value:
5540 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5541 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5542 // already produced the correct final bit pattern.
5543 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5544 // can be computed by taking the two's complement pattern for `Lo` and
5545 // clearing the sign bit.
5546 if (IsSigned && !IntegralHi.isNegative())
5547 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5548 *IsExact = RoundStatus == opOK;
5549 return RoundStatus;
5550 }
5551
5552 // Convert Hi into an integer. This may not fit but that is OK: we know that
5553 // Hi + Lo would not fit either in this situation.
5554 const opStatus HiStatus = IntegralHi.convertToInteger(
5555 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5556 if (HiStatus == opInvalidOp)
5557 return HiStatus;
5558
5559 // Convert Lo into a temporary integer of the same width.
5560 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5561 const opStatus LoStatus =
5562 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5563 if (LoStatus == opInvalidOp)
5564 return LoStatus;
5565
5566 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5567 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5568 // where the sum could cross the integer type's boundary is when Hi is a
5569 // power of two, which is handled by the special case block above.
5570 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5571
5572 *IsExact = RoundStatus == opOK;
5573 return RoundStatus;
5574}
5575
5578 unsigned int Width, bool IsSigned,
5579 roundingMode RM, bool *IsExact) const {
5580 opStatus FS =
5581 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5582
5583 if (FS == opInvalidOp) {
5584 const unsigned DstPartsCount = partCountForBits(Width);
5585 assert(DstPartsCount <= Input.size() && "Integer too big");
5586
5587 unsigned Bits;
5588 if (getCategory() == fcNaN)
5589 Bits = 0;
5590 else if (isNegative())
5591 Bits = IsSigned;
5592 else
5593 Bits = Width - IsSigned;
5594
5595 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5596 if (isNegative() && IsSigned)
5597 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5598 }
5599
5600 return FS;
5601}
5602
5603APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5604 switch (RM) {
5606 makeLargest(/*Neg=*/isNegative());
5607 break;
5609 if (isNegative())
5610 makeInf(/*Neg=*/true);
5611 else
5612 makeLargest(/*Neg=*/false);
5613 break;
5615 if (isNegative())
5616 makeLargest(/*Neg=*/true);
5617 else
5618 makeInf(/*Neg=*/false);
5619 break;
5622 makeInf(/*Neg=*/isNegative());
5623 break;
5624 default:
5625 llvm_unreachable("Invalid rounding mode found");
5626 }
5627 opStatus S = opInexact;
5628 if (!getFirst().isFinite())
5629 S = static_cast<opStatus>(S | opOverflow);
5630 return S;
5631}
5632
5633APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5634 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5635 // Find the most significant bit of the source integer. APInt::tcMSB returns
5636 // UINT_MAX for a zero value.
5637 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5638 if (SrcMSB == UINT_MAX) {
5639 // The source integer is 0.
5640 makeZero(/*Neg=*/false);
5641 return opOK;
5642 }
5643
5644 // Create a minimally-sized APInt to represent the source value.
5645 const unsigned SrcBitWidth = SrcMSB + 1;
5646 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5647 /*isUnsigned=*/true};
5648
5649 // Stage 1: Initial Approximation.
5650 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5651 // We use round-to-nearest because it minimizes the initial error, which is
5652 // crucial for the subsequent steps.
5654 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5655
5656 // If the first approximation already overflows, the number is too large.
5657 // NOTE: The underlying semantics are *more* conservative when choosing to
5658 // overflow because their notion of ULP is much larger. As such, it is always
5659 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5660 if (!Hi.isFinite())
5661 return handleOverflow(RM);
5662
5663 // Stage 2: Exact Error Calculation.
5664 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5665 // This is done by converting Hi back to an integer and subtracting it from
5666 // the original source.
5667 bool HiAsIntIsExact;
5668 // Create an integer representation of Hi. Its width is determined by the
5669 // exponent of Hi, ensuring it's just large enough. This width can exceed
5670 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5671 // accurately when converted back to an integer.
5672 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5673 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5674 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5675
5676 // Stage 3: Error Approximation and Rounding.
5677 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5678 // captures the remainder of the original number. The rounding mode for this
5679 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5680 // ensure the final sum (Hi + Lo) rounds correctly.
5681 roundingMode LoRM = RM;
5682 // Adjustments are only necessary when the initial approximation Hi was an
5683 // overestimate, making the Error negative.
5684 if (Error.isNegative()) {
5685 if (RM == rmNearestTiesToAway) {
5686 // For rmNearestTiesToAway, a tie should round away from zero. Since
5687 // SrcInt is positive, this means rounding toward +infinity.
5688 // A standard conversion of a negative Error would round ties toward
5689 // -infinity, causing the final sum Hi + Lo to be smaller. To
5690 // counteract this, we detect the tie case and override the rounding
5691 // mode for Lo to rmTowardPositive.
5692 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5693 const unsigned LoPrecision = getSecond().getSemantics().precision;
5694 if (ErrorActiveBits > LoPrecision) {
5695 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5696 // A tie occurs when the bits to be truncated are of the form 100...0.
5697 // This is detected by checking if the number of trailing zeros is
5698 // exactly one less than the number of bits being truncated.
5699 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5700 LoRM = rmTowardPositive;
5701 }
5702 } else if (RM == rmTowardZero) {
5703 // For rmTowardZero, the final positive result must be truncated (rounded
5704 // down). When Hi is an overestimate, Error is negative. A standard
5705 // rmTowardZero conversion of Error would make it *less* negative,
5706 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5707 // rounds down correctly, we force Lo to round toward -infinity.
5708 LoRM = rmTowardNegative;
5709 }
5710 }
5711
5713 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5714
5715 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5716 // components do not overlap. fastTwoSum performs this operation.
5717 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5718 Floats[0] = std::move(Hi);
5719 Floats[1] = std::move(Lo);
5720
5721 // A final check for overflow is needed because fastTwoSum can cause a
5722 // carry-out from Lo that pushes Hi to infinity.
5723 if (!getFirst().isFinite())
5724 return handleOverflow(RM);
5725
5726 // The largest DoubleAPFloat must be canonical. Values which are larger are
5727 // not canonical and are equivalent to overflow.
5728 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5729 DoubleAPFloat Largest{*Semantics};
5730 Largest.makeLargest(/*Neg=*/false);
5731 if (compare(Largest) == APFloat::cmpGreaterThan)
5732 return handleOverflow(RM);
5733 }
5734
5735 // The final status of the operation is determined by the conversion of the
5736 // error term. If Lo could represent Error exactly, the entire conversion
5737 // is exact. Otherwise, it's inexact.
5738 return Status;
5739}
5740
5742 bool IsSigned,
5743 roundingMode RM) {
5744 const bool NegateInput = IsSigned && Input.isNegative();
5745 APInt API = Input;
5746 if (NegateInput)
5747 API.negate();
5748
5750 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5751 if (NegateInput)
5752 changeSign();
5753 return Status;
5754}
5755
5757 unsigned int HexDigits,
5758 bool UpperCase,
5759 roundingMode RM) const {
5760 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5761 "Unexpected Semantics");
5762 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5763 .convertToHexString(DST, HexDigits, UpperCase, RM);
5764}
5765
5767 return getCategory() == fcNormal &&
5768 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5769 // (double)(Hi + Lo) == Hi defines a normal number.
5770 Floats[0] != Floats[0] + Floats[1]);
5771}
5772
5774 if (getCategory() != fcNormal)
5775 return false;
5776 DoubleAPFloat Tmp(*this);
5777 Tmp.makeSmallest(this->isNegative());
5778 return Tmp.compare(*this) == cmpEqual;
5779}
5780
5782 if (getCategory() != fcNormal)
5783 return false;
5784
5785 DoubleAPFloat Tmp(*this);
5787 return Tmp.compare(*this) == cmpEqual;
5788}
5789
5791 if (getCategory() != fcNormal)
5792 return false;
5793 DoubleAPFloat Tmp(*this);
5794 Tmp.makeLargest(this->isNegative());
5795 return Tmp.compare(*this) == cmpEqual;
5796}
5797
5799 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5800 "Unexpected Semantics");
5801 return Floats[0].isInteger() && Floats[1].isInteger();
5802}
5803
5805 unsigned FormatPrecision,
5806 unsigned FormatMaxPadding,
5807 bool TruncateZero) const {
5808 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5809 "Unexpected Semantics");
5810 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5811 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5812}
5813
5815 // In order for Hi + Lo to be a power of two, the following must be true:
5816 // 1. Hi must be a power of two.
5817 // 2. Lo must be zero.
5818 if (getSecond().isNonZero())
5819 return INT_MIN;
5820 return getFirst().getExactLog2Abs();
5821}
5822
5823int ilogb(const DoubleAPFloat &Arg) {
5824 const APFloat &Hi = Arg.getFirst();
5825 const APFloat &Lo = Arg.getSecond();
5826 int IlogbResult = ilogb(Hi);
5827 // Zero and non-finite values can delegate to ilogb(Hi).
5828 if (Arg.getCategory() != fcNormal)
5829 return IlogbResult;
5830 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5831 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5832 return IlogbResult;
5833 if (Hi.getExactLog2Abs() == INT_MIN)
5834 return IlogbResult;
5835 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5836 // get nudged out of the binade by the low component.
5837 return IlogbResult - 1;
5838}
5839
5842 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5843 "Unexpected Semantics");
5845 scalbn(Arg.Floats[0], Exp, RM),
5846 scalbn(Arg.Floats[1], Exp, RM));
5847}
5848
5849DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5851 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5852 "Unexpected Semantics");
5853
5854 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5855 // [1.0, 2.0).
5856 Exp = ilogb(Arg);
5857
5858 // For NaNs, quiet any signaling NaN and return the result, as per standard
5859 // practice.
5860 if (Exp == APFloat::IEK_NaN) {
5861 DoubleAPFloat Quiet{Arg};
5862 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5863 return Quiet;
5864 }
5865
5866 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5867 if (Exp == APFloat::IEK_Inf)
5868 return Arg;
5869
5870 // For zero, the fraction is zero and the standard requires the exponent be 0.
5871 if (Exp == APFloat::IEK_Zero) {
5872 Exp = 0;
5873 return Arg;
5874 }
5875
5876 const APFloat &Hi = Arg.getFirst();
5877 const APFloat &Lo = Arg.getSecond();
5878
5879 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5880 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5881 // Increment the exponent to ensure the fraction is in the correct range.
5882 ++Exp;
5883
5884 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5885 APFloat Second = Lo;
5886 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5887 roundingMode LoRoundingMode;
5888 // The interpretation of rmTowardZero depends on the sign of the combined
5889 // Arg rather than the sign of the component.
5890 if (RM == rmTowardZero)
5891 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5892 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5893 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5894 // "away from zero" based on its own sign would move the value in the
5895 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5896 // direction-agnostic. We only need to bother with this if Lo is scaled
5897 // down.
5898 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5899 LoRoundingMode = rmNearestTiesToEven;
5900 else
5901 LoRoundingMode = RM;
5902 Second = scalbn(Lo, -Exp, LoRoundingMode);
5903 // The rmNearestTiesToEven proxy is correct most of the time, but it
5904 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5905 // exact midpoint.
5906 // NOTE: This is morally equivalent to roundTiesTowardZero.
5907 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5908 // Re-scale the result back to check if rounding occurred.
5909 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5910 if (RecomposedLo != Lo) {
5911 // RoundingError tells us which direction we rounded:
5912 // - RoundingError > 0: we rounded up.
5913 // - RoundingError < 0: we down up.
5914 const APFloat RoundingError = RecomposedLo - Lo;
5915 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5916 // We do this by checking if the absolute rounding error is exactly
5917 // half a ULP of the result.
5918 const APFloat UlpOfSecond = harrisonUlp(Second);
5919 const APFloat ScaledUlpOfSecond =
5920 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5921 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5922 const bool RoundedLoAway =
5923 Second.isNegative() == RoundingError.isNegative();
5924 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5925 // decrease the magnitude of Second to increase the magnitude
5926 // First+Second.
5927 if (IsMidpoint && RoundedLoAway)
5928 Second.next(/*nextDown=*/!Second.isNegative());
5929 }
5930 }
5931 // Handle a tricky edge case where Arg is slightly less than a power of two
5932 // (e.g., Arg = 2^k - epsilon). In this situation:
5933 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5934 // 2. ilogb(Arg) correctly returns k-1.
5935 // 3. Our initial Exp becomes (k-1) + 1 = k.
5936 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5937 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5938 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5939 // We detect this specific case by checking if Hi is a power of two and if
5940 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5941 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5942 // valid fraction.
5943 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5944 ++Exp;
5945 }
5946
5947 APFloat First = scalbn(Hi, -Exp, RM);
5949 std::move(Second));
5950}
5951
5952} // namespace detail
5953
5954APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5955 if (usesLayout<IEEEFloat>(Semantics)) {
5956 new (&IEEE) IEEEFloat(std::move(F));
5957 return;
5958 }
5959 if (usesLayout<DoubleAPFloat>(Semantics)) {
5960 const fltSemantics& S = F.getSemantics();
5961 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5963 return;
5964 }
5965 llvm_unreachable("Unexpected semantics");
5966}
5967
5972
5973hash_code hash_value(const APFloat &Arg) {
5974 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5975 return hash_value(Arg.U.IEEE);
5976 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5977 return hash_value(Arg.U.Double);
5978 llvm_unreachable("Unexpected semantics");
5979}
5980
5982 : APFloat(Semantics) {
5983 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5984 assert(StatusOrErr && "Invalid floating point representation");
5985 consumeError(StatusOrErr.takeError());
5986}
5987
5989 if (isZero())
5990 return isNegative() ? fcNegZero : fcPosZero;
5991 if (isNormal())
5992 return isNegative() ? fcNegNormal : fcPosNormal;
5993 if (isDenormal())
5995 if (isInfinity())
5996 return isNegative() ? fcNegInf : fcPosInf;
5997 assert(isNaN() && "Other class of FP constant");
5998 return isSignaling() ? fcSNan : fcQNan;
5999}
6000
6001bool APFloat::getExactInverse(APFloat *Inv) const {
6002 // Only finite, non-zero numbers can have a useful, representable inverse.
6003 // This check filters out +/- zero, +/- infinity, and NaN.
6004 if (!isFiniteNonZero())
6005 return false;
6006
6007 // Historically, this function rejects subnormal inputs. One reason why this
6008 // might be important is that subnormals may behave differently under FTZ/DAZ
6009 // runtime behavior.
6010 if (isDenormal())
6011 return false;
6012
6013 // A number has an exact, representable inverse if and only if it is a power
6014 // of two.
6015 //
6016 // Mathematical Rationale:
6017 // 1. A binary floating-point number x is a dyadic rational, meaning it can
6018 // be written as x = M / 2^k for integers M (the significand) and k.
6019 // 2. The inverse is 1/x = 2^k / M.
6020 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
6021 // in binary), its denominator M must also be a power of two.
6022 // Let's say M = 2^m.
6023 // 4. Substituting this back into the formula for x, we get
6024 // x = (2^m) / (2^k) = 2^(m-k).
6025 //
6026 // This proves that x must be a power of two.
6027
6028 // getExactLog2Abs() returns the integer exponent if the number is a power of
6029 // two or INT_MIN if it is not.
6030 const int Exp = getExactLog2Abs();
6031 if (Exp == INT_MIN)
6032 return false;
6033
6034 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
6035 // scaling 1.0 by the negated exponent.
6036 APFloat Reciprocal =
6037 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
6038 rmTowardZero);
6039
6040 // scalbn might round if the resulting exponent -Exp is outside the
6041 // representable range, causing overflow (to infinity) or underflow. We
6042 // must verify that the result is still the exact power of two we expect.
6043 if (Reciprocal.getExactLog2Abs() != -Exp)
6044 return false;
6045
6046 // Avoid multiplication with a subnormal, it is not safe on all platforms and
6047 // may be slower than a normal division.
6048 if (Reciprocal.isDenormal())
6049 return false;
6050
6051 assert(Reciprocal.isFiniteNonZero());
6052
6053 if (Inv)
6054 *Inv = std::move(Reciprocal);
6055
6056 return true;
6057}
6058
6060 roundingMode RM, bool *losesInfo) {
6061 if (&getSemantics() == &ToSemantics) {
6062 *losesInfo = false;
6063 return opOK;
6064 }
6065 if (usesLayout<IEEEFloat>(getSemantics()) &&
6066 usesLayout<IEEEFloat>(ToSemantics))
6067 return U.IEEE.convert(ToSemantics, RM, losesInfo);
6068 if (usesLayout<IEEEFloat>(getSemantics()) &&
6069 usesLayout<DoubleAPFloat>(ToSemantics)) {
6070 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
6071 auto Ret =
6072 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
6073 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
6074 return Ret;
6075 }
6076 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
6077 usesLayout<IEEEFloat>(ToSemantics)) {
6078 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
6079 *this = APFloat(std::move(getIEEE()), ToSemantics);
6080 return Ret;
6081 }
6082 llvm_unreachable("Unexpected semantics");
6083}
6084
6088
6090 SmallVector<char, 16> Buffer;
6091 toString(Buffer);
6092 OS << Buffer;
6093}
6094
6095#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6097 print(dbgs());
6098 dbgs() << '\n';
6099}
6100#endif
6101
6103 NID.Add(bitcastToAPInt());
6104}
6105
6107 roundingMode rounding_mode,
6108 bool *isExact) const {
6109 unsigned bitWidth = result.getBitWidth();
6110 SmallVector<uint64_t, 4> parts(result.getNumWords());
6111 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
6112 rounding_mode, isExact);
6113 // Keeps the original signed-ness.
6114 result = APInt(bitWidth, parts);
6115 return status;
6116}
6117
6119 if (&getSemantics() ==
6120 (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
6121 return getIEEE().convertToDouble();
6122 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
6123 "Float semantics is not representable by IEEEdouble");
6124 APFloat Temp = *this;
6125 bool LosesInfo;
6126 opStatus St =
6127 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
6128 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6129 (void)St;
6130 return Temp.getIEEE().convertToDouble();
6131}
6132
6133#ifdef HAS_IEE754_FLOAT128
6134float128 APFloat::convertToQuad() const {
6135 if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
6136 return getIEEE().convertToQuad();
6137 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
6138 "Float semantics is not representable by IEEEquad");
6139 APFloat Temp = *this;
6140 bool LosesInfo;
6141 opStatus St =
6142 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
6143 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6144 (void)St;
6145 return Temp.getIEEE().convertToQuad();
6146}
6147#endif
6148
6150 if (&getSemantics() ==
6151 (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
6152 return getIEEE().convertToFloat();
6153 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
6154 "Float semantics is not representable by IEEEsingle");
6155 APFloat Temp = *this;
6156 bool LosesInfo;
6157 opStatus St =
6158 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
6159 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6160 (void)St;
6161 return Temp.getIEEE().convertToFloat();
6162}
6163
6164APFloat::Storage::~Storage() {
6165 if (usesLayout<IEEEFloat>(*semantics)) {
6166 IEEE.~IEEEFloat();
6167 return;
6168 }
6169 if (usesLayout<DoubleAPFloat>(*semantics)) {
6170 Double.~DoubleAPFloat();
6171 return;
6172 }
6173 llvm_unreachable("Unexpected semantics");
6174}
6175
6176APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6177 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6178 new (this) IEEEFloat(RHS.IEEE);
6179 return;
6180 }
6181 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6182 new (this) DoubleAPFloat(RHS.Double);
6183 return;
6184 }
6185 llvm_unreachable("Unexpected semantics");
6186}
6187
6188APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6189 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6190 new (this) IEEEFloat(std::move(RHS.IEEE));
6191 return;
6192 }
6193 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6194 new (this) DoubleAPFloat(std::move(RHS.Double));
6195 return;
6196 }
6197 llvm_unreachable("Unexpected semantics");
6198}
6199
6200APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6201 if (usesLayout<IEEEFloat>(*semantics) &&
6202 usesLayout<IEEEFloat>(*RHS.semantics)) {
6203 IEEE = RHS.IEEE;
6204 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6205 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6206 Double = RHS.Double;
6207 } else if (this != &RHS) {
6208 this->~Storage();
6209 new (this) Storage(RHS);
6210 }
6211 return *this;
6212}
6213
6214APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6215 if (usesLayout<IEEEFloat>(*semantics) &&
6216 usesLayout<IEEEFloat>(*RHS.semantics)) {
6217 IEEE = std::move(RHS.IEEE);
6218 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6219 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6220 Double = std::move(RHS.Double);
6221 } else if (this != &RHS) {
6222 this->~Storage();
6223 new (this) Storage(std::move(RHS));
6224 }
6225 return *this;
6226}
6227
6228} // namespace llvm
6229
6230#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:48
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:26
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
static bool isSigned(unsigned int Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:306
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:174
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:329
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:334
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:304
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static const fltSemantics & BFloat()
Definition APFloat.h:295
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEquad()
Definition APFloat.h:298
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:307
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:313
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:325
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:360
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:317
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:342
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:300
friend class APFloat
Definition APFloat.h:291
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:296
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:333
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:221
static constexpr unsigned integerPartWidth
Definition APFloat.h:152
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:300
APInt::WordType integerPart
Definition APFloat.h:151
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:321
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:346
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:304
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:307
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:316
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:315
static const fltSemantics & Float8E4M3()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:308
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:266
static const fltSemantics & Float8E3M4()
Definition APFloat.h:311
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:337
static const fltSemantics & Float8E5M2()
Definition APFloat.h:303
fltCategory
Category of internally-represented number.
Definition APFloat.h:370
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:314
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static const fltSemantics & FloatTF32()
Definition APFloat.h:312
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:155
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:310
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1102
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:6102
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1190
bool isFiniteNonZero() const
Definition APFloat.h:1441
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6059
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1479
bool isNegative() const
Definition APFloat.h:1431
bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:6001
friend DoubleAPFloat
Definition APFloat.h:1495
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:6118
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1460
bool isNormal() const
Definition APFloat.h:1435
bool isDenormal() const
Definition APFloat.h:1432
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1163
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:6085
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5973
const fltSemantics & getSemantics() const
Definition APFloat.h:1439
bool isFinite() const
Definition APFloat.h:1436
bool isNaN() const
Definition APFloat.h:1429
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1070
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1421
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6149
bool isSignaling() const
Definition APFloat.h:1433
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1217
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1199
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
opStatus next(bool nextDown)
Definition APFloat.h:1236
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1130
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5988
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1208
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5968
friend IEEEFloat
Definition APFloat.h:1494
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:6096
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:6089
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1230
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1155
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1061
bool isInfinity() const
Definition APFloat.h:1428
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1573
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2368
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2340
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2363
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2442
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2412
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2752
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1752
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2348
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2726
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2632
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1495
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2373
void negate()
Negate this APInt in place.
Definition APInt.h:1468
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1918
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2379
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2699
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2354
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2392
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1736
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2530
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2477
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2516
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1744
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1913
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1722
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:569
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
FoldingSetNodeID - This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:330
void Add(const T &x)
Definition FoldingSet.h:370
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
const char * iterator
Definition StringRef.h:59
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
iterator begin() const
Definition StringRef.h:112
char back() const
back - Get the last character in the string.
Definition StringRef.h:155
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:686
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
char front() const
front - Get the first character in the string.
Definition StringRef.h:149
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
iterator end() const
Definition StringRef.h:114
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:647
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5316
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4846
LLVM_ABI void changeSign()
Definition APFloat.cpp:5223
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5790
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:5110
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:5013
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5282
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5339
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5814
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5741
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5350
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5360
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5773
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:5005
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5344
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5229
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5766
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5577
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5309
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5823
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5376
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5288
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5798
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5293
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:5099
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5781
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:5120
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4793
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5804
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5298
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5331
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:5146
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:5131
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5756
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5286
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:5000
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5326
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3329
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1547
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2318
fltCategory getCategory() const
Definition APFloat.h:573
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2887
bool isFiniteNonZero() const
Definition APFloat.h:576
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:463
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:4115
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4510
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3739
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4753
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2489
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:538
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2188
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:563
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2208
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3809
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3802
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2162
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4466
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:4147
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4700
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:1047
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4729
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1149
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2156
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:550
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3272
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:936
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2168
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2402
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:1008
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1174
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:4161
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1166
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1201
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2356
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4735
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4555
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:560
const fltSemantics & getSemantics() const
Definition APFloat.h:574
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:553
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4539
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4715
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2566
LLVM_ABI void changeSign()
Definition APFloat.cpp:2112
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:1033
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2827
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:1039
An opaque object representing a hash code.
Definition Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:439
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:441
static constexpr opStatus opDivByZero
Definition APFloat.h:436
static constexpr opStatus opOverflow
Definition APFloat.h:437
static constexpr cmpResult cmpLessThan
Definition APFloat.h:431
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1572
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:427
static constexpr uninitializedTag uninitialized
Definition APFloat.h:421
static constexpr fltCategory fcZero
Definition APFloat.h:443
static constexpr opStatus opOK
Definition APFloat.h:434
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:432
static constexpr unsigned integerPartWidth
Definition APFloat.h:429
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3477
APFloatBase::ExponentType ExponentType
Definition APFloat.h:420
static constexpr fltCategory fcNormal
Definition APFloat.h:442
static constexpr opStatus opInvalidOp
Definition APFloat.h:435
APFloatBase::opStatus opStatus
Definition APFloat.h:417
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4774
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:415
static constexpr cmpResult cmpUnordered
Definition APFloat.h:433
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:426
APFloatBase::roundingMode roundingMode
Definition APFloat.h:416
APFloatBase::cmpResult cmpResult
Definition APFloat.h:418
static constexpr fltCategory fcInfinity
Definition APFloat.h:440
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:424
static constexpr roundingMode rmTowardZero
Definition APFloat.h:428
static constexpr opStatus opUnderflow
Definition APFloat.h:438
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:422
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4735
static constexpr cmpResult cmpEqual
Definition APFloat.h:430
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4753
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4863
APFloatBase::integerPart integerPart
Definition APFloat.h:414
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:827
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
static const char infinityL[]
Definition APFloat.cpp:818
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:391
static const char NaNU[]
Definition APFloat.cpp:821
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:702
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:761
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:878
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:365
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:453
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:98
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:291
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1516
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:845
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
const unsigned int maxPrecision
Definition APFloat.cpp:290
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1537
static const char NaNL[]
Definition APFloat.cpp:820
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
static const char infinityU[]
Definition APFloat.cpp:819
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:50
@ lfMoreThanHalf
Definition APFloat.h:54
@ lfLessThanHalf
Definition APFloat.h:52
@ lfExactlyHalf
Definition APFloat.h:53
@ lfExactlyZero
Definition APFloat.h:51
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:545
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:292
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1525
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:375
static Error createError(const Twine &Err)
Definition APFloat.cpp:387
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:668
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
static const char hexDigitsUpper[]
Definition APFloat.cpp:817
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
const unsigned int maxExponent
Definition APFloat.cpp:289
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:398
fltNonfiniteBehavior
Definition APFloat.cpp:57
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:681
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:505
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:370
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:646
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1551
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:716
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:863
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:615
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
fltNanEncoding
Definition APFloat.cpp:81
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:408
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:816
#define N
const char * lastSigDigit
Definition APFloat.cpp:540
const char * firstSigDigit
Definition APFloat.cpp:539
APFloatBase::ExponentType maxExponent
Definition APFloat.cpp:106
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.cpp:119
APFloatBase::ExponentType minExponent
Definition APFloat.cpp:110
unsigned int sizeInBits
Definition APFloat.cpp:117
unsigned int precision
Definition APFloat.cpp:114
fltNanEncoding nanEncoding
Definition APFloat.cpp:121