LLVM 22.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/Error.h"
28#include <cstring>
29#include <limits.h>
30
31#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
39
40using namespace llvm;
41
42/// A macro used to combine two fcCategory enums into one key which can be used
43/// in a switch statement to classify how the interaction of two APFloat's
44/// categories affects an operation.
45///
46/// TODO: If clang source code is ever allowed to use constexpr in its own
47/// codebase, change this into a static inline function.
48#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50/* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54namespace llvm {
55
56// How the nonfinite values Inf and NaN are represented.
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
62
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
71
72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73 // Float4E2M1FN types, which do not support Inf or NaN values.
75};
76
77// How NaN values are represented. This is curently only used in combination
78// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79// while having IEEE non-finite behavior is liable to lead to unexpected
80// results.
81enum class fltNanEncoding {
82 // Represents the standard IEEE behavior where a value is NaN if its
83 // exponent is all 1s and the significand is non-zero.
85
86 // Represents the behavior in the Float8E4M3FN floating point type where NaN
87 // is represented by having the exponent and mantissa set to all 1s.
88 // This behavior matches the FP8 E4M3 type described in
89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90 // as non-signalling, although the paper does not state whether the NaN
91 // values are signalling or not.
93
94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97 // there is only one NaN value, it is treated as quiet NaN. This matches the
98 // behavior described in https://arxiv.org/abs/2206.02915 .
100};
101
102/* Represents floating point arithmetic semantics. */
104 /* The largest E such that 2^E is representable; this matches the
105 definition of IEEE 754. */
107
108 /* The smallest E such that 2^E is a normalized number; this
109 matches the definition of IEEE 754. */
111
112 /* Number of bits in the significand. This includes the integer
113 bit. */
114 unsigned int precision;
115
116 /* Number of bits actually used in the semantics. */
117 unsigned int sizeInBits;
118
120
122
123 /* Whether this semantics has an encoding for Zero */
124 bool hasZero = true;
125
126 /* Whether this semantics can represent signed values */
127 bool hasSignedRepr = true;
128
129 /* Whether the sign bit of this semantics is the most significant bit */
130 bool hasSignBitInMSB = true;
131};
132
133constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
134constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
135constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
136constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
137constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
138constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
139constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
141constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
142constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
144constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
146constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
148constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
149constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
150constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
151 127,
152 -127,
153 1,
154 8,
157 false,
158 false,
159 false};
160
161constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
163constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
165constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
167constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
168 80};
169constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
170constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
171constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
172 1023, -1022 + 53, 53 + 53, 128};
173
175 switch (S) {
176 case S_IEEEhalf:
177 return IEEEhalf();
178 case S_BFloat:
179 return BFloat();
180 case S_IEEEsingle:
181 return IEEEsingle();
182 case S_IEEEdouble:
183 return IEEEdouble();
184 case S_IEEEquad:
185 return IEEEquad();
187 return PPCDoubleDouble();
189 return PPCDoubleDoubleLegacy();
190 case S_Float8E5M2:
191 return Float8E5M2();
192 case S_Float8E5M2FNUZ:
193 return Float8E5M2FNUZ();
194 case S_Float8E4M3:
195 return Float8E4M3();
196 case S_Float8E4M3FN:
197 return Float8E4M3FN();
198 case S_Float8E4M3FNUZ:
199 return Float8E4M3FNUZ();
201 return Float8E4M3B11FNUZ();
202 case S_Float8E3M4:
203 return Float8E3M4();
204 case S_FloatTF32:
205 return FloatTF32();
206 case S_Float8E8M0FNU:
207 return Float8E8M0FNU();
208 case S_Float6E3M2FN:
209 return Float6E3M2FN();
210 case S_Float6E2M3FN:
211 return Float6E2M3FN();
212 case S_Float4E2M1FN:
213 return Float4E2M1FN();
215 return x87DoubleExtended();
216 }
217 llvm_unreachable("Unrecognised floating semantics");
218}
219
222 if (&Sem == &llvm::APFloat::IEEEhalf())
223 return S_IEEEhalf;
224 else if (&Sem == &llvm::APFloat::BFloat())
225 return S_BFloat;
226 else if (&Sem == &llvm::APFloat::IEEEsingle())
227 return S_IEEEsingle;
228 else if (&Sem == &llvm::APFloat::IEEEdouble())
229 return S_IEEEdouble;
230 else if (&Sem == &llvm::APFloat::IEEEquad())
231 return S_IEEEquad;
232 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
233 return S_PPCDoubleDouble;
234 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
236 else if (&Sem == &llvm::APFloat::Float8E5M2())
237 return S_Float8E5M2;
238 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
239 return S_Float8E5M2FNUZ;
240 else if (&Sem == &llvm::APFloat::Float8E4M3())
241 return S_Float8E4M3;
242 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
243 return S_Float8E4M3FN;
244 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
245 return S_Float8E4M3FNUZ;
246 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
247 return S_Float8E4M3B11FNUZ;
248 else if (&Sem == &llvm::APFloat::Float8E3M4())
249 return S_Float8E3M4;
250 else if (&Sem == &llvm::APFloat::FloatTF32())
251 return S_FloatTF32;
252 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
253 return S_Float8E8M0FNU;
254 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
255 return S_Float6E3M2FN;
256 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
257 return S_Float6E2M3FN;
258 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
259 return S_Float4E2M1FN;
260 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
261 return S_x87DoubleExtended;
262 else
263 llvm_unreachable("Unknown floating semantics");
264}
265
267 const fltSemantics &B) {
268 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
269 A.precision <= B.precision;
270}
271
272/* A tight upper bound on number of parts required to hold the value
273 pow(5, power) is
274
275 power * 815 / (351 * integerPartWidth) + 1
276
277 However, whilst the result may require only this many parts,
278 because we are multiplying two values to get it, the
279 multiplication may require an extra part with the excess part
280 being zero (consider the trivial case of 1 * 1, tcFullMultiply
281 requires two parts to hold the single-part result). So we add an
282 extra one to guarantee enough space whilst multiplying. */
283const unsigned int maxExponent = 16383;
284const unsigned int maxPrecision = 113;
286const unsigned int maxPowerOfFiveParts =
287 2 +
289
290unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
291 return semantics.precision;
292}
295 return semantics.maxExponent;
296}
299 return semantics.minExponent;
300}
301unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
302 return semantics.sizeInBits;
303}
305 bool isSigned) {
306 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
307 // at least one more bit than the MaxExponent to hold the max FP value.
308 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
309 // Extra sign bit needed.
310 if (isSigned)
311 ++MinBitWidth;
312 return MinBitWidth;
313}
314
316 return semantics.hasZero;
317}
318
320 return semantics.hasSignedRepr;
321}
322
326
330
332 // Keep in sync with Type::isIEEELikeFPTy
333 return SemanticsToEnum(semantics) <= S_IEEEquad;
334}
335
337 return semantics.hasSignBitInMSB;
338}
339
341 const fltSemantics &Dst) {
342 // Exponent range must be larger.
343 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
344 return false;
345
346 // If the mantissa is long enough, the result value could still be denormal
347 // with a larger exponent range.
348 //
349 // FIXME: This condition is probably not accurate but also shouldn't be a
350 // practical concern with existing types.
351 return Dst.precision >= Src.precision;
352}
353
355 return Sem.sizeInBits;
356}
357
358static constexpr APFloatBase::ExponentType
359exponentZero(const fltSemantics &semantics) {
360 return semantics.minExponent - 1;
361}
362
363static constexpr APFloatBase::ExponentType
364exponentInf(const fltSemantics &semantics) {
365 return semantics.maxExponent + 1;
366}
367
368static constexpr APFloatBase::ExponentType
369exponentNaN(const fltSemantics &semantics) {
372 return exponentZero(semantics);
373 if (semantics.hasSignedRepr)
374 return semantics.maxExponent;
375 }
376 return semantics.maxExponent + 1;
377}
378
379/* A bunch of private, handy routines. */
380
381static inline Error createError(const Twine &Err) {
383}
384
385static constexpr inline unsigned int partCountForBits(unsigned int bits) {
386 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
388}
389
390/* Returns 0U-9U. Return values >= 10U are not digits. */
391static inline unsigned int
392decDigitValue(unsigned int c)
393{
394 return c - '0';
395}
396
397/* Return the value of a decimal exponent of the form
398 [+-]ddddddd.
399
400 If the exponent overflows, returns a large exponent with the
401 appropriate sign. */
404 bool isNegative;
405 unsigned int absExponent;
406 const unsigned int overlargeExponent = 24000; /* FIXME. */
407 StringRef::iterator p = begin;
408
409 // Treat no exponent as 0 to match binutils
410 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
411 return 0;
412 }
413
414 isNegative = (*p == '-');
415 if (*p == '-' || *p == '+') {
416 p++;
417 if (p == end)
418 return createError("Exponent has no digits");
419 }
420
421 absExponent = decDigitValue(*p++);
422 if (absExponent >= 10U)
423 return createError("Invalid character in exponent");
424
425 for (; p != end; ++p) {
426 unsigned int value;
427
428 value = decDigitValue(*p);
429 if (value >= 10U)
430 return createError("Invalid character in exponent");
431
432 absExponent = absExponent * 10U + value;
433 if (absExponent >= overlargeExponent) {
434 absExponent = overlargeExponent;
435 break;
436 }
437 }
438
439 if (isNegative)
440 return -(int) absExponent;
441 else
442 return (int) absExponent;
443}
444
445/* This is ugly and needs cleaning up, but I don't immediately see
446 how whilst remaining safe. */
449 int exponentAdjustment) {
450 int unsignedExponent;
451 bool negative, overflow;
452 int exponent = 0;
453
454 if (p == end)
455 return createError("Exponent has no digits");
456
457 negative = *p == '-';
458 if (*p == '-' || *p == '+') {
459 p++;
460 if (p == end)
461 return createError("Exponent has no digits");
462 }
463
464 unsignedExponent = 0;
465 overflow = false;
466 for (; p != end; ++p) {
467 unsigned int value;
468
469 value = decDigitValue(*p);
470 if (value >= 10U)
471 return createError("Invalid character in exponent");
472
473 unsignedExponent = unsignedExponent * 10 + value;
474 if (unsignedExponent > 32767) {
475 overflow = true;
476 break;
477 }
478 }
479
480 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
481 overflow = true;
482
483 if (!overflow) {
484 exponent = unsignedExponent;
485 if (negative)
486 exponent = -exponent;
487 exponent += exponentAdjustment;
488 if (exponent > 32767 || exponent < -32768)
489 overflow = true;
490 }
491
492 if (overflow)
493 exponent = negative ? -32768: 32767;
494
495 return exponent;
496}
497
500 StringRef::iterator *dot) {
501 StringRef::iterator p = begin;
502 *dot = end;
503 while (p != end && *p == '0')
504 p++;
505
506 if (p != end && *p == '.') {
507 *dot = p++;
508
509 if (end - begin == 1)
510 return createError("Significand has no digits");
511
512 while (p != end && *p == '0')
513 p++;
514 }
515
516 return p;
517}
518
519/* Given a normal decimal floating point number of the form
520
521 dddd.dddd[eE][+-]ddd
522
523 where the decimal point and exponent are optional, fill out the
524 structure D. Exponent is appropriate if the significand is
525 treated as an integer, and normalizedExponent if the significand
526 is taken to have the decimal point after a single leading
527 non-zero digit.
528
529 If the value is zero, V->firstSigDigit points to a non-digit, and
530 the return exponent is zero.
531*/
533 const char *firstSigDigit;
534 const char *lastSigDigit;
537};
538
541 StringRef::iterator dot = end;
542
543 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
544 if (!PtrOrErr)
545 return PtrOrErr.takeError();
546 StringRef::iterator p = *PtrOrErr;
547
548 D->firstSigDigit = p;
549 D->exponent = 0;
550 D->normalizedExponent = 0;
551
552 for (; p != end; ++p) {
553 if (*p == '.') {
554 if (dot != end)
555 return createError("String contains multiple dots");
556 dot = p++;
557 if (p == end)
558 break;
559 }
560 if (decDigitValue(*p) >= 10U)
561 break;
562 }
563
564 if (p != end) {
565 if (*p != 'e' && *p != 'E')
566 return createError("Invalid character in significand");
567 if (p == begin)
568 return createError("Significand has no digits");
569 if (dot != end && p - begin == 1)
570 return createError("Significand has no digits");
571
572 /* p points to the first non-digit in the string */
573 auto ExpOrErr = readExponent(p + 1, end);
574 if (!ExpOrErr)
575 return ExpOrErr.takeError();
576 D->exponent = *ExpOrErr;
577
578 /* Implied decimal point? */
579 if (dot == end)
580 dot = p;
581 }
582
583 /* If number is all zeroes accept any exponent. */
584 if (p != D->firstSigDigit) {
585 /* Drop insignificant trailing zeroes. */
586 if (p != begin) {
587 do
588 do
589 p--;
590 while (p != begin && *p == '0');
591 while (p != begin && *p == '.');
592 }
593
594 /* Adjust the exponents for any decimal point. */
595 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
596 D->normalizedExponent = (D->exponent +
597 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
598 - (dot > D->firstSigDigit && dot < p)));
599 }
600
601 D->lastSigDigit = p;
602 return Error::success();
603}
604
605/* Return the trailing fraction of a hexadecimal number.
606 DIGITVALUE is the first hex digit of the fraction, P points to
607 the next digit. */
610 unsigned int digitValue) {
611 unsigned int hexDigit;
612
613 /* If the first trailing digit isn't 0 or 8 we can work out the
614 fraction immediately. */
615 if (digitValue > 8)
616 return lfMoreThanHalf;
617 else if (digitValue < 8 && digitValue > 0)
618 return lfLessThanHalf;
619
620 // Otherwise we need to find the first non-zero digit.
621 while (p != end && (*p == '0' || *p == '.'))
622 p++;
623
624 if (p == end)
625 return createError("Invalid trailing hexadecimal fraction!");
626
627 hexDigit = hexDigitValue(*p);
628
629 /* If we ran off the end it is exactly zero or one-half, otherwise
630 a little more. */
631 if (hexDigit == UINT_MAX)
632 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
633 else
634 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
635}
636
637/* Return the fraction lost were a bignum truncated losing the least
638 significant BITS bits. */
639static lostFraction
641 unsigned int partCount,
642 unsigned int bits)
643{
644 unsigned int lsb;
645
646 lsb = APInt::tcLSB(parts, partCount);
647
648 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
649 if (bits <= lsb)
650 return lfExactlyZero;
651 if (bits == lsb + 1)
652 return lfExactlyHalf;
653 if (bits <= partCount * APFloatBase::integerPartWidth &&
654 APInt::tcExtractBit(parts, bits - 1))
655 return lfMoreThanHalf;
656
657 return lfLessThanHalf;
658}
659
660/* Shift DST right BITS bits noting lost fraction. */
661static lostFraction
662shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
663{
664 lostFraction lost_fraction;
665
666 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
667
668 APInt::tcShiftRight(dst, parts, bits);
669
670 return lost_fraction;
671}
672
673/* Combine the effect of two lost fractions. */
674static lostFraction
676 lostFraction lessSignificant)
677{
678 if (lessSignificant != lfExactlyZero) {
679 if (moreSignificant == lfExactlyZero)
680 moreSignificant = lfLessThanHalf;
681 else if (moreSignificant == lfExactlyHalf)
682 moreSignificant = lfMoreThanHalf;
683 }
684
685 return moreSignificant;
686}
687
688/* The error from the true value, in half-ulps, on multiplying two
689 floating point numbers, which differ from the value they
690 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
691 than the returned value.
692
693 See "How to Read Floating Point Numbers Accurately" by William D
694 Clinger. */
695static unsigned int
696HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
697{
698 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
699
700 if (HUerr1 + HUerr2 == 0)
701 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
702 else
703 return inexactMultiply + 2 * (HUerr1 + HUerr2);
704}
705
706/* The number of ulps from the boundary (zero, or half if ISNEAREST)
707 when the least significant BITS are truncated. BITS cannot be
708 zero. */
710ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
711 bool isNearest) {
712 unsigned int count, partBits;
713 APFloatBase::integerPart part, boundary;
714
715 assert(bits != 0);
716
717 bits--;
719 partBits = bits % APFloatBase::integerPartWidth + 1;
720
721 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
722
723 if (isNearest)
724 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
725 else
726 boundary = 0;
727
728 if (count == 0) {
729 if (part - boundary <= boundary - part)
730 return part - boundary;
731 else
732 return boundary - part;
733 }
734
735 if (part == boundary) {
736 while (--count)
737 if (parts[count])
738 return ~(APFloatBase::integerPart) 0; /* A lot. */
739
740 return parts[0];
741 } else if (part == boundary - 1) {
742 while (--count)
743 if (~parts[count])
744 return ~(APFloatBase::integerPart) 0; /* A lot. */
745
746 return -parts[0];
747 }
748
749 return ~(APFloatBase::integerPart) 0; /* A lot. */
750}
751
752/* Place pow(5, power) in DST, and return the number of parts used.
753 DST must be at least one part larger than size of the answer. */
754static unsigned int
755powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
756 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
758 pow5s[0] = 78125 * 5;
759
760 unsigned int partsCount = 1;
761 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
762 unsigned int result;
763 assert(power <= maxExponent);
764
765 p1 = dst;
766 p2 = scratch;
767
768 *p1 = firstEightPowers[power & 7];
769 power >>= 3;
770
771 result = 1;
772 pow5 = pow5s;
773
774 for (unsigned int n = 0; power; power >>= 1, n++) {
775 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
776 if (n != 0) {
777 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
778 partsCount, partsCount);
779 partsCount *= 2;
780 if (pow5[partsCount - 1] == 0)
781 partsCount--;
782 }
783
784 if (power & 1) {
786
787 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
788 result += partsCount;
789 if (p2[result - 1] == 0)
790 result--;
791
792 /* Now result is in p1 with partsCount parts and p2 is scratch
793 space. */
794 tmp = p1;
795 p1 = p2;
796 p2 = tmp;
797 }
798
799 pow5 += partsCount;
800 }
801
802 if (p1 != dst)
803 APInt::tcAssign(dst, p1, result);
804
805 return result;
806}
807
808/* Zero at the end to avoid modular arithmetic when adding one; used
809 when rounding up during hexadecimal output. */
810static const char hexDigitsLower[] = "0123456789abcdef0";
811static const char hexDigitsUpper[] = "0123456789ABCDEF0";
812static const char infinityL[] = "infinity";
813static const char infinityU[] = "INFINITY";
814static const char NaNL[] = "nan";
815static const char NaNU[] = "NAN";
816
817/* Write out an integerPart in hexadecimal, starting with the most
818 significant nibble. Write out exactly COUNT hexdigits, return
819 COUNT. */
820static unsigned int
821partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
822 const char *hexDigitChars)
823{
824 unsigned int result = count;
825
827
828 part >>= (APFloatBase::integerPartWidth - 4 * count);
829 while (count--) {
830 dst[count] = hexDigitChars[part & 0xf];
831 part >>= 4;
832 }
833
834 return result;
835}
836
837/* Write out an unsigned decimal integer. */
838static char *
839writeUnsignedDecimal (char *dst, unsigned int n)
840{
841 char buff[40], *p;
842
843 p = buff;
844 do
845 *p++ = '0' + n % 10;
846 while (n /= 10);
847
848 do
849 *dst++ = *--p;
850 while (p != buff);
851
852 return dst;
853}
854
855/* Write out a signed decimal integer. */
856static char *
857writeSignedDecimal (char *dst, int value)
858{
859 if (value < 0) {
860 *dst++ = '-';
861 dst = writeUnsignedDecimal(dst, -(unsigned) value);
862 } else {
863 dst = writeUnsignedDecimal(dst, value);
864 }
865
866 return dst;
867}
868
869// Compute the ULP of the input using a definition from:
870// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
871// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
872static APFloat harrisonUlp(const APFloat &X) {
873 const fltSemantics &Sem = X.getSemantics();
874 switch (X.getCategory()) {
875 case APFloat::fcNaN:
876 return APFloat::getQNaN(Sem);
878 return APFloat::getInf(Sem);
879 case APFloat::fcZero:
880 return APFloat::getSmallest(Sem);
882 break;
883 }
884 if (X.isDenormal() || X.isSmallestNormalized())
885 return APFloat::getSmallest(Sem);
886 int Exp = ilogb(X);
887 if (X.getExactLog2() != INT_MIN)
888 Exp -= 1;
889 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
891}
892
893namespace detail {
894/* Constructors. */
895void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
896 unsigned int count;
897
898 semantics = ourSemantics;
899 count = partCount();
900 if (count > 1)
901 significand.parts = new integerPart[count];
902}
903
904void IEEEFloat::freeSignificand() {
905 if (needsCleanup())
906 delete [] significand.parts;
907}
908
909void IEEEFloat::assign(const IEEEFloat &rhs) {
910 assert(semantics == rhs.semantics);
911
912 sign = rhs.sign;
913 category = rhs.category;
914 exponent = rhs.exponent;
915 if (isFiniteNonZero() || category == fcNaN)
916 copySignificand(rhs);
917}
918
919void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
920 assert(isFiniteNonZero() || category == fcNaN);
921 assert(rhs.partCount() >= partCount());
922
923 APInt::tcAssign(significandParts(), rhs.significandParts(),
924 partCount());
925}
926
927/* Make this number a NaN, with an arbitrary but deterministic value
928 for the significand. If double or longer, this is a signalling NaN,
929 which may not be ideal. If float, this is QNaN(0). */
930void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
931 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
932 llvm_unreachable("This floating point format does not support NaN");
933
934 if (Negative && !semantics->hasSignedRepr)
936 "This floating point format does not support signed values");
937
938 category = fcNaN;
939 sign = Negative;
940 exponent = exponentNaN();
941
942 integerPart *significand = significandParts();
943 unsigned numParts = partCount();
944
945 APInt fill_storage;
946 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
947 // Finite-only types do not distinguish signalling and quiet NaN, so
948 // make them all signalling.
949 SNaN = false;
950 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
951 sign = true;
952 fill_storage = APInt::getZero(semantics->precision - 1);
953 } else {
954 fill_storage = APInt::getAllOnes(semantics->precision - 1);
955 }
956 fill = &fill_storage;
957 }
958
959 // Set the significand bits to the fill.
960 if (!fill || fill->getNumWords() < numParts)
961 APInt::tcSet(significand, 0, numParts);
962 if (fill) {
963 APInt::tcAssign(significand, fill->getRawData(),
964 std::min(fill->getNumWords(), numParts));
965
966 // Zero out the excess bits of the significand.
967 unsigned bitsToPreserve = semantics->precision - 1;
968 unsigned part = bitsToPreserve / 64;
969 bitsToPreserve %= 64;
970 significand[part] &= ((1ULL << bitsToPreserve) - 1);
971 for (part++; part != numParts; ++part)
972 significand[part] = 0;
973 }
974
975 unsigned QNaNBit =
976 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
977
978 if (SNaN) {
979 // We always have to clear the QNaN bit to make it an SNaN.
980 APInt::tcClearBit(significand, QNaNBit);
981
982 // If there are no bits set in the payload, we have to set
983 // *something* to make it a NaN instead of an infinity;
984 // conventionally, this is the next bit down from the QNaN bit.
985 if (APInt::tcIsZero(significand, numParts))
986 APInt::tcSetBit(significand, QNaNBit - 1);
987 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
988 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
989 // Do nothing.
990 } else {
991 // We always have to set the QNaN bit to make it a QNaN.
992 APInt::tcSetBit(significand, QNaNBit);
993 }
994
995 // For x87 extended precision, we want to make a NaN, not a
996 // pseudo-NaN. Maybe we should expose the ability to make
997 // pseudo-NaNs?
998 if (semantics == &APFloatBase::semX87DoubleExtended)
999 APInt::tcSetBit(significand, QNaNBit + 1);
1000}
1001
1003 if (this != &rhs) {
1004 if (semantics != rhs.semantics) {
1005 freeSignificand();
1006 initialize(rhs.semantics);
1007 }
1008 assign(rhs);
1009 }
1010
1011 return *this;
1012}
1013
1015 freeSignificand();
1016
1017 semantics = rhs.semantics;
1018 significand = rhs.significand;
1019 exponent = rhs.exponent;
1020 category = rhs.category;
1021 sign = rhs.sign;
1022
1023 rhs.semantics = &APFloatBase::semBogus;
1024 return *this;
1025}
1026
1028 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1029 (APInt::tcExtractBit(significandParts(),
1030 semantics->precision - 1) == 0);
1031}
1032
1034 // The smallest number by magnitude in our format will be the smallest
1035 // denormal, i.e. the floating point number with exponent being minimum
1036 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1037 return isFiniteNonZero() && exponent == semantics->minExponent &&
1038 significandMSB() == 0;
1039}
1040
1042 return getCategory() == fcNormal && exponent == semantics->minExponent &&
1043 isSignificandAllZerosExceptMSB();
1044}
1045
1046unsigned int IEEEFloat::getNumHighBits() const {
1047 const unsigned int PartCount = partCountForBits(semantics->precision);
1048 const unsigned int Bits = PartCount * integerPartWidth;
1049
1050 // Compute how many bits are used in the final word.
1051 // When precision is just 1, it represents the 'Pth'
1052 // Precision bit and not the actual significand bit.
1053 const unsigned int NumHighBits = (semantics->precision > 1)
1054 ? (Bits - semantics->precision + 1)
1055 : (Bits - semantics->precision);
1056 return NumHighBits;
1057}
1058
1059bool IEEEFloat::isSignificandAllOnes() const {
1060 // Test if the significand excluding the integral bit is all ones. This allows
1061 // us to test for binade boundaries.
1062 const integerPart *Parts = significandParts();
1063 const unsigned PartCount = partCountForBits(semantics->precision);
1064 for (unsigned i = 0; i < PartCount - 1; i++)
1065 if (~Parts[i])
1066 return false;
1067
1068 // Set the unused high bits to all ones when we compare.
1069 const unsigned NumHighBits = getNumHighBits();
1070 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1071 "Can not have more high bits to fill than integerPartWidth");
1072 const integerPart HighBitFill =
1073 ~integerPart(0) << (integerPartWidth - NumHighBits);
1074 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
1075 return false;
1076
1077 return true;
1078}
1079
1080bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1081 // Test if the significand excluding the integral bit is all ones except for
1082 // the least significant bit.
1083 const integerPart *Parts = significandParts();
1084
1085 if (Parts[0] & 1)
1086 return false;
1087
1088 const unsigned PartCount = partCountForBits(semantics->precision);
1089 for (unsigned i = 0; i < PartCount - 1; i++) {
1090 if (~Parts[i] & ~unsigned{!i})
1091 return false;
1092 }
1093
1094 // Set the unused high bits to all ones when we compare.
1095 const unsigned NumHighBits = getNumHighBits();
1096 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1097 "Can not have more high bits to fill than integerPartWidth");
1098 const integerPart HighBitFill = ~integerPart(0)
1099 << (integerPartWidth - NumHighBits);
1100 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1101 return false;
1102
1103 return true;
1104}
1105
1106bool IEEEFloat::isSignificandAllZeros() const {
1107 // Test if the significand excluding the integral bit is all zeros. This
1108 // allows us to test for binade boundaries.
1109 const integerPart *Parts = significandParts();
1110 const unsigned PartCount = partCountForBits(semantics->precision);
1111
1112 for (unsigned i = 0; i < PartCount - 1; i++)
1113 if (Parts[i])
1114 return false;
1115
1116 // Compute how many bits are used in the final word.
1117 const unsigned NumHighBits = getNumHighBits();
1118 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1119 "clear than integerPartWidth");
1120 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1121
1122 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1123 return false;
1124
1125 return true;
1126}
1127
1128bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1129 const integerPart *Parts = significandParts();
1130 const unsigned PartCount = partCountForBits(semantics->precision);
1131
1132 for (unsigned i = 0; i < PartCount - 1; i++) {
1133 if (Parts[i])
1134 return false;
1135 }
1136
1137 const unsigned NumHighBits = getNumHighBits();
1138 const integerPart MSBMask = integerPart(1)
1139 << (integerPartWidth - NumHighBits);
1140 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1141}
1142
1144 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1145 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1146 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1147 // The largest number by magnitude in our format will be the floating point
1148 // number with maximum exponent and with significand that is all ones except
1149 // the LSB.
1150 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1151 ? isSignificandAllOnesExceptLSB()
1152 : IsMaxExp;
1153 } else {
1154 // The largest number by magnitude in our format will be the floating point
1155 // number with maximum exponent and with significand that is all ones.
1156 return IsMaxExp && isSignificandAllOnes();
1157 }
1158}
1159
1161 // This could be made more efficient; I'm going for obviously correct.
1162 if (!isFinite()) return false;
1163 IEEEFloat truncated = *this;
1164 truncated.roundToIntegral(rmTowardZero);
1165 return compare(truncated) == cmpEqual;
1166}
1167
1168bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1169 if (this == &rhs)
1170 return true;
1171 if (semantics != rhs.semantics ||
1172 category != rhs.category ||
1173 sign != rhs.sign)
1174 return false;
1175 if (category==fcZero || category==fcInfinity)
1176 return true;
1177
1178 if (isFiniteNonZero() && exponent != rhs.exponent)
1179 return false;
1180
1181 return std::equal(significandParts(), significandParts() + partCount(),
1182 rhs.significandParts());
1183}
1184
1186 initialize(&ourSemantics);
1187 sign = 0;
1188 category = fcNormal;
1189 zeroSignificand();
1190 exponent = ourSemantics.precision - 1;
1191 significandParts()[0] = value;
1193}
1194
1196 initialize(&ourSemantics);
1197 // The Float8E8MOFNU format does not have a representation
1198 // for zero. So, use the closest representation instead.
1199 // Moreover, the all-zero encoding represents a valid
1200 // normal value (which is the smallestNormalized here).
1201 // Hence, we call makeSmallestNormalized (where category is
1202 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1203 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1204}
1205
1206// Delegate to the previous constructor, because later copy constructor may
1207// actually inspects category, which can't be garbage.
1209 : IEEEFloat(ourSemantics) {}
1210
1212 initialize(rhs.semantics);
1213 assign(rhs);
1214}
1215
1216IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1217 *this = std::move(rhs);
1218}
1219
1220IEEEFloat::~IEEEFloat() { freeSignificand(); }
1221
1222unsigned int IEEEFloat::partCount() const {
1223 return partCountForBits(semantics->precision + 1);
1224}
1225
1226const APFloat::integerPart *IEEEFloat::significandParts() const {
1227 return const_cast<IEEEFloat *>(this)->significandParts();
1228}
1229
1230APFloat::integerPart *IEEEFloat::significandParts() {
1231 if (partCount() > 1)
1232 return significand.parts;
1233 else
1234 return &significand.part;
1235}
1236
1237void IEEEFloat::zeroSignificand() {
1238 APInt::tcSet(significandParts(), 0, partCount());
1239}
1240
1241/* Increment an fcNormal floating point number's significand. */
1242void IEEEFloat::incrementSignificand() {
1243 integerPart carry;
1244
1245 carry = APInt::tcIncrement(significandParts(), partCount());
1246
1247 /* Our callers should never cause us to overflow. */
1248 assert(carry == 0);
1249 (void)carry;
1250}
1251
1252/* Add the significand of the RHS. Returns the carry flag. */
1253APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1254 integerPart *parts;
1255
1256 parts = significandParts();
1257
1258 assert(semantics == rhs.semantics);
1259 assert(exponent == rhs.exponent);
1260
1261 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1262}
1263
1264/* Subtract the significand of the RHS with a borrow flag. Returns
1265 the borrow flag. */
1266APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1267 integerPart borrow) {
1268 integerPart *parts;
1269
1270 parts = significandParts();
1271
1272 assert(semantics == rhs.semantics);
1273 assert(exponent == rhs.exponent);
1274
1275 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1276 partCount());
1277}
1278
1279/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1280 on to the full-precision result of the multiplication. Returns the
1281 lost fraction. */
1282lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1283 IEEEFloat addend,
1284 bool ignoreAddend) {
1285 unsigned int omsb; // One, not zero, based MSB.
1286 unsigned int partsCount, newPartsCount, precision;
1287 integerPart *lhsSignificand;
1288 integerPart scratch[4];
1289 integerPart *fullSignificand;
1290 lostFraction lost_fraction;
1291 bool ignored;
1292
1293 assert(semantics == rhs.semantics);
1294
1295 precision = semantics->precision;
1296
1297 // Allocate space for twice as many bits as the original significand, plus one
1298 // extra bit for the addition to overflow into.
1299 newPartsCount = partCountForBits(precision * 2 + 1);
1300
1301 if (newPartsCount > 4)
1302 fullSignificand = new integerPart[newPartsCount];
1303 else
1304 fullSignificand = scratch;
1305
1306 lhsSignificand = significandParts();
1307 partsCount = partCount();
1308
1309 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1310 rhs.significandParts(), partsCount, partsCount);
1311
1312 lost_fraction = lfExactlyZero;
1313 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1314 exponent += rhs.exponent;
1315
1316 // Assume the operands involved in the multiplication are single-precision
1317 // FP, and the two multiplicants are:
1318 // *this = a23 . a22 ... a0 * 2^e1
1319 // rhs = b23 . b22 ... b0 * 2^e2
1320 // the result of multiplication is:
1321 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1322 // Note that there are three significant bits at the left-hand side of the
1323 // radix point: two for the multiplication, and an overflow bit for the
1324 // addition (that will always be zero at this point). Move the radix point
1325 // toward left by two bits, and adjust exponent accordingly.
1326 exponent += 2;
1327
1328 if (!ignoreAddend && addend.isNonZero()) {
1329 // The intermediate result of the multiplication has "2 * precision"
1330 // signicant bit; adjust the addend to be consistent with mul result.
1331 //
1332 Significand savedSignificand = significand;
1333 const fltSemantics *savedSemantics = semantics;
1334 fltSemantics extendedSemantics;
1336 unsigned int extendedPrecision;
1337
1338 // Normalize our MSB to one below the top bit to allow for overflow.
1339 extendedPrecision = 2 * precision + 1;
1340 if (omsb != extendedPrecision - 1) {
1341 assert(extendedPrecision > omsb);
1342 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1343 (extendedPrecision - 1) - omsb);
1344 exponent -= (extendedPrecision - 1) - omsb;
1345 }
1346
1347 /* Create new semantics. */
1348 extendedSemantics = *semantics;
1349 extendedSemantics.precision = extendedPrecision;
1350
1351 if (newPartsCount == 1)
1352 significand.part = fullSignificand[0];
1353 else
1354 significand.parts = fullSignificand;
1355 semantics = &extendedSemantics;
1356
1357 // Make a copy so we can convert it to the extended semantics.
1358 // Note that we cannot convert the addend directly, as the extendedSemantics
1359 // is a local variable (which we take a reference to).
1360 IEEEFloat extendedAddend(addend);
1361 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero,
1362 &ignored);
1363 assert(status == APFloat::opOK);
1364 (void)status;
1365
1366 // Shift the significand of the addend right by one bit. This guarantees
1367 // that the high bit of the significand is zero (same as fullSignificand),
1368 // so the addition will overflow (if it does overflow at all) into the top bit.
1369 lost_fraction = extendedAddend.shiftSignificandRight(1);
1370 assert(lost_fraction == lfExactlyZero &&
1371 "Lost precision while shifting addend for fused-multiply-add.");
1372
1373 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1374
1375 /* Restore our state. */
1376 if (newPartsCount == 1)
1377 fullSignificand[0] = significand.part;
1378 significand = savedSignificand;
1379 semantics = savedSemantics;
1380
1381 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1382 }
1383
1384 // Convert the result having "2 * precision" significant-bits back to the one
1385 // having "precision" significant-bits. First, move the radix point from
1386 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1387 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1388 exponent -= precision + 1;
1389
1390 // In case MSB resides at the left-hand side of radix point, shift the
1391 // mantissa right by some amount to make sure the MSB reside right before
1392 // the radix point (i.e. "MSB . rest-significant-bits").
1393 //
1394 // Note that the result is not normalized when "omsb < precision". So, the
1395 // caller needs to call IEEEFloat::normalize() if normalized value is
1396 // expected.
1397 if (omsb > precision) {
1398 unsigned int bits, significantParts;
1399 lostFraction lf;
1400
1401 bits = omsb - precision;
1402 significantParts = partCountForBits(omsb);
1403 lf = shiftRight(fullSignificand, significantParts, bits);
1404 lost_fraction = combineLostFractions(lf, lost_fraction);
1405 exponent += bits;
1406 }
1407
1408 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1409
1410 if (newPartsCount > 4)
1411 delete [] fullSignificand;
1412
1413 return lost_fraction;
1414}
1415
1416lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1417 // When the given semantics has zero, the addend here is a zero.
1418 // i.e . it belongs to the 'fcZero' category.
1419 // But when the semantics does not support zero, we need to
1420 // explicitly convey that this addend should be ignored
1421 // for multiplication.
1422 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1423}
1424
1425/* Multiply the significands of LHS and RHS to DST. */
1426lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1427 unsigned int bit, i, partsCount;
1428 const integerPart *rhsSignificand;
1429 integerPart *lhsSignificand, *dividend, *divisor;
1430 integerPart scratch[4];
1431 lostFraction lost_fraction;
1432
1433 assert(semantics == rhs.semantics);
1434
1435 lhsSignificand = significandParts();
1436 rhsSignificand = rhs.significandParts();
1437 partsCount = partCount();
1438
1439 if (partsCount > 2)
1440 dividend = new integerPart[partsCount * 2];
1441 else
1442 dividend = scratch;
1443
1444 divisor = dividend + partsCount;
1445
1446 /* Copy the dividend and divisor as they will be modified in-place. */
1447 for (i = 0; i < partsCount; i++) {
1448 dividend[i] = lhsSignificand[i];
1449 divisor[i] = rhsSignificand[i];
1450 lhsSignificand[i] = 0;
1451 }
1452
1453 exponent -= rhs.exponent;
1454
1455 unsigned int precision = semantics->precision;
1456
1457 /* Normalize the divisor. */
1458 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1459 if (bit) {
1460 exponent += bit;
1461 APInt::tcShiftLeft(divisor, partsCount, bit);
1462 }
1463
1464 /* Normalize the dividend. */
1465 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1466 if (bit) {
1467 exponent -= bit;
1468 APInt::tcShiftLeft(dividend, partsCount, bit);
1469 }
1470
1471 /* Ensure the dividend >= divisor initially for the loop below.
1472 Incidentally, this means that the division loop below is
1473 guaranteed to set the integer bit to one. */
1474 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1475 exponent--;
1476 APInt::tcShiftLeft(dividend, partsCount, 1);
1477 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1478 }
1479
1480 /* Long division. */
1481 for (bit = precision; bit; bit -= 1) {
1482 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1483 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1484 APInt::tcSetBit(lhsSignificand, bit - 1);
1485 }
1486
1487 APInt::tcShiftLeft(dividend, partsCount, 1);
1488 }
1489
1490 /* Figure out the lost fraction. */
1491 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1492
1493 if (cmp > 0)
1494 lost_fraction = lfMoreThanHalf;
1495 else if (cmp == 0)
1496 lost_fraction = lfExactlyHalf;
1497 else if (APInt::tcIsZero(dividend, partsCount))
1498 lost_fraction = lfExactlyZero;
1499 else
1500 lost_fraction = lfLessThanHalf;
1501
1502 if (partsCount > 2)
1503 delete [] dividend;
1504
1505 return lost_fraction;
1506}
1507
1508unsigned int IEEEFloat::significandMSB() const {
1509 return APInt::tcMSB(significandParts(), partCount());
1510}
1511
1512unsigned int IEEEFloat::significandLSB() const {
1513 return APInt::tcLSB(significandParts(), partCount());
1514}
1515
1516/* Note that a zero result is NOT normalized to fcZero. */
1517lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1518 /* Our exponent should not overflow. */
1519 assert((ExponentType) (exponent + bits) >= exponent);
1520
1521 exponent += bits;
1522
1523 return shiftRight(significandParts(), partCount(), bits);
1524}
1525
1526/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1527void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1528 assert(bits < semantics->precision ||
1529 (semantics->precision == 1 && bits <= 1));
1530
1531 if (bits) {
1532 unsigned int partsCount = partCount();
1533
1534 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1535 exponent -= bits;
1536
1537 assert(!APInt::tcIsZero(significandParts(), partsCount));
1538 }
1539}
1540
1542 int compare;
1543
1544 assert(semantics == rhs.semantics);
1546 assert(rhs.isFiniteNonZero());
1547
1548 compare = exponent - rhs.exponent;
1549
1550 /* If exponents are equal, do an unsigned bignum comparison of the
1551 significands. */
1552 if (compare == 0)
1553 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1554 partCount());
1555
1556 if (compare > 0)
1557 return cmpGreaterThan;
1558 else if (compare < 0)
1559 return cmpLessThan;
1560 else
1561 return cmpEqual;
1562}
1563
1564/* Set the least significant BITS bits of a bignum, clear the
1565 rest. */
1566static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1567 unsigned bits) {
1568 unsigned i = 0;
1569 while (bits > APInt::APINT_BITS_PER_WORD) {
1570 dst[i++] = ~(APInt::WordType)0;
1572 }
1573
1574 if (bits)
1575 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1576
1577 while (i < parts)
1578 dst[i++] = 0;
1579}
1580
1581/* Handle overflow. Sign is preserved. We either become infinity or
1582 the largest finite number. */
1583APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1585 /* Infinity? */
1586 if (rounding_mode == rmNearestTiesToEven ||
1587 rounding_mode == rmNearestTiesToAway ||
1588 (rounding_mode == rmTowardPositive && !sign) ||
1589 (rounding_mode == rmTowardNegative && sign)) {
1591 makeNaN(false, sign);
1592 else
1593 category = fcInfinity;
1594 return static_cast<opStatus>(opOverflow | opInexact);
1595 }
1596 }
1597
1598 /* Otherwise we become the largest finite number. */
1599 category = fcNormal;
1600 exponent = semantics->maxExponent;
1601 tcSetLeastSignificantBits(significandParts(), partCount(),
1602 semantics->precision);
1603 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1604 semantics->nanEncoding == fltNanEncoding::AllOnes)
1605 APInt::tcClearBit(significandParts(), 0);
1606
1607 return opInexact;
1608}
1609
1610/* Returns TRUE if, when truncating the current number, with BIT the
1611 new LSB, with the given lost fraction and rounding mode, the result
1612 would need to be rounded away from zero (i.e., by increasing the
1613 signficand). This routine must work for fcZero of both signs, and
1614 fcNormal numbers. */
1615bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1616 lostFraction lost_fraction,
1617 unsigned int bit) const {
1618 /* NaNs and infinities should not have lost fractions. */
1619 assert(isFiniteNonZero() || category == fcZero);
1620
1621 /* Current callers never pass this so we don't handle it. */
1622 assert(lost_fraction != lfExactlyZero);
1623
1624 switch (rounding_mode) {
1626 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1627
1629 if (lost_fraction == lfMoreThanHalf)
1630 return true;
1631
1632 /* Our zeroes don't have a significand to test. */
1633 if (lost_fraction == lfExactlyHalf && category != fcZero)
1634 return APInt::tcExtractBit(significandParts(), bit);
1635
1636 return false;
1637
1638 case rmTowardZero:
1639 return false;
1640
1641 case rmTowardPositive:
1642 return !sign;
1643
1644 case rmTowardNegative:
1645 return sign;
1646
1647 default:
1648 break;
1649 }
1650 llvm_unreachable("Invalid rounding mode found");
1651}
1652
1653APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1654 lostFraction lost_fraction) {
1655 unsigned int omsb; /* One, not zero, based MSB. */
1656 int exponentChange;
1657
1658 if (!isFiniteNonZero())
1659 return opOK;
1660
1661 /* Before rounding normalize the exponent of fcNormal numbers. */
1662 omsb = significandMSB() + 1;
1663
1664 // Only skip this `if` if the value is exactly zero.
1665 if (omsb || lost_fraction != lfExactlyZero) {
1666 /* OMSB is numbered from 1. We want to place it in the integer
1667 bit numbered PRECISION if possible, with a compensating change in
1668 the exponent. */
1669 exponentChange = omsb - semantics->precision;
1670
1671 /* If the resulting exponent is too high, overflow according to
1672 the rounding mode. */
1673 if (exponent + exponentChange > semantics->maxExponent)
1674 return handleOverflow(rounding_mode);
1675
1676 /* Subnormal numbers have exponent minExponent, and their MSB
1677 is forced based on that. */
1678 if (exponent + exponentChange < semantics->minExponent)
1679 exponentChange = semantics->minExponent - exponent;
1680
1681 /* Shifting left is easy as we don't lose precision. */
1682 if (exponentChange < 0) {
1683 assert(lost_fraction == lfExactlyZero);
1684
1685 shiftSignificandLeft(-exponentChange);
1686
1687 return opOK;
1688 }
1689
1690 if (exponentChange > 0) {
1691 lostFraction lf;
1692
1693 /* Shift right and capture any new lost fraction. */
1694 lf = shiftSignificandRight(exponentChange);
1695
1696 lost_fraction = combineLostFractions(lf, lost_fraction);
1697
1698 /* Keep OMSB up-to-date. */
1699 if (omsb > (unsigned) exponentChange)
1700 omsb -= exponentChange;
1701 else
1702 omsb = 0;
1703 }
1704 }
1705
1706 // The all-ones values is an overflow if NaN is all ones. If NaN is
1707 // represented by negative zero, then it is a valid finite value.
1708 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1709 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1710 exponent == semantics->maxExponent && isSignificandAllOnes())
1711 return handleOverflow(rounding_mode);
1712
1713 /* Now round the number according to rounding_mode given the lost
1714 fraction. */
1715
1716 /* As specified in IEEE 754, since we do not trap we do not report
1717 underflow for exact results. */
1718 if (lost_fraction == lfExactlyZero) {
1719 /* Canonicalize zeroes. */
1720 if (omsb == 0) {
1721 category = fcZero;
1722 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1723 sign = false;
1724 if (!semantics->hasZero)
1726 }
1727
1728 return opOK;
1729 }
1730
1731 /* Increment the significand if we're rounding away from zero. */
1732 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1733 if (omsb == 0)
1734 exponent = semantics->minExponent;
1735
1736 incrementSignificand();
1737 omsb = significandMSB() + 1;
1738
1739 /* Did the significand increment overflow? */
1740 if (omsb == (unsigned) semantics->precision + 1) {
1741 /* Renormalize by incrementing the exponent and shifting our
1742 significand right one. However if we already have the
1743 maximum exponent we overflow to infinity. */
1744 if (exponent == semantics->maxExponent)
1745 // Invoke overflow handling with a rounding mode that will guarantee
1746 // that the result gets turned into the correct infinity representation.
1747 // This is needed instead of just setting the category to infinity to
1748 // account for 8-bit floating point types that have no inf, only NaN.
1749 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1750
1751 shiftSignificandRight(1);
1752
1753 return opInexact;
1754 }
1755
1756 // The all-ones values is an overflow if NaN is all ones. If NaN is
1757 // represented by negative zero, then it is a valid finite value.
1758 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1759 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1760 exponent == semantics->maxExponent && isSignificandAllOnes())
1761 return handleOverflow(rounding_mode);
1762 }
1763
1764 /* The normal case - we were and are not denormal, and any
1765 significand increment above didn't overflow. */
1766 if (omsb == semantics->precision)
1767 return opInexact;
1768
1769 /* We have a non-zero denormal. */
1770 assert(omsb < semantics->precision);
1771
1772 /* Canonicalize zeroes. */
1773 if (omsb == 0) {
1774 category = fcZero;
1775 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1776 sign = false;
1777 // This condition handles the case where the semantics
1778 // does not have zero but uses the all-zero encoding
1779 // to represent the smallest normal value.
1780 if (!semantics->hasZero)
1782 }
1783
1784 /* The fcZero case is a denormal that underflowed to zero. */
1785 return (opStatus) (opUnderflow | opInexact);
1786}
1787
1788APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1789 bool subtract) {
1790 switch (PackCategoriesIntoKey(category, rhs.category)) {
1791 default:
1792 llvm_unreachable(nullptr);
1793
1797 assign(rhs);
1798 [[fallthrough]];
1803 if (isSignaling()) {
1804 makeQuiet();
1805 return opInvalidOp;
1806 }
1807 return rhs.isSignaling() ? opInvalidOp : opOK;
1808
1812 return opOK;
1813
1816 category = fcInfinity;
1817 sign = rhs.sign ^ subtract;
1818 return opOK;
1819
1821 assign(rhs);
1822 sign = rhs.sign ^ subtract;
1823 return opOK;
1824
1826 /* Sign depends on rounding mode; handled by caller. */
1827 return opOK;
1828
1830 /* Differently signed infinities can only be validly
1831 subtracted. */
1832 if (((sign ^ rhs.sign)!=0) != subtract) {
1833 makeNaN();
1834 return opInvalidOp;
1835 }
1836
1837 return opOK;
1838
1840 return opDivByZero;
1841 }
1842}
1843
1844/* Add or subtract two normal numbers. */
1845lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1846 bool subtract) {
1847 integerPart carry = 0;
1848 lostFraction lost_fraction;
1849 int bits;
1850
1851 /* Determine if the operation on the absolute values is effectively
1852 an addition or subtraction. */
1853 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1854
1855 /* Are we bigger exponent-wise than the RHS? */
1856 bits = exponent - rhs.exponent;
1857
1858 /* Subtraction is more subtle than one might naively expect. */
1859 if (subtract) {
1860 if ((bits < 0) && !semantics->hasSignedRepr)
1862 "This floating point format does not support signed values");
1863
1864 IEEEFloat temp_rhs(rhs);
1865 bool lost_fraction_is_from_rhs = false;
1866
1867 if (bits == 0)
1868 lost_fraction = lfExactlyZero;
1869 else if (bits > 0) {
1870 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1871 lost_fraction_is_from_rhs = true;
1872 shiftSignificandLeft(1);
1873 } else {
1874 lost_fraction = shiftSignificandRight(-bits - 1);
1875 temp_rhs.shiftSignificandLeft(1);
1876 }
1877
1878 // Should we reverse the subtraction.
1879 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1880 if (cmp_result == cmpLessThan) {
1881 bool borrow =
1882 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1883 if (borrow) {
1884 // The lost fraction is being subtracted, borrow from the significand
1885 // and invert `lost_fraction`.
1886 if (lost_fraction == lfLessThanHalf)
1887 lost_fraction = lfMoreThanHalf;
1888 else if (lost_fraction == lfMoreThanHalf)
1889 lost_fraction = lfLessThanHalf;
1890 }
1891 carry = temp_rhs.subtractSignificand(*this, borrow);
1892 copySignificand(temp_rhs);
1893 sign = !sign;
1894 } else if (cmp_result == cmpGreaterThan) {
1895 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1896 if (borrow) {
1897 // The lost fraction is being subtracted, borrow from the significand
1898 // and invert `lost_fraction`.
1899 if (lost_fraction == lfLessThanHalf)
1900 lost_fraction = lfMoreThanHalf;
1901 else if (lost_fraction == lfMoreThanHalf)
1902 lost_fraction = lfLessThanHalf;
1903 }
1904 carry = subtractSignificand(temp_rhs, borrow);
1905 } else { // cmpEqual
1906 zeroSignificand();
1907 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1908 // rhs is slightly larger due to the lost fraction, flip the sign.
1909 sign = !sign;
1910 }
1911 }
1912
1913 /* The code above is intended to ensure that no borrow is
1914 necessary. */
1915 assert(!carry);
1916 (void)carry;
1917 } else {
1918 if (bits > 0) {
1919 IEEEFloat temp_rhs(rhs);
1920
1921 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1922 carry = addSignificand(temp_rhs);
1923 } else {
1924 lost_fraction = shiftSignificandRight(-bits);
1925 carry = addSignificand(rhs);
1926 }
1927
1928 /* We have a guard bit; generating a carry cannot happen. */
1929 assert(!carry);
1930 (void)carry;
1931 }
1932
1933 return lost_fraction;
1934}
1935
1936APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1937 switch (PackCategoriesIntoKey(category, rhs.category)) {
1938 default:
1939 llvm_unreachable(nullptr);
1940
1944 assign(rhs);
1945 sign = false;
1946 [[fallthrough]];
1951 sign ^= rhs.sign; // restore the original sign
1952 if (isSignaling()) {
1953 makeQuiet();
1954 return opInvalidOp;
1955 }
1956 return rhs.isSignaling() ? opInvalidOp : opOK;
1957
1961 category = fcInfinity;
1962 return opOK;
1963
1967 category = fcZero;
1968 return opOK;
1969
1972 makeNaN();
1973 return opInvalidOp;
1974
1976 return opOK;
1977 }
1978}
1979
1980APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1981 switch (PackCategoriesIntoKey(category, rhs.category)) {
1982 default:
1983 llvm_unreachable(nullptr);
1984
1988 assign(rhs);
1989 sign = false;
1990 [[fallthrough]];
1995 sign ^= rhs.sign; // restore the original sign
1996 if (isSignaling()) {
1997 makeQuiet();
1998 return opInvalidOp;
1999 }
2000 return rhs.isSignaling() ? opInvalidOp : opOK;
2001
2006 return opOK;
2007
2009 category = fcZero;
2010 return opOK;
2011
2013 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
2014 makeNaN(false, sign);
2015 else
2016 category = fcInfinity;
2017 return opDivByZero;
2018
2021 makeNaN();
2022 return opInvalidOp;
2023
2025 return opOK;
2026 }
2027}
2028
2029APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
2030 switch (PackCategoriesIntoKey(category, rhs.category)) {
2031 default:
2032 llvm_unreachable(nullptr);
2033
2037 assign(rhs);
2038 [[fallthrough]];
2043 if (isSignaling()) {
2044 makeQuiet();
2045 return opInvalidOp;
2046 }
2047 return rhs.isSignaling() ? opInvalidOp : opOK;
2048
2052 return opOK;
2053
2059 makeNaN();
2060 return opInvalidOp;
2061
2063 return opOK;
2064 }
2065}
2066
2067APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
2068 switch (PackCategoriesIntoKey(category, rhs.category)) {
2069 default:
2070 llvm_unreachable(nullptr);
2071
2075 assign(rhs);
2076 [[fallthrough]];
2081 if (isSignaling()) {
2082 makeQuiet();
2083 return opInvalidOp;
2084 }
2085 return rhs.isSignaling() ? opInvalidOp : opOK;
2086
2090 return opOK;
2091
2097 makeNaN();
2098 return opInvalidOp;
2099
2101 return opDivByZero; // fake status, indicating this is not a special case
2102 }
2103}
2104
2105/* Change sign. */
2107 // With NaN-as-negative-zero, neither NaN or negative zero can change
2108 // their signs.
2109 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2110 (isZero() || isNaN()))
2111 return;
2112 /* Look mummy, this one's easy. */
2113 sign = !sign;
2114}
2115
2116/* Normalized addition or subtraction. */
2117APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2118 roundingMode rounding_mode,
2119 bool subtract) {
2120 opStatus fs;
2121
2122 fs = addOrSubtractSpecials(rhs, subtract);
2123
2124 /* This return code means it was not a simple case. */
2125 if (fs == opDivByZero) {
2126 lostFraction lost_fraction;
2127
2128 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2129 fs = normalize(rounding_mode, lost_fraction);
2130
2131 /* Can only be zero if we lost no fraction. */
2132 assert(category != fcZero || lost_fraction == lfExactlyZero);
2133 }
2134
2135 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2136 positive zero unless rounding to minus infinity, except that
2137 adding two like-signed zeroes gives that zero. */
2138 if (category == fcZero) {
2139 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2140 sign = (rounding_mode == rmTowardNegative);
2141 // NaN-in-negative-zero means zeros need to be normalized to +0.
2142 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2143 sign = false;
2144 }
2145
2146 return fs;
2147}
2148
2149/* Normalized addition. */
2151 roundingMode rounding_mode) {
2152 return addOrSubtract(rhs, rounding_mode, false);
2153}
2154
2155/* Normalized subtraction. */
2157 roundingMode rounding_mode) {
2158 return addOrSubtract(rhs, rounding_mode, true);
2159}
2160
2161/* Normalized multiply. */
2163 roundingMode rounding_mode) {
2164 opStatus fs;
2165
2166 sign ^= rhs.sign;
2167 fs = multiplySpecials(rhs);
2168
2169 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2170 sign = false;
2171 if (isFiniteNonZero()) {
2172 lostFraction lost_fraction = multiplySignificand(rhs);
2173 fs = normalize(rounding_mode, lost_fraction);
2174 if (lost_fraction != lfExactlyZero)
2175 fs = (opStatus) (fs | opInexact);
2176 }
2177
2178 return fs;
2179}
2180
2181/* Normalized divide. */
2183 roundingMode rounding_mode) {
2184 opStatus fs;
2185
2186 sign ^= rhs.sign;
2187 fs = divideSpecials(rhs);
2188
2189 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2190 sign = false;
2191 if (isFiniteNonZero()) {
2192 lostFraction lost_fraction = divideSignificand(rhs);
2193 fs = normalize(rounding_mode, lost_fraction);
2194 if (lost_fraction != lfExactlyZero)
2195 fs = (opStatus) (fs | opInexact);
2196 }
2197
2198 return fs;
2199}
2200
2201/* Normalized remainder. */
2203 opStatus fs;
2204 unsigned int origSign = sign;
2205
2206 // First handle the special cases.
2207 fs = remainderSpecials(rhs);
2208 if (fs != opDivByZero)
2209 return fs;
2210
2211 fs = opOK;
2212
2213 // Make sure the current value is less than twice the denom. If the addition
2214 // did not succeed (an overflow has happened), which means that the finite
2215 // value we currently posses must be less than twice the denom (as we are
2216 // using the same semantics).
2217 IEEEFloat P2 = rhs;
2218 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2219 fs = mod(P2);
2220 assert(fs == opOK);
2221 }
2222
2223 // Lets work with absolute numbers.
2224 IEEEFloat P = rhs;
2225 P.sign = false;
2226 sign = false;
2227
2228 //
2229 // To calculate the remainder we use the following scheme.
2230 //
2231 // The remainder is defained as follows:
2232 //
2233 // remainder = numer - rquot * denom = x - r * p
2234 //
2235 // Where r is the result of: x/p, rounded toward the nearest integral value
2236 // (with halfway cases rounded toward the even number).
2237 //
2238 // Currently, (after x mod 2p):
2239 // r is the number of 2p's present inside x, which is inherently, an even
2240 // number of p's.
2241 //
2242 // We may split the remaining calculation into 4 options:
2243 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2244 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2245 // are done as well.
2246 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2247 // to subtract 1p at least once.
2248 // - if x >= p then we must subtract p at least once, as x must be a
2249 // remainder.
2250 //
2251 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2252 //
2253 // We can now split the remaining calculation to the following 3 options:
2254 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2255 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2256 // must round up to the next even number. so we must subtract p once more.
2257 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2258 // integral, and subtract p once more.
2259 //
2260
2261 // Extend the semantics to prevent an overflow/underflow or inexact result.
2262 bool losesInfo;
2263 fltSemantics extendedSemantics = *semantics;
2264 extendedSemantics.maxExponent++;
2265 extendedSemantics.minExponent--;
2266 extendedSemantics.precision += 2;
2267
2268 IEEEFloat VEx = *this;
2269 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2270 assert(fs == opOK && !losesInfo);
2271 IEEEFloat PEx = P;
2272 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2273 assert(fs == opOK && !losesInfo);
2274
2275 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2276 // any fraction.
2277 fs = VEx.add(VEx, rmNearestTiesToEven);
2278 assert(fs == opOK);
2279
2280 if (VEx.compare(PEx) == cmpGreaterThan) {
2282 assert(fs == opOK);
2283
2284 // Make VEx = this.add(this), but because we have different semantics, we do
2285 // not want to `convert` again, so we just subtract PEx twice (which equals
2286 // to the desired value).
2287 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2288 assert(fs == opOK);
2289 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2290 assert(fs == opOK);
2291
2292 cmpResult result = VEx.compare(PEx);
2293 if (result == cmpGreaterThan || result == cmpEqual) {
2295 assert(fs == opOK);
2296 }
2297 }
2298
2299 if (isZero()) {
2300 sign = origSign; // IEEE754 requires this
2301 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2302 // But some 8-bit floats only have positive 0.
2303 sign = false;
2304 }
2305
2306 else
2307 sign ^= origSign;
2308 return fs;
2309}
2310
2311/* Normalized llvm frem (C fmod). */
2313 opStatus fs;
2314 fs = modSpecials(rhs);
2315 unsigned int origSign = sign;
2316
2317 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2319 int Exp = ilogb(*this) - ilogb(rhs);
2320 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2321 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2322 // check for it.
2323 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2324 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2325 V.sign = sign;
2326
2328
2329 // When the semantics supports zero, this loop's
2330 // exit-condition is handled by the 'isFiniteNonZero'
2331 // category check above. However, when the semantics
2332 // does not have 'fcZero' and we have reached the
2333 // minimum possible value, (and any further subtract
2334 // will underflow to the same value) explicitly
2335 // provide an exit-path here.
2336 if (!semantics->hasZero && this->isSmallest())
2337 break;
2338
2339 assert(fs==opOK);
2340 }
2341 if (isZero()) {
2342 sign = origSign; // fmod requires this
2343 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2344 sign = false;
2345 }
2346 return fs;
2347}
2348
2349/* Normalized fused-multiply-add. */
2351 const IEEEFloat &addend,
2352 roundingMode rounding_mode) {
2353 opStatus fs;
2354
2355 /* Post-multiplication sign, before addition. */
2356 sign ^= multiplicand.sign;
2357
2358 /* If and only if all arguments are normal do we need to do an
2359 extended-precision calculation. */
2360 if (isFiniteNonZero() &&
2361 multiplicand.isFiniteNonZero() &&
2362 addend.isFinite()) {
2363 lostFraction lost_fraction;
2364
2365 lost_fraction = multiplySignificand(multiplicand, addend);
2366 fs = normalize(rounding_mode, lost_fraction);
2367 if (lost_fraction != lfExactlyZero)
2368 fs = (opStatus) (fs | opInexact);
2369
2370 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2371 positive zero unless rounding to minus infinity, except that
2372 adding two like-signed zeroes gives that zero. */
2373 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2374 sign = (rounding_mode == rmTowardNegative);
2375 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2376 sign = false;
2377 }
2378 } else {
2379 fs = multiplySpecials(multiplicand);
2380
2381 /* FS can only be opOK or opInvalidOp. There is no more work
2382 to do in the latter case. The IEEE-754R standard says it is
2383 implementation-defined in this case whether, if ADDEND is a
2384 quiet NaN, we raise invalid op; this implementation does so.
2385
2386 If we need to do the addition we can do so with normal
2387 precision. */
2388 if (fs == opOK)
2389 fs = addOrSubtract(addend, rounding_mode, false);
2390 }
2391
2392 return fs;
2393}
2394
2395/* Rounding-mode correct round to integral value. */
2397 opStatus fs;
2398
2399 if (isInfinity())
2400 // [IEEE Std 754-2008 6.1]:
2401 // The behavior of infinity in floating-point arithmetic is derived from the
2402 // limiting cases of real arithmetic with operands of arbitrarily
2403 // large magnitude, when such a limit exists.
2404 // ...
2405 // Operations on infinite operands are usually exact and therefore signal no
2406 // exceptions ...
2407 return opOK;
2408
2409 if (isNaN()) {
2410 if (isSignaling()) {
2411 // [IEEE Std 754-2008 6.2]:
2412 // Under default exception handling, any operation signaling an invalid
2413 // operation exception and for which a floating-point result is to be
2414 // delivered shall deliver a quiet NaN.
2415 makeQuiet();
2416 // [IEEE Std 754-2008 6.2]:
2417 // Signaling NaNs shall be reserved operands that, under default exception
2418 // handling, signal the invalid operation exception(see 7.2) for every
2419 // general-computational and signaling-computational operation except for
2420 // the conversions described in 5.12.
2421 return opInvalidOp;
2422 } else {
2423 // [IEEE Std 754-2008 6.2]:
2424 // For an operation with quiet NaN inputs, other than maximum and minimum
2425 // operations, if a floating-point result is to be delivered the result
2426 // shall be a quiet NaN which should be one of the input NaNs.
2427 // ...
2428 // Every general-computational and quiet-computational operation involving
2429 // one or more input NaNs, none of them signaling, shall signal no
2430 // exception, except fusedMultiplyAdd might signal the invalid operation
2431 // exception(see 7.2).
2432 return opOK;
2433 }
2434 }
2435
2436 if (isZero()) {
2437 // [IEEE Std 754-2008 6.3]:
2438 // ... the sign of the result of conversions, the quantize operation, the
2439 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2440 // the sign of the first or only operand.
2441 return opOK;
2442 }
2443
2444 // If the exponent is large enough, we know that this value is already
2445 // integral, and the arithmetic below would potentially cause it to saturate
2446 // to +/-Inf. Bail out early instead.
2447 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2448 return opOK;
2449
2450 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2451 // precision of our format, and then subtract it back off again. The choice
2452 // of rounding modes for the addition/subtraction determines the rounding mode
2453 // for our integral rounding as well.
2454 // NOTE: When the input value is negative, we do subtraction followed by
2455 // addition instead.
2456 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2457 1);
2458 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2459 IEEEFloat MagicConstant(*semantics);
2460 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2462 assert(fs == opOK);
2463 MagicConstant.sign = sign;
2464
2465 // Preserve the input sign so that we can handle the case of zero result
2466 // correctly.
2467 bool inputSign = isNegative();
2468
2469 fs = add(MagicConstant, rounding_mode);
2470
2471 // Current value and 'MagicConstant' are both integers, so the result of the
2472 // subtraction is always exact according to Sterbenz' lemma.
2473 subtract(MagicConstant, rounding_mode);
2474
2475 // Restore the input sign.
2476 if (inputSign != isNegative())
2477 changeSign();
2478
2479 return fs;
2480}
2481
2482/* Comparison requires normalized numbers. */
2484 cmpResult result;
2485
2486 assert(semantics == rhs.semantics);
2487
2488 switch (PackCategoriesIntoKey(category, rhs.category)) {
2489 default:
2490 llvm_unreachable(nullptr);
2491
2499 return cmpUnordered;
2500
2504 if (sign)
2505 return cmpLessThan;
2506 else
2507 return cmpGreaterThan;
2508
2512 if (rhs.sign)
2513 return cmpGreaterThan;
2514 else
2515 return cmpLessThan;
2516
2518 if (sign == rhs.sign)
2519 return cmpEqual;
2520 else if (sign)
2521 return cmpLessThan;
2522 else
2523 return cmpGreaterThan;
2524
2526 return cmpEqual;
2527
2529 break;
2530 }
2531
2532 /* Two normal numbers. Do they have the same sign? */
2533 if (sign != rhs.sign) {
2534 if (sign)
2535 result = cmpLessThan;
2536 else
2537 result = cmpGreaterThan;
2538 } else {
2539 /* Compare absolute values; invert result if negative. */
2540 result = compareAbsoluteValue(rhs);
2541
2542 if (sign) {
2543 if (result == cmpLessThan)
2544 result = cmpGreaterThan;
2545 else if (result == cmpGreaterThan)
2546 result = cmpLessThan;
2547 }
2548 }
2549
2550 return result;
2551}
2552
2553/// IEEEFloat::convert - convert a value of one floating point type to another.
2554/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2555/// records whether the transformation lost information, i.e. whether
2556/// converting the result back to the original type will produce the
2557/// original value (this is almost the same as return value==fsOK, but there
2558/// are edge cases where this is not so).
2559
2561 roundingMode rounding_mode,
2562 bool *losesInfo) {
2564 unsigned int newPartCount, oldPartCount;
2565 opStatus fs;
2566 int shift;
2567 const fltSemantics &fromSemantics = *semantics;
2568 bool is_signaling = isSignaling();
2569
2571 newPartCount = partCountForBits(toSemantics.precision + 1);
2572 oldPartCount = partCount();
2573 shift = toSemantics.precision - fromSemantics.precision;
2574
2575 bool X86SpecialNan = false;
2576 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2577 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2578 (!(*significandParts() & 0x8000000000000000ULL) ||
2579 !(*significandParts() & 0x4000000000000000ULL))) {
2580 // x86 has some unusual NaNs which cannot be represented in any other
2581 // format; note them here.
2582 X86SpecialNan = true;
2583 }
2584
2585 // If this is a truncation of a denormal number, and the target semantics
2586 // has larger exponent range than the source semantics (this can happen
2587 // when truncating from PowerPC double-double to double format), the
2588 // right shift could lose result mantissa bits. Adjust exponent instead
2589 // of performing excessive shift.
2590 // Also do a similar trick in case shifting denormal would produce zero
2591 // significand as this case isn't handled correctly by normalize.
2592 if (shift < 0 && isFiniteNonZero()) {
2593 int omsb = significandMSB() + 1;
2594 int exponentChange = omsb - fromSemantics.precision;
2595 if (exponent + exponentChange < toSemantics.minExponent)
2596 exponentChange = toSemantics.minExponent - exponent;
2597 exponentChange = std::max(exponentChange, shift);
2598 if (exponentChange < 0) {
2599 shift -= exponentChange;
2600 exponent += exponentChange;
2601 } else if (omsb <= -shift) {
2602 exponentChange = omsb + shift - 1; // leave at least one bit set
2603 shift -= exponentChange;
2604 exponent += exponentChange;
2605 }
2606 }
2607
2608 // If this is a truncation, perform the shift before we narrow the storage.
2609 if (shift < 0 && (isFiniteNonZero() ||
2610 (category == fcNaN && semantics->nonFiniteBehavior !=
2612 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2613
2614 // Fix the storage so it can hold to new value.
2615 if (newPartCount > oldPartCount) {
2616 // The new type requires more storage; make it available.
2617 integerPart *newParts;
2618 newParts = new integerPart[newPartCount];
2619 APInt::tcSet(newParts, 0, newPartCount);
2620 if (isFiniteNonZero() || category==fcNaN)
2621 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2622 freeSignificand();
2623 significand.parts = newParts;
2624 } else if (newPartCount == 1 && oldPartCount != 1) {
2625 // Switch to built-in storage for a single part.
2626 integerPart newPart = 0;
2627 if (isFiniteNonZero() || category==fcNaN)
2628 newPart = significandParts()[0];
2629 freeSignificand();
2630 significand.part = newPart;
2631 }
2632
2633 // Now that we have the right storage, switch the semantics.
2634 semantics = &toSemantics;
2635
2636 // If this is an extension, perform the shift now that the storage is
2637 // available.
2638 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2639 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2640
2641 if (isFiniteNonZero()) {
2642 fs = normalize(rounding_mode, lostFraction);
2643 *losesInfo = (fs != opOK);
2644 } else if (category == fcNaN) {
2645 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2646 *losesInfo =
2648 makeNaN(false, sign);
2649 return is_signaling ? opInvalidOp : opOK;
2650 }
2651
2652 // If NaN is negative zero, we need to create a new NaN to avoid converting
2653 // NaN to -Inf.
2654 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2655 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2656 makeNaN(false, false);
2657
2658 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2659
2660 // For x87 extended precision, we want to make a NaN, not a special NaN if
2661 // the input wasn't special either.
2662 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2663 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2664
2665 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2666 // This also guarantees that a sNaN does not become Inf on a truncation
2667 // that loses all payload bits.
2668 if (is_signaling) {
2669 makeQuiet();
2670 fs = opInvalidOp;
2671 } else {
2672 fs = opOK;
2673 }
2674 } else if (category == fcInfinity &&
2675 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2676 makeNaN(false, sign);
2677 *losesInfo = true;
2678 fs = opInexact;
2679 } else if (category == fcZero &&
2680 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2681 // Negative zero loses info, but positive zero doesn't.
2682 *losesInfo =
2683 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2684 fs = *losesInfo ? opInexact : opOK;
2685 // NaN is negative zero means -0 -> +0, which can lose information
2686 sign = false;
2687 } else {
2688 *losesInfo = false;
2689 fs = opOK;
2690 }
2691
2692 if (category == fcZero && !semantics->hasZero)
2694 return fs;
2695}
2696
2697/* Convert a floating point number to an integer according to the
2698 rounding mode. If the rounded integer value is out of range this
2699 returns an invalid operation exception and the contents of the
2700 destination parts are unspecified. If the rounded value is in
2701 range but the floating point number is not the exact integer, the C
2702 standard doesn't require an inexact exception to be raised. IEEE
2703 854 does require it so we do that.
2704
2705 Note that for conversions to integer type the C standard requires
2706 round-to-zero to always be used. */
2707APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2708 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2709 roundingMode rounding_mode, bool *isExact) const {
2710 lostFraction lost_fraction;
2711 const integerPart *src;
2712 unsigned int dstPartsCount, truncatedBits;
2713
2714 *isExact = false;
2715
2716 /* Handle the three special cases first. */
2717 if (category == fcInfinity || category == fcNaN)
2718 return opInvalidOp;
2719
2720 dstPartsCount = partCountForBits(width);
2721 assert(dstPartsCount <= parts.size() && "Integer too big");
2722
2723 if (category == fcZero) {
2724 APInt::tcSet(parts.data(), 0, dstPartsCount);
2725 // Negative zero can't be represented as an int.
2726 *isExact = !sign;
2727 return opOK;
2728 }
2729
2730 src = significandParts();
2731
2732 /* Step 1: place our absolute value, with any fraction truncated, in
2733 the destination. */
2734 if (exponent < 0) {
2735 /* Our absolute value is less than one; truncate everything. */
2736 APInt::tcSet(parts.data(), 0, dstPartsCount);
2737 /* For exponent -1 the integer bit represents .5, look at that.
2738 For smaller exponents leftmost truncated bit is 0. */
2739 truncatedBits = semantics->precision -1U - exponent;
2740 } else {
2741 /* We want the most significant (exponent + 1) bits; the rest are
2742 truncated. */
2743 unsigned int bits = exponent + 1U;
2744
2745 /* Hopelessly large in magnitude? */
2746 if (bits > width)
2747 return opInvalidOp;
2748
2749 if (bits < semantics->precision) {
2750 /* We truncate (semantics->precision - bits) bits. */
2751 truncatedBits = semantics->precision - bits;
2752 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2753 } else {
2754 /* We want at least as many bits as are available. */
2755 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2756 0);
2757 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2758 bits - semantics->precision);
2759 truncatedBits = 0;
2760 }
2761 }
2762
2763 /* Step 2: work out any lost fraction, and increment the absolute
2764 value if we would round away from zero. */
2765 if (truncatedBits) {
2766 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2767 truncatedBits);
2768 if (lost_fraction != lfExactlyZero &&
2769 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2770 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2771 return opInvalidOp; /* Overflow. */
2772 }
2773 } else {
2774 lost_fraction = lfExactlyZero;
2775 }
2776
2777 /* Step 3: check if we fit in the destination. */
2778 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2779
2780 if (sign) {
2781 if (!isSigned) {
2782 /* Negative numbers cannot be represented as unsigned. */
2783 if (omsb != 0)
2784 return opInvalidOp;
2785 } else {
2786 /* It takes omsb bits to represent the unsigned integer value.
2787 We lose a bit for the sign, but care is needed as the
2788 maximally negative integer is a special case. */
2789 if (omsb == width &&
2790 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2791 return opInvalidOp;
2792
2793 /* This case can happen because of rounding. */
2794 if (omsb > width)
2795 return opInvalidOp;
2796 }
2797
2798 APInt::tcNegate (parts.data(), dstPartsCount);
2799 } else {
2800 if (omsb >= width + !isSigned)
2801 return opInvalidOp;
2802 }
2803
2804 if (lost_fraction == lfExactlyZero) {
2805 *isExact = true;
2806 return opOK;
2807 }
2808 return opInexact;
2809}
2810
2811/* Same as convertToSignExtendedInteger, except we provide
2812 deterministic values in case of an invalid operation exception,
2813 namely zero for NaNs and the minimal or maximal value respectively
2814 for underflow or overflow.
2815 The *isExact output tells whether the result is exact, in the sense
2816 that converting it back to the original floating point type produces
2817 the original value. This is almost equivalent to result==opOK,
2818 except for negative zeroes.
2819*/
2822 unsigned int width, bool isSigned,
2823 roundingMode rounding_mode, bool *isExact) const {
2824 opStatus fs;
2825
2826 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2827 isExact);
2828
2829 if (fs == opInvalidOp) {
2830 unsigned int bits, dstPartsCount;
2831
2832 dstPartsCount = partCountForBits(width);
2833 assert(dstPartsCount <= parts.size() && "Integer too big");
2834
2835 if (category == fcNaN)
2836 bits = 0;
2837 else if (sign)
2838 bits = isSigned;
2839 else
2840 bits = width - isSigned;
2841
2842 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2843 if (sign && isSigned)
2844 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2845 }
2846
2847 return fs;
2848}
2849
2850/* Convert an unsigned integer SRC to a floating point number,
2851 rounding according to ROUNDING_MODE. The sign of the floating
2852 point number is not modified. */
2853APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2854 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2855 unsigned int omsb, precision, dstCount;
2856 integerPart *dst;
2857 lostFraction lost_fraction;
2858
2859 category = fcNormal;
2860 omsb = APInt::tcMSB(src, srcCount) + 1;
2861 dst = significandParts();
2862 dstCount = partCount();
2863 precision = semantics->precision;
2864
2865 /* We want the most significant PRECISION bits of SRC. There may not
2866 be that many; extract what we can. */
2867 if (precision <= omsb) {
2868 exponent = omsb - 1;
2869 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2870 omsb - precision);
2871 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2872 } else {
2873 exponent = precision - 1;
2874 lost_fraction = lfExactlyZero;
2875 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2876 }
2877
2878 return normalize(rounding_mode, lost_fraction);
2879}
2880
2882 roundingMode rounding_mode) {
2883 unsigned int partCount = Val.getNumWords();
2884 APInt api = Val;
2885
2886 sign = false;
2887 if (isSigned && api.isNegative()) {
2888 sign = true;
2889 api = -api;
2890 }
2891
2892 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2893}
2894
2896IEEEFloat::convertFromHexadecimalString(StringRef s,
2897 roundingMode rounding_mode) {
2898 lostFraction lost_fraction = lfExactlyZero;
2899
2900 category = fcNormal;
2901 zeroSignificand();
2902 exponent = 0;
2903
2904 integerPart *significand = significandParts();
2905 unsigned partsCount = partCount();
2906 unsigned bitPos = partsCount * integerPartWidth;
2907 bool computedTrailingFraction = false;
2908
2909 // Skip leading zeroes and any (hexa)decimal point.
2910 StringRef::iterator begin = s.begin();
2911 StringRef::iterator end = s.end();
2913 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2914 if (!PtrOrErr)
2915 return PtrOrErr.takeError();
2916 StringRef::iterator p = *PtrOrErr;
2917 StringRef::iterator firstSignificantDigit = p;
2918
2919 while (p != end) {
2920 integerPart hex_value;
2921
2922 if (*p == '.') {
2923 if (dot != end)
2924 return createError("String contains multiple dots");
2925 dot = p++;
2926 continue;
2927 }
2928
2929 hex_value = hexDigitValue(*p);
2930 if (hex_value == UINT_MAX)
2931 break;
2932
2933 p++;
2934
2935 // Store the number while we have space.
2936 if (bitPos) {
2937 bitPos -= 4;
2938 hex_value <<= bitPos % integerPartWidth;
2939 significand[bitPos / integerPartWidth] |= hex_value;
2940 } else if (!computedTrailingFraction) {
2941 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2942 if (!FractOrErr)
2943 return FractOrErr.takeError();
2944 lost_fraction = *FractOrErr;
2945 computedTrailingFraction = true;
2946 }
2947 }
2948
2949 /* Hex floats require an exponent but not a hexadecimal point. */
2950 if (p == end)
2951 return createError("Hex strings require an exponent");
2952 if (*p != 'p' && *p != 'P')
2953 return createError("Invalid character in significand");
2954 if (p == begin)
2955 return createError("Significand has no digits");
2956 if (dot != end && p - begin == 1)
2957 return createError("Significand has no digits");
2958
2959 /* Ignore the exponent if we are zero. */
2960 if (p != firstSignificantDigit) {
2961 int expAdjustment;
2962
2963 /* Implicit hexadecimal point? */
2964 if (dot == end)
2965 dot = p;
2966
2967 /* Calculate the exponent adjustment implicit in the number of
2968 significant digits. */
2969 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2970 if (expAdjustment < 0)
2971 expAdjustment++;
2972 expAdjustment = expAdjustment * 4 - 1;
2973
2974 /* Adjust for writing the significand starting at the most
2975 significant nibble. */
2976 expAdjustment += semantics->precision;
2977 expAdjustment -= partsCount * integerPartWidth;
2978
2979 /* Adjust for the given exponent. */
2980 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2981 if (!ExpOrErr)
2982 return ExpOrErr.takeError();
2983 exponent = *ExpOrErr;
2984 }
2985
2986 return normalize(rounding_mode, lost_fraction);
2987}
2988
2990IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2991 unsigned sigPartCount, int exp,
2992 roundingMode rounding_mode) {
2993 unsigned int parts, pow5PartCount;
2994 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2996 bool isNearest;
2997
2998 isNearest = (rounding_mode == rmNearestTiesToEven ||
2999 rounding_mode == rmNearestTiesToAway);
3000
3001 parts = partCountForBits(semantics->precision + 11);
3002
3003 /* Calculate pow(5, abs(exp)). */
3004 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
3005
3006 for (;; parts *= 2) {
3007 opStatus sigStatus, powStatus;
3008 unsigned int excessPrecision, truncatedBits;
3009
3010 calcSemantics.precision = parts * integerPartWidth - 1;
3011 excessPrecision = calcSemantics.precision - semantics->precision;
3012 truncatedBits = excessPrecision;
3013
3014 IEEEFloat decSig(calcSemantics, uninitialized);
3015 decSig.makeZero(sign);
3016 IEEEFloat pow5(calcSemantics);
3017
3018 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
3020 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
3022 /* Add exp, as 10^n = 5^n * 2^n. */
3023 decSig.exponent += exp;
3024
3025 lostFraction calcLostFraction;
3026 integerPart HUerr, HUdistance;
3027 unsigned int powHUerr;
3028
3029 if (exp >= 0) {
3030 /* multiplySignificand leaves the precision-th bit set to 1. */
3031 calcLostFraction = decSig.multiplySignificand(pow5);
3032 powHUerr = powStatus != opOK;
3033 } else {
3034 calcLostFraction = decSig.divideSignificand(pow5);
3035 /* Denormal numbers have less precision. */
3036 if (decSig.exponent < semantics->minExponent) {
3037 excessPrecision += (semantics->minExponent - decSig.exponent);
3038 truncatedBits = excessPrecision;
3039 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
3040 }
3041 /* Extra half-ulp lost in reciprocal of exponent. */
3042 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
3043 }
3044
3045 /* Both multiplySignificand and divideSignificand return the
3046 result with the integer bit set. */
3048 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3049
3050 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3051 powHUerr);
3052 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3053 excessPrecision, isNearest);
3054
3055 /* Are we guaranteed to round correctly if we truncate? */
3056 if (HUdistance >= HUerr) {
3057 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3058 calcSemantics.precision - excessPrecision,
3059 excessPrecision);
3060 /* Take the exponent of decSig. If we tcExtract-ed less bits
3061 above we must adjust our exponent to compensate for the
3062 implicit right shift. */
3063 exponent = (decSig.exponent + semantics->precision
3064 - (calcSemantics.precision - excessPrecision));
3065 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3066 decSig.partCount(),
3067 truncatedBits);
3068 return normalize(rounding_mode, calcLostFraction);
3069 }
3070 }
3071}
3072
3073Expected<APFloat::opStatus>
3074IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3075 decimalInfo D;
3076 opStatus fs;
3077
3078 /* Scan the text. */
3079 StringRef::iterator p = str.begin();
3080 if (Error Err = interpretDecimal(p, str.end(), &D))
3081 return std::move(Err);
3082
3083 /* Handle the quick cases. First the case of no significant digits,
3084 i.e. zero, and then exponents that are obviously too large or too
3085 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3086 definitely overflows if
3087
3088 (exp - 1) * L >= maxExponent
3089
3090 and definitely underflows to zero where
3091
3092 (exp + 1) * L <= minExponent - precision
3093
3094 With integer arithmetic the tightest bounds for L are
3095
3096 93/28 < L < 196/59 [ numerator <= 256 ]
3097 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3098 */
3099
3100 // Test if we have a zero number allowing for strings with no null terminators
3101 // and zero decimals with non-zero exponents.
3102 //
3103 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3104 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3105 // be at most one dot. On the other hand, if we have a zero with a non-zero
3106 // exponent, then we know that D.firstSigDigit will be non-numeric.
3107 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3108 category = fcZero;
3109 fs = opOK;
3110 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3111 sign = false;
3112 if (!semantics->hasZero)
3114
3115 /* Check whether the normalized exponent is high enough to overflow
3116 max during the log-rebasing in the max-exponent check below. */
3117 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3118 fs = handleOverflow(rounding_mode);
3119
3120 /* If it wasn't, then it also wasn't high enough to overflow max
3121 during the log-rebasing in the min-exponent check. Check that it
3122 won't overflow min in either check, then perform the min-exponent
3123 check. */
3124 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3125 (D.normalizedExponent + 1) * 28738 <=
3126 8651 * (semantics->minExponent - (int) semantics->precision)) {
3127 /* Underflow to zero and round. */
3128 category = fcNormal;
3129 zeroSignificand();
3130 fs = normalize(rounding_mode, lfLessThanHalf);
3131
3132 /* We can finally safely perform the max-exponent check. */
3133 } else if ((D.normalizedExponent - 1) * 42039
3134 >= 12655 * semantics->maxExponent) {
3135 /* Overflow and round. */
3136 fs = handleOverflow(rounding_mode);
3137 } else {
3138 integerPart *decSignificand;
3139 unsigned int partCount;
3140
3141 /* A tight upper bound on number of bits required to hold an
3142 N-digit decimal integer is N * 196 / 59. Allocate enough space
3143 to hold the full significand, and an extra part required by
3144 tcMultiplyPart. */
3145 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3146 partCount = partCountForBits(1 + 196 * partCount / 59);
3147 decSignificand = new integerPart[partCount + 1];
3148 partCount = 0;
3149
3150 /* Convert to binary efficiently - we do almost all multiplication
3151 in an integerPart. When this would overflow do we do a single
3152 bignum multiplication, and then revert again to multiplication
3153 in an integerPart. */
3154 do {
3155 integerPart decValue, val, multiplier;
3156
3157 val = 0;
3158 multiplier = 1;
3159
3160 do {
3161 if (*p == '.') {
3162 p++;
3163 if (p == str.end()) {
3164 break;
3165 }
3166 }
3167 decValue = decDigitValue(*p++);
3168 if (decValue >= 10U) {
3169 delete[] decSignificand;
3170 return createError("Invalid character in significand");
3171 }
3172 multiplier *= 10;
3173 val = val * 10 + decValue;
3174 /* The maximum number that can be multiplied by ten with any
3175 digit added without overflowing an integerPart. */
3176 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3177
3178 /* Multiply out the current part. */
3179 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3180 partCount, partCount + 1, false);
3181
3182 /* If we used another part (likely but not guaranteed), increase
3183 the count. */
3184 if (decSignificand[partCount])
3185 partCount++;
3186 } while (p <= D.lastSigDigit);
3187
3188 category = fcNormal;
3189 fs = roundSignificandWithExponent(decSignificand, partCount,
3190 D.exponent, rounding_mode);
3191
3192 delete [] decSignificand;
3193 }
3194
3195 return fs;
3196}
3197
3198bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3199 const size_t MIN_NAME_SIZE = 3;
3200
3201 if (str.size() < MIN_NAME_SIZE)
3202 return false;
3203
3204 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3205 makeInf(false);
3206 return true;
3207 }
3208
3209 bool IsNegative = str.consume_front("-");
3210 if (IsNegative) {
3211 if (str.size() < MIN_NAME_SIZE)
3212 return false;
3213
3214 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3215 makeInf(true);
3216 return true;
3217 }
3218 }
3219
3220 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3221 bool IsSignaling = str.consume_front_insensitive("s");
3222 if (IsSignaling) {
3223 if (str.size() < MIN_NAME_SIZE)
3224 return false;
3225 }
3226
3227 if (str.consume_front("nan") || str.consume_front("NaN")) {
3228 // A NaN without payload.
3229 if (str.empty()) {
3230 makeNaN(IsSignaling, IsNegative);
3231 return true;
3232 }
3233
3234 // Allow the payload to be inside parentheses.
3235 if (str.front() == '(') {
3236 // Parentheses should be balanced (and not empty).
3237 if (str.size() <= 2 || str.back() != ')')
3238 return false;
3239
3240 str = str.slice(1, str.size() - 1);
3241 }
3242
3243 // Determine the payload number's radix.
3244 unsigned Radix = 10;
3245 if (str[0] == '0') {
3246 if (str.size() > 1 && tolower(str[1]) == 'x') {
3247 str = str.drop_front(2);
3248 Radix = 16;
3249 } else {
3250 Radix = 8;
3251 }
3252 }
3253
3254 // Parse the payload and make the NaN.
3255 APInt Payload;
3256 if (!str.getAsInteger(Radix, Payload)) {
3257 makeNaN(IsSignaling, IsNegative, &Payload);
3258 return true;
3259 }
3260 }
3261
3262 return false;
3263}
3264
3265Expected<APFloat::opStatus>
3267 if (str.empty())
3268 return createError("Invalid string length");
3269
3270 // Handle special cases.
3271 if (convertFromStringSpecials(str))
3272 return opOK;
3273
3274 /* Handle a leading minus sign. */
3275 StringRef::iterator p = str.begin();
3276 size_t slen = str.size();
3277 sign = *p == '-' ? 1 : 0;
3278 if (sign && !semantics->hasSignedRepr)
3280 "This floating point format does not support signed values");
3281
3282 if (*p == '-' || *p == '+') {
3283 p++;
3284 slen--;
3285 if (!slen)
3286 return createError("String has no digits");
3287 }
3288
3289 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3290 if (slen == 2)
3291 return createError("Invalid string");
3292 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3293 rounding_mode);
3294 }
3295
3296 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3297}
3298
3299/* Write out a hexadecimal representation of the floating point value
3300 to DST, which must be of sufficient size, in the C99 form
3301 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3302 excluding the terminating NUL.
3303
3304 If UPPERCASE, the output is in upper case, otherwise in lower case.
3305
3306 HEXDIGITS digits appear altogether, rounding the value if
3307 necessary. If HEXDIGITS is 0, the minimal precision to display the
3308 number precisely is used instead. If nothing would appear after
3309 the decimal point it is suppressed.
3310
3311 The decimal exponent is always printed and has at least one digit.
3312 Zero values display an exponent of zero. Infinities and NaNs
3313 appear as "infinity" or "nan" respectively.
3314
3315 The above rules are as specified by C99. There is ambiguity about
3316 what the leading hexadecimal digit should be. This implementation
3317 uses whatever is necessary so that the exponent is displayed as
3318 stored. This implies the exponent will fall within the IEEE format
3319 range, and the leading hexadecimal digit will be 0 (for denormals),
3320 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3321 any other digits zero).
3322*/
3323unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3324 bool upperCase,
3325 roundingMode rounding_mode) const {
3326 char *p;
3327
3328 p = dst;
3329 if (sign)
3330 *dst++ = '-';
3331
3332 switch (category) {
3333 case fcInfinity:
3334 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3335 dst += sizeof infinityL - 1;
3336 break;
3337
3338 case fcNaN:
3339 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3340 dst += sizeof NaNU - 1;
3341 break;
3342
3343 case fcZero:
3344 *dst++ = '0';
3345 *dst++ = upperCase ? 'X': 'x';
3346 *dst++ = '0';
3347 if (hexDigits > 1) {
3348 *dst++ = '.';
3349 memset (dst, '0', hexDigits - 1);
3350 dst += hexDigits - 1;
3351 }
3352 *dst++ = upperCase ? 'P': 'p';
3353 *dst++ = '0';
3354 break;
3355
3356 case fcNormal:
3357 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3358 break;
3359 }
3360
3361 *dst = 0;
3362
3363 return static_cast<unsigned int>(dst - p);
3364}
3365
3366/* Does the hard work of outputting the correctly rounded hexadecimal
3367 form of a normal floating point number with the specified number of
3368 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3369 digits necessary to print the value precisely is output. */
3370char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3371 bool upperCase,
3372 roundingMode rounding_mode) const {
3373 unsigned int count, valueBits, shift, partsCount, outputDigits;
3374 const char *hexDigitChars;
3375 const integerPart *significand;
3376 char *p;
3377 bool roundUp;
3378
3379 *dst++ = '0';
3380 *dst++ = upperCase ? 'X': 'x';
3381
3382 roundUp = false;
3383 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3384
3385 significand = significandParts();
3386 partsCount = partCount();
3387
3388 /* +3 because the first digit only uses the single integer bit, so
3389 we have 3 virtual zero most-significant-bits. */
3390 valueBits = semantics->precision + 3;
3391 shift = integerPartWidth - valueBits % integerPartWidth;
3392
3393 /* The natural number of digits required ignoring trailing
3394 insignificant zeroes. */
3395 outputDigits = (valueBits - significandLSB () + 3) / 4;
3396
3397 /* hexDigits of zero means use the required number for the
3398 precision. Otherwise, see if we are truncating. If we are,
3399 find out if we need to round away from zero. */
3400 if (hexDigits) {
3401 if (hexDigits < outputDigits) {
3402 /* We are dropping non-zero bits, so need to check how to round.
3403 "bits" is the number of dropped bits. */
3404 unsigned int bits;
3405 lostFraction fraction;
3406
3407 bits = valueBits - hexDigits * 4;
3408 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3409 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3410 }
3411 outputDigits = hexDigits;
3412 }
3413
3414 /* Write the digits consecutively, and start writing in the location
3415 of the hexadecimal point. We move the most significant digit
3416 left and add the hexadecimal point later. */
3417 p = ++dst;
3418
3419 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3420
3421 while (outputDigits && count) {
3422 integerPart part;
3423
3424 /* Put the most significant integerPartWidth bits in "part". */
3425 if (--count == partsCount)
3426 part = 0; /* An imaginary higher zero part. */
3427 else
3428 part = significand[count] << shift;
3429
3430 if (count && shift)
3431 part |= significand[count - 1] >> (integerPartWidth - shift);
3432
3433 /* Convert as much of "part" to hexdigits as we can. */
3434 unsigned int curDigits = integerPartWidth / 4;
3435
3436 curDigits = std::min(curDigits, outputDigits);
3437 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3438 outputDigits -= curDigits;
3439 }
3440
3441 if (roundUp) {
3442 char *q = dst;
3443
3444 /* Note that hexDigitChars has a trailing '0'. */
3445 do {
3446 q--;
3447 *q = hexDigitChars[hexDigitValue (*q) + 1];
3448 } while (*q == '0');
3449 assert(q >= p);
3450 } else {
3451 /* Add trailing zeroes. */
3452 memset (dst, '0', outputDigits);
3453 dst += outputDigits;
3454 }
3455
3456 /* Move the most significant digit to before the point, and if there
3457 is something after the decimal point add it. This must come
3458 after rounding above. */
3459 p[-1] = p[0];
3460 if (dst -1 == p)
3461 dst--;
3462 else
3463 p[0] = '.';
3464
3465 /* Finally output the exponent. */
3466 *dst++ = upperCase ? 'P': 'p';
3467
3468 return writeSignedDecimal (dst, exponent);
3469}
3470
3472 if (!Arg.isFiniteNonZero())
3473 return hash_combine((uint8_t)Arg.category,
3474 // NaN has no sign, fix it at zero.
3475 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3476 Arg.semantics->precision);
3477
3478 // Normal floats need their exponent and significand hashed.
3479 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3480 Arg.semantics->precision, Arg.exponent,
3482 Arg.significandParts(),
3483 Arg.significandParts() + Arg.partCount()));
3484}
3485
3486// Conversion from APFloat to/from host float/double. It may eventually be
3487// possible to eliminate these and have everybody deal with APFloats, but that
3488// will take a while. This approach will not easily extend to long double.
3489// Current implementation requires integerPartWidth==64, which is correct at
3490// the moment but could be made more general.
3491
3492// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3493// the actual IEEE respresentations. We compensate for that here.
3494
3495APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3496 assert(semantics ==
3497 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3498 assert(partCount()==2);
3499
3500 uint64_t myexponent, mysignificand;
3501
3502 if (isFiniteNonZero()) {
3503 myexponent = exponent+16383; //bias
3504 mysignificand = significandParts()[0];
3505 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3506 myexponent = 0; // denormal
3507 } else if (category==fcZero) {
3508 myexponent = 0;
3509 mysignificand = 0;
3510 } else if (category==fcInfinity) {
3511 myexponent = 0x7fff;
3512 mysignificand = 0x8000000000000000ULL;
3513 } else {
3514 assert(category == fcNaN && "Unknown category");
3515 myexponent = 0x7fff;
3516 mysignificand = significandParts()[0];
3517 }
3518
3519 uint64_t words[2];
3520 words[0] = mysignificand;
3521 words[1] = ((uint64_t)(sign & 1) << 15) |
3522 (myexponent & 0x7fffLL);
3523 return APInt(80, words);
3524}
3525
3526APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3527 assert(semantics ==
3528 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3529 assert(partCount()==2);
3530
3531 uint64_t words[2];
3532 opStatus fs;
3533 bool losesInfo;
3534
3535 // Convert number to double. To avoid spurious underflows, we re-
3536 // normalize against the "double" minExponent first, and only *then*
3537 // truncate the mantissa. The result of that second conversion
3538 // may be inexact, but should never underflow.
3539 // Declare fltSemantics before APFloat that uses it (and
3540 // saves pointer to it) to ensure correct destruction order.
3541 fltSemantics extendedSemantics = *semantics;
3542 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3543 IEEEFloat extended(*this);
3544 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3545 assert(fs == opOK && !losesInfo);
3546 (void)fs;
3547
3548 IEEEFloat u(extended);
3549 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3550 assert(fs == opOK || fs == opInexact);
3551 (void)fs;
3552 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3553
3554 // If conversion was exact or resulted in a special case, we're done;
3555 // just set the second double to zero. Otherwise, re-convert back to
3556 // the extended format and compute the difference. This now should
3557 // convert exactly to double.
3558 if (u.isFiniteNonZero() && losesInfo) {
3559 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3560 assert(fs == opOK && !losesInfo);
3561 (void)fs;
3562
3563 IEEEFloat v(extended);
3564 v.subtract(u, rmNearestTiesToEven);
3565 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3566 assert(fs == opOK && !losesInfo);
3567 (void)fs;
3568 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3569 } else {
3570 words[1] = 0;
3571 }
3572
3573 return APInt(128, words);
3574}
3575
3576template <const fltSemantics &S>
3577APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3578 assert(semantics == &S);
3579 const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
3580 ? -S.minExponent
3581 : -(S.minExponent - 1);
3582 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3583 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3584 constexpr integerPart integer_bit =
3585 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3586 constexpr uint64_t significand_mask = integer_bit - 1;
3587 constexpr unsigned int exponent_bits =
3588 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3589 : S.sizeInBits;
3590 static_assert(exponent_bits < 64);
3591 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3592
3593 uint64_t myexponent;
3594 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3595 mysignificand;
3596
3597 if (isFiniteNonZero()) {
3598 myexponent = exponent + bias;
3599 std::copy_n(significandParts(), mysignificand.size(),
3600 mysignificand.begin());
3601 if (myexponent == 1 &&
3602 !(significandParts()[integer_bit_part] & integer_bit))
3603 myexponent = 0; // denormal
3604 } else if (category == fcZero) {
3605 if (!S.hasZero)
3606 llvm_unreachable("semantics does not support zero!");
3607 myexponent = ::exponentZero(S) + bias;
3608 mysignificand.fill(0);
3609 } else if (category == fcInfinity) {
3610 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3611 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3612 llvm_unreachable("semantics don't support inf!");
3613 myexponent = ::exponentInf(S) + bias;
3614 mysignificand.fill(0);
3615 } else {
3616 assert(category == fcNaN && "Unknown category!");
3617 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3618 llvm_unreachable("semantics don't support NaN!");
3619 myexponent = ::exponentNaN(S) + bias;
3620 std::copy_n(significandParts(), mysignificand.size(),
3621 mysignificand.begin());
3622 }
3623 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3624 auto words_iter =
3625 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3626 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3627 // Clear the integer bit.
3628 words[mysignificand.size() - 1] &= significand_mask;
3629 }
3630 std::fill(words_iter, words.end(), uint64_t{0});
3631 constexpr size_t last_word = words.size() - 1;
3632 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3633 << ((S.sizeInBits - 1) % 64);
3634 words[last_word] |= shifted_sign;
3635 uint64_t shifted_exponent = (myexponent & exponent_mask)
3636 << (trailing_significand_bits % 64);
3637 words[last_word] |= shifted_exponent;
3638 if constexpr (last_word == 0) {
3639 return APInt(S.sizeInBits, words[0]);
3640 }
3641 return APInt(S.sizeInBits, words);
3642}
3643
3644APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3645 assert(partCount() == 2);
3646 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3647}
3648
3649APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3650 assert(partCount()==1);
3651 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3652}
3653
3654APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3655 assert(partCount()==1);
3656 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3657}
3658
3659APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3660 assert(partCount() == 1);
3661 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3662}
3663
3664APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3665 assert(partCount()==1);
3666 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3667}
3668
3669APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3670 assert(partCount() == 1);
3671 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3672}
3673
3674APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3675 assert(partCount() == 1);
3676 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3677}
3678
3679APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3680 assert(partCount() == 1);
3681 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3682}
3683
3684APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3685 assert(partCount() == 1);
3686 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3687}
3688
3689APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3690 assert(partCount() == 1);
3691 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3692}
3693
3694APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3695 assert(partCount() == 1);
3696 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3697}
3698
3699APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3700 assert(partCount() == 1);
3701 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3702}
3703
3704APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3705 assert(partCount() == 1);
3706 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3707}
3708
3709APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3710 assert(partCount() == 1);
3711 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3712}
3713
3714APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3715 assert(partCount() == 1);
3716 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3717}
3718
3719APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3720 assert(partCount() == 1);
3721 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3722}
3723
3724APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3725 assert(partCount() == 1);
3726 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3727}
3728
3729// This function creates an APInt that is just a bit map of the floating
3730// point constant as it would appear in memory. It is not a conversion,
3731// and treating the result as a normal integer is unlikely to be useful.
3732
3734 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3735 return convertHalfAPFloatToAPInt();
3736
3737 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3738 return convertBFloatAPFloatToAPInt();
3739
3740 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3741 return convertFloatAPFloatToAPInt();
3742
3743 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3744 return convertDoubleAPFloatToAPInt();
3745
3746 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3747 return convertQuadrupleAPFloatToAPInt();
3748
3749 if (semantics ==
3750 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3751 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3752
3753 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3754 return convertFloat8E5M2APFloatToAPInt();
3755
3756 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3757 return convertFloat8E5M2FNUZAPFloatToAPInt();
3758
3759 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3760 return convertFloat8E4M3APFloatToAPInt();
3761
3762 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3763 return convertFloat8E4M3FNAPFloatToAPInt();
3764
3765 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3766 return convertFloat8E4M3FNUZAPFloatToAPInt();
3767
3768 if (semantics ==
3769 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3770 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3771
3772 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3773 return convertFloat8E3M4APFloatToAPInt();
3774
3775 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3776 return convertFloatTF32APFloatToAPInt();
3777
3778 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3779 return convertFloat8E8M0FNUAPFloatToAPInt();
3780
3781 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3782 return convertFloat6E3M2FNAPFloatToAPInt();
3783
3784 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3785 return convertFloat6E2M3FNAPFloatToAPInt();
3786
3787 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3788 return convertFloat4E2M1FNAPFloatToAPInt();
3789
3790 assert(semantics ==
3791 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3792 "unknown format!");
3793 return convertF80LongDoubleAPFloatToAPInt();
3794}
3795
3797 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3798 "Float semantics are not IEEEsingle");
3799 APInt api = bitcastToAPInt();
3800 return api.bitsToFloat();
3801}
3802
3804 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3805 "Float semantics are not IEEEdouble");
3806 APInt api = bitcastToAPInt();
3807 return api.bitsToDouble();
3808}
3809
3810#ifdef HAS_IEE754_FLOAT128
3811float128 IEEEFloat::convertToQuad() const {
3812 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3813 "Float semantics are not IEEEquads");
3814 APInt api = bitcastToAPInt();
3815 return api.bitsToQuad();
3816}
3817#endif
3818
3819/// Integer bit is explicit in this format. Intel hardware (387 and later)
3820/// does not support these bit patterns:
3821/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3822/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3823/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3824/// exponent = 0, integer bit 1 ("pseudodenormal")
3825/// At the moment, the first three are treated as NaNs, the last one as Normal.
3826void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3827 uint64_t i1 = api.getRawData()[0];
3828 uint64_t i2 = api.getRawData()[1];
3829 uint64_t myexponent = (i2 & 0x7fff);
3830 uint64_t mysignificand = i1;
3831 uint8_t myintegerbit = mysignificand >> 63;
3832
3833 initialize(&APFloatBase::semX87DoubleExtended);
3834 assert(partCount()==2);
3835
3836 sign = static_cast<unsigned int>(i2>>15);
3837 if (myexponent == 0 && mysignificand == 0) {
3838 makeZero(sign);
3839 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3840 makeInf(sign);
3841 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3842 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3843 category = fcNaN;
3844 exponent = exponentNaN();
3845 significandParts()[0] = mysignificand;
3846 significandParts()[1] = 0;
3847 } else {
3848 category = fcNormal;
3849 exponent = myexponent - 16383;
3850 significandParts()[0] = mysignificand;
3851 significandParts()[1] = 0;
3852 if (myexponent==0) // denormal
3853 exponent = -16382;
3854 }
3855}
3856
3857void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3858 uint64_t i1 = api.getRawData()[0];
3859 uint64_t i2 = api.getRawData()[1];
3860 opStatus fs;
3861 bool losesInfo;
3862
3863 // Get the first double and convert to our format.
3864 initFromDoubleAPInt(APInt(64, i1));
3865 fs = convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3866 &losesInfo);
3867 assert(fs == opOK && !losesInfo);
3868 (void)fs;
3869
3870 // Unless we have a special case, add in second double.
3871 if (isFiniteNonZero()) {
3872 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3873 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3874 &losesInfo);
3875 assert(fs == opOK && !losesInfo);
3876 (void)fs;
3877
3879 }
3880}
3881
3882// The E8M0 format has the following characteristics:
3883// It is an 8-bit unsigned format with only exponents (no actual significand).
3884// No encodings for {zero, infinities or denorms}.
3885// NaN is represented by all 1's.
3886// Bias is 127.
3887void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3888 const uint64_t exponent_mask = 0xff;
3889 uint64_t val = api.getRawData()[0];
3890 uint64_t myexponent = (val & exponent_mask);
3891
3892 initialize(&APFloatBase::semFloat8E8M0FNU);
3893 assert(partCount() == 1);
3894
3895 // This format has unsigned representation only
3896 sign = 0;
3897
3898 // Set the significand
3899 // This format does not have any significand but the 'Pth' precision bit is
3900 // always set to 1 for consistency in APFloat's internal representation.
3901 uint64_t mysignificand = 1;
3902 significandParts()[0] = mysignificand;
3903
3904 // This format can either have a NaN or fcNormal
3905 // All 1's i.e. 255 is a NaN
3906 if (val == exponent_mask) {
3907 category = fcNaN;
3908 exponent = exponentNaN();
3909 return;
3910 }
3911 // Handle fcNormal...
3912 category = fcNormal;
3913 exponent = myexponent - 127; // 127 is bias
3914}
3915template <const fltSemantics &S>
3916void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3917 assert(api.getBitWidth() == S.sizeInBits);
3918 constexpr integerPart integer_bit = integerPart{1}
3919 << ((S.precision - 1) % integerPartWidth);
3920 constexpr uint64_t significand_mask = integer_bit - 1;
3921 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3922 constexpr unsigned int stored_significand_parts =
3923 partCountForBits(trailing_significand_bits);
3924 constexpr unsigned int exponent_bits =
3925 S.sizeInBits - 1 - trailing_significand_bits;
3926 static_assert(exponent_bits < 64);
3927 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3928 constexpr int bias = -(S.minExponent - 1);
3929
3930 // Copy the bits of the significand. We need to clear out the exponent and
3931 // sign bit in the last word.
3932 std::array<integerPart, stored_significand_parts> mysignificand;
3933 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3934 if constexpr (significand_mask != 0) {
3935 mysignificand[mysignificand.size() - 1] &= significand_mask;
3936 }
3937
3938 // We assume the last word holds the sign bit, the exponent, and potentially
3939 // some of the trailing significand field.
3940 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3941 uint64_t myexponent =
3942 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3943
3944 initialize(&S);
3945 assert(partCount() == mysignificand.size());
3946
3947 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3948
3949 bool all_zero_significand =
3950 llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3951
3952 bool is_zero = myexponent == 0 && all_zero_significand;
3953
3954 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3955 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3956 makeInf(sign);
3957 return;
3958 }
3959 }
3960
3961 bool is_nan = false;
3962
3963 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3964 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3965 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3966 bool all_ones_significand =
3967 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3968 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3969 (!significand_mask ||
3970 mysignificand[mysignificand.size() - 1] == significand_mask);
3971 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3972 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3973 is_nan = is_zero && sign;
3974 }
3975
3976 if (is_nan) {
3977 category = fcNaN;
3978 exponent = ::exponentNaN(S);
3979 std::copy_n(mysignificand.begin(), mysignificand.size(),
3980 significandParts());
3981 return;
3982 }
3983
3984 if (is_zero) {
3985 makeZero(sign);
3986 return;
3987 }
3988
3989 category = fcNormal;
3990 exponent = myexponent - bias;
3991 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3992 if (myexponent == 0) // denormal
3993 exponent = S.minExponent;
3994 else
3995 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3996}
3997
3998void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3999 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
4000}
4001
4002void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
4003 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
4004}
4005
4006void IEEEFloat::initFromFloatAPInt(const APInt &api) {
4007 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
4008}
4009
4010void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
4011 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
4012}
4013
4014void IEEEFloat::initFromHalfAPInt(const APInt &api) {
4015 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
4016}
4017
4018void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
4019 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
4020}
4021
4022void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
4023 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
4024}
4025
4026void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
4027 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
4028}
4029
4030void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
4031 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
4032}
4033
4034void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
4035 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
4036}
4037
4038void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
4039 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
4040}
4041
4042void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
4043 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
4044}
4045
4046void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
4047 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
4048}
4049
4050void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
4051 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
4052}
4053
4054void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
4055 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
4056}
4057
4058void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
4059 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
4060}
4061
4062/// Treat api as containing the bits of a floating point number.
4063void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
4064 assert(api.getBitWidth() == Sem->sizeInBits);
4065 if (Sem == &APFloatBase::semIEEEhalf)
4066 return initFromHalfAPInt(api);
4067 if (Sem == &APFloatBase::semBFloat)
4068 return initFromBFloatAPInt(api);
4069 if (Sem == &APFloatBase::semIEEEsingle)
4070 return initFromFloatAPInt(api);
4071 if (Sem == &APFloatBase::semIEEEdouble)
4072 return initFromDoubleAPInt(api);
4073 if (Sem == &APFloatBase::semX87DoubleExtended)
4074 return initFromF80LongDoubleAPInt(api);
4075 if (Sem == &APFloatBase::semIEEEquad)
4076 return initFromQuadrupleAPInt(api);
4077 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
4078 return initFromPPCDoubleDoubleLegacyAPInt(api);
4079 if (Sem == &APFloatBase::semFloat8E5M2)
4080 return initFromFloat8E5M2APInt(api);
4081 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
4082 return initFromFloat8E5M2FNUZAPInt(api);
4083 if (Sem == &APFloatBase::semFloat8E4M3)
4084 return initFromFloat8E4M3APInt(api);
4085 if (Sem == &APFloatBase::semFloat8E4M3FN)
4086 return initFromFloat8E4M3FNAPInt(api);
4087 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
4088 return initFromFloat8E4M3FNUZAPInt(api);
4089 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
4090 return initFromFloat8E4M3B11FNUZAPInt(api);
4091 if (Sem == &APFloatBase::semFloat8E3M4)
4092 return initFromFloat8E3M4APInt(api);
4093 if (Sem == &APFloatBase::semFloatTF32)
4094 return initFromFloatTF32APInt(api);
4095 if (Sem == &APFloatBase::semFloat8E8M0FNU)
4096 return initFromFloat8E8M0FNUAPInt(api);
4097 if (Sem == &APFloatBase::semFloat6E3M2FN)
4098 return initFromFloat6E3M2FNAPInt(api);
4099 if (Sem == &APFloatBase::semFloat6E2M3FN)
4100 return initFromFloat6E2M3FNAPInt(api);
4101 if (Sem == &APFloatBase::semFloat4E2M1FN)
4102 return initFromFloat4E2M1FNAPInt(api);
4103
4104 llvm_unreachable("unsupported semantics");
4105}
4106
4107/// Make this number the largest magnitude normal number in the given
4108/// semantics.
4109void IEEEFloat::makeLargest(bool Negative) {
4110 if (Negative && !semantics->hasSignedRepr)
4112 "This floating point format does not support signed values");
4113 // We want (in interchange format):
4114 // sign = {Negative}
4115 // exponent = 1..10
4116 // significand = 1..1
4117 category = fcNormal;
4118 sign = Negative;
4119 exponent = semantics->maxExponent;
4120
4121 // Use memset to set all but the highest integerPart to all ones.
4122 integerPart *significand = significandParts();
4123 unsigned PartCount = partCount();
4124 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4125
4126 // Set the high integerPart especially setting all unused top bits for
4127 // internal consistency.
4128 const unsigned NumUnusedHighBits =
4129 PartCount*integerPartWidth - semantics->precision;
4130 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4131 ? (~integerPart(0) >> NumUnusedHighBits)
4132 : 0;
4133 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4134 semantics->nanEncoding == fltNanEncoding::AllOnes &&
4135 (semantics->precision > 1))
4136 significand[0] &= ~integerPart(1);
4137}
4138
4139/// Make this number the smallest magnitude denormal number in the given
4140/// semantics.
4141void IEEEFloat::makeSmallest(bool Negative) {
4142 if (Negative && !semantics->hasSignedRepr)
4144 "This floating point format does not support signed values");
4145 // We want (in interchange format):
4146 // sign = {Negative}
4147 // exponent = 0..0
4148 // significand = 0..01
4149 category = fcNormal;
4150 sign = Negative;
4151 exponent = semantics->minExponent;
4152 APInt::tcSet(significandParts(), 1, partCount());
4153}
4154
4156 if (Negative && !semantics->hasSignedRepr)
4158 "This floating point format does not support signed values");
4159 // We want (in interchange format):
4160 // sign = {Negative}
4161 // exponent = 0..0
4162 // significand = 10..0
4163
4164 category = fcNormal;
4165 zeroSignificand();
4166 sign = Negative;
4167 exponent = semantics->minExponent;
4168 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4169}
4170
4171IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4172 initFromAPInt(&Sem, API);
4173}
4174
4176 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4177}
4178
4180 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4181}
4182
4183namespace {
4184 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4185 Buffer.append(Str.begin(), Str.end());
4186 }
4187
4188 /// Removes data from the given significand until it is no more
4189 /// precise than is required for the desired precision.
4190 void AdjustToPrecision(APInt &significand,
4191 int &exp, unsigned FormatPrecision) {
4192 unsigned bits = significand.getActiveBits();
4193
4194 // 196/59 is a very slight overestimate of lg_2(10).
4195 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4196
4197 if (bits <= bitsRequired) return;
4198
4199 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4200 if (!tensRemovable) return;
4201
4202 exp += tensRemovable;
4203
4204 APInt divisor(significand.getBitWidth(), 1);
4205 APInt powten(significand.getBitWidth(), 10);
4206 while (true) {
4207 if (tensRemovable & 1)
4208 divisor *= powten;
4209 tensRemovable >>= 1;
4210 if (!tensRemovable) break;
4211 powten *= powten;
4212 }
4213
4214 significand = significand.udiv(divisor);
4215
4216 // Truncate the significand down to its active bit count.
4217 significand = significand.trunc(significand.getActiveBits());
4218 }
4219
4220
4221 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4222 int &exp, unsigned FormatPrecision) {
4223 unsigned N = buffer.size();
4224 if (N <= FormatPrecision) return;
4225
4226 // The most significant figures are the last ones in the buffer.
4227 unsigned FirstSignificant = N - FormatPrecision;
4228
4229 // Round.
4230 // FIXME: this probably shouldn't use 'round half up'.
4231
4232 // Rounding down is just a truncation, except we also want to drop
4233 // trailing zeros from the new result.
4234 if (buffer[FirstSignificant - 1] < '5') {
4235 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4236 FirstSignificant++;
4237
4238 exp += FirstSignificant;
4239 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4240 return;
4241 }
4242
4243 // Rounding up requires a decimal add-with-carry. If we continue
4244 // the carry, the newly-introduced zeros will just be truncated.
4245 for (unsigned I = FirstSignificant; I != N; ++I) {
4246 if (buffer[I] == '9') {
4247 FirstSignificant++;
4248 } else {
4249 buffer[I]++;
4250 break;
4251 }
4252 }
4253
4254 // If we carried through, we have exactly one digit of precision.
4255 if (FirstSignificant == N) {
4256 exp += FirstSignificant;
4257 buffer.clear();
4258 buffer.push_back('1');
4259 return;
4260 }
4261
4262 exp += FirstSignificant;
4263 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4264 }
4265
4266 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4267 APInt significand, unsigned FormatPrecision,
4268 unsigned FormatMaxPadding, bool TruncateZero) {
4269 const int semanticsPrecision = significand.getBitWidth();
4270
4271 if (isNeg)
4272 Str.push_back('-');
4273
4274 // Set FormatPrecision if zero. We want to do this before we
4275 // truncate trailing zeros, as those are part of the precision.
4276 if (!FormatPrecision) {
4277 // We use enough digits so the number can be round-tripped back to an
4278 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4279 // Accurately" by Steele and White.
4280 // FIXME: Using a formula based purely on the precision is conservative;
4281 // we can print fewer digits depending on the actual value being printed.
4282
4283 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4284 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4285 }
4286
4287 // Ignore trailing binary zeros.
4288 int trailingZeros = significand.countr_zero();
4289 exp += trailingZeros;
4290 significand.lshrInPlace(trailingZeros);
4291
4292 // Change the exponent from 2^e to 10^e.
4293 if (exp == 0) {
4294 // Nothing to do.
4295 } else if (exp > 0) {
4296 // Just shift left.
4297 significand = significand.zext(semanticsPrecision + exp);
4298 significand <<= exp;
4299 exp = 0;
4300 } else { /* exp < 0 */
4301 int texp = -exp;
4302
4303 // We transform this using the identity:
4304 // (N)(2^-e) == (N)(5^e)(10^-e)
4305 // This means we have to multiply N (the significand) by 5^e.
4306 // To avoid overflow, we have to operate on numbers large
4307 // enough to store N * 5^e:
4308 // log2(N * 5^e) == log2(N) + e * log2(5)
4309 // <= semantics->precision + e * 137 / 59
4310 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4311
4312 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4313
4314 // Multiply significand by 5^e.
4315 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4316 significand = significand.zext(precision);
4317 APInt five_to_the_i(precision, 5);
4318 while (true) {
4319 if (texp & 1)
4320 significand *= five_to_the_i;
4321
4322 texp >>= 1;
4323 if (!texp)
4324 break;
4325 five_to_the_i *= five_to_the_i;
4326 }
4327 }
4328
4329 AdjustToPrecision(significand, exp, FormatPrecision);
4330
4332
4333 // Fill the buffer.
4334 unsigned precision = significand.getBitWidth();
4335 if (precision < 4) {
4336 // We need enough precision to store the value 10.
4337 precision = 4;
4338 significand = significand.zext(precision);
4339 }
4340 APInt ten(precision, 10);
4341 APInt digit(precision, 0);
4342
4343 bool inTrail = true;
4344 while (significand != 0) {
4345 // digit <- significand % 10
4346 // significand <- significand / 10
4347 APInt::udivrem(significand, ten, significand, digit);
4348
4349 unsigned d = digit.getZExtValue();
4350
4351 // Drop trailing zeros.
4352 if (inTrail && !d)
4353 exp++;
4354 else {
4355 buffer.push_back((char) ('0' + d));
4356 inTrail = false;
4357 }
4358 }
4359
4360 assert(!buffer.empty() && "no characters in buffer!");
4361
4362 // Drop down to FormatPrecision.
4363 // TODO: don't do more precise calculations above than are required.
4364 AdjustToPrecision(buffer, exp, FormatPrecision);
4365
4366 unsigned NDigits = buffer.size();
4367
4368 // Check whether we should use scientific notation.
4369 bool FormatScientific;
4370 if (!FormatMaxPadding)
4371 FormatScientific = true;
4372 else {
4373 if (exp >= 0) {
4374 // 765e3 --> 765000
4375 // ^^^
4376 // But we shouldn't make the number look more precise than it is.
4377 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4378 NDigits + (unsigned) exp > FormatPrecision);
4379 } else {
4380 // Power of the most significant digit.
4381 int MSD = exp + (int) (NDigits - 1);
4382 if (MSD >= 0) {
4383 // 765e-2 == 7.65
4384 FormatScientific = false;
4385 } else {
4386 // 765e-5 == 0.00765
4387 // ^ ^^
4388 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4389 }
4390 }
4391 }
4392
4393 // Scientific formatting is pretty straightforward.
4394 if (FormatScientific) {
4395 exp += (NDigits - 1);
4396
4397 Str.push_back(buffer[NDigits-1]);
4398 Str.push_back('.');
4399 if (NDigits == 1 && TruncateZero)
4400 Str.push_back('0');
4401 else
4402 for (unsigned I = 1; I != NDigits; ++I)
4403 Str.push_back(buffer[NDigits-1-I]);
4404 // Fill with zeros up to FormatPrecision.
4405 if (!TruncateZero && FormatPrecision > NDigits - 1)
4406 Str.append(FormatPrecision - NDigits + 1, '0');
4407 // For !TruncateZero we use lower 'e'.
4408 Str.push_back(TruncateZero ? 'E' : 'e');
4409
4410 Str.push_back(exp >= 0 ? '+' : '-');
4411 if (exp < 0)
4412 exp = -exp;
4413 SmallVector<char, 6> expbuf;
4414 do {
4415 expbuf.push_back((char) ('0' + (exp % 10)));
4416 exp /= 10;
4417 } while (exp);
4418 // Exponent always at least two digits if we do not truncate zeros.
4419 if (!TruncateZero && expbuf.size() < 2)
4420 expbuf.push_back('0');
4421 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4422 Str.push_back(expbuf[E-1-I]);
4423 return;
4424 }
4425
4426 // Non-scientific, positive exponents.
4427 if (exp >= 0) {
4428 for (unsigned I = 0; I != NDigits; ++I)
4429 Str.push_back(buffer[NDigits-1-I]);
4430 for (unsigned I = 0; I != (unsigned) exp; ++I)
4431 Str.push_back('0');
4432 return;
4433 }
4434
4435 // Non-scientific, negative exponents.
4436
4437 // The number of digits to the left of the decimal point.
4438 int NWholeDigits = exp + (int) NDigits;
4439
4440 unsigned I = 0;
4441 if (NWholeDigits > 0) {
4442 for (; I != (unsigned) NWholeDigits; ++I)
4443 Str.push_back(buffer[NDigits-I-1]);
4444 Str.push_back('.');
4445 } else {
4446 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4447
4448 Str.push_back('0');
4449 Str.push_back('.');
4450 for (unsigned Z = 1; Z != NZeros; ++Z)
4451 Str.push_back('0');
4452 }
4453
4454 for (; I != NDigits; ++I)
4455 Str.push_back(buffer[NDigits-I-1]);
4456
4457 }
4458} // namespace
4459
4460void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4461 unsigned FormatMaxPadding, bool TruncateZero) const {
4462 switch (category) {
4463 case fcInfinity:
4464 if (isNegative())
4465 return append(Str, "-Inf");
4466 else
4467 return append(Str, "+Inf");
4468
4469 case fcNaN: return append(Str, "NaN");
4470
4471 case fcZero:
4472 if (isNegative())
4473 Str.push_back('-');
4474
4475 if (!FormatMaxPadding) {
4476 if (TruncateZero)
4477 append(Str, "0.0E+0");
4478 else {
4479 append(Str, "0.0");
4480 if (FormatPrecision > 1)
4481 Str.append(FormatPrecision - 1, '0');
4482 append(Str, "e+00");
4483 }
4484 } else {
4485 Str.push_back('0');
4486 }
4487 return;
4488
4489 case fcNormal:
4490 break;
4491 }
4492
4493 // Decompose the number into an APInt and an exponent.
4494 int exp = exponent - ((int) semantics->precision - 1);
4495 APInt significand(
4496 semantics->precision,
4497 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4498
4499 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4500 FormatMaxPadding, TruncateZero);
4501
4502}
4503
4505 if (!isFinite() || isZero())
4506 return INT_MIN;
4507
4508 const integerPart *Parts = significandParts();
4509 const int PartCount = partCountForBits(semantics->precision);
4510
4511 int PopCount = 0;
4512 for (int i = 0; i < PartCount; ++i) {
4513 PopCount += llvm::popcount(Parts[i]);
4514 if (PopCount > 1)
4515 return INT_MIN;
4516 }
4517
4518 if (exponent != semantics->minExponent)
4519 return exponent;
4520
4521 int CountrParts = 0;
4522 for (int i = 0; i < PartCount;
4523 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4524 if (Parts[i] != 0) {
4525 return exponent - semantics->precision + CountrParts +
4526 llvm::countr_zero(Parts[i]) + 1;
4527 }
4528 }
4529
4530 llvm_unreachable("didn't find the set bit");
4531}
4532
4534 if (!isNaN())
4535 return false;
4536 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4537 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4538 return false;
4539
4540 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4541 // first bit of the trailing significand being 0.
4542 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4543}
4544
4545/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4546///
4547/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4548/// appropriate sign switching before/after the computation.
4550 // If we are performing nextDown, swap sign so we have -x.
4551 if (nextDown)
4552 changeSign();
4553
4554 // Compute nextUp(x)
4555 opStatus result = opOK;
4556
4557 // Handle each float category separately.
4558 switch (category) {
4559 case fcInfinity:
4560 // nextUp(+inf) = +inf
4561 if (!isNegative())
4562 break;
4563 // nextUp(-inf) = -getLargest()
4564 makeLargest(true);
4565 break;
4566 case fcNaN:
4567 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4568 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4569 // change the payload.
4570 if (isSignaling()) {
4571 result = opInvalidOp;
4572 // For consistency, propagate the sign of the sNaN to the qNaN.
4573 makeNaN(false, isNegative(), nullptr);
4574 }
4575 break;
4576 case fcZero:
4577 // nextUp(pm 0) = +getSmallest()
4578 makeSmallest(false);
4579 break;
4580 case fcNormal:
4581 // nextUp(-getSmallest()) = -0
4582 if (isSmallest() && isNegative()) {
4583 APInt::tcSet(significandParts(), 0, partCount());
4584 category = fcZero;
4585 exponent = 0;
4586 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4587 sign = false;
4588 if (!semantics->hasZero)
4590 break;
4591 }
4592
4593 if (isLargest() && !isNegative()) {
4594 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4595 // nextUp(getLargest()) == NAN
4596 makeNaN();
4597 break;
4598 } else if (semantics->nonFiniteBehavior ==
4600 // nextUp(getLargest()) == getLargest()
4601 break;
4602 } else {
4603 // nextUp(getLargest()) == INFINITY
4604 APInt::tcSet(significandParts(), 0, partCount());
4605 category = fcInfinity;
4606 exponent = semantics->maxExponent + 1;
4607 break;
4608 }
4609 }
4610
4611 // nextUp(normal) == normal + inc.
4612 if (isNegative()) {
4613 // If we are negative, we need to decrement the significand.
4614
4615 // We only cross a binade boundary that requires adjusting the exponent
4616 // if:
4617 // 1. exponent != semantics->minExponent. This implies we are not in the
4618 // smallest binade or are dealing with denormals.
4619 // 2. Our significand excluding the integral bit is all zeros.
4620 bool WillCrossBinadeBoundary =
4621 exponent != semantics->minExponent && isSignificandAllZeros();
4622
4623 // Decrement the significand.
4624 //
4625 // We always do this since:
4626 // 1. If we are dealing with a non-binade decrement, by definition we
4627 // just decrement the significand.
4628 // 2. If we are dealing with a normal -> normal binade decrement, since
4629 // we have an explicit integral bit the fact that all bits but the
4630 // integral bit are zero implies that subtracting one will yield a
4631 // significand with 0 integral bit and 1 in all other spots. Thus we
4632 // must just adjust the exponent and set the integral bit to 1.
4633 // 3. If we are dealing with a normal -> denormal binade decrement,
4634 // since we set the integral bit to 0 when we represent denormals, we
4635 // just decrement the significand.
4636 integerPart *Parts = significandParts();
4637 APInt::tcDecrement(Parts, partCount());
4638
4639 if (WillCrossBinadeBoundary) {
4640 // Our result is a normal number. Do the following:
4641 // 1. Set the integral bit to 1.
4642 // 2. Decrement the exponent.
4643 APInt::tcSetBit(Parts, semantics->precision - 1);
4644 exponent--;
4645 }
4646 } else {
4647 // If we are positive, we need to increment the significand.
4648
4649 // We only cross a binade boundary that requires adjusting the exponent if
4650 // the input is not a denormal and all of said input's significand bits
4651 // are set. If all of said conditions are true: clear the significand, set
4652 // the integral bit to 1, and increment the exponent. If we have a
4653 // denormal always increment since moving denormals and the numbers in the
4654 // smallest normal binade have the same exponent in our representation.
4655 // If there are only exponents, any increment always crosses the
4656 // BinadeBoundary.
4657 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4658 (!isDenormal() && isSignificandAllOnes());
4659
4660 if (WillCrossBinadeBoundary) {
4661 integerPart *Parts = significandParts();
4662 APInt::tcSet(Parts, 0, partCount());
4663 APInt::tcSetBit(Parts, semantics->precision - 1);
4664 assert(exponent != semantics->maxExponent &&
4665 "We can not increment an exponent beyond the maxExponent allowed"
4666 " by the given floating point semantics.");
4667 exponent++;
4668 } else {
4669 incrementSignificand();
4670 }
4671 }
4672 break;
4673 }
4674
4675 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4676 if (nextDown)
4677 changeSign();
4678
4679 return result;
4680}
4681
4682APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4683 return ::exponentNaN(*semantics);
4684}
4685
4686APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4687 return ::exponentInf(*semantics);
4688}
4689
4690APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4691 return ::exponentZero(*semantics);
4692}
4693
4694void IEEEFloat::makeInf(bool Negative) {
4695 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4696 llvm_unreachable("This floating point format does not support Inf");
4697
4698 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4699 // There is no Inf, so make NaN instead.
4700 makeNaN(false, Negative);
4701 return;
4702 }
4703 category = fcInfinity;
4704 sign = Negative;
4705 exponent = exponentInf();
4706 APInt::tcSet(significandParts(), 0, partCount());
4707}
4708
4709void IEEEFloat::makeZero(bool Negative) {
4710 if (!semantics->hasZero)
4711 llvm_unreachable("This floating point format does not support Zero");
4712
4713 category = fcZero;
4714 sign = Negative;
4715 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4716 // Merge negative zero to positive because 0b10000...000 is used for NaN
4717 sign = false;
4718 }
4719 exponent = exponentZero();
4720 APInt::tcSet(significandParts(), 0, partCount());
4721}
4722
4724 assert(isNaN());
4725 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4726 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4727}
4728
4729int ilogb(const IEEEFloat &Arg) {
4730 if (Arg.isNaN())
4731 return APFloat::IEK_NaN;
4732 if (Arg.isZero())
4733 return APFloat::IEK_Zero;
4734 if (Arg.isInfinity())
4735 return APFloat::IEK_Inf;
4736 if (!Arg.isDenormal())
4737 return Arg.exponent;
4738
4739 IEEEFloat Normalized(Arg);
4740 int SignificandBits = Arg.getSemantics().precision - 1;
4741
4742 Normalized.exponent += SignificandBits;
4743 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4744 return Normalized.exponent - SignificandBits;
4745}
4746
4748 auto MaxExp = X.getSemantics().maxExponent;
4749 auto MinExp = X.getSemantics().minExponent;
4750
4751 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4752 // overflow; clamp it to a safe range before adding, but ensure that the range
4753 // is large enough that the clamp does not change the result. The range we
4754 // need to support is the difference between the largest possible exponent and
4755 // the normalized exponent of half the smallest denormal.
4756
4757 int SignificandBits = X.getSemantics().precision - 1;
4758 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4759
4760 // Clamp to one past the range ends to let normalize handle overlflow.
4761 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4762 X.normalize(RoundingMode, lfExactlyZero);
4763 if (X.isNaN())
4764 X.makeQuiet();
4765 return X;
4766}
4767
4768IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4769 Exp = ilogb(Val);
4770
4771 // Quiet signalling nans.
4772 if (Exp == APFloat::IEK_NaN) {
4773 IEEEFloat Quiet(Val);
4774 Quiet.makeQuiet();
4775 return Quiet;
4776 }
4777
4778 if (Exp == APFloat::IEK_Inf)
4779 return Val;
4780
4781 // 1 is added because frexp is defined to return a normalized fraction in
4782 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4783 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4784 return scalbn(Val, -Exp, RM);
4785}
4786
4788 : Semantics(&S),
4789 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4790 APFloat(APFloatBase::semIEEEdouble)}) {
4791 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4792}
4793
4795 : Semantics(&S), Floats(new APFloat[2]{
4796 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4797 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4798 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4799}
4800
4802 : Semantics(&S),
4803 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4804 APFloat(APFloatBase::semIEEEdouble)}) {
4805 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4806}
4807
4809 : Semantics(&S),
4810 Floats(new APFloat[2]{
4811 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4812 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4813 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4814}
4815
4817 APFloat &&Second)
4818 : Semantics(&S),
4819 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4820 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4821 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4822 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4823}
4824
4826 : Semantics(RHS.Semantics),
4827 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4828 APFloat(RHS.Floats[1])}
4829 : nullptr) {
4830 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4831}
4832
4834 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4835 RHS.Semantics = &APFloatBase::semBogus;
4836 RHS.Floats = nullptr;
4837 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4838}
4839
4841 if (Semantics == RHS.Semantics && RHS.Floats) {
4842 Floats[0] = RHS.Floats[0];
4843 Floats[1] = RHS.Floats[1];
4844 } else if (this != &RHS) {
4845 this->~DoubleAPFloat();
4846 new (this) DoubleAPFloat(RHS);
4847 }
4848 return *this;
4849}
4850
4851// Returns a result such that:
4852// 1. abs(Lo) <= ulp(Hi)/2
4853// 2. Hi == RTNE(Hi + Lo)
4854// 3. Hi + Lo == X + Y
4855//
4856// Requires that log2(X) >= log2(Y).
4857static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4858 if (!X.isFinite())
4859 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4860 APFloat Hi = X + Y;
4861 APFloat Delta = Hi - X;
4862 APFloat Lo = Y - Delta;
4863 return {Hi, Lo};
4864}
4865
4866// Implement addition, subtraction, multiplication and division based on:
4867// "Software for Doubled-Precision Floating-Point Computations",
4868// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4869APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4870 const APFloat &c, const APFloat &cc,
4871 roundingMode RM) {
4872 int Status = opOK;
4873 APFloat z = a;
4874 Status |= z.add(c, RM);
4875 if (!z.isFinite()) {
4876 if (!z.isInfinity()) {
4877 Floats[0] = std::move(z);
4878 Floats[1].makeZero(/* Neg = */ false);
4879 return (opStatus)Status;
4880 }
4881 Status = opOK;
4882 auto AComparedToC = a.compareAbsoluteValue(c);
4883 z = cc;
4884 Status |= z.add(aa, RM);
4885 if (AComparedToC == APFloat::cmpGreaterThan) {
4886 // z = cc + aa + c + a;
4887 Status |= z.add(c, RM);
4888 Status |= z.add(a, RM);
4889 } else {
4890 // z = cc + aa + a + c;
4891 Status |= z.add(a, RM);
4892 Status |= z.add(c, RM);
4893 }
4894 if (!z.isFinite()) {
4895 Floats[0] = std::move(z);
4896 Floats[1].makeZero(/* Neg = */ false);
4897 return (opStatus)Status;
4898 }
4899 Floats[0] = z;
4900 APFloat zz = aa;
4901 Status |= zz.add(cc, RM);
4902 if (AComparedToC == APFloat::cmpGreaterThan) {
4903 // Floats[1] = a - z + c + zz;
4904 Floats[1] = a;
4905 Status |= Floats[1].subtract(z, RM);
4906 Status |= Floats[1].add(c, RM);
4907 Status |= Floats[1].add(zz, RM);
4908 } else {
4909 // Floats[1] = c - z + a + zz;
4910 Floats[1] = c;
4911 Status |= Floats[1].subtract(z, RM);
4912 Status |= Floats[1].add(a, RM);
4913 Status |= Floats[1].add(zz, RM);
4914 }
4915 } else {
4916 // q = a - z;
4917 APFloat q = a;
4918 Status |= q.subtract(z, RM);
4919
4920 // zz = q + c + (a - (q + z)) + aa + cc;
4921 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4922 auto zz = q;
4923 Status |= zz.add(c, RM);
4924 Status |= q.add(z, RM);
4925 Status |= q.subtract(a, RM);
4926 q.changeSign();
4927 Status |= zz.add(q, RM);
4928 Status |= zz.add(aa, RM);
4929 Status |= zz.add(cc, RM);
4930 if (zz.isZero() && !zz.isNegative()) {
4931 Floats[0] = std::move(z);
4932 Floats[1].makeZero(/* Neg = */ false);
4933 return opOK;
4934 }
4935 Floats[0] = z;
4936 Status |= Floats[0].add(zz, RM);
4937 if (!Floats[0].isFinite()) {
4938 Floats[1].makeZero(/* Neg = */ false);
4939 return (opStatus)Status;
4940 }
4941 Floats[1] = std::move(z);
4942 Status |= Floats[1].subtract(Floats[0], RM);
4943 Status |= Floats[1].add(zz, RM);
4944 }
4945 return (opStatus)Status;
4946}
4947
4948APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4949 const DoubleAPFloat &RHS,
4950 DoubleAPFloat &Out,
4951 roundingMode RM) {
4952 if (LHS.getCategory() == fcNaN) {
4953 Out = LHS;
4954 return opOK;
4955 }
4956 if (RHS.getCategory() == fcNaN) {
4957 Out = RHS;
4958 return opOK;
4959 }
4960 if (LHS.getCategory() == fcZero) {
4961 Out = RHS;
4962 return opOK;
4963 }
4964 if (RHS.getCategory() == fcZero) {
4965 Out = LHS;
4966 return opOK;
4967 }
4968 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4969 LHS.isNegative() != RHS.isNegative()) {
4970 Out.makeNaN(false, Out.isNegative(), nullptr);
4971 return opInvalidOp;
4972 }
4973 if (LHS.getCategory() == fcInfinity) {
4974 Out = LHS;
4975 return opOK;
4976 }
4977 if (RHS.getCategory() == fcInfinity) {
4978 Out = RHS;
4979 return opOK;
4980 }
4981 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4982
4983 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4984 CC(RHS.Floats[1]);
4985 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4986 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4987 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4988 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4989 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4990 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4991 return Out.addImpl(A, AA, C, CC, RM);
4992}
4993
4995 roundingMode RM) {
4996 return addWithSpecial(*this, RHS, *this, RM);
4997}
4998
5000 roundingMode RM) {
5001 changeSign();
5002 auto Ret = add(RHS, RM);
5003 changeSign();
5004 return Ret;
5005}
5006
5009 const auto &LHS = *this;
5010 auto &Out = *this;
5011 /* Interesting observation: For special categories, finding the lowest
5012 common ancestor of the following layered graph gives the correct
5013 return category:
5014
5015 NaN
5016 / \
5017 Zero Inf
5018 \ /
5019 Normal
5020
5021 e.g. NaN * NaN = NaN
5022 Zero * Inf = NaN
5023 Normal * Zero = Zero
5024 Normal * Inf = Inf
5025 */
5026 if (LHS.getCategory() == fcNaN) {
5027 Out = LHS;
5028 return opOK;
5029 }
5030 if (RHS.getCategory() == fcNaN) {
5031 Out = RHS;
5032 return opOK;
5033 }
5034 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
5035 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
5036 Out.makeNaN(false, false, nullptr);
5037 return opOK;
5038 }
5039 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
5040 Out = LHS;
5041 return opOK;
5042 }
5043 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
5044 Out = RHS;
5045 return opOK;
5046 }
5047 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
5048 "Special cases not handled exhaustively");
5049
5050 int Status = opOK;
5051 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
5052 // t = a * c
5053 APFloat T = A;
5054 Status |= T.multiply(C, RM);
5055 if (!T.isFiniteNonZero()) {
5056 Floats[0] = T;
5057 Floats[1].makeZero(/* Neg = */ false);
5058 return (opStatus)Status;
5059 }
5060
5061 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
5062 APFloat Tau = A;
5063 T.changeSign();
5064 Status |= Tau.fusedMultiplyAdd(C, T, RM);
5065 T.changeSign();
5066 {
5067 // v = a * d
5068 APFloat V = A;
5069 Status |= V.multiply(D, RM);
5070 // w = b * c
5071 APFloat W = B;
5072 Status |= W.multiply(C, RM);
5073 Status |= V.add(W, RM);
5074 // tau += v + w
5075 Status |= Tau.add(V, RM);
5076 }
5077 // u = t + tau
5078 APFloat U = T;
5079 Status |= U.add(Tau, RM);
5080
5081 Floats[0] = U;
5082 if (!U.isFinite()) {
5083 Floats[1].makeZero(/* Neg = */ false);
5084 } else {
5085 // Floats[1] = (t - u) + tau
5086 Status |= T.subtract(U, RM);
5087 Status |= T.add(Tau, RM);
5088 Floats[1] = T;
5089 }
5090 return (opStatus)Status;
5091}
5092
5095 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5096 "Unexpected Semantics");
5097 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5098 auto Ret = Tmp.divide(
5099 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
5100 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5101 return Ret;
5102}
5103
5105 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5106 "Unexpected Semantics");
5107 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5108 auto Ret = Tmp.remainder(
5109 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5110 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5111 return Ret;
5112}
5113
5115 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5116 "Unexpected Semantics");
5117 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5118 auto Ret = Tmp.mod(
5119 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5120 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5121 return Ret;
5122}
5123
5126 const DoubleAPFloat &Addend,
5128 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5129 "Unexpected Semantics");
5130 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5131 auto Ret = Tmp.fusedMultiplyAdd(
5132 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
5133 Multiplicand.bitcastToAPInt()),
5134 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
5135 RM);
5136 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5137 return Ret;
5138}
5139
5141 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5142 "Unexpected Semantics");
5143 const APFloat &Hi = getFirst();
5144 const APFloat &Lo = getSecond();
5145
5146 APFloat RoundedHi = Hi;
5147 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
5148
5149 // We can reduce the problem to just the high part if the input:
5150 // 1. Represents a non-finite value.
5151 // 2. Has a component which is zero.
5152 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5153 Floats[0] = std::move(RoundedHi);
5154 Floats[1].makeZero(/*Neg=*/false);
5155 return HiStatus;
5156 }
5157
5158 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5159 // halfway point.
5160 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5161 APFloat TieBreaker) {
5162 // RoundingError tells us which direction we rounded:
5163 // - RoundingError > 0: we rounded up.
5164 // - RoundingError < 0: we rounded down.
5165 // Sterbenz' lemma ensures that RoundingError is exact.
5166 const APFloat RoundingError = Rounded - ToRound;
5167 if (TieBreaker.isNonZero() &&
5168 TieBreaker.isNegative() != RoundingError.isNegative() &&
5169 abs(RoundingError).isExactlyValue(0.5))
5170 Rounded.add(
5171 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5173 return Rounded;
5174 };
5175
5176 // Case 1: Hi is not an integer.
5177 // Special cases are for rounding modes that are sensitive to ties.
5178 if (RoundedHi != Hi) {
5179 // We need to consider the case where Hi was between two integers and the
5180 // rounding mode broke the tie when, in fact, Lo may have had a different
5181 // sign than Hi.
5182 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5183 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5184
5185 Floats[0] = std::move(RoundedHi);
5186 Floats[1].makeZero(/*Neg=*/false);
5187 return HiStatus;
5188 }
5189
5190 // Case 2: Hi is an integer.
5191 // Special cases are for rounding modes which are rounding towards or away from zero.
5192 RoundingMode LoRoundingMode;
5193 if (RM == rmTowardZero)
5194 // When our input is positive, we want the Lo component rounded toward
5195 // negative infinity to get the smallest result magnitude. Likewise,
5196 // negative inputs want the Lo component rounded toward positive infinity.
5197 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5198 else
5199 LoRoundingMode = RM;
5200
5201 APFloat RoundedLo = Lo;
5202 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5203 if (LoRoundingMode == rmNearestTiesToAway)
5204 // We need to consider the case where Lo was between two integers and the
5205 // rounding mode broke the tie when, in fact, Hi may have had a different
5206 // sign than Lo.
5207 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5208
5209 // We must ensure that the final result has no overlap between the two APFloat values.
5210 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5211
5212 Floats[0] = std::move(RoundedHi);
5213 Floats[1] = std::move(RoundedLo);
5214 return LoStatus;
5215}
5216
5218 Floats[0].changeSign();
5219 Floats[1].changeSign();
5220}
5221
5224 // Compare absolute values of the high parts.
5225 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5226 if (HiPartCmp != cmpEqual)
5227 return HiPartCmp;
5228
5229 // Zero, regardless of sign, is equal.
5230 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5231 return cmpEqual;
5232
5233 // At this point, |this->Hi| == |RHS.Hi|.
5234 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5235 // same, and Hi-|Lo| if signs are different.
5236 const bool ThisIsSubtractive =
5237 Floats[0].isNegative() != Floats[1].isNegative();
5238 const bool RHSIsSubtractive =
5239 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5240
5241 // Case 1: The low part of 'this' is zero.
5242 if (Floats[1].isZero())
5243 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5244 // If RHS is subtractive, its magnitude is smaller.
5245 // If RHS is additive, its magnitude is larger.
5246 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5247
5248 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5249 if (RHS.Floats[1].isZero())
5250 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5251 // If 'this' is subtractive, its magnitude is smaller.
5252 // If 'this' is additive, its magnitude is larger.
5253 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5254
5255 // If their natures differ, the additive one is larger.
5256 if (ThisIsSubtractive != RHSIsSubtractive)
5257 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5258
5259 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5260 // The comparison now depends on the magnitude of the low parts.
5261 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5262
5263 if (ThisIsSubtractive) {
5264 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5265 if (LoPartCmp == cmpLessThan)
5266 return cmpGreaterThan;
5267 if (LoPartCmp == cmpGreaterThan)
5268 return cmpLessThan;
5269 }
5270
5271 // If additive, the comparison of |Lo| is direct.
5272 // If equal, they are equal.
5273 return LoPartCmp;
5274}
5275
5277 return Floats[0].getCategory();
5278}
5279
5280bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5281
5283 Floats[0].makeInf(Neg);
5284 Floats[1].makeZero(/* Neg = */ false);
5285}
5286
5288 Floats[0].makeZero(Neg);
5289 Floats[1].makeZero(/* Neg = */ false);
5290}
5291
5293 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5294 "Unexpected Semantics");
5295 Floats[0] =
5296 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5297 Floats[1] =
5298 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5299 if (Neg)
5300 changeSign();
5301}
5302
5304 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5305 "Unexpected Semantics");
5306 Floats[0].makeSmallest(Neg);
5307 Floats[1].makeZero(/* Neg = */ false);
5308}
5309
5311 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5312 "Unexpected Semantics");
5313 Floats[0] =
5314 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5315 if (Neg)
5316 Floats[0].changeSign();
5317 Floats[1].makeZero(/* Neg = */ false);
5318}
5319
5320void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5321 Floats[0].makeNaN(SNaN, Neg, fill);
5322 Floats[1].makeZero(/* Neg = */ false);
5323}
5324
5326 auto Result = Floats[0].compare(RHS.Floats[0]);
5327 // |Float[0]| > |Float[1]|
5328 if (Result == APFloat::cmpEqual)
5329 return Floats[1].compare(RHS.Floats[1]);
5330 return Result;
5331}
5332
5334 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5335 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5336}
5337
5339 if (Arg.Floats)
5340 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5341 return hash_combine(Arg.Semantics);
5342}
5343
5345 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5346 "Unexpected Semantics");
5347 uint64_t Data[] = {
5348 Floats[0].bitcastToAPInt().getRawData()[0],
5349 Floats[1].bitcastToAPInt().getRawData()[0],
5350 };
5351 return APInt(128, Data);
5352}
5353
5355 roundingMode RM) {
5356 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5357 "Unexpected Semantics");
5358 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5359 auto Ret = Tmp.convertFromString(S, RM);
5360 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5361 return Ret;
5362}
5363
5364// The double-double lattice of values corresponds to numbers which obey:
5365// - abs(lo) <= 1/2 * ulp(hi)
5366// - roundTiesToEven(hi + lo) == hi
5367//
5368// nextUp must choose the smallest output > input that follows these rules.
5369// nexDown must choose the largest output < input that follows these rules.
5371 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5372 "Unexpected Semantics");
5373 // nextDown(x) = -nextUp(-x)
5374 if (nextDown) {
5375 changeSign();
5376 APFloat::opStatus Result = next(/*nextDown=*/false);
5377 changeSign();
5378 return Result;
5379 }
5380 switch (getCategory()) {
5381 case fcInfinity:
5382 // nextUp(+inf) = +inf
5383 // nextUp(-inf) = -getLargest()
5384 if (isNegative())
5385 makeLargest(true);
5386 return opOK;
5387
5388 case fcNaN:
5389 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5390 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5391 // change the payload.
5392 if (getFirst().isSignaling()) {
5393 // For consistency, propagate the sign of the sNaN to the qNaN.
5394 makeNaN(false, isNegative(), nullptr);
5395 return opInvalidOp;
5396 }
5397 return opOK;
5398
5399 case fcZero:
5400 // nextUp(pm 0) = +getSmallest()
5401 makeSmallest(false);
5402 return opOK;
5403
5404 case fcNormal:
5405 break;
5406 }
5407
5408 const APFloat &HiOld = getFirst();
5409 const APFloat &LoOld = getSecond();
5410
5411 APFloat NextLo = LoOld;
5412 NextLo.next(/*nextDown=*/false);
5413
5414 // We want to admit values where:
5415 // 1. abs(Lo) <= ulp(Hi)/2
5416 // 2. Hi == RTNE(Hi + lo)
5417 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5418 return Hi + Lo == Hi;
5419 };
5420
5421 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5422 if (InLattice(HiOld, NextLo)) {
5423 // Yes, the result is (HiOld, nextUp(LoOld)).
5424 Floats[1] = std::move(NextLo);
5425
5426 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5427 // value is defined to have exactly 106 bits of precision. This limitation
5428 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5429 // value.
5430 DoubleAPFloat Largest{*Semantics, uninitialized};
5431 Largest.makeLargest(/*Neg=*/false);
5432 if (compare(Largest) == cmpGreaterThan)
5433 makeInf(/*Neg=*/false);
5434
5435 return opOK;
5436 }
5437
5438 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5439 // correct result. We know the new hi component will be nextUp(HiOld) but our
5440 // lattice rules make it a little ambiguous what the correct NextLo must be.
5441 APFloat NextHi = HiOld;
5442 NextHi.next(/*nextDown=*/false);
5443
5444 // nextUp(getLargest()) == INFINITY
5445 if (NextHi.isInfinity()) {
5446 makeInf(/*Neg=*/false);
5447 return opOK;
5448 }
5449
5450 // IEEE 754-2019 5.3.1:
5451 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5452 // -0."
5453 if (NextHi.isZero()) {
5454 makeZero(/*Neg=*/true);
5455 return opOK;
5456 }
5457
5458 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5459 // negative infinity as possible.
5460 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5461 if (!InLattice(NextHi, NextLo))
5462 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5463 NextLo.next(/*nextDown=*/false);
5464
5465 Floats[0] = std::move(NextHi);
5466 Floats[1] = std::move(NextLo);
5467
5468 return opOK;
5469}
5470
5471APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5472 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5473 roundingMode RM, bool *IsExact) const {
5474 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5475 "Unexpected Semantics");
5476
5477 // If Hi is not finite, or Lo is zero, the value is entirely represented
5478 // by Hi. Delegate to the simpler single-APFloat conversion.
5479 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5480 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5481
5482 // First, round the full double-double value to an integral value. This
5483 // simplifies the rest of the function, as we no longer need to consider
5484 // fractional parts.
5485 *IsExact = false;
5486 DoubleAPFloat Integral = *this;
5487 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5488 if (RoundStatus == opInvalidOp)
5489 return opInvalidOp;
5490 const APFloat &IntegralHi = Integral.getFirst();
5491 const APFloat &IntegralLo = Integral.getSecond();
5492
5493 // If rounding results in either component being zero, the sum is trivial.
5494 // Delegate to the simpler single-APFloat conversion.
5495 bool HiIsExact;
5496 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5497 const opStatus HiStatus =
5498 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5499 // The conversion from an integer-valued float to an APInt may fail if the
5500 // result would be out of range. Regardless, taking this path is only
5501 // possible if rounding occurred during the initial `roundToIntegral`.
5502 return HiStatus == opOK ? opInexact : HiStatus;
5503 }
5504
5505 // A negative number cannot be represented by an unsigned integer.
5506 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5507 if (!IsSigned && IntegralHi.isNegative())
5508 return opInvalidOp;
5509
5510 // Handle the special boundary case where |Hi| is exactly the power of two
5511 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5512 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5513 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5514 // signed, N for unsigned).
5515 bool LoIsExact;
5516 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5517 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5518 if (HiExactLog2 >= 0 &&
5519 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5520 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5521 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5522 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5523 return opInvalidOp;
5524
5525 // If the signs differ, the sum will fit. We can compute the result using
5526 // properties of two's complement arithmetic without a wide intermediate
5527 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5528 const opStatus LoStatus = IntegralLo.convertToInteger(
5529 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5530 if (LoStatus == opInvalidOp)
5531 return opInvalidOp;
5532
5533 // Adjust the bit pattern of Lo to account for Hi's value:
5534 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5535 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5536 // already produced the correct final bit pattern.
5537 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5538 // can be computed by taking the two's complement pattern for `Lo` and
5539 // clearing the sign bit.
5540 if (IsSigned && !IntegralHi.isNegative())
5541 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5542 *IsExact = RoundStatus == opOK;
5543 return RoundStatus;
5544 }
5545
5546 // Convert Hi into an integer. This may not fit but that is OK: we know that
5547 // Hi + Lo would not fit either in this situation.
5548 const opStatus HiStatus = IntegralHi.convertToInteger(
5549 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5550 if (HiStatus == opInvalidOp)
5551 return HiStatus;
5552
5553 // Convert Lo into a temporary integer of the same width.
5554 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5555 const opStatus LoStatus =
5556 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5557 if (LoStatus == opInvalidOp)
5558 return LoStatus;
5559
5560 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5561 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5562 // where the sum could cross the integer type's boundary is when Hi is a
5563 // power of two, which is handled by the special case block above.
5564 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5565
5566 *IsExact = RoundStatus == opOK;
5567 return RoundStatus;
5568}
5569
5572 unsigned int Width, bool IsSigned,
5573 roundingMode RM, bool *IsExact) const {
5574 opStatus FS =
5575 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5576
5577 if (FS == opInvalidOp) {
5578 const unsigned DstPartsCount = partCountForBits(Width);
5579 assert(DstPartsCount <= Input.size() && "Integer too big");
5580
5581 unsigned Bits;
5582 if (getCategory() == fcNaN)
5583 Bits = 0;
5584 else if (isNegative())
5585 Bits = IsSigned;
5586 else
5587 Bits = Width - IsSigned;
5588
5589 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5590 if (isNegative() && IsSigned)
5591 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5592 }
5593
5594 return FS;
5595}
5596
5597APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5598 switch (RM) {
5600 makeLargest(/*Neg=*/isNegative());
5601 break;
5603 if (isNegative())
5604 makeInf(/*Neg=*/true);
5605 else
5606 makeLargest(/*Neg=*/false);
5607 break;
5609 if (isNegative())
5610 makeLargest(/*Neg=*/true);
5611 else
5612 makeInf(/*Neg=*/false);
5613 break;
5616 makeInf(/*Neg=*/isNegative());
5617 break;
5618 default:
5619 llvm_unreachable("Invalid rounding mode found");
5620 }
5621 opStatus S = opInexact;
5622 if (!getFirst().isFinite())
5623 S = static_cast<opStatus>(S | opOverflow);
5624 return S;
5625}
5626
5627APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5628 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5629 // Find the most significant bit of the source integer. APInt::tcMSB returns
5630 // UINT_MAX for a zero value.
5631 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5632 if (SrcMSB == UINT_MAX) {
5633 // The source integer is 0.
5634 makeZero(/*Neg=*/false);
5635 return opOK;
5636 }
5637
5638 // Create a minimally-sized APInt to represent the source value.
5639 const unsigned SrcBitWidth = SrcMSB + 1;
5640 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5641 /*isUnsigned=*/true};
5642
5643 // Stage 1: Initial Approximation.
5644 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5645 // We use round-to-nearest because it minimizes the initial error, which is
5646 // crucial for the subsequent steps.
5648 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5649
5650 // If the first approximation already overflows, the number is too large.
5651 // NOTE: The underlying semantics are *more* conservative when choosing to
5652 // overflow because their notion of ULP is much larger. As such, it is always
5653 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5654 if (!Hi.isFinite())
5655 return handleOverflow(RM);
5656
5657 // Stage 2: Exact Error Calculation.
5658 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5659 // This is done by converting Hi back to an integer and subtracting it from
5660 // the original source.
5661 bool HiAsIntIsExact;
5662 // Create an integer representation of Hi. Its width is determined by the
5663 // exponent of Hi, ensuring it's just large enough. This width can exceed
5664 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5665 // accurately when converted back to an integer.
5666 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5667 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5668 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5669
5670 // Stage 3: Error Approximation and Rounding.
5671 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5672 // captures the remainder of the original number. The rounding mode for this
5673 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5674 // ensure the final sum (Hi + Lo) rounds correctly.
5675 roundingMode LoRM = RM;
5676 // Adjustments are only necessary when the initial approximation Hi was an
5677 // overestimate, making the Error negative.
5678 if (Error.isNegative()) {
5679 if (RM == rmNearestTiesToAway) {
5680 // For rmNearestTiesToAway, a tie should round away from zero. Since
5681 // SrcInt is positive, this means rounding toward +infinity.
5682 // A standard conversion of a negative Error would round ties toward
5683 // -infinity, causing the final sum Hi + Lo to be smaller. To
5684 // counteract this, we detect the tie case and override the rounding
5685 // mode for Lo to rmTowardPositive.
5686 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5687 const unsigned LoPrecision = getSecond().getSemantics().precision;
5688 if (ErrorActiveBits > LoPrecision) {
5689 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5690 // A tie occurs when the bits to be truncated are of the form 100...0.
5691 // This is detected by checking if the number of trailing zeros is
5692 // exactly one less than the number of bits being truncated.
5693 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5694 LoRM = rmTowardPositive;
5695 }
5696 } else if (RM == rmTowardZero) {
5697 // For rmTowardZero, the final positive result must be truncated (rounded
5698 // down). When Hi is an overestimate, Error is negative. A standard
5699 // rmTowardZero conversion of Error would make it *less* negative,
5700 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5701 // rounds down correctly, we force Lo to round toward -infinity.
5702 LoRM = rmTowardNegative;
5703 }
5704 }
5705
5707 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5708
5709 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5710 // components do not overlap. fastTwoSum performs this operation.
5711 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5712 Floats[0] = std::move(Hi);
5713 Floats[1] = std::move(Lo);
5714
5715 // A final check for overflow is needed because fastTwoSum can cause a
5716 // carry-out from Lo that pushes Hi to infinity.
5717 if (!getFirst().isFinite())
5718 return handleOverflow(RM);
5719
5720 // The largest DoubleAPFloat must be canonical. Values which are larger are
5721 // not canonical and are equivalent to overflow.
5722 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5723 DoubleAPFloat Largest{*Semantics};
5724 Largest.makeLargest(/*Neg=*/false);
5725 if (compare(Largest) == APFloat::cmpGreaterThan)
5726 return handleOverflow(RM);
5727 }
5728
5729 // The final status of the operation is determined by the conversion of the
5730 // error term. If Lo could represent Error exactly, the entire conversion
5731 // is exact. Otherwise, it's inexact.
5732 return Status;
5733}
5734
5736 bool IsSigned,
5737 roundingMode RM) {
5738 const bool NegateInput = IsSigned && Input.isNegative();
5739 APInt API = Input;
5740 if (NegateInput)
5741 API.negate();
5742
5744 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5745 if (NegateInput)
5746 changeSign();
5747 return Status;
5748}
5749
5751 unsigned int HexDigits,
5752 bool UpperCase,
5753 roundingMode RM) const {
5754 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5755 "Unexpected Semantics");
5756 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5757 .convertToHexString(DST, HexDigits, UpperCase, RM);
5758}
5759
5761 return getCategory() == fcNormal &&
5762 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5763 // (double)(Hi + Lo) == Hi defines a normal number.
5764 Floats[0] != Floats[0] + Floats[1]);
5765}
5766
5768 if (getCategory() != fcNormal)
5769 return false;
5770 DoubleAPFloat Tmp(*this);
5771 Tmp.makeSmallest(this->isNegative());
5772 return Tmp.compare(*this) == cmpEqual;
5773}
5774
5776 if (getCategory() != fcNormal)
5777 return false;
5778
5779 DoubleAPFloat Tmp(*this);
5781 return Tmp.compare(*this) == cmpEqual;
5782}
5783
5785 if (getCategory() != fcNormal)
5786 return false;
5787 DoubleAPFloat Tmp(*this);
5788 Tmp.makeLargest(this->isNegative());
5789 return Tmp.compare(*this) == cmpEqual;
5790}
5791
5793 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5794 "Unexpected Semantics");
5795 return Floats[0].isInteger() && Floats[1].isInteger();
5796}
5797
5799 unsigned FormatPrecision,
5800 unsigned FormatMaxPadding,
5801 bool TruncateZero) const {
5802 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5803 "Unexpected Semantics");
5804 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5805 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5806}
5807
5809 // In order for Hi + Lo to be a power of two, the following must be true:
5810 // 1. Hi must be a power of two.
5811 // 2. Lo must be zero.
5812 if (getSecond().isNonZero())
5813 return INT_MIN;
5814 return getFirst().getExactLog2Abs();
5815}
5816
5817int ilogb(const DoubleAPFloat &Arg) {
5818 const APFloat &Hi = Arg.getFirst();
5819 const APFloat &Lo = Arg.getSecond();
5820 int IlogbResult = ilogb(Hi);
5821 // Zero and non-finite values can delegate to ilogb(Hi).
5822 if (Arg.getCategory() != fcNormal)
5823 return IlogbResult;
5824 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5825 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5826 return IlogbResult;
5827 if (Hi.getExactLog2Abs() == INT_MIN)
5828 return IlogbResult;
5829 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5830 // get nudged out of the binade by the low component.
5831 return IlogbResult - 1;
5832}
5833
5836 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5837 "Unexpected Semantics");
5839 scalbn(Arg.Floats[0], Exp, RM),
5840 scalbn(Arg.Floats[1], Exp, RM));
5841}
5842
5843DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5845 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5846 "Unexpected Semantics");
5847
5848 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5849 // [1.0, 2.0).
5850 Exp = ilogb(Arg);
5851
5852 // For NaNs, quiet any signaling NaN and return the result, as per standard
5853 // practice.
5854 if (Exp == APFloat::IEK_NaN) {
5855 DoubleAPFloat Quiet{Arg};
5856 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5857 return Quiet;
5858 }
5859
5860 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5861 if (Exp == APFloat::IEK_Inf)
5862 return Arg;
5863
5864 // For zero, the fraction is zero and the standard requires the exponent be 0.
5865 if (Exp == APFloat::IEK_Zero) {
5866 Exp = 0;
5867 return Arg;
5868 }
5869
5870 const APFloat &Hi = Arg.getFirst();
5871 const APFloat &Lo = Arg.getSecond();
5872
5873 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5874 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5875 // Increment the exponent to ensure the fraction is in the correct range.
5876 ++Exp;
5877
5878 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5879 APFloat Second = Lo;
5880 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5881 roundingMode LoRoundingMode;
5882 // The interpretation of rmTowardZero depends on the sign of the combined
5883 // Arg rather than the sign of the component.
5884 if (RM == rmTowardZero)
5885 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5886 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5887 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5888 // "away from zero" based on its own sign would move the value in the
5889 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5890 // direction-agnostic. We only need to bother with this if Lo is scaled
5891 // down.
5892 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5893 LoRoundingMode = rmNearestTiesToEven;
5894 else
5895 LoRoundingMode = RM;
5896 Second = scalbn(Lo, -Exp, LoRoundingMode);
5897 // The rmNearestTiesToEven proxy is correct most of the time, but it
5898 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5899 // exact midpoint.
5900 // NOTE: This is morally equivalent to roundTiesTowardZero.
5901 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5902 // Re-scale the result back to check if rounding occurred.
5903 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5904 if (RecomposedLo != Lo) {
5905 // RoundingError tells us which direction we rounded:
5906 // - RoundingError > 0: we rounded up.
5907 // - RoundingError < 0: we down up.
5908 const APFloat RoundingError = RecomposedLo - Lo;
5909 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5910 // We do this by checking if the absolute rounding error is exactly
5911 // half a ULP of the result.
5912 const APFloat UlpOfSecond = harrisonUlp(Second);
5913 const APFloat ScaledUlpOfSecond =
5914 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5915 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5916 const bool RoundedLoAway =
5917 Second.isNegative() == RoundingError.isNegative();
5918 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5919 // decrease the magnitude of Second to increase the magnitude
5920 // First+Second.
5921 if (IsMidpoint && RoundedLoAway)
5922 Second.next(/*nextDown=*/!Second.isNegative());
5923 }
5924 }
5925 // Handle a tricky edge case where Arg is slightly less than a power of two
5926 // (e.g., Arg = 2^k - epsilon). In this situation:
5927 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5928 // 2. ilogb(Arg) correctly returns k-1.
5929 // 3. Our initial Exp becomes (k-1) + 1 = k.
5930 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5931 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5932 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5933 // We detect this specific case by checking if Hi is a power of two and if
5934 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5935 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5936 // valid fraction.
5937 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5938 ++Exp;
5939 }
5940
5941 APFloat First = scalbn(Hi, -Exp, RM);
5943 std::move(Second));
5944}
5945
5946} // namespace detail
5947
5948APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5949 if (usesLayout<IEEEFloat>(Semantics)) {
5950 new (&IEEE) IEEEFloat(std::move(F));
5951 return;
5952 }
5953 if (usesLayout<DoubleAPFloat>(Semantics)) {
5954 const fltSemantics& S = F.getSemantics();
5955 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5957 return;
5958 }
5959 llvm_unreachable("Unexpected semantics");
5960}
5961
5966
5967hash_code hash_value(const APFloat &Arg) {
5968 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5969 return hash_value(Arg.U.IEEE);
5970 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5971 return hash_value(Arg.U.Double);
5972 llvm_unreachable("Unexpected semantics");
5973}
5974
5976 : APFloat(Semantics) {
5977 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5978 assert(StatusOrErr && "Invalid floating point representation");
5979 consumeError(StatusOrErr.takeError());
5980}
5981
5983 if (isZero())
5984 return isNegative() ? fcNegZero : fcPosZero;
5985 if (isNormal())
5986 return isNegative() ? fcNegNormal : fcPosNormal;
5987 if (isDenormal())
5989 if (isInfinity())
5990 return isNegative() ? fcNegInf : fcPosInf;
5991 assert(isNaN() && "Other class of FP constant");
5992 return isSignaling() ? fcSNan : fcQNan;
5993}
5994
5995bool APFloat::getExactInverse(APFloat *Inv) const {
5996 // Only finite, non-zero numbers can have a useful, representable inverse.
5997 // This check filters out +/- zero, +/- infinity, and NaN.
5998 if (!isFiniteNonZero())
5999 return false;
6000
6001 // Historically, this function rejects subnormal inputs. One reason why this
6002 // might be important is that subnormals may behave differently under FTZ/DAZ
6003 // runtime behavior.
6004 if (isDenormal())
6005 return false;
6006
6007 // A number has an exact, representable inverse if and only if it is a power
6008 // of two.
6009 //
6010 // Mathematical Rationale:
6011 // 1. A binary floating-point number x is a dyadic rational, meaning it can
6012 // be written as x = M / 2^k for integers M (the significand) and k.
6013 // 2. The inverse is 1/x = 2^k / M.
6014 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
6015 // in binary), its denominator M must also be a power of two.
6016 // Let's say M = 2^m.
6017 // 4. Substituting this back into the formula for x, we get
6018 // x = (2^m) / (2^k) = 2^(m-k).
6019 //
6020 // This proves that x must be a power of two.
6021
6022 // getExactLog2Abs() returns the integer exponent if the number is a power of
6023 // two or INT_MIN if it is not.
6024 const int Exp = getExactLog2Abs();
6025 if (Exp == INT_MIN)
6026 return false;
6027
6028 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
6029 // scaling 1.0 by the negated exponent.
6030 APFloat Reciprocal =
6031 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
6032 rmTowardZero);
6033
6034 // scalbn might round if the resulting exponent -Exp is outside the
6035 // representable range, causing overflow (to infinity) or underflow. We
6036 // must verify that the result is still the exact power of two we expect.
6037 if (Reciprocal.getExactLog2Abs() != -Exp)
6038 return false;
6039
6040 // Avoid multiplication with a subnormal, it is not safe on all platforms and
6041 // may be slower than a normal division.
6042 if (Reciprocal.isDenormal())
6043 return false;
6044
6045 assert(Reciprocal.isFiniteNonZero());
6046
6047 if (Inv)
6048 *Inv = std::move(Reciprocal);
6049
6050 return true;
6051}
6052
6054 roundingMode RM, bool *losesInfo) {
6055 if (&getSemantics() == &ToSemantics) {
6056 *losesInfo = false;
6057 return opOK;
6058 }
6059 if (usesLayout<IEEEFloat>(getSemantics()) &&
6060 usesLayout<IEEEFloat>(ToSemantics))
6061 return U.IEEE.convert(ToSemantics, RM, losesInfo);
6062 if (usesLayout<IEEEFloat>(getSemantics()) &&
6063 usesLayout<DoubleAPFloat>(ToSemantics)) {
6064 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
6065 auto Ret =
6066 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
6067 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
6068 return Ret;
6069 }
6070 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
6071 usesLayout<IEEEFloat>(ToSemantics)) {
6072 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
6073 *this = APFloat(std::move(getIEEE()), ToSemantics);
6074 return Ret;
6075 }
6076 llvm_unreachable("Unexpected semantics");
6077}
6078
6082
6084 SmallVector<char, 16> Buffer;
6085 toString(Buffer);
6086 OS << Buffer;
6087}
6088
6089#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6091 print(dbgs());
6092 dbgs() << '\n';
6093}
6094#endif
6095
6097 NID.Add(bitcastToAPInt());
6098}
6099
6101 roundingMode rounding_mode,
6102 bool *isExact) const {
6103 unsigned bitWidth = result.getBitWidth();
6104 SmallVector<uint64_t, 4> parts(result.getNumWords());
6105 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
6106 rounding_mode, isExact);
6107 // Keeps the original signed-ness.
6108 result = APInt(bitWidth, parts);
6109 return status;
6110}
6111
6113 if (&getSemantics() ==
6114 (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
6115 return getIEEE().convertToDouble();
6116 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
6117 "Float semantics is not representable by IEEEdouble");
6118 APFloat Temp = *this;
6119 bool LosesInfo;
6120 opStatus St =
6121 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
6122 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6123 (void)St;
6124 return Temp.getIEEE().convertToDouble();
6125}
6126
6127#ifdef HAS_IEE754_FLOAT128
6128float128 APFloat::convertToQuad() const {
6129 if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
6130 return getIEEE().convertToQuad();
6131 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
6132 "Float semantics is not representable by IEEEquad");
6133 APFloat Temp = *this;
6134 bool LosesInfo;
6135 opStatus St =
6136 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
6137 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6138 (void)St;
6139 return Temp.getIEEE().convertToQuad();
6140}
6141#endif
6142
6144 if (&getSemantics() ==
6145 (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
6146 return getIEEE().convertToFloat();
6147 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
6148 "Float semantics is not representable by IEEEsingle");
6149 APFloat Temp = *this;
6150 bool LosesInfo;
6151 opStatus St =
6152 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
6153 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6154 (void)St;
6155 return Temp.getIEEE().convertToFloat();
6156}
6157
6158APFloat::Storage::~Storage() {
6159 if (usesLayout<IEEEFloat>(*semantics)) {
6160 IEEE.~IEEEFloat();
6161 return;
6162 }
6163 if (usesLayout<DoubleAPFloat>(*semantics)) {
6164 Double.~DoubleAPFloat();
6165 return;
6166 }
6167 llvm_unreachable("Unexpected semantics");
6168}
6169
6170APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6171 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6172 new (this) IEEEFloat(RHS.IEEE);
6173 return;
6174 }
6175 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6176 new (this) DoubleAPFloat(RHS.Double);
6177 return;
6178 }
6179 llvm_unreachable("Unexpected semantics");
6180}
6181
6182APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6183 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6184 new (this) IEEEFloat(std::move(RHS.IEEE));
6185 return;
6186 }
6187 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6188 new (this) DoubleAPFloat(std::move(RHS.Double));
6189 return;
6190 }
6191 llvm_unreachable("Unexpected semantics");
6192}
6193
6194APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6195 if (usesLayout<IEEEFloat>(*semantics) &&
6196 usesLayout<IEEEFloat>(*RHS.semantics)) {
6197 IEEE = RHS.IEEE;
6198 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6199 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6200 Double = RHS.Double;
6201 } else if (this != &RHS) {
6202 this->~Storage();
6203 new (this) Storage(RHS);
6204 }
6205 return *this;
6206}
6207
6208APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6209 if (usesLayout<IEEEFloat>(*semantics) &&
6210 usesLayout<IEEEFloat>(*RHS.semantics)) {
6211 IEEE = std::move(RHS.IEEE);
6212 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6213 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6214 Double = std::move(RHS.Double);
6215 } else if (this != &RHS) {
6216 this->~Storage();
6217 new (this) Storage(std::move(RHS));
6218 }
6219 return *this;
6220}
6221
6222} // namespace llvm
6223
6224#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:48
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:26
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
static bool isSigned(unsigned int Opcode)
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:306
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:174
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:323
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:334
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:298
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEquad()
Definition APFloat.h:298
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:301
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:313
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:319
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:354
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:317
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:336
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:294
friend class APFloat
Definition APFloat.h:291
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:327
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:221
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:155
static constexpr unsigned integerPartWidth
Definition APFloat.h:152
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:300
APInt::WordType integerPart
Definition APFloat.h:151
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:315
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:340
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:304
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:307
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:316
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:315
static const fltSemantics & Float8E4M3()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:308
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:266
static const fltSemantics & Float8E3M4()
Definition APFloat.h:311
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:331
static const fltSemantics & Float8E5M2()
Definition APFloat.h:303
fltCategory
Category of internally-represented number.
Definition APFloat.h:370
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:314
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static const fltSemantics & FloatTF32()
Definition APFloat.h:312
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:304
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1102
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:6096
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1190
bool isFiniteNonZero() const
Definition APFloat.h:1441
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1479
bool isNegative() const
Definition APFloat.h:1431
bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5995
friend DoubleAPFloat
Definition APFloat.h:1495
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:6112
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1460
bool isNormal() const
Definition APFloat.h:1435
bool isDenormal() const
Definition APFloat.h:1432
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1163
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:6079
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5967
const fltSemantics & getSemantics() const
Definition APFloat.h:1439
bool isFinite() const
Definition APFloat.h:1436
bool isNaN() const
Definition APFloat.h:1429
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1070
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1421
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6143
bool isSignaling() const
Definition APFloat.h:1433
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1217
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1199
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
opStatus next(bool nextDown)
Definition APFloat.h:1236
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1130
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5982
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1208
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5962
friend IEEEFloat
Definition APFloat.h:1494
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:6090
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:6083
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1230
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1155
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1061
bool isInfinity() const
Definition APFloat.h:1428
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1573
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2368
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2340
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1758
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2363
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2442
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2412
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1512
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2752
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1752
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2348
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2726
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2632
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1495
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2373
void negate()
Negate this APInt in place.
Definition APInt.h:1468
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1918
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2379
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2699
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2354
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2392
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1736
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2530
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2477
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2516
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1744
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1913
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1722
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:569
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:858
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
FoldingSetNodeID - This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:330
void Add(const T &x)
Definition FoldingSet.h:370
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:299
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
const char * iterator
Definition StringRef.h:59
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
iterator begin() const
Definition StringRef.h:112
char back() const
back - Get the last character in the string.
Definition StringRef.h:155
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:686
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
char front() const
front - Get the first character in the string.
Definition StringRef.h:149
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
iterator end() const
Definition StringRef.h:114
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:647
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5310
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4840
LLVM_ABI void changeSign()
Definition APFloat.cpp:5217
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5784
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:5104
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:5007
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5276
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5333
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5808
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5735
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5344
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5354
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5767
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4999
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5338
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5223
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5760
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5571
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5303
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5817
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5370
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5282
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5792
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5287
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:5093
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5775
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:5114
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4787
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5798
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5292
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5325
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:5140
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:5125
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5750
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5280
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4994
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5320
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3323
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1541
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2312
fltCategory getCategory() const
Definition APFloat.h:573
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2881
bool isFiniteNonZero() const
Definition APFloat.h:576
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:463
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:4109
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4504
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3733
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4747
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2483
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:538
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2182
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:563
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2202
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3803
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3796
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2156
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4460
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:4141
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4694
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:1041
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4723
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1143
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2150
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:550
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3266
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:930
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2162
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2396
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:1002
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1168
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:4155
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1160
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1195
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2350
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4729
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4549
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:560
const fltSemantics & getSemantics() const
Definition APFloat.h:574
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:553
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4533
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4709
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2560
LLVM_ABI void changeSign()
Definition APFloat.cpp:2106
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:1027
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2821
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:1033
An opaque object representing a hash code.
Definition Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:439
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:441
static constexpr opStatus opDivByZero
Definition APFloat.h:436
static constexpr opStatus opOverflow
Definition APFloat.h:437
static constexpr cmpResult cmpLessThan
Definition APFloat.h:431
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1566
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:427
static constexpr uninitializedTag uninitialized
Definition APFloat.h:421
static constexpr fltCategory fcZero
Definition APFloat.h:443
static constexpr opStatus opOK
Definition APFloat.h:434
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:432
static constexpr unsigned integerPartWidth
Definition APFloat.h:429
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3471
APFloatBase::ExponentType ExponentType
Definition APFloat.h:420
static constexpr fltCategory fcNormal
Definition APFloat.h:442
static constexpr opStatus opInvalidOp
Definition APFloat.h:435
APFloatBase::opStatus opStatus
Definition APFloat.h:417
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4768
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:415
static constexpr cmpResult cmpUnordered
Definition APFloat.h:433
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:426
APFloatBase::roundingMode roundingMode
Definition APFloat.h:416
APFloatBase::cmpResult cmpResult
Definition APFloat.h:418
static constexpr fltCategory fcInfinity
Definition APFloat.h:440
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:424
static constexpr roundingMode rmTowardZero
Definition APFloat.h:428
static constexpr opStatus opUnderflow
Definition APFloat.h:438
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:422
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4729
static constexpr cmpResult cmpEqual
Definition APFloat.h:430
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4747
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4857
APFloatBase::integerPart integerPart
Definition APFloat.h:414
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:821
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
static const char infinityL[]
Definition APFloat.cpp:812
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:385
static const char NaNU[]
Definition APFloat.cpp:815
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:696
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:755
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:872
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:359
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:447
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:98
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:285
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1516
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:839
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
const unsigned int maxPrecision
Definition APFloat.cpp:284
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1537
static const char NaNL[]
Definition APFloat.cpp:814
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
static const char infinityU[]
Definition APFloat.cpp:813
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:50
@ lfMoreThanHalf
Definition APFloat.h:54
@ lfLessThanHalf
Definition APFloat.h:52
@ lfExactlyHalf
Definition APFloat.h:53
@ lfExactlyZero
Definition APFloat.h:51
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:539
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:286
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1525
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:369
static Error createError(const Twine &Err)
Definition APFloat.cpp:381
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:662
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
static const char hexDigitsUpper[]
Definition APFloat.cpp:811
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
const unsigned int maxExponent
Definition APFloat.cpp:283
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:392
fltNonfiniteBehavior
Definition APFloat.cpp:57
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:675
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:499
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:364
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:640
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1551
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:710
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:857
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:609
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
fltNanEncoding
Definition APFloat.cpp:81
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:402
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:810
#define N
const char * lastSigDigit
Definition APFloat.cpp:534
const char * firstSigDigit
Definition APFloat.cpp:533
APFloatBase::ExponentType maxExponent
Definition APFloat.cpp:106
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.cpp:119
APFloatBase::ExponentType minExponent
Definition APFloat.cpp:110
unsigned int sizeInBits
Definition APFloat.cpp:117
unsigned int precision
Definition APFloat.cpp:114
fltNanEncoding nanEncoding
Definition APFloat.cpp:121