LLVM 23.0.0git
APFloat.cpp
Go to the documentation of this file.
1//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision floating
10// point values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APSInt.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/Hashing.h"
20#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/Error.h"
28#include <cstring>
29#include <limits.h>
30
31#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 do { \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
38 } while (false)
39
40using namespace llvm;
41
42/// A macro used to combine two fcCategory enums into one key which can be used
43/// in a switch statement to classify how the interaction of two APFloat's
44/// categories affects an operation.
45///
46/// TODO: If clang source code is ever allowed to use constexpr in its own
47/// codebase, change this into a static inline function.
48#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50/* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54namespace llvm {
55
56// How the nonfinite values Inf and NaN are represented.
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
62
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
71
72 // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73 // Float4E2M1FN types, which do not support Inf or NaN values.
75};
76
77// How NaN values are represented. This is curently only used in combination
78// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79// while having IEEE non-finite behavior is liable to lead to unexpected
80// results.
81enum class fltNanEncoding {
82 // Represents the standard IEEE behavior where a value is NaN if its
83 // exponent is all 1s and the significand is non-zero.
85
86 // Represents the behavior in the Float8E4M3FN floating point type where NaN
87 // is represented by having the exponent and mantissa set to all 1s.
88 // This behavior matches the FP8 E4M3 type described in
89 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90 // as non-signalling, although the paper does not state whether the NaN
91 // values are signalling or not.
93
94 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
96 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97 // there is only one NaN value, it is treated as quiet NaN. This matches the
98 // behavior described in https://arxiv.org/abs/2206.02915 .
100};
101
102/* Represents floating point arithmetic semantics. */
104 /* The largest E such that 2^E is representable; this matches the
105 definition of IEEE 754. */
107
108 /* The smallest E such that 2^E is a normalized number; this
109 matches the definition of IEEE 754. */
111
112 /* Number of bits in the significand. This includes the integer
113 bit. */
114 unsigned int precision;
115
116 /* Number of bits actually used in the semantics. */
117 unsigned int sizeInBits;
118
120
122
123 /* Whether this semantics has an encoding for Zero */
124 bool hasZero = true;
125
126 /* Whether this semantics can represent signed values */
127 bool hasSignedRepr = true;
128
129 /* Whether the sign bit of this semantics is the most significant bit */
130 bool hasSignBitInMSB = true;
131};
132
133constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
134constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
135constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
136constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
137constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
138constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
139constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
141constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
142constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
144constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
146constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
148constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
149constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
150constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
151 127,
152 -127,
153 1,
154 8,
157 false,
158 false,
159 false};
160
161constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
163constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
165constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
167constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
168 80};
169constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
170constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
171constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
172 1023, -1022 + 53, 53 + 53, 128};
173
175 switch (S) {
176 case S_IEEEhalf:
177 return IEEEhalf();
178 case S_BFloat:
179 return BFloat();
180 case S_IEEEsingle:
181 return IEEEsingle();
182 case S_IEEEdouble:
183 return IEEEdouble();
184 case S_IEEEquad:
185 return IEEEquad();
187 return PPCDoubleDouble();
189 return PPCDoubleDoubleLegacy();
190 case S_Float8E5M2:
191 return Float8E5M2();
192 case S_Float8E5M2FNUZ:
193 return Float8E5M2FNUZ();
194 case S_Float8E4M3:
195 return Float8E4M3();
196 case S_Float8E4M3FN:
197 return Float8E4M3FN();
198 case S_Float8E4M3FNUZ:
199 return Float8E4M3FNUZ();
201 return Float8E4M3B11FNUZ();
202 case S_Float8E3M4:
203 return Float8E3M4();
204 case S_FloatTF32:
205 return FloatTF32();
206 case S_Float8E8M0FNU:
207 return Float8E8M0FNU();
208 case S_Float6E3M2FN:
209 return Float6E3M2FN();
210 case S_Float6E2M3FN:
211 return Float6E2M3FN();
212 case S_Float4E2M1FN:
213 return Float4E2M1FN();
215 return x87DoubleExtended();
216 }
217 llvm_unreachable("Unrecognised floating semantics");
218}
219
222 if (&Sem == &llvm::APFloat::IEEEhalf())
223 return S_IEEEhalf;
224 else if (&Sem == &llvm::APFloat::BFloat())
225 return S_BFloat;
226 else if (&Sem == &llvm::APFloat::IEEEsingle())
227 return S_IEEEsingle;
228 else if (&Sem == &llvm::APFloat::IEEEdouble())
229 return S_IEEEdouble;
230 else if (&Sem == &llvm::APFloat::IEEEquad())
231 return S_IEEEquad;
232 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
233 return S_PPCDoubleDouble;
234 else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
236 else if (&Sem == &llvm::APFloat::Float8E5M2())
237 return S_Float8E5M2;
238 else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
239 return S_Float8E5M2FNUZ;
240 else if (&Sem == &llvm::APFloat::Float8E4M3())
241 return S_Float8E4M3;
242 else if (&Sem == &llvm::APFloat::Float8E4M3FN())
243 return S_Float8E4M3FN;
244 else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
245 return S_Float8E4M3FNUZ;
246 else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
247 return S_Float8E4M3B11FNUZ;
248 else if (&Sem == &llvm::APFloat::Float8E3M4())
249 return S_Float8E3M4;
250 else if (&Sem == &llvm::APFloat::FloatTF32())
251 return S_FloatTF32;
252 else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
253 return S_Float8E8M0FNU;
254 else if (&Sem == &llvm::APFloat::Float6E3M2FN())
255 return S_Float6E3M2FN;
256 else if (&Sem == &llvm::APFloat::Float6E2M3FN())
257 return S_Float6E2M3FN;
258 else if (&Sem == &llvm::APFloat::Float4E2M1FN())
259 return S_Float4E2M1FN;
260 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
261 return S_x87DoubleExtended;
262 else
263 llvm_unreachable("Unknown floating semantics");
264}
265
267 const fltSemantics &B) {
268 return A.maxExponent <= B.maxExponent && A.minExponent >= B.minExponent &&
269 A.precision <= B.precision;
270}
271
272/* A tight upper bound on number of parts required to hold the value
273 pow(5, power) is
274
275 power * 815 / (351 * integerPartWidth) + 1
276
277 However, whilst the result may require only this many parts,
278 because we are multiplying two values to get it, the
279 multiplication may require an extra part with the excess part
280 being zero (consider the trivial case of 1 * 1, tcFullMultiply
281 requires two parts to hold the single-part result). So we add an
282 extra one to guarantee enough space whilst multiplying. */
283const unsigned int maxExponent = 16383;
284const unsigned int maxPrecision = 113;
286const unsigned int maxPowerOfFiveParts =
287 2 +
289
290unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
291 return semantics.precision;
292}
295 return semantics.maxExponent;
296}
299 return semantics.minExponent;
300}
301unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
302 return semantics.sizeInBits;
303}
305 bool isSigned) {
306 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
307 // at least one more bit than the MaxExponent to hold the max FP value.
308 unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
309 // Extra sign bit needed.
310 if (isSigned)
311 ++MinBitWidth;
312 return MinBitWidth;
313}
314
316 return semantics.hasZero;
317}
318
320 return semantics.hasSignedRepr;
321}
322
326
330
332 // Keep in sync with Type::isIEEELikeFPTy
333 return SemanticsToEnum(semantics) <= S_IEEEquad;
334}
335
337 return semantics.hasSignBitInMSB;
338}
339
341 const fltSemantics &Dst) {
342 // Exponent range must be larger.
343 if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
344 return false;
345
346 // If the mantissa is long enough, the result value could still be denormal
347 // with a larger exponent range.
348 //
349 // FIXME: This condition is probably not accurate but also shouldn't be a
350 // practical concern with existing types.
351 return Dst.precision >= Src.precision;
352}
353
355 return Sem.sizeInBits;
356}
357
358static constexpr APFloatBase::ExponentType
359exponentZero(const fltSemantics &semantics) {
360 return semantics.minExponent - 1;
361}
362
363static constexpr APFloatBase::ExponentType
364exponentInf(const fltSemantics &semantics) {
365 return semantics.maxExponent + 1;
366}
367
368static constexpr APFloatBase::ExponentType
369exponentNaN(const fltSemantics &semantics) {
372 return exponentZero(semantics);
373 if (semantics.hasSignedRepr)
374 return semantics.maxExponent;
375 }
376 return semantics.maxExponent + 1;
377}
378
379/* A bunch of private, handy routines. */
380
381static inline Error createError(const Twine &Err) {
383}
384
385static constexpr inline unsigned int partCountForBits(unsigned int bits) {
386 return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
388}
389
390/* Returns 0U-9U. Return values >= 10U are not digits. */
391static inline unsigned int
392decDigitValue(unsigned int c)
393{
394 return c - '0';
395}
396
397/* Return the value of a decimal exponent of the form
398 [+-]ddddddd.
399
400 If the exponent overflows, returns a large exponent with the
401 appropriate sign. */
404 bool isNegative;
405 unsigned int absExponent;
406 const unsigned int overlargeExponent = 24000; /* FIXME. */
407 StringRef::iterator p = begin;
408
409 // Treat no exponent as 0 to match binutils
410 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
411 return 0;
412 }
413
414 isNegative = (*p == '-');
415 if (*p == '-' || *p == '+') {
416 p++;
417 if (p == end)
418 return createError("Exponent has no digits");
419 }
420
421 absExponent = decDigitValue(*p++);
422 if (absExponent >= 10U)
423 return createError("Invalid character in exponent");
424
425 for (; p != end; ++p) {
426 unsigned int value;
427
428 value = decDigitValue(*p);
429 if (value >= 10U)
430 return createError("Invalid character in exponent");
431
432 absExponent = absExponent * 10U + value;
433 if (absExponent >= overlargeExponent) {
434 absExponent = overlargeExponent;
435 break;
436 }
437 }
438
439 if (isNegative)
440 return -(int) absExponent;
441 else
442 return (int) absExponent;
443}
444
445/* This is ugly and needs cleaning up, but I don't immediately see
446 how whilst remaining safe. */
449 int exponentAdjustment) {
450 int unsignedExponent;
451 bool negative, overflow;
452 int exponent = 0;
453
454 if (p == end)
455 return createError("Exponent has no digits");
456
457 negative = *p == '-';
458 if (*p == '-' || *p == '+') {
459 p++;
460 if (p == end)
461 return createError("Exponent has no digits");
462 }
463
464 unsignedExponent = 0;
465 overflow = false;
466 for (; p != end; ++p) {
467 unsigned int value;
468
469 value = decDigitValue(*p);
470 if (value >= 10U)
471 return createError("Invalid character in exponent");
472
473 unsignedExponent = unsignedExponent * 10 + value;
474 if (unsignedExponent > 32767) {
475 overflow = true;
476 break;
477 }
478 }
479
480 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
481 overflow = true;
482
483 if (!overflow) {
484 exponent = unsignedExponent;
485 if (negative)
486 exponent = -exponent;
487 exponent += exponentAdjustment;
488 if (exponent > 32767 || exponent < -32768)
489 overflow = true;
490 }
491
492 if (overflow)
493 exponent = negative ? -32768: 32767;
494
495 return exponent;
496}
497
500 StringRef::iterator *dot) {
501 StringRef::iterator p = begin;
502 *dot = end;
503 while (p != end && *p == '0')
504 p++;
505
506 if (p != end && *p == '.') {
507 *dot = p++;
508
509 if (end - begin == 1)
510 return createError("Significand has no digits");
511
512 while (p != end && *p == '0')
513 p++;
514 }
515
516 return p;
517}
518
519/* Given a normal decimal floating point number of the form
520
521 dddd.dddd[eE][+-]ddd
522
523 where the decimal point and exponent are optional, fill out the
524 structure D. Exponent is appropriate if the significand is
525 treated as an integer, and normalizedExponent if the significand
526 is taken to have the decimal point after a single leading
527 non-zero digit.
528
529 If the value is zero, V->firstSigDigit points to a non-digit, and
530 the return exponent is zero.
531*/
533 const char *firstSigDigit;
534 const char *lastSigDigit;
537};
538
541 StringRef::iterator dot = end;
542
543 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
544 if (!PtrOrErr)
545 return PtrOrErr.takeError();
546 StringRef::iterator p = *PtrOrErr;
547
548 D->firstSigDigit = p;
549 D->exponent = 0;
550 D->normalizedExponent = 0;
551
552 for (; p != end; ++p) {
553 if (*p == '.') {
554 if (dot != end)
555 return createError("String contains multiple dots");
556 dot = p++;
557 if (p == end)
558 break;
559 }
560 if (decDigitValue(*p) >= 10U)
561 break;
562 }
563
564 if (p != end) {
565 if (*p != 'e' && *p != 'E')
566 return createError("Invalid character in significand");
567 if (p == begin)
568 return createError("Significand has no digits");
569 if (dot != end && p - begin == 1)
570 return createError("Significand has no digits");
571
572 /* p points to the first non-digit in the string */
573 auto ExpOrErr = readExponent(p + 1, end);
574 if (!ExpOrErr)
575 return ExpOrErr.takeError();
576 D->exponent = *ExpOrErr;
577
578 /* Implied decimal point? */
579 if (dot == end)
580 dot = p;
581 }
582
583 /* If number is all zeroes accept any exponent. */
584 if (p != D->firstSigDigit) {
585 /* Drop insignificant trailing zeroes. */
586 if (p != begin) {
587 do
588 do
589 p--;
590 while (p != begin && *p == '0');
591 while (p != begin && *p == '.');
592 }
593
594 /* Adjust the exponents for any decimal point. */
595 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
596 D->normalizedExponent = (D->exponent +
597 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
598 - (dot > D->firstSigDigit && dot < p)));
599 }
600
601 D->lastSigDigit = p;
602 return Error::success();
603}
604
605/* Return the trailing fraction of a hexadecimal number.
606 DIGITVALUE is the first hex digit of the fraction, P points to
607 the next digit. */
610 unsigned int digitValue) {
611 unsigned int hexDigit;
612
613 /* If the first trailing digit isn't 0 or 8 we can work out the
614 fraction immediately. */
615 if (digitValue > 8)
616 return lfMoreThanHalf;
617 else if (digitValue < 8 && digitValue > 0)
618 return lfLessThanHalf;
619
620 // Otherwise we need to find the first non-zero digit.
621 while (p != end && (*p == '0' || *p == '.'))
622 p++;
623
624 if (p == end)
625 return createError("Invalid trailing hexadecimal fraction!");
626
627 hexDigit = hexDigitValue(*p);
628
629 /* If we ran off the end it is exactly zero or one-half, otherwise
630 a little more. */
631 if (hexDigit == UINT_MAX)
632 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
633 else
634 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
635}
636
637/* Return the fraction lost were a bignum truncated losing the least
638 significant BITS bits. */
639static lostFraction
641 unsigned int partCount,
642 unsigned int bits)
643{
644 unsigned int lsb;
645
646 lsb = APInt::tcLSB(parts, partCount);
647
648 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
649 if (bits <= lsb)
650 return lfExactlyZero;
651 if (bits == lsb + 1)
652 return lfExactlyHalf;
653 if (bits <= partCount * APFloatBase::integerPartWidth &&
654 APInt::tcExtractBit(parts, bits - 1))
655 return lfMoreThanHalf;
656
657 return lfLessThanHalf;
658}
659
660/* Shift DST right BITS bits noting lost fraction. */
661static lostFraction
662shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
663{
664 lostFraction lost_fraction;
665
666 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
667
668 APInt::tcShiftRight(dst, parts, bits);
669
670 return lost_fraction;
671}
672
673/* Combine the effect of two lost fractions. */
674static lostFraction
676 lostFraction lessSignificant)
677{
678 if (lessSignificant != lfExactlyZero) {
679 if (moreSignificant == lfExactlyZero)
680 moreSignificant = lfLessThanHalf;
681 else if (moreSignificant == lfExactlyHalf)
682 moreSignificant = lfMoreThanHalf;
683 }
684
685 return moreSignificant;
686}
687
688/* The error from the true value, in half-ulps, on multiplying two
689 floating point numbers, which differ from the value they
690 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
691 than the returned value.
692
693 See "How to Read Floating Point Numbers Accurately" by William D
694 Clinger. */
695static unsigned int
696HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
697{
698 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
699
700 if (HUerr1 + HUerr2 == 0)
701 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
702 else
703 return inexactMultiply + 2 * (HUerr1 + HUerr2);
704}
705
706/* The number of ulps from the boundary (zero, or half if ISNEAREST)
707 when the least significant BITS are truncated. BITS cannot be
708 zero. */
710ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
711 bool isNearest) {
712 unsigned int count, partBits;
713 APFloatBase::integerPart part, boundary;
714
715 assert(bits != 0);
716
717 bits--;
719 partBits = bits % APFloatBase::integerPartWidth + 1;
720
721 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
722
723 if (isNearest)
724 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
725 else
726 boundary = 0;
727
728 if (count == 0) {
729 if (part - boundary <= boundary - part)
730 return part - boundary;
731 else
732 return boundary - part;
733 }
734
735 if (part == boundary) {
736 while (--count)
737 if (parts[count])
738 return ~(APFloatBase::integerPart) 0; /* A lot. */
739
740 return parts[0];
741 } else if (part == boundary - 1) {
742 while (--count)
743 if (~parts[count])
744 return ~(APFloatBase::integerPart) 0; /* A lot. */
745
746 return -parts[0];
747 }
748
749 return ~(APFloatBase::integerPart) 0; /* A lot. */
750}
751
752/* Place pow(5, power) in DST, and return the number of parts used.
753 DST must be at least one part larger than size of the answer. */
754static unsigned int
755powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
756 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
758 pow5s[0] = 78125 * 5;
759
760 unsigned int partsCount = 1;
761 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
762 unsigned int result;
763 assert(power <= maxExponent);
764
765 p1 = dst;
766 p2 = scratch;
767
768 *p1 = firstEightPowers[power & 7];
769 power >>= 3;
770
771 result = 1;
772 pow5 = pow5s;
773
774 for (unsigned int n = 0; power; power >>= 1, n++) {
775 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
776 if (n != 0) {
777 APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
778 partsCount, partsCount);
779 partsCount *= 2;
780 if (pow5[partsCount - 1] == 0)
781 partsCount--;
782 }
783
784 if (power & 1) {
786
787 APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
788 result += partsCount;
789 if (p2[result - 1] == 0)
790 result--;
791
792 /* Now result is in p1 with partsCount parts and p2 is scratch
793 space. */
794 tmp = p1;
795 p1 = p2;
796 p2 = tmp;
797 }
798
799 pow5 += partsCount;
800 }
801
802 if (p1 != dst)
803 APInt::tcAssign(dst, p1, result);
804
805 return result;
806}
807
808/* Zero at the end to avoid modular arithmetic when adding one; used
809 when rounding up during hexadecimal output. */
810static const char hexDigitsLower[] = "0123456789abcdef0";
811static const char hexDigitsUpper[] = "0123456789ABCDEF0";
812static const char infinityL[] = "infinity";
813static const char infinityU[] = "INFINITY";
814static const char NaNL[] = "nan";
815static const char NaNU[] = "NAN";
816
817/* Write out an integerPart in hexadecimal, starting with the most
818 significant nibble. Write out exactly COUNT hexdigits, return
819 COUNT. */
820static unsigned int
821partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
822 const char *hexDigitChars)
823{
824 unsigned int result = count;
825
827
828 part >>= (APFloatBase::integerPartWidth - 4 * count);
829 while (count--) {
830 dst[count] = hexDigitChars[part & 0xf];
831 part >>= 4;
832 }
833
834 return result;
835}
836
837/* Write out an unsigned decimal integer. */
838static char *
839writeUnsignedDecimal (char *dst, unsigned int n)
840{
841 char buff[40], *p;
842
843 p = buff;
844 do
845 *p++ = '0' + n % 10;
846 while (n /= 10);
847
848 do
849 *dst++ = *--p;
850 while (p != buff);
851
852 return dst;
853}
854
855/* Write out a signed decimal integer. */
856static char *
857writeSignedDecimal (char *dst, int value)
858{
859 if (value < 0) {
860 *dst++ = '-';
861 dst = writeUnsignedDecimal(dst, -(unsigned) value);
862 } else {
863 dst = writeUnsignedDecimal(dst, value);
864 }
865
866 return dst;
867}
868
869// Compute the ULP of the input using a definition from:
870// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
871// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
872static APFloat harrisonUlp(const APFloat &X) {
873 const fltSemantics &Sem = X.getSemantics();
874 switch (X.getCategory()) {
875 case APFloat::fcNaN:
876 return APFloat::getQNaN(Sem);
878 return APFloat::getInf(Sem);
879 case APFloat::fcZero:
880 return APFloat::getSmallest(Sem);
882 break;
883 }
884 if (X.isDenormal() || X.isSmallestNormalized())
885 return APFloat::getSmallest(Sem);
886 int Exp = ilogb(X);
887 if (X.getExactLog2() != INT_MIN)
888 Exp -= 1;
889 return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
891}
892
893namespace detail {
894/* Constructors. */
895void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
896 unsigned int count;
897
898 semantics = ourSemantics;
899 count = partCount();
900 if (count > 1)
901 significand.parts = new integerPart[count];
902}
903
904void IEEEFloat::freeSignificand() {
905 if (needsCleanup())
906 delete [] significand.parts;
907}
908
909void IEEEFloat::assign(const IEEEFloat &rhs) {
910 assert(semantics == rhs.semantics);
911
912 sign = rhs.sign;
913 category = rhs.category;
914 exponent = rhs.exponent;
915 if (isFiniteNonZero() || category == fcNaN)
916 copySignificand(rhs);
917}
918
919void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
920 assert(isFiniteNonZero() || category == fcNaN);
921 assert(rhs.partCount() >= partCount());
922
923 APInt::tcAssign(significandParts(), rhs.significandParts(),
924 partCount());
925}
926
927/* Make this number a NaN, with an arbitrary but deterministic value
928 for the significand. If double or longer, this is a signalling NaN,
929 which may not be ideal. If float, this is QNaN(0). */
930void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
931 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
932 llvm_unreachable("This floating point format does not support NaN");
933
934 if (Negative && !semantics->hasSignedRepr)
936 "This floating point format does not support signed values");
937
938 category = fcNaN;
939 sign = Negative;
940 exponent = exponentNaN();
941
942 integerPart *significand = significandParts();
943 unsigned numParts = partCount();
944
945 APInt fill_storage;
946 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
947 // Finite-only types do not distinguish signalling and quiet NaN, so
948 // make them all signalling.
949 SNaN = false;
950 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
951 sign = true;
952 fill_storage = APInt::getZero(semantics->precision - 1);
953 } else {
954 fill_storage = APInt::getAllOnes(semantics->precision - 1);
955 }
956 fill = &fill_storage;
957 }
958
959 // Set the significand bits to the fill.
960 if (!fill || fill->getNumWords() < numParts)
961 APInt::tcSet(significand, 0, numParts);
962 if (fill) {
963 APInt::tcAssign(significand, fill->getRawData(),
964 std::min(fill->getNumWords(), numParts));
965
966 // Zero out the excess bits of the significand.
967 unsigned bitsToPreserve = semantics->precision - 1;
968 unsigned part = bitsToPreserve / 64;
969 bitsToPreserve %= 64;
970 significand[part] &= ((1ULL << bitsToPreserve) - 1);
971 for (part++; part != numParts; ++part)
972 significand[part] = 0;
973 }
974
975 unsigned QNaNBit =
976 (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
977
978 if (SNaN) {
979 // We always have to clear the QNaN bit to make it an SNaN.
980 APInt::tcClearBit(significand, QNaNBit);
981
982 // If there are no bits set in the payload, we have to set
983 // *something* to make it a NaN instead of an infinity;
984 // conventionally, this is the next bit down from the QNaN bit.
985 if (APInt::tcIsZero(significand, numParts))
986 APInt::tcSetBit(significand, QNaNBit - 1);
987 } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
988 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
989 // Do nothing.
990 } else {
991 // We always have to set the QNaN bit to make it a QNaN.
992 APInt::tcSetBit(significand, QNaNBit);
993 }
994
995 // For x87 extended precision, we want to make a NaN, not a
996 // pseudo-NaN. Maybe we should expose the ability to make
997 // pseudo-NaNs?
998 if (semantics == &APFloatBase::semX87DoubleExtended)
999 APInt::tcSetBit(significand, QNaNBit + 1);
1000}
1001
1003 if (this != &rhs) {
1004 if (semantics != rhs.semantics) {
1005 freeSignificand();
1006 initialize(rhs.semantics);
1007 }
1008 assign(rhs);
1009 }
1010
1011 return *this;
1012}
1013
1015 freeSignificand();
1016
1017 semantics = rhs.semantics;
1018 significand = rhs.significand;
1019 exponent = rhs.exponent;
1020 category = rhs.category;
1021 sign = rhs.sign;
1022
1023 rhs.semantics = &APFloatBase::semBogus;
1024 return *this;
1025}
1026
1028 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1029 (APInt::tcExtractBit(significandParts(),
1030 semantics->precision - 1) == 0);
1031}
1032
1034 // The smallest number by magnitude in our format will be the smallest
1035 // denormal, i.e. the floating point number with exponent being minimum
1036 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1037 return isFiniteNonZero() && exponent == semantics->minExponent &&
1038 significandMSB() == 0;
1039}
1040
1042 return getCategory() == fcNormal && exponent == semantics->minExponent &&
1043 isSignificandAllZerosExceptMSB();
1044}
1045
1046unsigned int IEEEFloat::getNumHighBits() const {
1047 const unsigned int PartCount = partCountForBits(semantics->precision);
1048 const unsigned int Bits = PartCount * integerPartWidth;
1049
1050 // Compute how many bits are used in the final word.
1051 // When precision is just 1, it represents the 'Pth'
1052 // Precision bit and not the actual significand bit.
1053 const unsigned int NumHighBits = (semantics->precision > 1)
1054 ? (Bits - semantics->precision + 1)
1055 : (Bits - semantics->precision);
1056 return NumHighBits;
1057}
1058
1059bool IEEEFloat::isSignificandAllOnes() const {
1060 // Test if the significand excluding the integral bit is all ones. This allows
1061 // us to test for binade boundaries.
1062 const integerPart *Parts = significandParts();
1063 const unsigned PartCount = partCountForBits(semantics->precision);
1064 for (unsigned i = 0; i < PartCount - 1; i++)
1065 if (~Parts[i])
1066 return false;
1067
1068 // Set the unused high bits to all ones when we compare.
1069 const unsigned NumHighBits = getNumHighBits();
1070 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1071 "Can not have more high bits to fill than integerPartWidth");
1072 const integerPart HighBitFill =
1073 ~integerPart(0) << (integerPartWidth - NumHighBits);
1074 if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
1075 return false;
1076
1077 return true;
1078}
1079
1080bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1081 // Test if the significand excluding the integral bit is all ones except for
1082 // the least significant bit.
1083 const integerPart *Parts = significandParts();
1084
1085 if (Parts[0] & 1)
1086 return false;
1087
1088 const unsigned PartCount = partCountForBits(semantics->precision);
1089 for (unsigned i = 0; i < PartCount - 1; i++) {
1090 if (~Parts[i] & ~unsigned{!i})
1091 return false;
1092 }
1093
1094 // Set the unused high bits to all ones when we compare.
1095 const unsigned NumHighBits = getNumHighBits();
1096 assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1097 "Can not have more high bits to fill than integerPartWidth");
1098 const integerPart HighBitFill = ~integerPart(0)
1099 << (integerPartWidth - NumHighBits);
1100 if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1101 return false;
1102
1103 return true;
1104}
1105
1106bool IEEEFloat::isSignificandAllZeros() const {
1107 // Test if the significand excluding the integral bit is all zeros. This
1108 // allows us to test for binade boundaries.
1109 const integerPart *Parts = significandParts();
1110 const unsigned PartCount = partCountForBits(semantics->precision);
1111
1112 for (unsigned i = 0; i < PartCount - 1; i++)
1113 if (Parts[i])
1114 return false;
1115
1116 // Compute how many bits are used in the final word.
1117 const unsigned NumHighBits = getNumHighBits();
1118 assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1119 "clear than integerPartWidth");
1120 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1121
1122 if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1123 return false;
1124
1125 return true;
1126}
1127
1128bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1129 const integerPart *Parts = significandParts();
1130 const unsigned PartCount = partCountForBits(semantics->precision);
1131
1132 for (unsigned i = 0; i < PartCount - 1; i++) {
1133 if (Parts[i])
1134 return false;
1135 }
1136
1137 const unsigned NumHighBits = getNumHighBits();
1138 const integerPart MSBMask = integerPart(1)
1139 << (integerPartWidth - NumHighBits);
1140 return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1141}
1142
1144 bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1145 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1146 semantics->nanEncoding == fltNanEncoding::AllOnes) {
1147 // The largest number by magnitude in our format will be the floating point
1148 // number with maximum exponent and with significand that is all ones except
1149 // the LSB.
1150 return (IsMaxExp && APFloat::hasSignificand(*semantics))
1151 ? isSignificandAllOnesExceptLSB()
1152 : IsMaxExp;
1153 } else {
1154 // The largest number by magnitude in our format will be the floating point
1155 // number with maximum exponent and with significand that is all ones.
1156 return IsMaxExp && isSignificandAllOnes();
1157 }
1158}
1159
1161 // This could be made more efficient; I'm going for obviously correct.
1162 if (!isFinite()) return false;
1163 IEEEFloat truncated = *this;
1164 truncated.roundToIntegral(rmTowardZero);
1165 return compare(truncated) == cmpEqual;
1166}
1167
1168bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1169 if (this == &rhs)
1170 return true;
1171 if (semantics != rhs.semantics ||
1172 category != rhs.category ||
1173 sign != rhs.sign)
1174 return false;
1175 if (category==fcZero || category==fcInfinity)
1176 return true;
1177
1178 if (isFiniteNonZero() && exponent != rhs.exponent)
1179 return false;
1180
1181 return std::equal(significandParts(), significandParts() + partCount(),
1182 rhs.significandParts());
1183}
1184
1186 initialize(&ourSemantics);
1187 sign = 0;
1188 category = fcNormal;
1189 zeroSignificand();
1190 exponent = ourSemantics.precision - 1;
1191 significandParts()[0] = value;
1193}
1194
1196 initialize(&ourSemantics);
1197 // The Float8E8MOFNU format does not have a representation
1198 // for zero. So, use the closest representation instead.
1199 // Moreover, the all-zero encoding represents a valid
1200 // normal value (which is the smallestNormalized here).
1201 // Hence, we call makeSmallestNormalized (where category is
1202 // 'fcNormal') instead of makeZero (where category is 'fcZero').
1203 ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1204}
1205
1206// Delegate to the previous constructor, because later copy constructor may
1207// actually inspects category, which can't be garbage.
1209 : IEEEFloat(ourSemantics) {}
1210
1212 initialize(rhs.semantics);
1213 assign(rhs);
1214}
1215
1216IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&APFloatBase::semBogus) {
1217 *this = std::move(rhs);
1218}
1219
1220IEEEFloat::~IEEEFloat() { freeSignificand(); }
1221
1222unsigned int IEEEFloat::partCount() const {
1223 return partCountForBits(semantics->precision + 1);
1224}
1225
1226const APFloat::integerPart *IEEEFloat::significandParts() const {
1227 return const_cast<IEEEFloat *>(this)->significandParts();
1228}
1229
1230APFloat::integerPart *IEEEFloat::significandParts() {
1231 if (partCount() > 1)
1232 return significand.parts;
1233 else
1234 return &significand.part;
1235}
1236
1237void IEEEFloat::zeroSignificand() {
1238 APInt::tcSet(significandParts(), 0, partCount());
1239}
1240
1241/* Increment an fcNormal floating point number's significand. */
1242void IEEEFloat::incrementSignificand() {
1243 integerPart carry;
1244
1245 carry = APInt::tcIncrement(significandParts(), partCount());
1246
1247 /* Our callers should never cause us to overflow. */
1248 assert(carry == 0);
1249 (void)carry;
1250}
1251
1252/* Add the significand of the RHS. Returns the carry flag. */
1253APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1254 integerPart *parts;
1255
1256 parts = significandParts();
1257
1258 assert(semantics == rhs.semantics);
1259 assert(exponent == rhs.exponent);
1260
1261 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1262}
1263
1264/* Subtract the significand of the RHS with a borrow flag. Returns
1265 the borrow flag. */
1266APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1267 integerPart borrow) {
1268 integerPart *parts;
1269
1270 parts = significandParts();
1271
1272 assert(semantics == rhs.semantics);
1273 assert(exponent == rhs.exponent);
1274
1275 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1276 partCount());
1277}
1278
1279/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1280 on to the full-precision result of the multiplication. Returns the
1281 lost fraction. */
1282lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1283 IEEEFloat addend,
1284 bool ignoreAddend) {
1285 unsigned int omsb; // One, not zero, based MSB.
1286 unsigned int partsCount, newPartsCount, precision;
1287 integerPart *lhsSignificand;
1288 integerPart scratch[4];
1289 integerPart *fullSignificand;
1290 lostFraction lost_fraction;
1291 bool ignored;
1292
1293 assert(semantics == rhs.semantics);
1294
1295 precision = semantics->precision;
1296
1297 // Allocate space for twice as many bits as the original significand, plus one
1298 // extra bit for the addition to overflow into.
1299 newPartsCount = partCountForBits(precision * 2 + 1);
1300
1301 if (newPartsCount > 4)
1302 fullSignificand = new integerPart[newPartsCount];
1303 else
1304 fullSignificand = scratch;
1305
1306 lhsSignificand = significandParts();
1307 partsCount = partCount();
1308
1309 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1310 rhs.significandParts(), partsCount, partsCount);
1311
1312 lost_fraction = lfExactlyZero;
1313 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1314 exponent += rhs.exponent;
1315
1316 // Assume the operands involved in the multiplication are single-precision
1317 // FP, and the two multiplicants are:
1318 // *this = a23 . a22 ... a0 * 2^e1
1319 // rhs = b23 . b22 ... b0 * 2^e2
1320 // the result of multiplication is:
1321 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1322 // Note that there are three significant bits at the left-hand side of the
1323 // radix point: two for the multiplication, and an overflow bit for the
1324 // addition (that will always be zero at this point). Move the radix point
1325 // toward left by two bits, and adjust exponent accordingly.
1326 exponent += 2;
1327
1328 if (!ignoreAddend && addend.isNonZero()) {
1329 // The intermediate result of the multiplication has "2 * precision"
1330 // signicant bit; adjust the addend to be consistent with mul result.
1331 //
1332 Significand savedSignificand = significand;
1333 const fltSemantics *savedSemantics = semantics;
1334 fltSemantics extendedSemantics;
1336 unsigned int extendedPrecision;
1337
1338 // Normalize our MSB to one below the top bit to allow for overflow.
1339 extendedPrecision = 2 * precision + 1;
1340 if (omsb != extendedPrecision - 1) {
1341 assert(extendedPrecision > omsb);
1342 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1343 (extendedPrecision - 1) - omsb);
1344 exponent -= (extendedPrecision - 1) - omsb;
1345 }
1346
1347 /* Create new semantics. */
1348 extendedSemantics = *semantics;
1349 extendedSemantics.precision = extendedPrecision;
1350
1351 if (newPartsCount == 1)
1352 significand.part = fullSignificand[0];
1353 else
1354 significand.parts = fullSignificand;
1355 semantics = &extendedSemantics;
1356
1357 // Make a copy so we can convert it to the extended semantics.
1358 // Note that we cannot convert the addend directly, as the extendedSemantics
1359 // is a local variable (which we take a reference to).
1360 IEEEFloat extendedAddend(addend);
1361 status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero,
1362 &ignored);
1363 assert(status == APFloat::opOK);
1364 (void)status;
1365
1366 // Shift the significand of the addend right by one bit. This guarantees
1367 // that the high bit of the significand is zero (same as fullSignificand),
1368 // so the addition will overflow (if it does overflow at all) into the top bit.
1369 lost_fraction = extendedAddend.shiftSignificandRight(1);
1370 assert(lost_fraction == lfExactlyZero &&
1371 "Lost precision while shifting addend for fused-multiply-add.");
1372
1373 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1374
1375 /* Restore our state. */
1376 if (newPartsCount == 1)
1377 fullSignificand[0] = significand.part;
1378 significand = savedSignificand;
1379 semantics = savedSemantics;
1380
1381 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1382 }
1383
1384 // Convert the result having "2 * precision" significant-bits back to the one
1385 // having "precision" significant-bits. First, move the radix point from
1386 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1387 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1388 exponent -= precision + 1;
1389
1390 // In case MSB resides at the left-hand side of radix point, shift the
1391 // mantissa right by some amount to make sure the MSB reside right before
1392 // the radix point (i.e. "MSB . rest-significant-bits").
1393 //
1394 // Note that the result is not normalized when "omsb < precision". So, the
1395 // caller needs to call IEEEFloat::normalize() if normalized value is
1396 // expected.
1397 if (omsb > precision) {
1398 unsigned int bits, significantParts;
1399 lostFraction lf;
1400
1401 bits = omsb - precision;
1402 significantParts = partCountForBits(omsb);
1403 lf = shiftRight(fullSignificand, significantParts, bits);
1404 lost_fraction = combineLostFractions(lf, lost_fraction);
1405 exponent += bits;
1406 }
1407
1408 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1409
1410 if (newPartsCount > 4)
1411 delete [] fullSignificand;
1412
1413 return lost_fraction;
1414}
1415
1416lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1417 // When the given semantics has zero, the addend here is a zero.
1418 // i.e . it belongs to the 'fcZero' category.
1419 // But when the semantics does not support zero, we need to
1420 // explicitly convey that this addend should be ignored
1421 // for multiplication.
1422 return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1423}
1424
1425/* Multiply the significands of LHS and RHS to DST. */
1426lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1427 unsigned int bit, i, partsCount;
1428 const integerPart *rhsSignificand;
1429 integerPart *lhsSignificand, *dividend, *divisor;
1430 integerPart scratch[4];
1431 lostFraction lost_fraction;
1432
1433 assert(semantics == rhs.semantics);
1434
1435 lhsSignificand = significandParts();
1436 rhsSignificand = rhs.significandParts();
1437 partsCount = partCount();
1438
1439 if (partsCount > 2)
1440 dividend = new integerPart[partsCount * 2];
1441 else
1442 dividend = scratch;
1443
1444 divisor = dividend + partsCount;
1445
1446 /* Copy the dividend and divisor as they will be modified in-place. */
1447 for (i = 0; i < partsCount; i++) {
1448 dividend[i] = lhsSignificand[i];
1449 divisor[i] = rhsSignificand[i];
1450 lhsSignificand[i] = 0;
1451 }
1452
1453 exponent -= rhs.exponent;
1454
1455 unsigned int precision = semantics->precision;
1456
1457 /* Normalize the divisor. */
1458 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1459 if (bit) {
1460 exponent += bit;
1461 APInt::tcShiftLeft(divisor, partsCount, bit);
1462 }
1463
1464 /* Normalize the dividend. */
1465 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1466 if (bit) {
1467 exponent -= bit;
1468 APInt::tcShiftLeft(dividend, partsCount, bit);
1469 }
1470
1471 /* Ensure the dividend >= divisor initially for the loop below.
1472 Incidentally, this means that the division loop below is
1473 guaranteed to set the integer bit to one. */
1474 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1475 exponent--;
1476 APInt::tcShiftLeft(dividend, partsCount, 1);
1477 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1478 }
1479
1480 /* Long division. */
1481 for (bit = precision; bit; bit -= 1) {
1482 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1483 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1484 APInt::tcSetBit(lhsSignificand, bit - 1);
1485 }
1486
1487 APInt::tcShiftLeft(dividend, partsCount, 1);
1488 }
1489
1490 /* Figure out the lost fraction. */
1491 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1492
1493 if (cmp > 0)
1494 lost_fraction = lfMoreThanHalf;
1495 else if (cmp == 0)
1496 lost_fraction = lfExactlyHalf;
1497 else if (APInt::tcIsZero(dividend, partsCount))
1498 lost_fraction = lfExactlyZero;
1499 else
1500 lost_fraction = lfLessThanHalf;
1501
1502 if (partsCount > 2)
1503 delete [] dividend;
1504
1505 return lost_fraction;
1506}
1507
1508unsigned int IEEEFloat::significandMSB() const {
1509 return APInt::tcMSB(significandParts(), partCount());
1510}
1511
1512unsigned int IEEEFloat::significandLSB() const {
1513 return APInt::tcLSB(significandParts(), partCount());
1514}
1515
1516/* Note that a zero result is NOT normalized to fcZero. */
1517lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1518 /* Our exponent should not overflow. */
1519 assert((ExponentType) (exponent + bits) >= exponent);
1520
1521 exponent += bits;
1522
1523 return shiftRight(significandParts(), partCount(), bits);
1524}
1525
1526/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1527void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1528 assert(bits < semantics->precision ||
1529 (semantics->precision == 1 && bits <= 1));
1530
1531 if (bits) {
1532 unsigned int partsCount = partCount();
1533
1534 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1535 exponent -= bits;
1536
1537 assert(!APInt::tcIsZero(significandParts(), partsCount));
1538 }
1539}
1540
1542 int compare;
1543
1544 assert(semantics == rhs.semantics);
1546 assert(rhs.isFiniteNonZero());
1547
1548 compare = exponent - rhs.exponent;
1549
1550 /* If exponents are equal, do an unsigned bignum comparison of the
1551 significands. */
1552 if (compare == 0)
1553 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1554 partCount());
1555
1556 if (compare > 0)
1557 return cmpGreaterThan;
1558 else if (compare < 0)
1559 return cmpLessThan;
1560 else
1561 return cmpEqual;
1562}
1563
1564/* Set the least significant BITS bits of a bignum, clear the
1565 rest. */
1566static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1567 unsigned bits) {
1568 unsigned i = 0;
1569 while (bits > APInt::APINT_BITS_PER_WORD) {
1570 dst[i++] = ~(APInt::WordType)0;
1572 }
1573
1574 if (bits)
1575 dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1576
1577 while (i < parts)
1578 dst[i++] = 0;
1579}
1580
1581/* Handle overflow. Sign is preserved. We either become infinity or
1582 the largest finite number. */
1583APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1585 /* Infinity? */
1586 if (rounding_mode == rmNearestTiesToEven ||
1587 rounding_mode == rmNearestTiesToAway ||
1588 (rounding_mode == rmTowardPositive && !sign) ||
1589 (rounding_mode == rmTowardNegative && sign)) {
1591 makeNaN(false, sign);
1592 else
1593 category = fcInfinity;
1594 return static_cast<opStatus>(opOverflow | opInexact);
1595 }
1596 }
1597
1598 /* Otherwise we become the largest finite number. */
1599 category = fcNormal;
1600 exponent = semantics->maxExponent;
1601 tcSetLeastSignificantBits(significandParts(), partCount(),
1602 semantics->precision);
1603 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1604 semantics->nanEncoding == fltNanEncoding::AllOnes)
1605 APInt::tcClearBit(significandParts(), 0);
1606
1607 return opInexact;
1608}
1609
1610/* Returns TRUE if, when truncating the current number, with BIT the
1611 new LSB, with the given lost fraction and rounding mode, the result
1612 would need to be rounded away from zero (i.e., by increasing the
1613 signficand). This routine must work for fcZero of both signs, and
1614 fcNormal numbers. */
1615bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1616 lostFraction lost_fraction,
1617 unsigned int bit) const {
1618 /* NaNs and infinities should not have lost fractions. */
1619 assert(isFiniteNonZero() || category == fcZero);
1620
1621 /* Current callers never pass this so we don't handle it. */
1622 assert(lost_fraction != lfExactlyZero);
1623
1624 switch (rounding_mode) {
1626 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1627
1629 if (lost_fraction == lfMoreThanHalf)
1630 return true;
1631
1632 /* Our zeroes don't have a significand to test. */
1633 if (lost_fraction == lfExactlyHalf && category != fcZero)
1634 return APInt::tcExtractBit(significandParts(), bit);
1635
1636 return false;
1637
1638 case rmTowardZero:
1639 return false;
1640
1641 case rmTowardPositive:
1642 return !sign;
1643
1644 case rmTowardNegative:
1645 return sign;
1646
1647 default:
1648 break;
1649 }
1650 llvm_unreachable("Invalid rounding mode found");
1651}
1652
1653APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1654 lostFraction lost_fraction) {
1655 unsigned int omsb; /* One, not zero, based MSB. */
1656 int exponentChange;
1657
1658 if (!isFiniteNonZero())
1659 return opOK;
1660
1661 /* Before rounding normalize the exponent of fcNormal numbers. */
1662 omsb = significandMSB() + 1;
1663
1664 // Only skip this `if` if the value is exactly zero.
1665 if (omsb || lost_fraction != lfExactlyZero) {
1666 /* OMSB is numbered from 1. We want to place it in the integer
1667 bit numbered PRECISION if possible, with a compensating change in
1668 the exponent. */
1669 exponentChange = omsb - semantics->precision;
1670
1671 /* If the resulting exponent is too high, overflow according to
1672 the rounding mode. */
1673 if (exponent + exponentChange > semantics->maxExponent)
1674 return handleOverflow(rounding_mode);
1675
1676 /* Subnormal numbers have exponent minExponent, and their MSB
1677 is forced based on that. */
1678 if (exponent + exponentChange < semantics->minExponent)
1679 exponentChange = semantics->minExponent - exponent;
1680
1681 /* Shifting left is easy as we don't lose precision. */
1682 if (exponentChange < 0) {
1683 assert(lost_fraction == lfExactlyZero);
1684
1685 shiftSignificandLeft(-exponentChange);
1686
1687 return opOK;
1688 }
1689
1690 if (exponentChange > 0) {
1691 lostFraction lf;
1692
1693 /* Shift right and capture any new lost fraction. */
1694 lf = shiftSignificandRight(exponentChange);
1695
1696 lost_fraction = combineLostFractions(lf, lost_fraction);
1697
1698 /* Keep OMSB up-to-date. */
1699 if (omsb > (unsigned) exponentChange)
1700 omsb -= exponentChange;
1701 else
1702 omsb = 0;
1703 }
1704 }
1705
1706 // The all-ones values is an overflow if NaN is all ones. If NaN is
1707 // represented by negative zero, then it is a valid finite value.
1708 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1709 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1710 exponent == semantics->maxExponent && isSignificandAllOnes())
1711 return handleOverflow(rounding_mode);
1712
1713 /* Now round the number according to rounding_mode given the lost
1714 fraction. */
1715
1716 /* As specified in IEEE 754, since we do not trap we do not report
1717 underflow for exact results. */
1718 if (lost_fraction == lfExactlyZero) {
1719 /* Canonicalize zeroes. */
1720 if (omsb == 0) {
1721 category = fcZero;
1722 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1723 sign = false;
1724 if (!semantics->hasZero)
1726 }
1727
1728 return opOK;
1729 }
1730
1731 /* Increment the significand if we're rounding away from zero. */
1732 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1733 if (omsb == 0)
1734 exponent = semantics->minExponent;
1735
1736 incrementSignificand();
1737 omsb = significandMSB() + 1;
1738
1739 /* Did the significand increment overflow? */
1740 if (omsb == (unsigned) semantics->precision + 1) {
1741 /* Renormalize by incrementing the exponent and shifting our
1742 significand right one. However if we already have the
1743 maximum exponent we overflow to infinity. */
1744 if (exponent == semantics->maxExponent)
1745 // Invoke overflow handling with a rounding mode that will guarantee
1746 // that the result gets turned into the correct infinity representation.
1747 // This is needed instead of just setting the category to infinity to
1748 // account for 8-bit floating point types that have no inf, only NaN.
1749 return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1750
1751 shiftSignificandRight(1);
1752
1753 return opInexact;
1754 }
1755
1756 // The all-ones values is an overflow if NaN is all ones. If NaN is
1757 // represented by negative zero, then it is a valid finite value.
1758 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1759 semantics->nanEncoding == fltNanEncoding::AllOnes &&
1760 exponent == semantics->maxExponent && isSignificandAllOnes())
1761 return handleOverflow(rounding_mode);
1762 }
1763
1764 /* The normal case - we were and are not denormal, and any
1765 significand increment above didn't overflow. */
1766 if (omsb == semantics->precision)
1767 return opInexact;
1768
1769 /* We have a non-zero denormal. */
1770 assert(omsb < semantics->precision);
1771
1772 /* Canonicalize zeroes. */
1773 if (omsb == 0) {
1774 category = fcZero;
1775 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1776 sign = false;
1777 // This condition handles the case where the semantics
1778 // does not have zero but uses the all-zero encoding
1779 // to represent the smallest normal value.
1780 if (!semantics->hasZero)
1782 }
1783
1784 /* The fcZero case is a denormal that underflowed to zero. */
1785 return (opStatus) (opUnderflow | opInexact);
1786}
1787
1788APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1789 bool subtract) {
1790 switch (PackCategoriesIntoKey(category, rhs.category)) {
1791 default:
1792 llvm_unreachable(nullptr);
1793
1797 assign(rhs);
1798 [[fallthrough]];
1803 if (isSignaling()) {
1804 makeQuiet();
1805 return opInvalidOp;
1806 }
1807 return rhs.isSignaling() ? opInvalidOp : opOK;
1808
1812 return opOK;
1813
1816 category = fcInfinity;
1817 sign = rhs.sign ^ subtract;
1818 return opOK;
1819
1821 assign(rhs);
1822 sign = rhs.sign ^ subtract;
1823 return opOK;
1824
1826 /* Sign depends on rounding mode; handled by caller. */
1827 return opOK;
1828
1830 /* Differently signed infinities can only be validly
1831 subtracted. */
1832 if (((sign ^ rhs.sign)!=0) != subtract) {
1833 makeNaN();
1834 return opInvalidOp;
1835 }
1836
1837 return opOK;
1838
1840 return opDivByZero;
1841 }
1842}
1843
1844/* Add or subtract two normal numbers. */
1845lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1846 bool subtract) {
1847 integerPart carry = 0;
1848 lostFraction lost_fraction;
1849 int bits;
1850
1851 /* Determine if the operation on the absolute values is effectively
1852 an addition or subtraction. */
1853 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1854
1855 /* Are we bigger exponent-wise than the RHS? */
1856 bits = exponent - rhs.exponent;
1857
1858 /* Subtraction is more subtle than one might naively expect. */
1859 if (subtract) {
1860 if ((bits < 0) && !semantics->hasSignedRepr)
1862 "This floating point format does not support signed values");
1863
1864 IEEEFloat temp_rhs(rhs);
1865 bool lost_fraction_is_from_rhs = false;
1866
1867 if (bits == 0)
1868 lost_fraction = lfExactlyZero;
1869 else if (bits > 0) {
1870 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1871 lost_fraction_is_from_rhs = true;
1872 shiftSignificandLeft(1);
1873 } else {
1874 lost_fraction = shiftSignificandRight(-bits - 1);
1875 temp_rhs.shiftSignificandLeft(1);
1876 }
1877
1878 // Should we reverse the subtraction.
1879 cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
1880 if (cmp_result == cmpLessThan) {
1881 bool borrow =
1882 lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs;
1883 if (borrow) {
1884 // The lost fraction is being subtracted, borrow from the significand
1885 // and invert `lost_fraction`.
1886 if (lost_fraction == lfLessThanHalf)
1887 lost_fraction = lfMoreThanHalf;
1888 else if (lost_fraction == lfMoreThanHalf)
1889 lost_fraction = lfLessThanHalf;
1890 }
1891 carry = temp_rhs.subtractSignificand(*this, borrow);
1892 copySignificand(temp_rhs);
1893 sign = !sign;
1894 } else if (cmp_result == cmpGreaterThan) {
1895 bool borrow = lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs;
1896 if (borrow) {
1897 // The lost fraction is being subtracted, borrow from the significand
1898 // and invert `lost_fraction`.
1899 if (lost_fraction == lfLessThanHalf)
1900 lost_fraction = lfMoreThanHalf;
1901 else if (lost_fraction == lfMoreThanHalf)
1902 lost_fraction = lfLessThanHalf;
1903 }
1904 carry = subtractSignificand(temp_rhs, borrow);
1905 } else { // cmpEqual
1906 zeroSignificand();
1907 if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
1908 // rhs is slightly larger due to the lost fraction, flip the sign.
1909 sign = !sign;
1910 }
1911 }
1912
1913 /* The code above is intended to ensure that no borrow is
1914 necessary. */
1915 assert(!carry);
1916 (void)carry;
1917 } else {
1918 if (bits > 0) {
1919 IEEEFloat temp_rhs(rhs);
1920
1921 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1922 carry = addSignificand(temp_rhs);
1923 } else {
1924 lost_fraction = shiftSignificandRight(-bits);
1925 carry = addSignificand(rhs);
1926 }
1927
1928 /* We have a guard bit; generating a carry cannot happen. */
1929 assert(!carry);
1930 (void)carry;
1931 }
1932
1933 return lost_fraction;
1934}
1935
1936APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1937 switch (PackCategoriesIntoKey(category, rhs.category)) {
1938 default:
1939 llvm_unreachable(nullptr);
1940
1944 assign(rhs);
1945 sign = false;
1946 [[fallthrough]];
1951 sign ^= rhs.sign; // restore the original sign
1952 if (isSignaling()) {
1953 makeQuiet();
1954 return opInvalidOp;
1955 }
1956 return rhs.isSignaling() ? opInvalidOp : opOK;
1957
1961 category = fcInfinity;
1962 return opOK;
1963
1967 category = fcZero;
1968 return opOK;
1969
1972 makeNaN();
1973 return opInvalidOp;
1974
1976 return opOK;
1977 }
1978}
1979
1980APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1981 switch (PackCategoriesIntoKey(category, rhs.category)) {
1982 default:
1983 llvm_unreachable(nullptr);
1984
1988 assign(rhs);
1989 sign = false;
1990 [[fallthrough]];
1995 sign ^= rhs.sign; // restore the original sign
1996 if (isSignaling()) {
1997 makeQuiet();
1998 return opInvalidOp;
1999 }
2000 return rhs.isSignaling() ? opInvalidOp : opOK;
2001
2006 return opOK;
2007
2009 category = fcZero;
2010 return opOK;
2011
2013 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
2014 makeNaN(false, sign);
2015 else
2016 category = fcInfinity;
2017 return opDivByZero;
2018
2021 makeNaN();
2022 return opInvalidOp;
2023
2025 return opOK;
2026 }
2027}
2028
2029APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
2030 switch (PackCategoriesIntoKey(category, rhs.category)) {
2031 default:
2032 llvm_unreachable(nullptr);
2033
2037 assign(rhs);
2038 [[fallthrough]];
2043 if (isSignaling()) {
2044 makeQuiet();
2045 return opInvalidOp;
2046 }
2047 return rhs.isSignaling() ? opInvalidOp : opOK;
2048
2052 return opOK;
2053
2059 makeNaN();
2060 return opInvalidOp;
2061
2063 return opOK;
2064 }
2065}
2066
2067APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
2068 switch (PackCategoriesIntoKey(category, rhs.category)) {
2069 default:
2070 llvm_unreachable(nullptr);
2071
2075 assign(rhs);
2076 [[fallthrough]];
2081 if (isSignaling()) {
2082 makeQuiet();
2083 return opInvalidOp;
2084 }
2085 return rhs.isSignaling() ? opInvalidOp : opOK;
2086
2090 return opOK;
2091
2097 makeNaN();
2098 return opInvalidOp;
2099
2101 return opDivByZero; // fake status, indicating this is not a special case
2102 }
2103}
2104
2105/* Change sign. */
2107 // With NaN-as-negative-zero, neither NaN or negative zero can change
2108 // their signs.
2109 if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2110 (isZero() || isNaN()))
2111 return;
2112 /* Look mummy, this one's easy. */
2113 sign = !sign;
2114}
2115
2116/* Normalized addition or subtraction. */
2117APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2118 roundingMode rounding_mode,
2119 bool subtract) {
2120 opStatus fs;
2121
2122 fs = addOrSubtractSpecials(rhs, subtract);
2123
2124 /* This return code means it was not a simple case. */
2125 if (fs == opDivByZero) {
2126 lostFraction lost_fraction;
2127
2128 lost_fraction = addOrSubtractSignificand(rhs, subtract);
2129 fs = normalize(rounding_mode, lost_fraction);
2130
2131 /* Can only be zero if we lost no fraction. */
2132 assert(category != fcZero || lost_fraction == lfExactlyZero);
2133 }
2134
2135 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2136 positive zero unless rounding to minus infinity, except that
2137 adding two like-signed zeroes gives that zero. */
2138 if (category == fcZero) {
2139 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2140 sign = (rounding_mode == rmTowardNegative);
2141 // NaN-in-negative-zero means zeros need to be normalized to +0.
2142 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2143 sign = false;
2144 }
2145
2146 return fs;
2147}
2148
2149/* Normalized addition. */
2151 roundingMode rounding_mode) {
2152 return addOrSubtract(rhs, rounding_mode, false);
2153}
2154
2155/* Normalized subtraction. */
2157 roundingMode rounding_mode) {
2158 return addOrSubtract(rhs, rounding_mode, true);
2159}
2160
2161/* Normalized multiply. */
2163 roundingMode rounding_mode) {
2164 opStatus fs;
2165
2166 sign ^= rhs.sign;
2167 fs = multiplySpecials(rhs);
2168
2169 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2170 sign = false;
2171 if (isFiniteNonZero()) {
2172 lostFraction lost_fraction = multiplySignificand(rhs);
2173 fs = normalize(rounding_mode, lost_fraction);
2174 if (lost_fraction != lfExactlyZero)
2175 fs = (opStatus) (fs | opInexact);
2176 }
2177
2178 return fs;
2179}
2180
2181/* Normalized divide. */
2183 roundingMode rounding_mode) {
2184 opStatus fs;
2185
2186 sign ^= rhs.sign;
2187 fs = divideSpecials(rhs);
2188
2189 if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2190 sign = false;
2191 if (isFiniteNonZero()) {
2192 lostFraction lost_fraction = divideSignificand(rhs);
2193 fs = normalize(rounding_mode, lost_fraction);
2194 if (lost_fraction != lfExactlyZero)
2195 fs = (opStatus) (fs | opInexact);
2196 }
2197
2198 return fs;
2199}
2200
2201/* Normalized remainder. */
2203 opStatus fs;
2204 unsigned int origSign = sign;
2205
2206 // First handle the special cases.
2207 fs = remainderSpecials(rhs);
2208 if (fs != opDivByZero)
2209 return fs;
2210
2211 fs = opOK;
2212
2213 // Make sure the current value is less than twice the denom. If the addition
2214 // did not succeed (an overflow has happened), which means that the finite
2215 // value we currently posses must be less than twice the denom (as we are
2216 // using the same semantics).
2217 IEEEFloat P2 = rhs;
2218 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2219 fs = mod(P2);
2220 assert(fs == opOK);
2221 }
2222
2223 // Lets work with absolute numbers.
2224 IEEEFloat P = rhs;
2225 P.sign = false;
2226 sign = false;
2227
2228 //
2229 // To calculate the remainder we use the following scheme.
2230 //
2231 // The remainder is defained as follows:
2232 //
2233 // remainder = numer - rquot * denom = x - r * p
2234 //
2235 // Where r is the result of: x/p, rounded toward the nearest integral value
2236 // (with halfway cases rounded toward the even number).
2237 //
2238 // Currently, (after x mod 2p):
2239 // r is the number of 2p's present inside x, which is inherently, an even
2240 // number of p's.
2241 //
2242 // We may split the remaining calculation into 4 options:
2243 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2244 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2245 // are done as well.
2246 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2247 // to subtract 1p at least once.
2248 // - if x >= p then we must subtract p at least once, as x must be a
2249 // remainder.
2250 //
2251 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2252 //
2253 // We can now split the remaining calculation to the following 3 options:
2254 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2255 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2256 // must round up to the next even number. so we must subtract p once more.
2257 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2258 // integral, and subtract p once more.
2259 //
2260
2261 // Extend the semantics to prevent an overflow/underflow or inexact result.
2262 bool losesInfo;
2263 fltSemantics extendedSemantics = *semantics;
2264 extendedSemantics.maxExponent++;
2265 extendedSemantics.minExponent--;
2266 extendedSemantics.precision += 2;
2267
2268 IEEEFloat VEx = *this;
2269 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2270 assert(fs == opOK && !losesInfo);
2271 IEEEFloat PEx = P;
2272 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2273 assert(fs == opOK && !losesInfo);
2274
2275 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2276 // any fraction.
2277 fs = VEx.add(VEx, rmNearestTiesToEven);
2278 assert(fs == opOK);
2279
2280 if (VEx.compare(PEx) == cmpGreaterThan) {
2282 assert(fs == opOK);
2283
2284 // Make VEx = this.add(this), but because we have different semantics, we do
2285 // not want to `convert` again, so we just subtract PEx twice (which equals
2286 // to the desired value).
2287 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2288 assert(fs == opOK);
2289 fs = VEx.subtract(PEx, rmNearestTiesToEven);
2290 assert(fs == opOK);
2291
2292 cmpResult result = VEx.compare(PEx);
2293 if (result == cmpGreaterThan || result == cmpEqual) {
2295 assert(fs == opOK);
2296 }
2297 }
2298
2299 if (isZero()) {
2300 sign = origSign; // IEEE754 requires this
2301 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2302 // But some 8-bit floats only have positive 0.
2303 sign = false;
2304 }
2305
2306 else
2307 sign ^= origSign;
2308 return fs;
2309}
2310
2311/* Normalized llvm frem (C fmod). */
2313 opStatus fs;
2314 fs = modSpecials(rhs);
2315 unsigned int origSign = sign;
2316
2317 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2319 int Exp = ilogb(*this) - ilogb(rhs);
2320 IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2321 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2322 // check for it.
2323 if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2324 V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2325 V.sign = sign;
2326
2328
2329 // When the semantics supports zero, this loop's
2330 // exit-condition is handled by the 'isFiniteNonZero'
2331 // category check above. However, when the semantics
2332 // does not have 'fcZero' and we have reached the
2333 // minimum possible value, (and any further subtract
2334 // will underflow to the same value) explicitly
2335 // provide an exit-path here.
2336 if (!semantics->hasZero && this->isSmallest())
2337 break;
2338
2339 assert(fs==opOK);
2340 }
2341 if (isZero()) {
2342 sign = origSign; // fmod requires this
2343 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2344 sign = false;
2345 }
2346 return fs;
2347}
2348
2349/* Normalized fused-multiply-add. */
2351 const IEEEFloat &addend,
2352 roundingMode rounding_mode) {
2353 opStatus fs;
2354
2355 /* Post-multiplication sign, before addition. */
2356 sign ^= multiplicand.sign;
2357
2358 /* If and only if all arguments are normal do we need to do an
2359 extended-precision calculation. */
2360 if (isFiniteNonZero() &&
2361 multiplicand.isFiniteNonZero() &&
2362 addend.isFinite()) {
2363 lostFraction lost_fraction;
2364
2365 lost_fraction = multiplySignificand(multiplicand, addend);
2366 fs = normalize(rounding_mode, lost_fraction);
2367 if (lost_fraction != lfExactlyZero)
2368 fs = (opStatus) (fs | opInexact);
2369
2370 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2371 positive zero unless rounding to minus infinity, except that
2372 adding two like-signed zeroes gives that zero. */
2373 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2374 sign = (rounding_mode == rmTowardNegative);
2375 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2376 sign = false;
2377 }
2378 } else {
2379 fs = multiplySpecials(multiplicand);
2380
2381 /* FS can only be opOK or opInvalidOp. There is no more work
2382 to do in the latter case. The IEEE-754R standard says it is
2383 implementation-defined in this case whether, if ADDEND is a
2384 quiet NaN, we raise invalid op; this implementation does so.
2385
2386 If we need to do the addition we can do so with normal
2387 precision. */
2388 if (fs == opOK)
2389 fs = addOrSubtract(addend, rounding_mode, false);
2390 }
2391
2392 return fs;
2393}
2394
2395/* Rounding-mode correct round to integral value. */
2397 opStatus fs;
2398
2399 if (isInfinity())
2400 // [IEEE Std 754-2008 6.1]:
2401 // The behavior of infinity in floating-point arithmetic is derived from the
2402 // limiting cases of real arithmetic with operands of arbitrarily
2403 // large magnitude, when such a limit exists.
2404 // ...
2405 // Operations on infinite operands are usually exact and therefore signal no
2406 // exceptions ...
2407 return opOK;
2408
2409 if (isNaN()) {
2410 if (isSignaling()) {
2411 // [IEEE Std 754-2008 6.2]:
2412 // Under default exception handling, any operation signaling an invalid
2413 // operation exception and for which a floating-point result is to be
2414 // delivered shall deliver a quiet NaN.
2415 makeQuiet();
2416 // [IEEE Std 754-2008 6.2]:
2417 // Signaling NaNs shall be reserved operands that, under default exception
2418 // handling, signal the invalid operation exception(see 7.2) for every
2419 // general-computational and signaling-computational operation except for
2420 // the conversions described in 5.12.
2421 return opInvalidOp;
2422 } else {
2423 // [IEEE Std 754-2008 6.2]:
2424 // For an operation with quiet NaN inputs, other than maximum and minimum
2425 // operations, if a floating-point result is to be delivered the result
2426 // shall be a quiet NaN which should be one of the input NaNs.
2427 // ...
2428 // Every general-computational and quiet-computational operation involving
2429 // one or more input NaNs, none of them signaling, shall signal no
2430 // exception, except fusedMultiplyAdd might signal the invalid operation
2431 // exception(see 7.2).
2432 return opOK;
2433 }
2434 }
2435
2436 if (isZero()) {
2437 // [IEEE Std 754-2008 6.3]:
2438 // ... the sign of the result of conversions, the quantize operation, the
2439 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2440 // the sign of the first or only operand.
2441 return opOK;
2442 }
2443
2444 // If the exponent is large enough, we know that this value is already
2445 // integral, and the arithmetic below would potentially cause it to saturate
2446 // to +/-Inf. Bail out early instead.
2447 if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2448 return opOK;
2449
2450 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2451 // precision of our format, and then subtract it back off again. The choice
2452 // of rounding modes for the addition/subtraction determines the rounding mode
2453 // for our integral rounding as well.
2454 // NOTE: When the input value is negative, we do subtraction followed by
2455 // addition instead.
2456 APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2457 1);
2458 IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2459 IEEEFloat MagicConstant(*semantics);
2460 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2462 assert(fs == opOK);
2463 MagicConstant.sign = sign;
2464
2465 // Preserve the input sign so that we can handle the case of zero result
2466 // correctly.
2467 bool inputSign = isNegative();
2468
2469 fs = add(MagicConstant, rounding_mode);
2470
2471 // Current value and 'MagicConstant' are both integers, so the result of the
2472 // subtraction is always exact according to Sterbenz' lemma.
2473 subtract(MagicConstant, rounding_mode);
2474
2475 // Restore the input sign.
2476 if (inputSign != isNegative())
2477 changeSign();
2478
2479 return fs;
2480}
2481
2482/* Comparison requires normalized numbers. */
2484 cmpResult result;
2485
2486 assert(semantics == rhs.semantics);
2487
2488 switch (PackCategoriesIntoKey(category, rhs.category)) {
2489 default:
2490 llvm_unreachable(nullptr);
2491
2499 return cmpUnordered;
2500
2504 if (sign)
2505 return cmpLessThan;
2506 else
2507 return cmpGreaterThan;
2508
2512 if (rhs.sign)
2513 return cmpGreaterThan;
2514 else
2515 return cmpLessThan;
2516
2518 if (sign == rhs.sign)
2519 return cmpEqual;
2520 else if (sign)
2521 return cmpLessThan;
2522 else
2523 return cmpGreaterThan;
2524
2526 return cmpEqual;
2527
2529 break;
2530 }
2531
2532 /* Two normal numbers. Do they have the same sign? */
2533 if (sign != rhs.sign) {
2534 if (sign)
2535 result = cmpLessThan;
2536 else
2537 result = cmpGreaterThan;
2538 } else {
2539 /* Compare absolute values; invert result if negative. */
2540 result = compareAbsoluteValue(rhs);
2541
2542 if (sign) {
2543 if (result == cmpLessThan)
2544 result = cmpGreaterThan;
2545 else if (result == cmpGreaterThan)
2546 result = cmpLessThan;
2547 }
2548 }
2549
2550 return result;
2551}
2552
2553/// IEEEFloat::convert - convert a value of one floating point type to another.
2554/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2555/// records whether the transformation lost information, i.e. whether
2556/// converting the result back to the original type will produce the
2557/// original value (this is almost the same as return value==fsOK, but there
2558/// are edge cases where this is not so).
2559
2561 roundingMode rounding_mode,
2562 bool *losesInfo) {
2564 unsigned int newPartCount, oldPartCount;
2565 opStatus fs;
2566 int shift;
2567 const fltSemantics &fromSemantics = *semantics;
2568 bool is_signaling = isSignaling();
2569
2571 newPartCount = partCountForBits(toSemantics.precision + 1);
2572 oldPartCount = partCount();
2573 shift = toSemantics.precision - fromSemantics.precision;
2574
2575 bool X86SpecialNan = false;
2576 if (&fromSemantics == &APFloatBase::semX87DoubleExtended &&
2577 &toSemantics != &APFloatBase::semX87DoubleExtended && category == fcNaN &&
2578 (!(*significandParts() & 0x8000000000000000ULL) ||
2579 !(*significandParts() & 0x4000000000000000ULL))) {
2580 // x86 has some unusual NaNs which cannot be represented in any other
2581 // format; note them here.
2582 X86SpecialNan = true;
2583 }
2584
2585 // If this is a truncation of a denormal number, and the target semantics
2586 // has larger exponent range than the source semantics (this can happen
2587 // when truncating from PowerPC double-double to double format), the
2588 // right shift could lose result mantissa bits. Adjust exponent instead
2589 // of performing excessive shift.
2590 // Also do a similar trick in case shifting denormal would produce zero
2591 // significand as this case isn't handled correctly by normalize.
2592 if (shift < 0 && isFiniteNonZero()) {
2593 int omsb = significandMSB() + 1;
2594 int exponentChange = omsb - fromSemantics.precision;
2595 if (exponent + exponentChange < toSemantics.minExponent)
2596 exponentChange = toSemantics.minExponent - exponent;
2597 exponentChange = std::max(exponentChange, shift);
2598 if (exponentChange < 0) {
2599 shift -= exponentChange;
2600 exponent += exponentChange;
2601 } else if (omsb <= -shift) {
2602 exponentChange = omsb + shift - 1; // leave at least one bit set
2603 shift -= exponentChange;
2604 exponent += exponentChange;
2605 }
2606 }
2607
2608 // If this is a truncation, perform the shift before we narrow the storage.
2609 if (shift < 0 && (isFiniteNonZero() ||
2610 (category == fcNaN && semantics->nonFiniteBehavior !=
2612 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2613
2614 // Fix the storage so it can hold to new value.
2615 if (newPartCount > oldPartCount) {
2616 // The new type requires more storage; make it available.
2617 integerPart *newParts;
2618 newParts = new integerPart[newPartCount];
2619 APInt::tcSet(newParts, 0, newPartCount);
2620 if (isFiniteNonZero() || category==fcNaN)
2621 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2622 freeSignificand();
2623 significand.parts = newParts;
2624 } else if (newPartCount == 1 && oldPartCount != 1) {
2625 // Switch to built-in storage for a single part.
2626 integerPart newPart = 0;
2627 if (isFiniteNonZero() || category==fcNaN)
2628 newPart = significandParts()[0];
2629 freeSignificand();
2630 significand.part = newPart;
2631 }
2632
2633 // Now that we have the right storage, switch the semantics.
2634 semantics = &toSemantics;
2635
2636 // If this is an extension, perform the shift now that the storage is
2637 // available.
2638 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2639 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2640
2641 if (isFiniteNonZero()) {
2642 fs = normalize(rounding_mode, lostFraction);
2643 *losesInfo = (fs != opOK);
2644 } else if (category == fcNaN) {
2645 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2646 *losesInfo =
2648 makeNaN(false, sign);
2649 return is_signaling ? opInvalidOp : opOK;
2650 }
2651
2652 // If NaN is negative zero, we need to create a new NaN to avoid converting
2653 // NaN to -Inf.
2654 if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2655 semantics->nanEncoding != fltNanEncoding::NegativeZero)
2656 makeNaN(false, false);
2657
2658 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2659
2660 // For x87 extended precision, we want to make a NaN, not a special NaN if
2661 // the input wasn't special either.
2662 if (!X86SpecialNan && semantics == &APFloatBase::semX87DoubleExtended)
2663 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2664
2665 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2666 // This also guarantees that a sNaN does not become Inf on a truncation
2667 // that loses all payload bits.
2668 if (is_signaling) {
2669 makeQuiet();
2670 fs = opInvalidOp;
2671 } else {
2672 fs = opOK;
2673 }
2674 } else if (category == fcInfinity &&
2675 semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2676 makeNaN(false, sign);
2677 *losesInfo = true;
2678 fs = opInexact;
2679 } else if (category == fcZero &&
2680 semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2681 // Negative zero loses info, but positive zero doesn't.
2682 *losesInfo =
2683 fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2684 fs = *losesInfo ? opInexact : opOK;
2685 // NaN is negative zero means -0 -> +0, which can lose information
2686 sign = false;
2687 } else {
2688 *losesInfo = false;
2689 fs = opOK;
2690 }
2691
2692 if (category == fcZero && !semantics->hasZero)
2694 return fs;
2695}
2696
2697/* Convert a floating point number to an integer according to the
2698 rounding mode. If the rounded integer value is out of range this
2699 returns an invalid operation exception and the contents of the
2700 destination parts are unspecified. If the rounded value is in
2701 range but the floating point number is not the exact integer, the C
2702 standard doesn't require an inexact exception to be raised. IEEE
2703 854 does require it so we do that.
2704
2705 Note that for conversions to integer type the C standard requires
2706 round-to-zero to always be used. */
2707APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2708 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2709 roundingMode rounding_mode, bool *isExact) const {
2710 lostFraction lost_fraction;
2711 const integerPart *src;
2712 unsigned int dstPartsCount, truncatedBits;
2713
2714 *isExact = false;
2715
2716 /* Handle the three special cases first. */
2717 if (category == fcInfinity || category == fcNaN)
2718 return opInvalidOp;
2719
2720 dstPartsCount = partCountForBits(width);
2721 assert(dstPartsCount <= parts.size() && "Integer too big");
2722
2723 if (category == fcZero) {
2724 APInt::tcSet(parts.data(), 0, dstPartsCount);
2725 // Negative zero can't be represented as an int.
2726 *isExact = !sign;
2727 return opOK;
2728 }
2729
2730 src = significandParts();
2731
2732 /* Step 1: place our absolute value, with any fraction truncated, in
2733 the destination. */
2734 if (exponent < 0) {
2735 /* Our absolute value is less than one; truncate everything. */
2736 APInt::tcSet(parts.data(), 0, dstPartsCount);
2737 /* For exponent -1 the integer bit represents .5, look at that.
2738 For smaller exponents leftmost truncated bit is 0. */
2739 truncatedBits = semantics->precision -1U - exponent;
2740 } else {
2741 /* We want the most significant (exponent + 1) bits; the rest are
2742 truncated. */
2743 unsigned int bits = exponent + 1U;
2744
2745 /* Hopelessly large in magnitude? */
2746 if (bits > width)
2747 return opInvalidOp;
2748
2749 if (bits < semantics->precision) {
2750 /* We truncate (semantics->precision - bits) bits. */
2751 truncatedBits = semantics->precision - bits;
2752 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2753 } else {
2754 /* We want at least as many bits as are available. */
2755 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2756 0);
2757 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2758 bits - semantics->precision);
2759 truncatedBits = 0;
2760 }
2761 }
2762
2763 /* Step 2: work out any lost fraction, and increment the absolute
2764 value if we would round away from zero. */
2765 if (truncatedBits) {
2766 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2767 truncatedBits);
2768 if (lost_fraction != lfExactlyZero &&
2769 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2770 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2771 return opInvalidOp; /* Overflow. */
2772 }
2773 } else {
2774 lost_fraction = lfExactlyZero;
2775 }
2776
2777 /* Step 3: check if we fit in the destination. */
2778 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2779
2780 if (sign) {
2781 if (!isSigned) {
2782 /* Negative numbers cannot be represented as unsigned. */
2783 if (omsb != 0)
2784 return opInvalidOp;
2785 } else {
2786 /* It takes omsb bits to represent the unsigned integer value.
2787 We lose a bit for the sign, but care is needed as the
2788 maximally negative integer is a special case. */
2789 if (omsb == width &&
2790 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2791 return opInvalidOp;
2792
2793 /* This case can happen because of rounding. */
2794 if (omsb > width)
2795 return opInvalidOp;
2796 }
2797
2798 APInt::tcNegate (parts.data(), dstPartsCount);
2799 } else {
2800 if (omsb >= width + !isSigned)
2801 return opInvalidOp;
2802 }
2803
2804 if (lost_fraction == lfExactlyZero) {
2805 *isExact = true;
2806 return opOK;
2807 }
2808 return opInexact;
2809}
2810
2811/* Same as convertToSignExtendedInteger, except we provide
2812 deterministic values in case of an invalid operation exception,
2813 namely zero for NaNs and the minimal or maximal value respectively
2814 for underflow or overflow.
2815 The *isExact output tells whether the result is exact, in the sense
2816 that converting it back to the original floating point type produces
2817 the original value. This is almost equivalent to result==opOK,
2818 except for negative zeroes.
2819*/
2822 unsigned int width, bool isSigned,
2823 roundingMode rounding_mode, bool *isExact) const {
2824 opStatus fs;
2825
2826 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2827 isExact);
2828
2829 if (fs == opInvalidOp) {
2830 unsigned int bits, dstPartsCount;
2831
2832 dstPartsCount = partCountForBits(width);
2833 assert(dstPartsCount <= parts.size() && "Integer too big");
2834
2835 if (category == fcNaN)
2836 bits = 0;
2837 else if (sign)
2838 bits = isSigned;
2839 else
2840 bits = width - isSigned;
2841
2842 tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2843 if (sign && isSigned)
2844 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2845 }
2846
2847 return fs;
2848}
2849
2850/* Convert an unsigned integer SRC to a floating point number,
2851 rounding according to ROUNDING_MODE. The sign of the floating
2852 point number is not modified. */
2853APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2854 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2855 unsigned int omsb, precision, dstCount;
2856 integerPart *dst;
2857 lostFraction lost_fraction;
2858
2859 category = fcNormal;
2860 omsb = APInt::tcMSB(src, srcCount) + 1;
2861 dst = significandParts();
2862 dstCount = partCount();
2863 precision = semantics->precision;
2864
2865 /* We want the most significant PRECISION bits of SRC. There may not
2866 be that many; extract what we can. */
2867 if (precision <= omsb) {
2868 exponent = omsb - 1;
2869 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2870 omsb - precision);
2871 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2872 } else {
2873 exponent = precision - 1;
2874 lost_fraction = lfExactlyZero;
2875 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2876 }
2877
2878 return normalize(rounding_mode, lost_fraction);
2879}
2880
2882 roundingMode rounding_mode) {
2883 unsigned int partCount = Val.getNumWords();
2884 APInt api = Val;
2885
2886 sign = false;
2887 if (isSigned && api.isNegative()) {
2888 sign = true;
2889 api = -api;
2890 }
2891
2892 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2893}
2894
2896IEEEFloat::convertFromHexadecimalString(StringRef s,
2897 roundingMode rounding_mode) {
2898 lostFraction lost_fraction = lfExactlyZero;
2899
2900 category = fcNormal;
2901 zeroSignificand();
2902 exponent = 0;
2903
2904 integerPart *significand = significandParts();
2905 unsigned partsCount = partCount();
2906 unsigned bitPos = partsCount * integerPartWidth;
2907 bool computedTrailingFraction = false;
2908
2909 // Skip leading zeroes and any (hexa)decimal point.
2910 StringRef::iterator begin = s.begin();
2911 StringRef::iterator end = s.end();
2913 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2914 if (!PtrOrErr)
2915 return PtrOrErr.takeError();
2916 StringRef::iterator p = *PtrOrErr;
2917 StringRef::iterator firstSignificantDigit = p;
2918
2919 while (p != end) {
2920 integerPart hex_value;
2921
2922 if (*p == '.') {
2923 if (dot != end)
2924 return createError("String contains multiple dots");
2925 dot = p++;
2926 continue;
2927 }
2928
2929 hex_value = hexDigitValue(*p);
2930 if (hex_value == UINT_MAX)
2931 break;
2932
2933 p++;
2934
2935 // Store the number while we have space.
2936 if (bitPos) {
2937 bitPos -= 4;
2938 hex_value <<= bitPos % integerPartWidth;
2939 significand[bitPos / integerPartWidth] |= hex_value;
2940 } else if (!computedTrailingFraction) {
2941 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2942 if (!FractOrErr)
2943 return FractOrErr.takeError();
2944 lost_fraction = *FractOrErr;
2945 computedTrailingFraction = true;
2946 }
2947 }
2948
2949 /* Hex floats require an exponent but not a hexadecimal point. */
2950 if (p == end)
2951 return createError("Hex strings require an exponent");
2952 if (*p != 'p' && *p != 'P')
2953 return createError("Invalid character in significand");
2954 if (p == begin)
2955 return createError("Significand has no digits");
2956 if (dot != end && p - begin == 1)
2957 return createError("Significand has no digits");
2958
2959 /* Ignore the exponent if we are zero. */
2960 if (p != firstSignificantDigit) {
2961 int expAdjustment;
2962
2963 /* Implicit hexadecimal point? */
2964 if (dot == end)
2965 dot = p;
2966
2967 /* Calculate the exponent adjustment implicit in the number of
2968 significant digits. */
2969 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2970 if (expAdjustment < 0)
2971 expAdjustment++;
2972 expAdjustment = expAdjustment * 4 - 1;
2973
2974 /* Adjust for writing the significand starting at the most
2975 significant nibble. */
2976 expAdjustment += semantics->precision;
2977 expAdjustment -= partsCount * integerPartWidth;
2978
2979 /* Adjust for the given exponent. */
2980 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2981 if (!ExpOrErr)
2982 return ExpOrErr.takeError();
2983 exponent = *ExpOrErr;
2984 }
2985
2986 return normalize(rounding_mode, lost_fraction);
2987}
2988
2990IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2991 unsigned sigPartCount, int exp,
2992 roundingMode rounding_mode) {
2993 unsigned int parts, pow5PartCount;
2994 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2996 bool isNearest;
2997
2998 isNearest = (rounding_mode == rmNearestTiesToEven ||
2999 rounding_mode == rmNearestTiesToAway);
3000
3001 parts = partCountForBits(semantics->precision + 11);
3002
3003 /* Calculate pow(5, abs(exp)). */
3004 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
3005
3006 for (;; parts *= 2) {
3007 opStatus sigStatus, powStatus;
3008 unsigned int excessPrecision, truncatedBits;
3009
3010 calcSemantics.precision = parts * integerPartWidth - 1;
3011 excessPrecision = calcSemantics.precision - semantics->precision;
3012 truncatedBits = excessPrecision;
3013
3014 IEEEFloat decSig(calcSemantics, uninitialized);
3015 decSig.makeZero(sign);
3016 IEEEFloat pow5(calcSemantics);
3017
3018 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
3020 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
3022 /* Add exp, as 10^n = 5^n * 2^n. */
3023 decSig.exponent += exp;
3024
3025 lostFraction calcLostFraction;
3026 integerPart HUerr, HUdistance;
3027 unsigned int powHUerr;
3028
3029 if (exp >= 0) {
3030 /* multiplySignificand leaves the precision-th bit set to 1. */
3031 calcLostFraction = decSig.multiplySignificand(pow5);
3032 powHUerr = powStatus != opOK;
3033 } else {
3034 calcLostFraction = decSig.divideSignificand(pow5);
3035 /* Denormal numbers have less precision. */
3036 if (decSig.exponent < semantics->minExponent) {
3037 excessPrecision += (semantics->minExponent - decSig.exponent);
3038 truncatedBits = excessPrecision;
3039 excessPrecision = std::min(excessPrecision, calcSemantics.precision);
3040 }
3041 /* Extra half-ulp lost in reciprocal of exponent. */
3042 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
3043 }
3044
3045 /* Both multiplySignificand and divideSignificand return the
3046 result with the integer bit set. */
3048 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3049
3050 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3051 powHUerr);
3052 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3053 excessPrecision, isNearest);
3054
3055 /* Are we guaranteed to round correctly if we truncate? */
3056 if (HUdistance >= HUerr) {
3057 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3058 calcSemantics.precision - excessPrecision,
3059 excessPrecision);
3060 /* Take the exponent of decSig. If we tcExtract-ed less bits
3061 above we must adjust our exponent to compensate for the
3062 implicit right shift. */
3063 exponent = (decSig.exponent + semantics->precision
3064 - (calcSemantics.precision - excessPrecision));
3065 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3066 decSig.partCount(),
3067 truncatedBits);
3068 return normalize(rounding_mode, calcLostFraction);
3069 }
3070 }
3071}
3072
3073Expected<APFloat::opStatus>
3074IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3075 decimalInfo D;
3076 opStatus fs;
3077
3078 /* Scan the text. */
3079 StringRef::iterator p = str.begin();
3080 if (Error Err = interpretDecimal(p, str.end(), &D))
3081 return std::move(Err);
3082
3083 /* Handle the quick cases. First the case of no significant digits,
3084 i.e. zero, and then exponents that are obviously too large or too
3085 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3086 definitely overflows if
3087
3088 (exp - 1) * L >= maxExponent
3089
3090 and definitely underflows to zero where
3091
3092 (exp + 1) * L <= minExponent - precision
3093
3094 With integer arithmetic the tightest bounds for L are
3095
3096 93/28 < L < 196/59 [ numerator <= 256 ]
3097 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3098 */
3099
3100 // Test if we have a zero number allowing for strings with no null terminators
3101 // and zero decimals with non-zero exponents.
3102 //
3103 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3104 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3105 // be at most one dot. On the other hand, if we have a zero with a non-zero
3106 // exponent, then we know that D.firstSigDigit will be non-numeric.
3107 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3108 category = fcZero;
3109 fs = opOK;
3110 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3111 sign = false;
3112 if (!semantics->hasZero)
3114
3115 /* Check whether the normalized exponent is high enough to overflow
3116 max during the log-rebasing in the max-exponent check below. */
3117 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3118 fs = handleOverflow(rounding_mode);
3119
3120 /* If it wasn't, then it also wasn't high enough to overflow max
3121 during the log-rebasing in the min-exponent check. Check that it
3122 won't overflow min in either check, then perform the min-exponent
3123 check. */
3124 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3125 (D.normalizedExponent + 1) * 28738 <=
3126 8651 * (semantics->minExponent - (int) semantics->precision)) {
3127 /* Underflow to zero and round. */
3128 category = fcNormal;
3129 zeroSignificand();
3130 fs = normalize(rounding_mode, lfLessThanHalf);
3131
3132 /* We can finally safely perform the max-exponent check. */
3133 } else if ((D.normalizedExponent - 1) * 42039
3134 >= 12655 * semantics->maxExponent) {
3135 /* Overflow and round. */
3136 fs = handleOverflow(rounding_mode);
3137 } else {
3138 integerPart *decSignificand;
3139 unsigned int partCount;
3140
3141 /* A tight upper bound on number of bits required to hold an
3142 N-digit decimal integer is N * 196 / 59. Allocate enough space
3143 to hold the full significand, and an extra part required by
3144 tcMultiplyPart. */
3145 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3146 partCount = partCountForBits(1 + 196 * partCount / 59);
3147 decSignificand = new integerPart[partCount + 1];
3148 partCount = 0;
3149
3150 /* Convert to binary efficiently - we do almost all multiplication
3151 in an integerPart. When this would overflow do we do a single
3152 bignum multiplication, and then revert again to multiplication
3153 in an integerPart. */
3154 do {
3155 integerPart decValue, val, multiplier;
3156
3157 val = 0;
3158 multiplier = 1;
3159
3160 do {
3161 if (*p == '.') {
3162 p++;
3163 if (p == str.end()) {
3164 break;
3165 }
3166 }
3167 decValue = decDigitValue(*p++);
3168 if (decValue >= 10U) {
3169 delete[] decSignificand;
3170 return createError("Invalid character in significand");
3171 }
3172 multiplier *= 10;
3173 val = val * 10 + decValue;
3174 /* The maximum number that can be multiplied by ten with any
3175 digit added without overflowing an integerPart. */
3176 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3177
3178 /* Multiply out the current part. */
3179 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3180 partCount, partCount + 1, false);
3181
3182 /* If we used another part (likely but not guaranteed), increase
3183 the count. */
3184 if (decSignificand[partCount])
3185 partCount++;
3186 } while (p <= D.lastSigDigit);
3187
3188 category = fcNormal;
3189 fs = roundSignificandWithExponent(decSignificand, partCount,
3190 D.exponent, rounding_mode);
3191
3192 delete [] decSignificand;
3193 }
3194
3195 return fs;
3196}
3197
3198bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3199 const size_t MIN_NAME_SIZE = 3;
3200
3201 if (str.size() < MIN_NAME_SIZE)
3202 return false;
3203
3204 if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3205 makeInf(false);
3206 return true;
3207 }
3208
3209 bool IsNegative = str.consume_front("-");
3210 if (IsNegative) {
3211 if (str.size() < MIN_NAME_SIZE)
3212 return false;
3213
3214 if (str == "inf" || str == "INFINITY" || str == "Inf") {
3215 makeInf(true);
3216 return true;
3217 }
3218 }
3219
3220 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3221 bool IsSignaling = str.consume_front_insensitive("s");
3222 if (IsSignaling) {
3223 if (str.size() < MIN_NAME_SIZE)
3224 return false;
3225 }
3226
3227 if (str.consume_front("nan") || str.consume_front("NaN")) {
3228 // A NaN without payload.
3229 if (str.empty()) {
3230 makeNaN(IsSignaling, IsNegative);
3231 return true;
3232 }
3233
3234 // Allow the payload to be inside parentheses.
3235 if (str.front() == '(') {
3236 // Parentheses should be balanced (and not empty).
3237 if (str.size() <= 2 || str.back() != ')')
3238 return false;
3239
3240 str = str.slice(1, str.size() - 1);
3241 }
3242
3243 // Determine the payload number's radix.
3244 unsigned Radix = 10;
3245 if (str[0] == '0') {
3246 if (str.size() > 1 && tolower(str[1]) == 'x') {
3247 str = str.drop_front(2);
3248 Radix = 16;
3249 } else {
3250 Radix = 8;
3251 }
3252 }
3253
3254 // Parse the payload and make the NaN.
3255 APInt Payload;
3256 if (!str.getAsInteger(Radix, Payload)) {
3257 makeNaN(IsSignaling, IsNegative, &Payload);
3258 return true;
3259 }
3260 }
3261
3262 return false;
3263}
3264
3265Expected<APFloat::opStatus>
3267 if (str.empty())
3268 return createError("Invalid string length");
3269
3270 // Handle special cases.
3271 if (convertFromStringSpecials(str))
3272 return opOK;
3273
3274 /* Handle a leading minus sign. */
3275 StringRef::iterator p = str.begin();
3276 size_t slen = str.size();
3277 sign = *p == '-' ? 1 : 0;
3278 if (sign && !semantics->hasSignedRepr)
3280 "This floating point format does not support signed values");
3281
3282 if (*p == '-' || *p == '+') {
3283 p++;
3284 slen--;
3285 if (!slen)
3286 return createError("String has no digits");
3287 }
3288
3289 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3290 if (slen == 2)
3291 return createError("Invalid string");
3292 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3293 rounding_mode);
3294 }
3295
3296 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3297}
3298
3299/* Write out a hexadecimal representation of the floating point value
3300 to DST, which must be of sufficient size, in the C99 form
3301 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3302 excluding the terminating NUL.
3303
3304 If UPPERCASE, the output is in upper case, otherwise in lower case.
3305
3306 HEXDIGITS digits appear altogether, rounding the value if
3307 necessary. If HEXDIGITS is 0, the minimal precision to display the
3308 number precisely is used instead. If nothing would appear after
3309 the decimal point it is suppressed.
3310
3311 The decimal exponent is always printed and has at least one digit.
3312 Zero values display an exponent of zero. Infinities and NaNs
3313 appear as "infinity" or "nan" respectively.
3314
3315 The above rules are as specified by C99. There is ambiguity about
3316 what the leading hexadecimal digit should be. This implementation
3317 uses whatever is necessary so that the exponent is displayed as
3318 stored. This implies the exponent will fall within the IEEE format
3319 range, and the leading hexadecimal digit will be 0 (for denormals),
3320 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3321 any other digits zero).
3322*/
3323unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3324 bool upperCase,
3325 roundingMode rounding_mode) const {
3326 char *p;
3327
3328 p = dst;
3329 if (sign)
3330 *dst++ = '-';
3331
3332 switch (category) {
3333 case fcInfinity:
3334 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3335 dst += sizeof infinityL - 1;
3336 break;
3337
3338 case fcNaN:
3339 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3340 dst += sizeof NaNU - 1;
3341 break;
3342
3343 case fcZero:
3344 *dst++ = '0';
3345 *dst++ = upperCase ? 'X': 'x';
3346 *dst++ = '0';
3347 if (hexDigits > 1) {
3348 *dst++ = '.';
3349 memset (dst, '0', hexDigits - 1);
3350 dst += hexDigits - 1;
3351 }
3352 *dst++ = upperCase ? 'P': 'p';
3353 *dst++ = '0';
3354 break;
3355
3356 case fcNormal:
3357 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3358 break;
3359 }
3360
3361 *dst = 0;
3362
3363 return static_cast<unsigned int>(dst - p);
3364}
3365
3366/* Does the hard work of outputting the correctly rounded hexadecimal
3367 form of a normal floating point number with the specified number of
3368 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3369 digits necessary to print the value precisely is output. */
3370char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3371 bool upperCase,
3372 roundingMode rounding_mode) const {
3373 unsigned int count, valueBits, shift, partsCount, outputDigits;
3374 const char *hexDigitChars;
3375 const integerPart *significand;
3376 char *p;
3377 bool roundUp;
3378
3379 *dst++ = '0';
3380 *dst++ = upperCase ? 'X': 'x';
3381
3382 roundUp = false;
3383 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3384
3385 significand = significandParts();
3386 partsCount = partCount();
3387
3388 /* +3 because the first digit only uses the single integer bit, so
3389 we have 3 virtual zero most-significant-bits. */
3390 valueBits = semantics->precision + 3;
3391 shift = integerPartWidth - valueBits % integerPartWidth;
3392
3393 /* The natural number of digits required ignoring trailing
3394 insignificant zeroes. */
3395 outputDigits = (valueBits - significandLSB () + 3) / 4;
3396
3397 /* hexDigits of zero means use the required number for the
3398 precision. Otherwise, see if we are truncating. If we are,
3399 find out if we need to round away from zero. */
3400 if (hexDigits) {
3401 if (hexDigits < outputDigits) {
3402 /* We are dropping non-zero bits, so need to check how to round.
3403 "bits" is the number of dropped bits. */
3404 unsigned int bits;
3405 lostFraction fraction;
3406
3407 bits = valueBits - hexDigits * 4;
3408 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3409 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3410 }
3411 outputDigits = hexDigits;
3412 }
3413
3414 /* Write the digits consecutively, and start writing in the location
3415 of the hexadecimal point. We move the most significant digit
3416 left and add the hexadecimal point later. */
3417 p = ++dst;
3418
3419 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3420
3421 while (outputDigits && count) {
3422 integerPart part;
3423
3424 /* Put the most significant integerPartWidth bits in "part". */
3425 if (--count == partsCount)
3426 part = 0; /* An imaginary higher zero part. */
3427 else
3428 part = significand[count] << shift;
3429
3430 if (count && shift)
3431 part |= significand[count - 1] >> (integerPartWidth - shift);
3432
3433 /* Convert as much of "part" to hexdigits as we can. */
3434 unsigned int curDigits = integerPartWidth / 4;
3435
3436 curDigits = std::min(curDigits, outputDigits);
3437 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3438 outputDigits -= curDigits;
3439 }
3440
3441 if (roundUp) {
3442 char *q = dst;
3443
3444 /* Note that hexDigitChars has a trailing '0'. */
3445 do {
3446 q--;
3447 *q = hexDigitChars[hexDigitValue (*q) + 1];
3448 } while (*q == '0');
3449 assert(q >= p);
3450 } else {
3451 /* Add trailing zeroes. */
3452 memset (dst, '0', outputDigits);
3453 dst += outputDigits;
3454 }
3455
3456 /* Move the most significant digit to before the point, and if there
3457 is something after the decimal point add it. This must come
3458 after rounding above. */
3459 p[-1] = p[0];
3460 if (dst -1 == p)
3461 dst--;
3462 else
3463 p[0] = '.';
3464
3465 /* Finally output the exponent. */
3466 *dst++ = upperCase ? 'P': 'p';
3467
3468 return writeSignedDecimal (dst, exponent);
3469}
3470
3472 if (!Arg.isFiniteNonZero())
3473 return hash_combine((uint8_t)Arg.category,
3474 // NaN has no sign, fix it at zero.
3475 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3476 Arg.semantics->precision);
3477
3478 // Normal floats need their exponent and significand hashed.
3479 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3480 Arg.semantics->precision, Arg.exponent,
3482 Arg.significandParts(),
3483 Arg.significandParts() + Arg.partCount()));
3484}
3485
3486// Conversion from APFloat to/from host float/double. It may eventually be
3487// possible to eliminate these and have everybody deal with APFloats, but that
3488// will take a while. This approach will not easily extend to long double.
3489// Current implementation requires integerPartWidth==64, which is correct at
3490// the moment but could be made more general.
3491
3492// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3493// the actual IEEE respresentations. We compensate for that here.
3494
3495APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3496 assert(semantics ==
3497 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended);
3498 assert(partCount()==2);
3499
3500 uint64_t myexponent, mysignificand;
3501
3502 if (isFiniteNonZero()) {
3503 myexponent = exponent+16383; //bias
3504 mysignificand = significandParts()[0];
3505 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3506 myexponent = 0; // denormal
3507 } else if (category==fcZero) {
3508 myexponent = 0;
3509 mysignificand = 0;
3510 } else if (category==fcInfinity) {
3511 myexponent = 0x7fff;
3512 mysignificand = 0x8000000000000000ULL;
3513 } else {
3514 assert(category == fcNaN && "Unknown category");
3515 myexponent = 0x7fff;
3516 mysignificand = significandParts()[0];
3517 }
3518
3519 uint64_t words[2];
3520 words[0] = mysignificand;
3521 words[1] = ((uint64_t)(sign & 1) << 15) |
3522 (myexponent & 0x7fffLL);
3523 return APInt(80, words);
3524}
3525
3526APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3527 assert(semantics ==
3528 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy);
3529 assert(partCount()==2);
3530
3531 uint64_t words[2];
3532 opStatus fs;
3533 bool losesInfo;
3534
3535 // Convert number to double. To avoid spurious underflows, we re-
3536 // normalize against the "double" minExponent first, and only *then*
3537 // truncate the mantissa. The result of that second conversion
3538 // may be inexact, but should never underflow.
3539 // Declare fltSemantics before APFloat that uses it (and
3540 // saves pointer to it) to ensure correct destruction order.
3541 fltSemantics extendedSemantics = *semantics;
3542 extendedSemantics.minExponent = APFloatBase::semIEEEdouble.minExponent;
3543 IEEEFloat extended(*this);
3544 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3545 assert(fs == opOK && !losesInfo);
3546 (void)fs;
3547
3548 IEEEFloat u(extended);
3549 fs = u.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3550 assert(fs == opOK || fs == opInexact);
3551 (void)fs;
3552 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3553
3554 // If conversion was exact or resulted in a special case, we're done;
3555 // just set the second double to zero. Otherwise, re-convert back to
3556 // the extended format and compute the difference. This now should
3557 // convert exactly to double.
3558 if (u.isFiniteNonZero() && losesInfo) {
3559 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3560 assert(fs == opOK && !losesInfo);
3561 (void)fs;
3562
3563 IEEEFloat v(extended);
3564 v.subtract(u, rmNearestTiesToEven);
3565 fs = v.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3566 assert(fs == opOK && !losesInfo);
3567 (void)fs;
3568 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3569 } else {
3570 words[1] = 0;
3571 }
3572
3573 return APInt(128, words);
3574}
3575
3576template <const fltSemantics &S>
3577APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3578 assert(semantics == &S);
3579 const int bias = (semantics == &APFloatBase::semFloat8E8M0FNU)
3580 ? -S.minExponent
3581 : -(S.minExponent - 1);
3582 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3583 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3584 constexpr integerPart integer_bit =
3585 integerPart{1} << (trailing_significand_bits % integerPartWidth);
3586 constexpr uint64_t significand_mask = integer_bit - 1;
3587 constexpr unsigned int exponent_bits =
3588 trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3589 : S.sizeInBits;
3590 static_assert(exponent_bits < 64);
3591 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3592
3593 uint64_t myexponent;
3594 std::array<integerPart, partCountForBits(trailing_significand_bits)>
3595 mysignificand;
3596
3597 if (isFiniteNonZero()) {
3598 myexponent = exponent + bias;
3599 std::copy_n(significandParts(), mysignificand.size(),
3600 mysignificand.begin());
3601 if (myexponent == 1 &&
3602 !(significandParts()[integer_bit_part] & integer_bit))
3603 myexponent = 0; // denormal
3604 } else if (category == fcZero) {
3605 if (!S.hasZero)
3606 llvm_unreachable("semantics does not support zero!");
3607 myexponent = ::exponentZero(S) + bias;
3608 mysignificand.fill(0);
3609 } else if (category == fcInfinity) {
3610 if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3611 S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3612 llvm_unreachable("semantics don't support inf!");
3613 myexponent = ::exponentInf(S) + bias;
3614 mysignificand.fill(0);
3615 } else {
3616 assert(category == fcNaN && "Unknown category!");
3617 if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3618 llvm_unreachable("semantics don't support NaN!");
3619 myexponent = ::exponentNaN(S) + bias;
3620 std::copy_n(significandParts(), mysignificand.size(),
3621 mysignificand.begin());
3622 }
3623 std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3624 auto words_iter =
3625 std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3626 if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3627 // Clear the integer bit.
3628 words[mysignificand.size() - 1] &= significand_mask;
3629 }
3630 std::fill(words_iter, words.end(), uint64_t{0});
3631 constexpr size_t last_word = words.size() - 1;
3632 uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3633 << ((S.sizeInBits - 1) % 64);
3634 words[last_word] |= shifted_sign;
3635 uint64_t shifted_exponent = (myexponent & exponent_mask)
3636 << (trailing_significand_bits % 64);
3637 words[last_word] |= shifted_exponent;
3638 if constexpr (last_word == 0) {
3639 return APInt(S.sizeInBits, words[0]);
3640 }
3641 return APInt(S.sizeInBits, words);
3642}
3643
3644APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3645 assert(partCount() == 2);
3646 return convertIEEEFloatToAPInt<APFloatBase::semIEEEquad>();
3647}
3648
3649APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3650 assert(partCount()==1);
3651 return convertIEEEFloatToAPInt<APFloatBase::semIEEEdouble>();
3652}
3653
3654APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3655 assert(partCount()==1);
3656 return convertIEEEFloatToAPInt<APFloatBase::semIEEEsingle>();
3657}
3658
3659APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3660 assert(partCount() == 1);
3661 return convertIEEEFloatToAPInt<APFloatBase::semBFloat>();
3662}
3663
3664APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3665 assert(partCount()==1);
3666 return convertIEEEFloatToAPInt<APFloatBase::APFloatBase::semIEEEhalf>();
3667}
3668
3669APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3670 assert(partCount() == 1);
3671 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2>();
3672}
3673
3674APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3675 assert(partCount() == 1);
3676 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E5M2FNUZ>();
3677}
3678
3679APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3680 assert(partCount() == 1);
3681 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3>();
3682}
3683
3684APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3685 assert(partCount() == 1);
3686 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FN>();
3687}
3688
3689APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3690 assert(partCount() == 1);
3691 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3FNUZ>();
3692}
3693
3694APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3695 assert(partCount() == 1);
3696 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E4M3B11FNUZ>();
3697}
3698
3699APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3700 assert(partCount() == 1);
3701 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E3M4>();
3702}
3703
3704APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3705 assert(partCount() == 1);
3706 return convertIEEEFloatToAPInt<APFloatBase::semFloatTF32>();
3707}
3708
3709APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3710 assert(partCount() == 1);
3711 return convertIEEEFloatToAPInt<APFloatBase::semFloat8E8M0FNU>();
3712}
3713
3714APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3715 assert(partCount() == 1);
3716 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E3M2FN>();
3717}
3718
3719APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3720 assert(partCount() == 1);
3721 return convertIEEEFloatToAPInt<APFloatBase::semFloat6E2M3FN>();
3722}
3723
3724APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3725 assert(partCount() == 1);
3726 return convertIEEEFloatToAPInt<APFloatBase::semFloat4E2M1FN>();
3727}
3728
3729// This function creates an APInt that is just a bit map of the floating
3730// point constant as it would appear in memory. It is not a conversion,
3731// and treating the result as a normal integer is unlikely to be useful.
3732
3734 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEhalf)
3735 return convertHalfAPFloatToAPInt();
3736
3737 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semBFloat)
3738 return convertBFloatAPFloatToAPInt();
3739
3740 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
3741 return convertFloatAPFloatToAPInt();
3742
3743 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
3744 return convertDoubleAPFloatToAPInt();
3745
3746 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
3747 return convertQuadrupleAPFloatToAPInt();
3748
3749 if (semantics ==
3750 (const llvm::fltSemantics *)&APFloatBase::semPPCDoubleDoubleLegacy)
3751 return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3752
3753 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2)
3754 return convertFloat8E5M2APFloatToAPInt();
3755
3756 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E5M2FNUZ)
3757 return convertFloat8E5M2FNUZAPFloatToAPInt();
3758
3759 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3)
3760 return convertFloat8E4M3APFloatToAPInt();
3761
3762 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FN)
3763 return convertFloat8E4M3FNAPFloatToAPInt();
3764
3765 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3FNUZ)
3766 return convertFloat8E4M3FNUZAPFloatToAPInt();
3767
3768 if (semantics ==
3769 (const llvm::fltSemantics *)&APFloatBase::semFloat8E4M3B11FNUZ)
3770 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3771
3772 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E3M4)
3773 return convertFloat8E3M4APFloatToAPInt();
3774
3775 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloatTF32)
3776 return convertFloatTF32APFloatToAPInt();
3777
3778 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat8E8M0FNU)
3779 return convertFloat8E8M0FNUAPFloatToAPInt();
3780
3781 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E3M2FN)
3782 return convertFloat6E3M2FNAPFloatToAPInt();
3783
3784 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat6E2M3FN)
3785 return convertFloat6E2M3FNAPFloatToAPInt();
3786
3787 if (semantics == (const llvm::fltSemantics *)&APFloatBase::semFloat4E2M1FN)
3788 return convertFloat4E2M1FNAPFloatToAPInt();
3789
3790 assert(semantics ==
3791 (const llvm::fltSemantics *)&APFloatBase::semX87DoubleExtended &&
3792 "unknown format!");
3793 return convertF80LongDoubleAPFloatToAPInt();
3794}
3795
3797 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle &&
3798 "Float semantics are not IEEEsingle");
3799 APInt api = bitcastToAPInt();
3800 return api.bitsToFloat();
3801}
3802
3804 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble &&
3805 "Float semantics are not IEEEdouble");
3806 APInt api = bitcastToAPInt();
3807 return api.bitsToDouble();
3808}
3809
3810#ifdef HAS_IEE754_FLOAT128
3811float128 IEEEFloat::convertToQuad() const {
3812 assert(semantics == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad &&
3813 "Float semantics are not IEEEquads");
3814 APInt api = bitcastToAPInt();
3815 return api.bitsToQuad();
3816}
3817#endif
3818
3819/// Integer bit is explicit in this format. Intel hardware (387 and later)
3820/// does not support these bit patterns:
3821/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3822/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3823/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3824/// exponent = 0, integer bit 1 ("pseudodenormal")
3825/// At the moment, the first three are treated as NaNs, the last one as Normal.
3826void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3827 uint64_t i1 = api.getRawData()[0];
3828 uint64_t i2 = api.getRawData()[1];
3829 uint64_t myexponent = (i2 & 0x7fff);
3830 uint64_t mysignificand = i1;
3831 uint8_t myintegerbit = mysignificand >> 63;
3832
3833 initialize(&APFloatBase::semX87DoubleExtended);
3834 assert(partCount()==2);
3835
3836 sign = static_cast<unsigned int>(i2>>15);
3837 if (myexponent == 0 && mysignificand == 0) {
3838 makeZero(sign);
3839 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3840 makeInf(sign);
3841 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3842 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3843 category = fcNaN;
3844 exponent = exponentNaN();
3845 significandParts()[0] = mysignificand;
3846 significandParts()[1] = 0;
3847 } else {
3848 category = fcNormal;
3849 exponent = myexponent - 16383;
3850 significandParts()[0] = mysignificand;
3851 significandParts()[1] = 0;
3852 if (myexponent==0) // denormal
3853 exponent = -16382;
3854 }
3855}
3856
3857void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3858 uint64_t i1 = api.getRawData()[0];
3859 uint64_t i2 = api.getRawData()[1];
3860 opStatus fs;
3861 bool losesInfo;
3862
3863 // Get the first double and convert to our format.
3864 initFromDoubleAPInt(APInt(64, i1));
3865 fs = convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3866 &losesInfo);
3867 assert(fs == opOK && !losesInfo);
3868 (void)fs;
3869
3870 // Unless we have a special case, add in second double.
3871 if (isFiniteNonZero()) {
3872 IEEEFloat v(APFloatBase::semIEEEdouble, APInt(64, i2));
3873 fs = v.convert(APFloatBase::semPPCDoubleDoubleLegacy, rmNearestTiesToEven,
3874 &losesInfo);
3875 assert(fs == opOK && !losesInfo);
3876 (void)fs;
3877
3879 }
3880}
3881
3882// The E8M0 format has the following characteristics:
3883// It is an 8-bit unsigned format with only exponents (no actual significand).
3884// No encodings for {zero, infinities or denorms}.
3885// NaN is represented by all 1's.
3886// Bias is 127.
3887void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3888 const uint64_t exponent_mask = 0xff;
3889 uint64_t val = api.getRawData()[0];
3890 uint64_t myexponent = (val & exponent_mask);
3891
3892 initialize(&APFloatBase::semFloat8E8M0FNU);
3893 assert(partCount() == 1);
3894
3895 // This format has unsigned representation only
3896 sign = 0;
3897
3898 // Set the significand
3899 // This format does not have any significand but the 'Pth' precision bit is
3900 // always set to 1 for consistency in APFloat's internal representation.
3901 uint64_t mysignificand = 1;
3902 significandParts()[0] = mysignificand;
3903
3904 // This format can either have a NaN or fcNormal
3905 // All 1's i.e. 255 is a NaN
3906 if (val == exponent_mask) {
3907 category = fcNaN;
3908 exponent = exponentNaN();
3909 return;
3910 }
3911 // Handle fcNormal...
3912 category = fcNormal;
3913 exponent = myexponent - 127; // 127 is bias
3914}
3915template <const fltSemantics &S>
3916void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3917 assert(api.getBitWidth() == S.sizeInBits);
3918 constexpr integerPart integer_bit = integerPart{1}
3919 << ((S.precision - 1) % integerPartWidth);
3920 constexpr uint64_t significand_mask = integer_bit - 1;
3921 constexpr unsigned int trailing_significand_bits = S.precision - 1;
3922 constexpr unsigned int stored_significand_parts =
3923 partCountForBits(trailing_significand_bits);
3924 constexpr unsigned int exponent_bits =
3925 S.sizeInBits - 1 - trailing_significand_bits;
3926 static_assert(exponent_bits < 64);
3927 constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3928 constexpr int bias = -(S.minExponent - 1);
3929
3930 // Copy the bits of the significand. We need to clear out the exponent and
3931 // sign bit in the last word.
3932 std::array<integerPart, stored_significand_parts> mysignificand;
3933 std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3934 if constexpr (significand_mask != 0) {
3935 mysignificand[mysignificand.size() - 1] &= significand_mask;
3936 }
3937
3938 // We assume the last word holds the sign bit, the exponent, and potentially
3939 // some of the trailing significand field.
3940 uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3941 uint64_t myexponent =
3942 (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3943
3944 initialize(&S);
3945 assert(partCount() == mysignificand.size());
3946
3947 sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3948
3949 bool all_zero_significand = llvm::all_of(mysignificand, equal_to(0));
3950
3951 bool is_zero = myexponent == 0 && all_zero_significand;
3952
3953 if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3954 if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3955 makeInf(sign);
3956 return;
3957 }
3958 }
3959
3960 bool is_nan = false;
3961
3962 if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3963 is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3964 } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3965 bool all_ones_significand =
3966 std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3967 [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3968 (!significand_mask ||
3969 mysignificand[mysignificand.size() - 1] == significand_mask);
3970 is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3971 } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3972 is_nan = is_zero && sign;
3973 }
3974
3975 if (is_nan) {
3976 category = fcNaN;
3977 exponent = ::exponentNaN(S);
3978 std::copy_n(mysignificand.begin(), mysignificand.size(),
3979 significandParts());
3980 return;
3981 }
3982
3983 if (is_zero) {
3984 makeZero(sign);
3985 return;
3986 }
3987
3988 category = fcNormal;
3989 exponent = myexponent - bias;
3990 std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3991 if (myexponent == 0) // denormal
3992 exponent = S.minExponent;
3993 else
3994 significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3995}
3996
3997void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3998 initFromIEEEAPInt<APFloatBase::semIEEEquad>(api);
3999}
4000
4001void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
4002 initFromIEEEAPInt<APFloatBase::semIEEEdouble>(api);
4003}
4004
4005void IEEEFloat::initFromFloatAPInt(const APInt &api) {
4006 initFromIEEEAPInt<APFloatBase::semIEEEsingle>(api);
4007}
4008
4009void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
4010 initFromIEEEAPInt<APFloatBase::semBFloat>(api);
4011}
4012
4013void IEEEFloat::initFromHalfAPInt(const APInt &api) {
4014 initFromIEEEAPInt<APFloatBase::semIEEEhalf>(api);
4015}
4016
4017void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
4018 initFromIEEEAPInt<APFloatBase::semFloat8E5M2>(api);
4019}
4020
4021void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
4022 initFromIEEEAPInt<APFloatBase::semFloat8E5M2FNUZ>(api);
4023}
4024
4025void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
4026 initFromIEEEAPInt<APFloatBase::semFloat8E4M3>(api);
4027}
4028
4029void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
4030 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FN>(api);
4031}
4032
4033void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
4034 initFromIEEEAPInt<APFloatBase::semFloat8E4M3FNUZ>(api);
4035}
4036
4037void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
4038 initFromIEEEAPInt<APFloatBase::semFloat8E4M3B11FNUZ>(api);
4039}
4040
4041void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
4042 initFromIEEEAPInt<APFloatBase::semFloat8E3M4>(api);
4043}
4044
4045void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
4046 initFromIEEEAPInt<APFloatBase::semFloatTF32>(api);
4047}
4048
4049void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
4050 initFromIEEEAPInt<APFloatBase::semFloat6E3M2FN>(api);
4051}
4052
4053void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
4054 initFromIEEEAPInt<APFloatBase::semFloat6E2M3FN>(api);
4055}
4056
4057void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
4058 initFromIEEEAPInt<APFloatBase::semFloat4E2M1FN>(api);
4059}
4060
4061/// Treat api as containing the bits of a floating point number.
4062void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
4063 assert(api.getBitWidth() == Sem->sizeInBits);
4064 if (Sem == &APFloatBase::semIEEEhalf)
4065 return initFromHalfAPInt(api);
4066 if (Sem == &APFloatBase::semBFloat)
4067 return initFromBFloatAPInt(api);
4068 if (Sem == &APFloatBase::semIEEEsingle)
4069 return initFromFloatAPInt(api);
4070 if (Sem == &APFloatBase::semIEEEdouble)
4071 return initFromDoubleAPInt(api);
4072 if (Sem == &APFloatBase::semX87DoubleExtended)
4073 return initFromF80LongDoubleAPInt(api);
4074 if (Sem == &APFloatBase::semIEEEquad)
4075 return initFromQuadrupleAPInt(api);
4076 if (Sem == &APFloatBase::semPPCDoubleDoubleLegacy)
4077 return initFromPPCDoubleDoubleLegacyAPInt(api);
4078 if (Sem == &APFloatBase::semFloat8E5M2)
4079 return initFromFloat8E5M2APInt(api);
4080 if (Sem == &APFloatBase::semFloat8E5M2FNUZ)
4081 return initFromFloat8E5M2FNUZAPInt(api);
4082 if (Sem == &APFloatBase::semFloat8E4M3)
4083 return initFromFloat8E4M3APInt(api);
4084 if (Sem == &APFloatBase::semFloat8E4M3FN)
4085 return initFromFloat8E4M3FNAPInt(api);
4086 if (Sem == &APFloatBase::semFloat8E4M3FNUZ)
4087 return initFromFloat8E4M3FNUZAPInt(api);
4088 if (Sem == &APFloatBase::semFloat8E4M3B11FNUZ)
4089 return initFromFloat8E4M3B11FNUZAPInt(api);
4090 if (Sem == &APFloatBase::semFloat8E3M4)
4091 return initFromFloat8E3M4APInt(api);
4092 if (Sem == &APFloatBase::semFloatTF32)
4093 return initFromFloatTF32APInt(api);
4094 if (Sem == &APFloatBase::semFloat8E8M0FNU)
4095 return initFromFloat8E8M0FNUAPInt(api);
4096 if (Sem == &APFloatBase::semFloat6E3M2FN)
4097 return initFromFloat6E3M2FNAPInt(api);
4098 if (Sem == &APFloatBase::semFloat6E2M3FN)
4099 return initFromFloat6E2M3FNAPInt(api);
4100 if (Sem == &APFloatBase::semFloat4E2M1FN)
4101 return initFromFloat4E2M1FNAPInt(api);
4102
4103 llvm_unreachable("unsupported semantics");
4104}
4105
4106/// Make this number the largest magnitude normal number in the given
4107/// semantics.
4108void IEEEFloat::makeLargest(bool Negative) {
4109 if (Negative && !semantics->hasSignedRepr)
4111 "This floating point format does not support signed values");
4112 // We want (in interchange format):
4113 // sign = {Negative}
4114 // exponent = 1..10
4115 // significand = 1..1
4116 category = fcNormal;
4117 sign = Negative;
4118 exponent = semantics->maxExponent;
4119
4120 // Use memset to set all but the highest integerPart to all ones.
4121 integerPart *significand = significandParts();
4122 unsigned PartCount = partCount();
4123 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4124
4125 // Set the high integerPart especially setting all unused top bits for
4126 // internal consistency.
4127 const unsigned NumUnusedHighBits =
4128 PartCount*integerPartWidth - semantics->precision;
4129 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4130 ? (~integerPart(0) >> NumUnusedHighBits)
4131 : 0;
4132 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4133 semantics->nanEncoding == fltNanEncoding::AllOnes &&
4134 (semantics->precision > 1))
4135 significand[0] &= ~integerPart(1);
4136}
4137
4138/// Make this number the smallest magnitude denormal number in the given
4139/// semantics.
4140void IEEEFloat::makeSmallest(bool Negative) {
4141 if (Negative && !semantics->hasSignedRepr)
4143 "This floating point format does not support signed values");
4144 // We want (in interchange format):
4145 // sign = {Negative}
4146 // exponent = 0..0
4147 // significand = 0..01
4148 category = fcNormal;
4149 sign = Negative;
4150 exponent = semantics->minExponent;
4151 APInt::tcSet(significandParts(), 1, partCount());
4152}
4153
4155 if (Negative && !semantics->hasSignedRepr)
4157 "This floating point format does not support signed values");
4158 // We want (in interchange format):
4159 // sign = {Negative}
4160 // exponent = 0..0
4161 // significand = 10..0
4162
4163 category = fcNormal;
4164 zeroSignificand();
4165 sign = Negative;
4166 exponent = semantics->minExponent;
4167 APInt::tcSetBit(significandParts(), semantics->precision - 1);
4168}
4169
4170IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4171 initFromAPInt(&Sem, API);
4172}
4173
4175 initFromAPInt(&APFloatBase::semIEEEsingle, APInt::floatToBits(f));
4176}
4177
4179 initFromAPInt(&APFloatBase::semIEEEdouble, APInt::doubleToBits(d));
4180}
4181
4182namespace {
4183 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4184 Buffer.append(Str.begin(), Str.end());
4185 }
4186
4187 /// Removes data from the given significand until it is no more
4188 /// precise than is required for the desired precision.
4189 void AdjustToPrecision(APInt &significand,
4190 int &exp, unsigned FormatPrecision) {
4191 unsigned bits = significand.getActiveBits();
4192
4193 // 196/59 is a very slight overestimate of lg_2(10).
4194 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4195
4196 if (bits <= bitsRequired) return;
4197
4198 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4199 if (!tensRemovable) return;
4200
4201 exp += tensRemovable;
4202
4203 APInt divisor(significand.getBitWidth(), 1);
4204 APInt powten(significand.getBitWidth(), 10);
4205 while (true) {
4206 if (tensRemovable & 1)
4207 divisor *= powten;
4208 tensRemovable >>= 1;
4209 if (!tensRemovable) break;
4210 powten *= powten;
4211 }
4212
4213 significand = significand.udiv(divisor);
4214
4215 // Truncate the significand down to its active bit count.
4216 significand = significand.trunc(significand.getActiveBits());
4217 }
4218
4219
4220 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4221 int &exp, unsigned FormatPrecision) {
4222 unsigned N = buffer.size();
4223 if (N <= FormatPrecision) return;
4224
4225 // The most significant figures are the last ones in the buffer.
4226 unsigned FirstSignificant = N - FormatPrecision;
4227
4228 // Round.
4229 // FIXME: this probably shouldn't use 'round half up'.
4230
4231 // Rounding down is just a truncation, except we also want to drop
4232 // trailing zeros from the new result.
4233 if (buffer[FirstSignificant - 1] < '5') {
4234 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4235 FirstSignificant++;
4236
4237 exp += FirstSignificant;
4238 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4239 return;
4240 }
4241
4242 // Rounding up requires a decimal add-with-carry. If we continue
4243 // the carry, the newly-introduced zeros will just be truncated.
4244 for (unsigned I = FirstSignificant; I != N; ++I) {
4245 if (buffer[I] == '9') {
4246 FirstSignificant++;
4247 } else {
4248 buffer[I]++;
4249 break;
4250 }
4251 }
4252
4253 // If we carried through, we have exactly one digit of precision.
4254 if (FirstSignificant == N) {
4255 exp += FirstSignificant;
4256 buffer.clear();
4257 buffer.push_back('1');
4258 return;
4259 }
4260
4261 exp += FirstSignificant;
4262 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4263 }
4264
4265 void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4266 APInt significand, unsigned FormatPrecision,
4267 unsigned FormatMaxPadding, bool TruncateZero) {
4268 const int semanticsPrecision = significand.getBitWidth();
4269
4270 if (isNeg)
4271 Str.push_back('-');
4272
4273 // Set FormatPrecision if zero. We want to do this before we
4274 // truncate trailing zeros, as those are part of the precision.
4275 if (!FormatPrecision) {
4276 // We use enough digits so the number can be round-tripped back to an
4277 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4278 // Accurately" by Steele and White.
4279 // FIXME: Using a formula based purely on the precision is conservative;
4280 // we can print fewer digits depending on the actual value being printed.
4281
4282 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4283 FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4284 }
4285
4286 // Ignore trailing binary zeros.
4287 int trailingZeros = significand.countr_zero();
4288 exp += trailingZeros;
4289 significand.lshrInPlace(trailingZeros);
4290
4291 // Change the exponent from 2^e to 10^e.
4292 if (exp == 0) {
4293 // Nothing to do.
4294 } else if (exp > 0) {
4295 // Just shift left.
4296 significand = significand.zext(semanticsPrecision + exp);
4297 significand <<= exp;
4298 exp = 0;
4299 } else { /* exp < 0 */
4300 int texp = -exp;
4301
4302 // We transform this using the identity:
4303 // (N)(2^-e) == (N)(5^e)(10^-e)
4304 // This means we have to multiply N (the significand) by 5^e.
4305 // To avoid overflow, we have to operate on numbers large
4306 // enough to store N * 5^e:
4307 // log2(N * 5^e) == log2(N) + e * log2(5)
4308 // <= semantics->precision + e * 137 / 59
4309 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4310
4311 unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4312
4313 // Multiply significand by 5^e.
4314 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4315 significand = significand.zext(precision);
4316 APInt five_to_the_i(precision, 5);
4317 while (true) {
4318 if (texp & 1)
4319 significand *= five_to_the_i;
4320
4321 texp >>= 1;
4322 if (!texp)
4323 break;
4324 five_to_the_i *= five_to_the_i;
4325 }
4326 }
4327
4328 AdjustToPrecision(significand, exp, FormatPrecision);
4329
4331
4332 // Fill the buffer.
4333 unsigned precision = significand.getBitWidth();
4334 if (precision < 4) {
4335 // We need enough precision to store the value 10.
4336 precision = 4;
4337 significand = significand.zext(precision);
4338 }
4339 APInt ten(precision, 10);
4340 APInt digit(precision, 0);
4341
4342 bool inTrail = true;
4343 while (significand != 0) {
4344 // digit <- significand % 10
4345 // significand <- significand / 10
4346 APInt::udivrem(significand, ten, significand, digit);
4347
4348 unsigned d = digit.getZExtValue();
4349
4350 // Drop trailing zeros.
4351 if (inTrail && !d)
4352 exp++;
4353 else {
4354 buffer.push_back((char) ('0' + d));
4355 inTrail = false;
4356 }
4357 }
4358
4359 assert(!buffer.empty() && "no characters in buffer!");
4360
4361 // Drop down to FormatPrecision.
4362 // TODO: don't do more precise calculations above than are required.
4363 AdjustToPrecision(buffer, exp, FormatPrecision);
4364
4365 unsigned NDigits = buffer.size();
4366
4367 // Check whether we should use scientific notation.
4368 bool FormatScientific;
4369 if (!FormatMaxPadding)
4370 FormatScientific = true;
4371 else {
4372 if (exp >= 0) {
4373 // 765e3 --> 765000
4374 // ^^^
4375 // But we shouldn't make the number look more precise than it is.
4376 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4377 NDigits + (unsigned) exp > FormatPrecision);
4378 } else {
4379 // Power of the most significant digit.
4380 int MSD = exp + (int) (NDigits - 1);
4381 if (MSD >= 0) {
4382 // 765e-2 == 7.65
4383 FormatScientific = false;
4384 } else {
4385 // 765e-5 == 0.00765
4386 // ^ ^^
4387 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4388 }
4389 }
4390 }
4391
4392 // Scientific formatting is pretty straightforward.
4393 if (FormatScientific) {
4394 exp += (NDigits - 1);
4395
4396 Str.push_back(buffer[NDigits-1]);
4397 Str.push_back('.');
4398 if (NDigits == 1 && TruncateZero)
4399 Str.push_back('0');
4400 else
4401 for (unsigned I = 1; I != NDigits; ++I)
4402 Str.push_back(buffer[NDigits-1-I]);
4403 // Fill with zeros up to FormatPrecision.
4404 if (!TruncateZero && FormatPrecision > NDigits - 1)
4405 Str.append(FormatPrecision - NDigits + 1, '0');
4406 // For !TruncateZero we use lower 'e'.
4407 Str.push_back(TruncateZero ? 'E' : 'e');
4408
4409 Str.push_back(exp >= 0 ? '+' : '-');
4410 if (exp < 0)
4411 exp = -exp;
4412 SmallVector<char, 6> expbuf;
4413 do {
4414 expbuf.push_back((char) ('0' + (exp % 10)));
4415 exp /= 10;
4416 } while (exp);
4417 // Exponent always at least two digits if we do not truncate zeros.
4418 if (!TruncateZero && expbuf.size() < 2)
4419 expbuf.push_back('0');
4420 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4421 Str.push_back(expbuf[E-1-I]);
4422 return;
4423 }
4424
4425 // Non-scientific, positive exponents.
4426 if (exp >= 0) {
4427 for (unsigned I = 0; I != NDigits; ++I)
4428 Str.push_back(buffer[NDigits-1-I]);
4429 for (unsigned I = 0; I != (unsigned) exp; ++I)
4430 Str.push_back('0');
4431 return;
4432 }
4433
4434 // Non-scientific, negative exponents.
4435
4436 // The number of digits to the left of the decimal point.
4437 int NWholeDigits = exp + (int) NDigits;
4438
4439 unsigned I = 0;
4440 if (NWholeDigits > 0) {
4441 for (; I != (unsigned) NWholeDigits; ++I)
4442 Str.push_back(buffer[NDigits-I-1]);
4443 Str.push_back('.');
4444 } else {
4445 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4446
4447 Str.push_back('0');
4448 Str.push_back('.');
4449 for (unsigned Z = 1; Z != NZeros; ++Z)
4450 Str.push_back('0');
4451 }
4452
4453 for (; I != NDigits; ++I)
4454 Str.push_back(buffer[NDigits-I-1]);
4455
4456 }
4457} // namespace
4458
4459void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4460 unsigned FormatMaxPadding, bool TruncateZero) const {
4461 switch (category) {
4462 case fcInfinity:
4463 if (isNegative())
4464 return append(Str, "-Inf");
4465 else
4466 return append(Str, "+Inf");
4467
4468 case fcNaN: return append(Str, "NaN");
4469
4470 case fcZero:
4471 if (isNegative())
4472 Str.push_back('-');
4473
4474 if (!FormatMaxPadding) {
4475 if (TruncateZero)
4476 append(Str, "0.0E+0");
4477 else {
4478 append(Str, "0.0");
4479 if (FormatPrecision > 1)
4480 Str.append(FormatPrecision - 1, '0');
4481 append(Str, "e+00");
4482 }
4483 } else {
4484 Str.push_back('0');
4485 }
4486 return;
4487
4488 case fcNormal:
4489 break;
4490 }
4491
4492 // Decompose the number into an APInt and an exponent.
4493 int exp = exponent - ((int) semantics->precision - 1);
4494 APInt significand(
4495 semantics->precision,
4496 ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4497
4498 toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4499 FormatMaxPadding, TruncateZero);
4500
4501}
4502
4504 if (!isFinite() || isZero())
4505 return INT_MIN;
4506
4507 const integerPart *Parts = significandParts();
4508 const int PartCount = partCountForBits(semantics->precision);
4509
4510 int PopCount = 0;
4511 for (int i = 0; i < PartCount; ++i) {
4512 PopCount += llvm::popcount(Parts[i]);
4513 if (PopCount > 1)
4514 return INT_MIN;
4515 }
4516
4517 if (exponent != semantics->minExponent)
4518 return exponent;
4519
4520 int CountrParts = 0;
4521 for (int i = 0; i < PartCount;
4522 ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4523 if (Parts[i] != 0) {
4524 return exponent - semantics->precision + CountrParts +
4525 llvm::countr_zero(Parts[i]) + 1;
4526 }
4527 }
4528
4529 llvm_unreachable("didn't find the set bit");
4530}
4531
4533 if (!isNaN())
4534 return false;
4535 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4536 semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4537 return false;
4538
4539 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4540 // first bit of the trailing significand being 0.
4541 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4542}
4543
4544/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4545///
4546/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4547/// appropriate sign switching before/after the computation.
4549 // If we are performing nextDown, swap sign so we have -x.
4550 if (nextDown)
4551 changeSign();
4552
4553 // Compute nextUp(x)
4554 opStatus result = opOK;
4555
4556 // Handle each float category separately.
4557 switch (category) {
4558 case fcInfinity:
4559 // nextUp(+inf) = +inf
4560 if (!isNegative())
4561 break;
4562 // nextUp(-inf) = -getLargest()
4563 makeLargest(true);
4564 break;
4565 case fcNaN:
4566 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4567 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4568 // change the payload.
4569 if (isSignaling()) {
4570 result = opInvalidOp;
4571 // For consistency, propagate the sign of the sNaN to the qNaN.
4572 makeNaN(false, isNegative(), nullptr);
4573 }
4574 break;
4575 case fcZero:
4576 // nextUp(pm 0) = +getSmallest()
4577 makeSmallest(false);
4578 break;
4579 case fcNormal:
4580 // nextUp(-getSmallest()) = -0
4581 if (isSmallest() && isNegative()) {
4582 APInt::tcSet(significandParts(), 0, partCount());
4583 category = fcZero;
4584 exponent = 0;
4585 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4586 sign = false;
4587 if (!semantics->hasZero)
4589 break;
4590 }
4591
4592 if (isLargest() && !isNegative()) {
4593 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4594 // nextUp(getLargest()) == NAN
4595 makeNaN();
4596 break;
4597 } else if (semantics->nonFiniteBehavior ==
4599 // nextUp(getLargest()) == getLargest()
4600 break;
4601 } else {
4602 // nextUp(getLargest()) == INFINITY
4603 APInt::tcSet(significandParts(), 0, partCount());
4604 category = fcInfinity;
4605 exponent = semantics->maxExponent + 1;
4606 break;
4607 }
4608 }
4609
4610 // nextUp(normal) == normal + inc.
4611 if (isNegative()) {
4612 // If we are negative, we need to decrement the significand.
4613
4614 // We only cross a binade boundary that requires adjusting the exponent
4615 // if:
4616 // 1. exponent != semantics->minExponent. This implies we are not in the
4617 // smallest binade or are dealing with denormals.
4618 // 2. Our significand excluding the integral bit is all zeros.
4619 bool WillCrossBinadeBoundary =
4620 exponent != semantics->minExponent && isSignificandAllZeros();
4621
4622 // Decrement the significand.
4623 //
4624 // We always do this since:
4625 // 1. If we are dealing with a non-binade decrement, by definition we
4626 // just decrement the significand.
4627 // 2. If we are dealing with a normal -> normal binade decrement, since
4628 // we have an explicit integral bit the fact that all bits but the
4629 // integral bit are zero implies that subtracting one will yield a
4630 // significand with 0 integral bit and 1 in all other spots. Thus we
4631 // must just adjust the exponent and set the integral bit to 1.
4632 // 3. If we are dealing with a normal -> denormal binade decrement,
4633 // since we set the integral bit to 0 when we represent denormals, we
4634 // just decrement the significand.
4635 integerPart *Parts = significandParts();
4636 APInt::tcDecrement(Parts, partCount());
4637
4638 if (WillCrossBinadeBoundary) {
4639 // Our result is a normal number. Do the following:
4640 // 1. Set the integral bit to 1.
4641 // 2. Decrement the exponent.
4642 APInt::tcSetBit(Parts, semantics->precision - 1);
4643 exponent--;
4644 }
4645 } else {
4646 // If we are positive, we need to increment the significand.
4647
4648 // We only cross a binade boundary that requires adjusting the exponent if
4649 // the input is not a denormal and all of said input's significand bits
4650 // are set. If all of said conditions are true: clear the significand, set
4651 // the integral bit to 1, and increment the exponent. If we have a
4652 // denormal always increment since moving denormals and the numbers in the
4653 // smallest normal binade have the same exponent in our representation.
4654 // If there are only exponents, any increment always crosses the
4655 // BinadeBoundary.
4656 bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4657 (!isDenormal() && isSignificandAllOnes());
4658
4659 if (WillCrossBinadeBoundary) {
4660 integerPart *Parts = significandParts();
4661 APInt::tcSet(Parts, 0, partCount());
4662 APInt::tcSetBit(Parts, semantics->precision - 1);
4663 assert(exponent != semantics->maxExponent &&
4664 "We can not increment an exponent beyond the maxExponent allowed"
4665 " by the given floating point semantics.");
4666 exponent++;
4667 } else {
4668 incrementSignificand();
4669 }
4670 }
4671 break;
4672 }
4673
4674 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4675 if (nextDown)
4676 changeSign();
4677
4678 return result;
4679}
4680
4681APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4682 return ::exponentNaN(*semantics);
4683}
4684
4685APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4686 return ::exponentInf(*semantics);
4687}
4688
4689APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4690 return ::exponentZero(*semantics);
4691}
4692
4693void IEEEFloat::makeInf(bool Negative) {
4694 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4695 llvm_unreachable("This floating point format does not support Inf");
4696
4697 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4698 // There is no Inf, so make NaN instead.
4699 makeNaN(false, Negative);
4700 return;
4701 }
4702 category = fcInfinity;
4703 sign = Negative;
4704 exponent = exponentInf();
4705 APInt::tcSet(significandParts(), 0, partCount());
4706}
4707
4708void IEEEFloat::makeZero(bool Negative) {
4709 if (!semantics->hasZero)
4710 llvm_unreachable("This floating point format does not support Zero");
4711
4712 category = fcZero;
4713 sign = Negative;
4714 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4715 // Merge negative zero to positive because 0b10000...000 is used for NaN
4716 sign = false;
4717 }
4718 exponent = exponentZero();
4719 APInt::tcSet(significandParts(), 0, partCount());
4720}
4721
4723 assert(isNaN());
4724 if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4725 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4726}
4727
4728int ilogb(const IEEEFloat &Arg) {
4729 if (Arg.isNaN())
4730 return APFloat::IEK_NaN;
4731 if (Arg.isZero())
4732 return APFloat::IEK_Zero;
4733 if (Arg.isInfinity())
4734 return APFloat::IEK_Inf;
4735 if (!Arg.isDenormal())
4736 return Arg.exponent;
4737
4738 IEEEFloat Normalized(Arg);
4739 int SignificandBits = Arg.getSemantics().precision - 1;
4740
4741 Normalized.exponent += SignificandBits;
4742 Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4743 return Normalized.exponent - SignificandBits;
4744}
4745
4747 auto MaxExp = X.getSemantics().maxExponent;
4748 auto MinExp = X.getSemantics().minExponent;
4749
4750 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4751 // overflow; clamp it to a safe range before adding, but ensure that the range
4752 // is large enough that the clamp does not change the result. The range we
4753 // need to support is the difference between the largest possible exponent and
4754 // the normalized exponent of half the smallest denormal.
4755
4756 int SignificandBits = X.getSemantics().precision - 1;
4757 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4758
4759 // Clamp to one past the range ends to let normalize handle overlflow.
4760 X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4761 X.normalize(RoundingMode, lfExactlyZero);
4762 if (X.isNaN())
4763 X.makeQuiet();
4764 return X;
4765}
4766
4767IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4768 Exp = ilogb(Val);
4769
4770 // Quiet signalling nans.
4771 if (Exp == APFloat::IEK_NaN) {
4772 IEEEFloat Quiet(Val);
4773 Quiet.makeQuiet();
4774 return Quiet;
4775 }
4776
4777 if (Exp == APFloat::IEK_Inf)
4778 return Val;
4779
4780 // 1 is added because frexp is defined to return a normalized fraction in
4781 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4782 Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4783 return scalbn(Val, -Exp, RM);
4784}
4785
4787 : Semantics(&S),
4788 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble),
4789 APFloat(APFloatBase::semIEEEdouble)}) {
4790 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4791}
4792
4794 : Semantics(&S), Floats(new APFloat[2]{
4795 APFloat(APFloatBase::semIEEEdouble, uninitialized),
4796 APFloat(APFloatBase::semIEEEdouble, uninitialized)}) {
4797 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4798}
4799
4801 : Semantics(&S),
4802 Floats(new APFloat[2]{APFloat(APFloatBase::semIEEEdouble, I),
4803 APFloat(APFloatBase::semIEEEdouble)}) {
4804 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4805}
4806
4808 : Semantics(&S),
4809 Floats(new APFloat[2]{
4810 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[0])),
4811 APFloat(APFloatBase::semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4812 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4813}
4814
4816 APFloat &&Second)
4817 : Semantics(&S),
4818 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4819 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4820 assert(&Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4821 assert(&Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4822}
4823
4825 : Semantics(RHS.Semantics),
4826 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4827 APFloat(RHS.Floats[1])}
4828 : nullptr) {
4829 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4830}
4831
4833 : Semantics(RHS.Semantics), Floats(RHS.Floats) {
4834 RHS.Semantics = &APFloatBase::semBogus;
4835 RHS.Floats = nullptr;
4836 assert(Semantics == &APFloatBase::semPPCDoubleDouble);
4837}
4838
4840 if (Semantics == RHS.Semantics && RHS.Floats) {
4841 Floats[0] = RHS.Floats[0];
4842 Floats[1] = RHS.Floats[1];
4843 } else if (this != &RHS) {
4844 this->~DoubleAPFloat();
4845 new (this) DoubleAPFloat(RHS);
4846 }
4847 return *this;
4848}
4849
4850// Returns a result such that:
4851// 1. abs(Lo) <= ulp(Hi)/2
4852// 2. Hi == RTNE(Hi + Lo)
4853// 3. Hi + Lo == X + Y
4854//
4855// Requires that log2(X) >= log2(Y).
4856static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) {
4857 if (!X.isFinite())
4858 return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)};
4859 APFloat Hi = X + Y;
4860 APFloat Delta = Hi - X;
4861 APFloat Lo = Y - Delta;
4862 return {Hi, Lo};
4863}
4864
4865// Implement addition, subtraction, multiplication and division based on:
4866// "Software for Doubled-Precision Floating-Point Computations",
4867// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4868APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4869 const APFloat &c, const APFloat &cc,
4870 roundingMode RM) {
4871 int Status = opOK;
4872 APFloat z = a;
4873 Status |= z.add(c, RM);
4874 if (!z.isFinite()) {
4875 if (!z.isInfinity()) {
4876 Floats[0] = std::move(z);
4877 Floats[1].makeZero(/* Neg = */ false);
4878 return (opStatus)Status;
4879 }
4880 Status = opOK;
4881 auto AComparedToC = a.compareAbsoluteValue(c);
4882 z = cc;
4883 Status |= z.add(aa, RM);
4884 if (AComparedToC == APFloat::cmpGreaterThan) {
4885 // z = cc + aa + c + a;
4886 Status |= z.add(c, RM);
4887 Status |= z.add(a, RM);
4888 } else {
4889 // z = cc + aa + a + c;
4890 Status |= z.add(a, RM);
4891 Status |= z.add(c, RM);
4892 }
4893 if (!z.isFinite()) {
4894 Floats[0] = std::move(z);
4895 Floats[1].makeZero(/* Neg = */ false);
4896 return (opStatus)Status;
4897 }
4898 Floats[0] = z;
4899 APFloat zz = aa;
4900 Status |= zz.add(cc, RM);
4901 if (AComparedToC == APFloat::cmpGreaterThan) {
4902 // Floats[1] = a - z + c + zz;
4903 Floats[1] = a;
4904 Status |= Floats[1].subtract(z, RM);
4905 Status |= Floats[1].add(c, RM);
4906 Status |= Floats[1].add(zz, RM);
4907 } else {
4908 // Floats[1] = c - z + a + zz;
4909 Floats[1] = c;
4910 Status |= Floats[1].subtract(z, RM);
4911 Status |= Floats[1].add(a, RM);
4912 Status |= Floats[1].add(zz, RM);
4913 }
4914 } else {
4915 // q = a - z;
4916 APFloat q = a;
4917 Status |= q.subtract(z, RM);
4918
4919 // zz = q + c + (a - (q + z)) + aa + cc;
4920 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4921 auto zz = q;
4922 Status |= zz.add(c, RM);
4923 Status |= q.add(z, RM);
4924 Status |= q.subtract(a, RM);
4925 q.changeSign();
4926 Status |= zz.add(q, RM);
4927 Status |= zz.add(aa, RM);
4928 Status |= zz.add(cc, RM);
4929 if (zz.isZero() && !zz.isNegative()) {
4930 Floats[0] = std::move(z);
4931 Floats[1].makeZero(/* Neg = */ false);
4932 return opOK;
4933 }
4934 Floats[0] = z;
4935 Status |= Floats[0].add(zz, RM);
4936 if (!Floats[0].isFinite()) {
4937 Floats[1].makeZero(/* Neg = */ false);
4938 return (opStatus)Status;
4939 }
4940 Floats[1] = std::move(z);
4941 Status |= Floats[1].subtract(Floats[0], RM);
4942 Status |= Floats[1].add(zz, RM);
4943 }
4944 return (opStatus)Status;
4945}
4946
4947APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4948 const DoubleAPFloat &RHS,
4949 DoubleAPFloat &Out,
4950 roundingMode RM) {
4951 if (LHS.getCategory() == fcNaN) {
4952 Out = LHS;
4953 return opOK;
4954 }
4955 if (RHS.getCategory() == fcNaN) {
4956 Out = RHS;
4957 return opOK;
4958 }
4959 if (LHS.getCategory() == fcZero) {
4960 Out = RHS;
4961 return opOK;
4962 }
4963 if (RHS.getCategory() == fcZero) {
4964 Out = LHS;
4965 return opOK;
4966 }
4967 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4968 LHS.isNegative() != RHS.isNegative()) {
4969 Out.makeNaN(false, Out.isNegative(), nullptr);
4970 return opInvalidOp;
4971 }
4972 if (LHS.getCategory() == fcInfinity) {
4973 Out = LHS;
4974 return opOK;
4975 }
4976 if (RHS.getCategory() == fcInfinity) {
4977 Out = RHS;
4978 return opOK;
4979 }
4980 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4981
4982 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4983 CC(RHS.Floats[1]);
4984 assert(&A.getSemantics() == &APFloatBase::semIEEEdouble);
4985 assert(&AA.getSemantics() == &APFloatBase::semIEEEdouble);
4986 assert(&C.getSemantics() == &APFloatBase::semIEEEdouble);
4987 assert(&CC.getSemantics() == &APFloatBase::semIEEEdouble);
4988 assert(&Out.Floats[0].getSemantics() == &APFloatBase::semIEEEdouble);
4989 assert(&Out.Floats[1].getSemantics() == &APFloatBase::semIEEEdouble);
4990 return Out.addImpl(A, AA, C, CC, RM);
4991}
4992
4994 roundingMode RM) {
4995 return addWithSpecial(*this, RHS, *this, RM);
4996}
4997
4999 roundingMode RM) {
5000 changeSign();
5001 auto Ret = add(RHS, RM);
5002 changeSign();
5003 return Ret;
5004}
5005
5008 const auto &LHS = *this;
5009 auto &Out = *this;
5010 /* Interesting observation: For special categories, finding the lowest
5011 common ancestor of the following layered graph gives the correct
5012 return category:
5013
5014 NaN
5015 / \
5016 Zero Inf
5017 \ /
5018 Normal
5019
5020 e.g. NaN * NaN = NaN
5021 Zero * Inf = NaN
5022 Normal * Zero = Zero
5023 Normal * Inf = Inf
5024 */
5025 if (LHS.getCategory() == fcNaN) {
5026 Out = LHS;
5027 return opOK;
5028 }
5029 if (RHS.getCategory() == fcNaN) {
5030 Out = RHS;
5031 return opOK;
5032 }
5033 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
5034 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
5035 Out.makeNaN(false, false, nullptr);
5036 return opOK;
5037 }
5038 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
5039 Out = LHS;
5040 return opOK;
5041 }
5042 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
5043 Out = RHS;
5044 return opOK;
5045 }
5046 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
5047 "Special cases not handled exhaustively");
5048
5049 int Status = opOK;
5050 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
5051 // t = a * c
5052 APFloat T = A;
5053 Status |= T.multiply(C, RM);
5054 if (!T.isFiniteNonZero()) {
5055 Floats[0] = T;
5056 Floats[1].makeZero(/* Neg = */ false);
5057 return (opStatus)Status;
5058 }
5059
5060 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
5061 APFloat Tau = A;
5062 T.changeSign();
5063 Status |= Tau.fusedMultiplyAdd(C, T, RM);
5064 T.changeSign();
5065 {
5066 // v = a * d
5067 APFloat V = A;
5068 Status |= V.multiply(D, RM);
5069 // w = b * c
5070 APFloat W = B;
5071 Status |= W.multiply(C, RM);
5072 Status |= V.add(W, RM);
5073 // tau += v + w
5074 Status |= Tau.add(V, RM);
5075 }
5076 // u = t + tau
5077 APFloat U = T;
5078 Status |= U.add(Tau, RM);
5079
5080 Floats[0] = U;
5081 if (!U.isFinite()) {
5082 Floats[1].makeZero(/* Neg = */ false);
5083 } else {
5084 // Floats[1] = (t - u) + tau
5085 Status |= T.subtract(U, RM);
5086 Status |= T.add(Tau, RM);
5087 Floats[1] = T;
5088 }
5089 return (opStatus)Status;
5090}
5091
5094 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5095 "Unexpected Semantics");
5096 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5097 auto Ret = Tmp.divide(
5098 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
5099 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5100 return Ret;
5101}
5102
5104 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5105 "Unexpected Semantics");
5106 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5107 auto Ret = Tmp.remainder(
5108 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5109 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5110 return Ret;
5111}
5112
5114 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5115 "Unexpected Semantics");
5116 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5117 auto Ret = Tmp.mod(
5118 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5119 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5120 return Ret;
5121}
5122
5125 const DoubleAPFloat &Addend,
5127 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5128 "Unexpected Semantics");
5129 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt());
5130 auto Ret = Tmp.fusedMultiplyAdd(
5131 APFloat(APFloatBase::semPPCDoubleDoubleLegacy,
5132 Multiplicand.bitcastToAPInt()),
5133 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()),
5134 RM);
5135 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5136 return Ret;
5137}
5138
5140 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5141 "Unexpected Semantics");
5142 const APFloat &Hi = getFirst();
5143 const APFloat &Lo = getSecond();
5144
5145 APFloat RoundedHi = Hi;
5146 const opStatus HiStatus = RoundedHi.roundToIntegral(RM);
5147
5148 // We can reduce the problem to just the high part if the input:
5149 // 1. Represents a non-finite value.
5150 // 2. Has a component which is zero.
5151 if (!Hi.isFiniteNonZero() || Lo.isZero()) {
5152 Floats[0] = std::move(RoundedHi);
5153 Floats[1].makeZero(/*Neg=*/false);
5154 return HiStatus;
5155 }
5156
5157 // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a
5158 // halfway point.
5159 auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded,
5160 APFloat TieBreaker) {
5161 // RoundingError tells us which direction we rounded:
5162 // - RoundingError > 0: we rounded up.
5163 // - RoundingError < 0: we rounded down.
5164 // Sterbenz' lemma ensures that RoundingError is exact.
5165 const APFloat RoundingError = Rounded - ToRound;
5166 if (TieBreaker.isNonZero() &&
5167 TieBreaker.isNegative() != RoundingError.isNegative() &&
5168 abs(RoundingError).isExactlyValue(0.5))
5169 Rounded.add(
5170 APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()),
5172 return Rounded;
5173 };
5174
5175 // Case 1: Hi is not an integer.
5176 // Special cases are for rounding modes that are sensitive to ties.
5177 if (RoundedHi != Hi) {
5178 // We need to consider the case where Hi was between two integers and the
5179 // rounding mode broke the tie when, in fact, Lo may have had a different
5180 // sign than Hi.
5181 if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven)
5182 RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo);
5183
5184 Floats[0] = std::move(RoundedHi);
5185 Floats[1].makeZero(/*Neg=*/false);
5186 return HiStatus;
5187 }
5188
5189 // Case 2: Hi is an integer.
5190 // Special cases are for rounding modes which are rounding towards or away from zero.
5191 RoundingMode LoRoundingMode;
5192 if (RM == rmTowardZero)
5193 // When our input is positive, we want the Lo component rounded toward
5194 // negative infinity to get the smallest result magnitude. Likewise,
5195 // negative inputs want the Lo component rounded toward positive infinity.
5196 LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative;
5197 else
5198 LoRoundingMode = RM;
5199
5200 APFloat RoundedLo = Lo;
5201 const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode);
5202 if (LoRoundingMode == rmNearestTiesToAway)
5203 // We need to consider the case where Lo was between two integers and the
5204 // rounding mode broke the tie when, in fact, Hi may have had a different
5205 // sign than Lo.
5206 RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi);
5207
5208 // We must ensure that the final result has no overlap between the two APFloat values.
5209 std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo);
5210
5211 Floats[0] = std::move(RoundedHi);
5212 Floats[1] = std::move(RoundedLo);
5213 return LoStatus;
5214}
5215
5217 Floats[0].changeSign();
5218 Floats[1].changeSign();
5219}
5220
5223 // Compare absolute values of the high parts.
5224 const cmpResult HiPartCmp = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5225 if (HiPartCmp != cmpEqual)
5226 return HiPartCmp;
5227
5228 // Zero, regardless of sign, is equal.
5229 if (Floats[1].isZero() && RHS.Floats[1].isZero())
5230 return cmpEqual;
5231
5232 // At this point, |this->Hi| == |RHS.Hi|.
5233 // The magnitude is |Hi+Lo| which is Hi+|Lo| if signs of Hi and Lo are the
5234 // same, and Hi-|Lo| if signs are different.
5235 const bool ThisIsSubtractive =
5236 Floats[0].isNegative() != Floats[1].isNegative();
5237 const bool RHSIsSubtractive =
5238 RHS.Floats[0].isNegative() != RHS.Floats[1].isNegative();
5239
5240 // Case 1: The low part of 'this' is zero.
5241 if (Floats[1].isZero())
5242 // We are comparing |Hi| vs. |Hi| ± |RHS.Lo|.
5243 // If RHS is subtractive, its magnitude is smaller.
5244 // If RHS is additive, its magnitude is larger.
5245 return RHSIsSubtractive ? cmpGreaterThan : cmpLessThan;
5246
5247 // Case 2: The low part of 'RHS' is zero (and we know 'this' is not).
5248 if (RHS.Floats[1].isZero())
5249 // We are comparing |Hi| ± |This.Lo| vs. |Hi|.
5250 // If 'this' is subtractive, its magnitude is smaller.
5251 // If 'this' is additive, its magnitude is larger.
5252 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5253
5254 // If their natures differ, the additive one is larger.
5255 if (ThisIsSubtractive != RHSIsSubtractive)
5256 return ThisIsSubtractive ? cmpLessThan : cmpGreaterThan;
5257
5258 // Case 3: Both are additive (Hi+|Lo|) or both are subtractive (Hi-|Lo|).
5259 // The comparison now depends on the magnitude of the low parts.
5260 const cmpResult LoPartCmp = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5261
5262 if (ThisIsSubtractive) {
5263 // Both are subtractive (Hi-|Lo|), so the comparison of |Lo| is inverted.
5264 if (LoPartCmp == cmpLessThan)
5265 return cmpGreaterThan;
5266 if (LoPartCmp == cmpGreaterThan)
5267 return cmpLessThan;
5268 }
5269
5270 // If additive, the comparison of |Lo| is direct.
5271 // If equal, they are equal.
5272 return LoPartCmp;
5273}
5274
5276 return Floats[0].getCategory();
5277}
5278
5279bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5280
5282 Floats[0].makeInf(Neg);
5283 Floats[1].makeZero(/* Neg = */ false);
5284}
5285
5287 Floats[0].makeZero(Neg);
5288 Floats[1].makeZero(/* Neg = */ false);
5289}
5290
5292 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5293 "Unexpected Semantics");
5294 Floats[0] =
5295 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5296 Floats[1] =
5297 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5298 if (Neg)
5299 changeSign();
5300}
5301
5303 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5304 "Unexpected Semantics");
5305 Floats[0].makeSmallest(Neg);
5306 Floats[1].makeZero(/* Neg = */ false);
5307}
5308
5310 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5311 "Unexpected Semantics");
5312 Floats[0] =
5313 APFloat(APFloatBase::semIEEEdouble, APInt(64, 0x0360000000000000ull));
5314 if (Neg)
5315 Floats[0].changeSign();
5316 Floats[1].makeZero(/* Neg = */ false);
5317}
5318
5319void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5320 Floats[0].makeNaN(SNaN, Neg, fill);
5321 Floats[1].makeZero(/* Neg = */ false);
5322}
5323
5325 auto Result = Floats[0].compare(RHS.Floats[0]);
5326 // |Float[0]| > |Float[1]|
5327 if (Result == APFloat::cmpEqual)
5328 return Floats[1].compare(RHS.Floats[1]);
5329 return Result;
5330}
5331
5333 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5334 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5335}
5336
5338 if (Arg.Floats)
5339 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5340 return hash_combine(Arg.Semantics);
5341}
5342
5344 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5345 "Unexpected Semantics");
5346 uint64_t Data[] = {
5347 Floats[0].bitcastToAPInt().getRawData()[0],
5348 Floats[1].bitcastToAPInt().getRawData()[0],
5349 };
5350 return APInt(128, Data);
5351}
5352
5354 roundingMode RM) {
5355 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5356 "Unexpected Semantics");
5357 APFloat Tmp(APFloatBase::semPPCDoubleDoubleLegacy);
5358 auto Ret = Tmp.convertFromString(S, RM);
5359 *this = DoubleAPFloat(APFloatBase::semPPCDoubleDouble, Tmp.bitcastToAPInt());
5360 return Ret;
5361}
5362
5363// The double-double lattice of values corresponds to numbers which obey:
5364// - abs(lo) <= 1/2 * ulp(hi)
5365// - roundTiesToEven(hi + lo) == hi
5366//
5367// nextUp must choose the smallest output > input that follows these rules.
5368// nexDown must choose the largest output < input that follows these rules.
5370 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5371 "Unexpected Semantics");
5372 // nextDown(x) = -nextUp(-x)
5373 if (nextDown) {
5374 changeSign();
5375 APFloat::opStatus Result = next(/*nextDown=*/false);
5376 changeSign();
5377 return Result;
5378 }
5379 switch (getCategory()) {
5380 case fcInfinity:
5381 // nextUp(+inf) = +inf
5382 // nextUp(-inf) = -getLargest()
5383 if (isNegative())
5384 makeLargest(true);
5385 return opOK;
5386
5387 case fcNaN:
5388 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
5389 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
5390 // change the payload.
5391 if (getFirst().isSignaling()) {
5392 // For consistency, propagate the sign of the sNaN to the qNaN.
5393 makeNaN(false, isNegative(), nullptr);
5394 return opInvalidOp;
5395 }
5396 return opOK;
5397
5398 case fcZero:
5399 // nextUp(pm 0) = +getSmallest()
5400 makeSmallest(false);
5401 return opOK;
5402
5403 case fcNormal:
5404 break;
5405 }
5406
5407 const APFloat &HiOld = getFirst();
5408 const APFloat &LoOld = getSecond();
5409
5410 APFloat NextLo = LoOld;
5411 NextLo.next(/*nextDown=*/false);
5412
5413 // We want to admit values where:
5414 // 1. abs(Lo) <= ulp(Hi)/2
5415 // 2. Hi == RTNE(Hi + lo)
5416 auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
5417 return Hi + Lo == Hi;
5418 };
5419
5420 // Check if (HiOld, nextUp(LoOld) is in the lattice.
5421 if (InLattice(HiOld, NextLo)) {
5422 // Yes, the result is (HiOld, nextUp(LoOld)).
5423 Floats[1] = std::move(NextLo);
5424
5425 // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
5426 // value is defined to have exactly 106 bits of precision. This limitation
5427 // results in semPPCDoubleDouble being unable to reach its maximum canonical
5428 // value.
5429 DoubleAPFloat Largest{*Semantics, uninitialized};
5430 Largest.makeLargest(/*Neg=*/false);
5431 if (compare(Largest) == cmpGreaterThan)
5432 makeInf(/*Neg=*/false);
5433
5434 return opOK;
5435 }
5436
5437 // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
5438 // correct result. We know the new hi component will be nextUp(HiOld) but our
5439 // lattice rules make it a little ambiguous what the correct NextLo must be.
5440 APFloat NextHi = HiOld;
5441 NextHi.next(/*nextDown=*/false);
5442
5443 // nextUp(getLargest()) == INFINITY
5444 if (NextHi.isInfinity()) {
5445 makeInf(/*Neg=*/false);
5446 return opOK;
5447 }
5448
5449 // IEEE 754-2019 5.3.1:
5450 // "If x is the negative number of least magnitude in x's format, nextUp(x) is
5451 // -0."
5452 if (NextHi.isZero()) {
5453 makeZero(/*Neg=*/true);
5454 return opOK;
5455 }
5456
5457 // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
5458 // negative infinity as possible.
5459 NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
5460 if (!InLattice(NextHi, NextLo))
5461 // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
5462 NextLo.next(/*nextDown=*/false);
5463
5464 Floats[0] = std::move(NextHi);
5465 Floats[1] = std::move(NextLo);
5466
5467 return opOK;
5468}
5469
5470APFloat::opStatus DoubleAPFloat::convertToSignExtendedInteger(
5471 MutableArrayRef<integerPart> Input, unsigned int Width, bool IsSigned,
5472 roundingMode RM, bool *IsExact) const {
5473 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5474 "Unexpected Semantics");
5475
5476 // If Hi is not finite, or Lo is zero, the value is entirely represented
5477 // by Hi. Delegate to the simpler single-APFloat conversion.
5478 if (!getFirst().isFiniteNonZero() || getSecond().isZero())
5479 return getFirst().convertToInteger(Input, Width, IsSigned, RM, IsExact);
5480
5481 // First, round the full double-double value to an integral value. This
5482 // simplifies the rest of the function, as we no longer need to consider
5483 // fractional parts.
5484 *IsExact = false;
5485 DoubleAPFloat Integral = *this;
5486 const opStatus RoundStatus = Integral.roundToIntegral(RM);
5487 if (RoundStatus == opInvalidOp)
5488 return opInvalidOp;
5489 const APFloat &IntegralHi = Integral.getFirst();
5490 const APFloat &IntegralLo = Integral.getSecond();
5491
5492 // If rounding results in either component being zero, the sum is trivial.
5493 // Delegate to the simpler single-APFloat conversion.
5494 bool HiIsExact;
5495 if (IntegralHi.isZero() || IntegralLo.isZero()) {
5496 const opStatus HiStatus =
5497 IntegralHi.convertToInteger(Input, Width, IsSigned, RM, &HiIsExact);
5498 // The conversion from an integer-valued float to an APInt may fail if the
5499 // result would be out of range. Regardless, taking this path is only
5500 // possible if rounding occurred during the initial `roundToIntegral`.
5501 return HiStatus == opOK ? opInexact : HiStatus;
5502 }
5503
5504 // A negative number cannot be represented by an unsigned integer.
5505 // Since a double-double is canonical, if Hi is negative, the sum is negative.
5506 if (!IsSigned && IntegralHi.isNegative())
5507 return opInvalidOp;
5508
5509 // Handle the special boundary case where |Hi| is exactly the power of two
5510 // that marks the edge of the integer's range (e.g., 2^63 for int64_t). In
5511 // this situation, Hi itself won't fit, but the sum Hi + Lo might.
5512 // `PositiveOverflowWidth` is the bit number for this boundary (N-1 for
5513 // signed, N for unsigned).
5514 bool LoIsExact;
5515 const int HiExactLog2 = IntegralHi.getExactLog2Abs();
5516 const unsigned PositiveOverflowWidth = IsSigned ? Width - 1 : Width;
5517 if (HiExactLog2 >= 0 &&
5518 static_cast<unsigned>(HiExactLog2) == PositiveOverflowWidth) {
5519 // If Hi and Lo have the same sign, |Hi + Lo| > |Hi|, so the sum is
5520 // guaranteed to overflow. E.g., for uint128_t, (2^128, 1) overflows.
5521 if (IntegralHi.isNegative() == IntegralLo.isNegative())
5522 return opInvalidOp;
5523
5524 // If the signs differ, the sum will fit. We can compute the result using
5525 // properties of two's complement arithmetic without a wide intermediate
5526 // integer. E.g., for uint128_t, (2^128, -1) should be 2^128 - 1.
5527 const opStatus LoStatus = IntegralLo.convertToInteger(
5528 Input, Width, /*IsSigned=*/true, RM, &LoIsExact);
5529 if (LoStatus == opInvalidOp)
5530 return opInvalidOp;
5531
5532 // Adjust the bit pattern of Lo to account for Hi's value:
5533 // - For unsigned (Hi=2^Width): `2^Width + Lo` in `Width`-bit
5534 // arithmetic is equivalent to just `Lo`. The conversion of `Lo` above
5535 // already produced the correct final bit pattern.
5536 // - For signed (Hi=2^(Width-1)): The sum `2^(Width-1) + Lo` (where Lo<0)
5537 // can be computed by taking the two's complement pattern for `Lo` and
5538 // clearing the sign bit.
5539 if (IsSigned && !IntegralHi.isNegative())
5540 APInt::tcClearBit(Input.data(), PositiveOverflowWidth);
5541 *IsExact = RoundStatus == opOK;
5542 return RoundStatus;
5543 }
5544
5545 // Convert Hi into an integer. This may not fit but that is OK: we know that
5546 // Hi + Lo would not fit either in this situation.
5547 const opStatus HiStatus = IntegralHi.convertToInteger(
5548 Input, Width, IsSigned, rmTowardZero, &HiIsExact);
5549 if (HiStatus == opInvalidOp)
5550 return HiStatus;
5551
5552 // Convert Lo into a temporary integer of the same width.
5553 APSInt LoResult{Width, /*isUnsigned=*/!IsSigned};
5554 const opStatus LoStatus =
5555 IntegralLo.convertToInteger(LoResult, rmTowardZero, &LoIsExact);
5556 if (LoStatus == opInvalidOp)
5557 return LoStatus;
5558
5559 // Add Lo to Hi. This addition is guaranteed not to overflow because of the
5560 // double-double canonicalization rule (`|Lo| <= ulp(Hi)/2`). The only case
5561 // where the sum could cross the integer type's boundary is when Hi is a
5562 // power of two, which is handled by the special case block above.
5563 APInt::tcAdd(Input.data(), LoResult.getRawData(), /*carry=*/0, Input.size());
5564
5565 *IsExact = RoundStatus == opOK;
5566 return RoundStatus;
5567}
5568
5571 unsigned int Width, bool IsSigned,
5572 roundingMode RM, bool *IsExact) const {
5573 opStatus FS =
5574 convertToSignExtendedInteger(Input, Width, IsSigned, RM, IsExact);
5575
5576 if (FS == opInvalidOp) {
5577 const unsigned DstPartsCount = partCountForBits(Width);
5578 assert(DstPartsCount <= Input.size() && "Integer too big");
5579
5580 unsigned Bits;
5581 if (getCategory() == fcNaN)
5582 Bits = 0;
5583 else if (isNegative())
5584 Bits = IsSigned;
5585 else
5586 Bits = Width - IsSigned;
5587
5588 tcSetLeastSignificantBits(Input.data(), DstPartsCount, Bits);
5589 if (isNegative() && IsSigned)
5590 APInt::tcShiftLeft(Input.data(), DstPartsCount, Width - 1);
5591 }
5592
5593 return FS;
5594}
5595
5596APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5597 switch (RM) {
5599 makeLargest(/*Neg=*/isNegative());
5600 break;
5602 if (isNegative())
5603 makeInf(/*Neg=*/true);
5604 else
5605 makeLargest(/*Neg=*/false);
5606 break;
5608 if (isNegative())
5609 makeLargest(/*Neg=*/true);
5610 else
5611 makeInf(/*Neg=*/false);
5612 break;
5615 makeInf(/*Neg=*/isNegative());
5616 break;
5617 default:
5618 llvm_unreachable("Invalid rounding mode found");
5619 }
5620 opStatus S = opInexact;
5621 if (!getFirst().isFinite())
5622 S = static_cast<opStatus>(S | opOverflow);
5623 return S;
5624}
5625
5626APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5627 const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5628 // Find the most significant bit of the source integer. APInt::tcMSB returns
5629 // UINT_MAX for a zero value.
5630 const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5631 if (SrcMSB == UINT_MAX) {
5632 // The source integer is 0.
5633 makeZero(/*Neg=*/false);
5634 return opOK;
5635 }
5636
5637 // Create a minimally-sized APInt to represent the source value.
5638 const unsigned SrcBitWidth = SrcMSB + 1;
5639 APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth, ArrayRef(Src, SrcCount)},
5640 /*isUnsigned=*/true};
5641
5642 // Stage 1: Initial Approximation.
5643 // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5644 // We use round-to-nearest because it minimizes the initial error, which is
5645 // crucial for the subsequent steps.
5647 Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5648
5649 // If the first approximation already overflows, the number is too large.
5650 // NOTE: The underlying semantics are *more* conservative when choosing to
5651 // overflow because their notion of ULP is much larger. As such, it is always
5652 // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5653 if (!Hi.isFinite())
5654 return handleOverflow(RM);
5655
5656 // Stage 2: Exact Error Calculation.
5657 // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5658 // This is done by converting Hi back to an integer and subtracting it from
5659 // the original source.
5660 bool HiAsIntIsExact;
5661 // Create an integer representation of Hi. Its width is determined by the
5662 // exponent of Hi, ensuring it's just large enough. This width can exceed
5663 // SrcBitWidth if the conversion to Hi rounded up to a power of two.
5664 // accurately when converted back to an integer.
5665 APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5666 Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5667 const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5668
5669 // Stage 3: Error Approximation and Rounding.
5670 // Convert the integer error into the Lo part of the DoubleAPFloat. This step
5671 // captures the remainder of the original number. The rounding mode for this
5672 // conversion (LoRM) may need to be adjusted from the user-requested RM to
5673 // ensure the final sum (Hi + Lo) rounds correctly.
5674 roundingMode LoRM = RM;
5675 // Adjustments are only necessary when the initial approximation Hi was an
5676 // overestimate, making the Error negative.
5677 if (Error.isNegative()) {
5678 if (RM == rmNearestTiesToAway) {
5679 // For rmNearestTiesToAway, a tie should round away from zero. Since
5680 // SrcInt is positive, this means rounding toward +infinity.
5681 // A standard conversion of a negative Error would round ties toward
5682 // -infinity, causing the final sum Hi + Lo to be smaller. To
5683 // counteract this, we detect the tie case and override the rounding
5684 // mode for Lo to rmTowardPositive.
5685 const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5686 const unsigned LoPrecision = getSecond().getSemantics().precision;
5687 if (ErrorActiveBits > LoPrecision) {
5688 const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5689 // A tie occurs when the bits to be truncated are of the form 100...0.
5690 // This is detected by checking if the number of trailing zeros is
5691 // exactly one less than the number of bits being truncated.
5692 if (Error.countTrailingZeros() == RoundingBoundary - 1)
5693 LoRM = rmTowardPositive;
5694 }
5695 } else if (RM == rmTowardZero) {
5696 // For rmTowardZero, the final positive result must be truncated (rounded
5697 // down). When Hi is an overestimate, Error is negative. A standard
5698 // rmTowardZero conversion of Error would make it *less* negative,
5699 // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5700 // rounds down correctly, we force Lo to round toward -infinity.
5701 LoRM = rmTowardNegative;
5702 }
5703 }
5704
5706 opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5707
5708 // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5709 // components do not overlap. fastTwoSum performs this operation.
5710 std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5711 Floats[0] = std::move(Hi);
5712 Floats[1] = std::move(Lo);
5713
5714 // A final check for overflow is needed because fastTwoSum can cause a
5715 // carry-out from Lo that pushes Hi to infinity.
5716 if (!getFirst().isFinite())
5717 return handleOverflow(RM);
5718
5719 // The largest DoubleAPFloat must be canonical. Values which are larger are
5720 // not canonical and are equivalent to overflow.
5721 if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5722 DoubleAPFloat Largest{*Semantics};
5723 Largest.makeLargest(/*Neg=*/false);
5724 if (compare(Largest) == APFloat::cmpGreaterThan)
5725 return handleOverflow(RM);
5726 }
5727
5728 // The final status of the operation is determined by the conversion of the
5729 // error term. If Lo could represent Error exactly, the entire conversion
5730 // is exact. Otherwise, it's inexact.
5731 return Status;
5732}
5733
5735 bool IsSigned,
5736 roundingMode RM) {
5737 const bool NegateInput = IsSigned && Input.isNegative();
5738 APInt API = Input;
5739 if (NegateInput)
5740 API.negate();
5741
5743 convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5744 if (NegateInput)
5745 changeSign();
5746 return Status;
5747}
5748
5750 unsigned int HexDigits,
5751 bool UpperCase,
5752 roundingMode RM) const {
5753 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5754 "Unexpected Semantics");
5755 return APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5756 .convertToHexString(DST, HexDigits, UpperCase, RM);
5757}
5758
5760 return getCategory() == fcNormal &&
5761 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5762 // (double)(Hi + Lo) == Hi defines a normal number.
5763 Floats[0] != Floats[0] + Floats[1]);
5764}
5765
5767 if (getCategory() != fcNormal)
5768 return false;
5769 DoubleAPFloat Tmp(*this);
5770 Tmp.makeSmallest(this->isNegative());
5771 return Tmp.compare(*this) == cmpEqual;
5772}
5773
5775 if (getCategory() != fcNormal)
5776 return false;
5777
5778 DoubleAPFloat Tmp(*this);
5780 return Tmp.compare(*this) == cmpEqual;
5781}
5782
5784 if (getCategory() != fcNormal)
5785 return false;
5786 DoubleAPFloat Tmp(*this);
5787 Tmp.makeLargest(this->isNegative());
5788 return Tmp.compare(*this) == cmpEqual;
5789}
5790
5792 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5793 "Unexpected Semantics");
5794 return Floats[0].isInteger() && Floats[1].isInteger();
5795}
5796
5798 unsigned FormatPrecision,
5799 unsigned FormatMaxPadding,
5800 bool TruncateZero) const {
5801 assert(Semantics == &APFloatBase::semPPCDoubleDouble &&
5802 "Unexpected Semantics");
5803 APFloat(APFloatBase::semPPCDoubleDoubleLegacy, bitcastToAPInt())
5804 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5805}
5806
5808 // In order for Hi + Lo to be a power of two, the following must be true:
5809 // 1. Hi must be a power of two.
5810 // 2. Lo must be zero.
5811 if (getSecond().isNonZero())
5812 return INT_MIN;
5813 return getFirst().getExactLog2Abs();
5814}
5815
5816int ilogb(const DoubleAPFloat &Arg) {
5817 const APFloat &Hi = Arg.getFirst();
5818 const APFloat &Lo = Arg.getSecond();
5819 int IlogbResult = ilogb(Hi);
5820 // Zero and non-finite values can delegate to ilogb(Hi).
5821 if (Arg.getCategory() != fcNormal)
5822 return IlogbResult;
5823 // If Lo can't change the binade, we can delegate to ilogb(Hi).
5824 if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
5825 return IlogbResult;
5826 if (Hi.getExactLog2Abs() == INT_MIN)
5827 return IlogbResult;
5828 // Numbers of the form 2^a - 2^b or -2^a + 2^b are almost powers of two but
5829 // get nudged out of the binade by the low component.
5830 return IlogbResult - 1;
5831}
5832
5835 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5836 "Unexpected Semantics");
5838 scalbn(Arg.Floats[0], Exp, RM),
5839 scalbn(Arg.Floats[1], Exp, RM));
5840}
5841
5842DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5844 assert(Arg.Semantics == &APFloatBase::PPCDoubleDouble() &&
5845 "Unexpected Semantics");
5846
5847 // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
5848 // [1.0, 2.0).
5849 Exp = ilogb(Arg);
5850
5851 // For NaNs, quiet any signaling NaN and return the result, as per standard
5852 // practice.
5853 if (Exp == APFloat::IEK_NaN) {
5854 DoubleAPFloat Quiet{Arg};
5855 Quiet.getFirst() = Quiet.getFirst().makeQuiet();
5856 return Quiet;
5857 }
5858
5859 // For infinity, return it unchanged. The exponent remains IEK_Inf.
5860 if (Exp == APFloat::IEK_Inf)
5861 return Arg;
5862
5863 // For zero, the fraction is zero and the standard requires the exponent be 0.
5864 if (Exp == APFloat::IEK_Zero) {
5865 Exp = 0;
5866 return Arg;
5867 }
5868
5869 const APFloat &Hi = Arg.getFirst();
5870 const APFloat &Lo = Arg.getSecond();
5871
5872 // frexp requires the fraction's absolute value to be in [0.5, 1.0).
5873 // ilogb provides an exponent for an absolute value in [1.0, 2.0).
5874 // Increment the exponent to ensure the fraction is in the correct range.
5875 ++Exp;
5876
5877 const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
5878 APFloat Second = Lo;
5879 if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
5880 roundingMode LoRoundingMode;
5881 // The interpretation of rmTowardZero depends on the sign of the combined
5882 // Arg rather than the sign of the component.
5883 if (RM == rmTowardZero)
5884 LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
5885 // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
5886 // Lo is a correction *toward* zero relative to Hi. Rounding Lo
5887 // "away from zero" based on its own sign would move the value in the
5888 // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
5889 // direction-agnostic. We only need to bother with this if Lo is scaled
5890 // down.
5891 else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
5892 LoRoundingMode = rmNearestTiesToEven;
5893 else
5894 LoRoundingMode = RM;
5895 Second = scalbn(Lo, -Exp, LoRoundingMode);
5896 // The rmNearestTiesToEven proxy is correct most of the time, but it
5897 // differs from rmNearestTiesToAway when the scaled value of Lo is an
5898 // exact midpoint.
5899 // NOTE: This is morally equivalent to roundTiesTowardZero.
5900 if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
5901 // Re-scale the result back to check if rounding occurred.
5902 const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
5903 if (RecomposedLo != Lo) {
5904 // RoundingError tells us which direction we rounded:
5905 // - RoundingError > 0: we rounded up.
5906 // - RoundingError < 0: we down up.
5907 const APFloat RoundingError = RecomposedLo - Lo;
5908 // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
5909 // We do this by checking if the absolute rounding error is exactly
5910 // half a ULP of the result.
5911 const APFloat UlpOfSecond = harrisonUlp(Second);
5912 const APFloat ScaledUlpOfSecond =
5913 scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
5914 const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
5915 const bool RoundedLoAway =
5916 Second.isNegative() == RoundingError.isNegative();
5917 // The sign of Hi and Lo disagree and we rounded Lo away: we must
5918 // decrease the magnitude of Second to increase the magnitude
5919 // First+Second.
5920 if (IsMidpoint && RoundedLoAway)
5921 Second.next(/*nextDown=*/!Second.isNegative());
5922 }
5923 }
5924 // Handle a tricky edge case where Arg is slightly less than a power of two
5925 // (e.g., Arg = 2^k - epsilon). In this situation:
5926 // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
5927 // 2. ilogb(Arg) correctly returns k-1.
5928 // 3. Our initial Exp becomes (k-1) + 1 = k.
5929 // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
5930 // scaling Lo by 2^-k would yield zero. This would make the result 1.0
5931 // which is an invalid fraction, as the required interval is [0.5, 1.0).
5932 // We detect this specific case by checking if Hi is a power of two and if
5933 // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
5934 // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
5935 // valid fraction.
5936 if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
5937 ++Exp;
5938 }
5939
5940 APFloat First = scalbn(Hi, -Exp, RM);
5942 std::move(Second));
5943}
5944
5945} // namespace detail
5946
5947APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5948 if (usesLayout<IEEEFloat>(Semantics)) {
5949 new (&IEEE) IEEEFloat(std::move(F));
5950 return;
5951 }
5952 if (usesLayout<DoubleAPFloat>(Semantics)) {
5953 const fltSemantics& S = F.getSemantics();
5954 new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5956 return;
5957 }
5958 llvm_unreachable("Unexpected semantics");
5959}
5960
5965
5966hash_code hash_value(const APFloat &Arg) {
5967 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5968 return hash_value(Arg.U.IEEE);
5969 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5970 return hash_value(Arg.U.Double);
5971 llvm_unreachable("Unexpected semantics");
5972}
5973
5975 : APFloat(Semantics) {
5976 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5977 assert(StatusOrErr && "Invalid floating point representation");
5978 consumeError(StatusOrErr.takeError());
5979}
5980
5982 if (isZero())
5983 return isNegative() ? fcNegZero : fcPosZero;
5984 if (isNormal())
5985 return isNegative() ? fcNegNormal : fcPosNormal;
5986 if (isDenormal())
5988 if (isInfinity())
5989 return isNegative() ? fcNegInf : fcPosInf;
5990 assert(isNaN() && "Other class of FP constant");
5991 return isSignaling() ? fcSNan : fcQNan;
5992}
5993
5994bool APFloat::getExactInverse(APFloat *Inv) const {
5995 // Only finite, non-zero numbers can have a useful, representable inverse.
5996 // This check filters out +/- zero, +/- infinity, and NaN.
5997 if (!isFiniteNonZero())
5998 return false;
5999
6000 // Historically, this function rejects subnormal inputs. One reason why this
6001 // might be important is that subnormals may behave differently under FTZ/DAZ
6002 // runtime behavior.
6003 if (isDenormal())
6004 return false;
6005
6006 // A number has an exact, representable inverse if and only if it is a power
6007 // of two.
6008 //
6009 // Mathematical Rationale:
6010 // 1. A binary floating-point number x is a dyadic rational, meaning it can
6011 // be written as x = M / 2^k for integers M (the significand) and k.
6012 // 2. The inverse is 1/x = 2^k / M.
6013 // 3. For 1/x to also be a dyadic rational (and thus exactly representable
6014 // in binary), its denominator M must also be a power of two.
6015 // Let's say M = 2^m.
6016 // 4. Substituting this back into the formula for x, we get
6017 // x = (2^m) / (2^k) = 2^(m-k).
6018 //
6019 // This proves that x must be a power of two.
6020
6021 // getExactLog2Abs() returns the integer exponent if the number is a power of
6022 // two or INT_MIN if it is not.
6023 const int Exp = getExactLog2Abs();
6024 if (Exp == INT_MIN)
6025 return false;
6026
6027 // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
6028 // scaling 1.0 by the negated exponent.
6029 APFloat Reciprocal =
6030 scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
6031 rmTowardZero);
6032
6033 // scalbn might round if the resulting exponent -Exp is outside the
6034 // representable range, causing overflow (to infinity) or underflow. We
6035 // must verify that the result is still the exact power of two we expect.
6036 if (Reciprocal.getExactLog2Abs() != -Exp)
6037 return false;
6038
6039 // Avoid multiplication with a subnormal, it is not safe on all platforms and
6040 // may be slower than a normal division.
6041 if (Reciprocal.isDenormal())
6042 return false;
6043
6044 assert(Reciprocal.isFiniteNonZero());
6045
6046 if (Inv)
6047 *Inv = std::move(Reciprocal);
6048
6049 return true;
6050}
6051
6053 roundingMode RM, bool *losesInfo) {
6054 if (&getSemantics() == &ToSemantics) {
6055 *losesInfo = false;
6056 return opOK;
6057 }
6058 if (usesLayout<IEEEFloat>(getSemantics()) &&
6059 usesLayout<IEEEFloat>(ToSemantics))
6060 return U.IEEE.convert(ToSemantics, RM, losesInfo);
6061 if (usesLayout<IEEEFloat>(getSemantics()) &&
6062 usesLayout<DoubleAPFloat>(ToSemantics)) {
6063 assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
6064 auto Ret =
6065 U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
6066 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
6067 return Ret;
6068 }
6069 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
6070 usesLayout<IEEEFloat>(ToSemantics)) {
6071 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
6072 *this = APFloat(std::move(getIEEE()), ToSemantics);
6073 return Ret;
6074 }
6075 llvm_unreachable("Unexpected semantics");
6076}
6077
6081
6083 SmallVector<char, 16> Buffer;
6084 toString(Buffer);
6085 OS << Buffer;
6086}
6087
6088#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6090 print(dbgs());
6091 dbgs() << '\n';
6092}
6093#endif
6094
6096 NID.Add(bitcastToAPInt());
6097}
6098
6100 roundingMode rounding_mode,
6101 bool *isExact) const {
6102 unsigned bitWidth = result.getBitWidth();
6103 SmallVector<uint64_t, 4> parts(result.getNumWords());
6104 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
6105 rounding_mode, isExact);
6106 // Keeps the original signed-ness.
6107 result = APInt(bitWidth, parts);
6108 return status;
6109}
6110
6112 if (&getSemantics() ==
6113 (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
6114 return getIEEE().convertToDouble();
6115 assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
6116 "Float semantics is not representable by IEEEdouble");
6117 APFloat Temp = *this;
6118 bool LosesInfo;
6119 opStatus St =
6120 Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
6121 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6122 (void)St;
6123 return Temp.getIEEE().convertToDouble();
6124}
6125
6126#ifdef HAS_IEE754_FLOAT128
6127float128 APFloat::convertToQuad() const {
6128 if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
6129 return getIEEE().convertToQuad();
6130 assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
6131 "Float semantics is not representable by IEEEquad");
6132 APFloat Temp = *this;
6133 bool LosesInfo;
6134 opStatus St =
6135 Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
6136 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6137 (void)St;
6138 return Temp.getIEEE().convertToQuad();
6139}
6140#endif
6141
6143 if (&getSemantics() ==
6144 (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
6145 return getIEEE().convertToFloat();
6146 assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
6147 "Float semantics is not representable by IEEEsingle");
6148 APFloat Temp = *this;
6149 bool LosesInfo;
6150 opStatus St =
6151 Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
6152 assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
6153 (void)St;
6154 return Temp.getIEEE().convertToFloat();
6155}
6156
6157APFloat::Storage::~Storage() {
6158 if (usesLayout<IEEEFloat>(*semantics)) {
6159 IEEE.~IEEEFloat();
6160 return;
6161 }
6162 if (usesLayout<DoubleAPFloat>(*semantics)) {
6163 Double.~DoubleAPFloat();
6164 return;
6165 }
6166 llvm_unreachable("Unexpected semantics");
6167}
6168
6169APFloat::Storage::Storage(const APFloat::Storage &RHS) {
6170 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6171 new (this) IEEEFloat(RHS.IEEE);
6172 return;
6173 }
6174 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6175 new (this) DoubleAPFloat(RHS.Double);
6176 return;
6177 }
6178 llvm_unreachable("Unexpected semantics");
6179}
6180
6181APFloat::Storage::Storage(APFloat::Storage &&RHS) {
6182 if (usesLayout<IEEEFloat>(*RHS.semantics)) {
6183 new (this) IEEEFloat(std::move(RHS.IEEE));
6184 return;
6185 }
6186 if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6187 new (this) DoubleAPFloat(std::move(RHS.Double));
6188 return;
6189 }
6190 llvm_unreachable("Unexpected semantics");
6191}
6192
6193APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
6194 if (usesLayout<IEEEFloat>(*semantics) &&
6195 usesLayout<IEEEFloat>(*RHS.semantics)) {
6196 IEEE = RHS.IEEE;
6197 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6198 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6199 Double = RHS.Double;
6200 } else if (this != &RHS) {
6201 this->~Storage();
6202 new (this) Storage(RHS);
6203 }
6204 return *this;
6205}
6206
6207APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
6208 if (usesLayout<IEEEFloat>(*semantics) &&
6209 usesLayout<IEEEFloat>(*RHS.semantics)) {
6210 IEEE = std::move(RHS.IEEE);
6211 } else if (usesLayout<DoubleAPFloat>(*semantics) &&
6212 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
6213 Double = std::move(RHS.Double);
6214 } else if (this != &RHS) {
6215 this->~Storage();
6216 new (this) Storage(std::move(RHS));
6217 }
6218 return *this;
6219}
6220
6221} // namespace llvm
6222
6223#undef APFLOAT_DISPATCH_ON_SEMANTICS
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define PackCategoriesIntoKey(_lhs, _rhs)
A macro used to combine two fcCategory enums into one key which can be used in a switch statement to ...
Definition APFloat.cpp:48
This file declares a class to represent arbitrary precision floating point values and provide a varie...
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)
Definition APFloat.h:26
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
Function Alias Analysis false
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:646
static bool isNeg(Value *V)
Returns true if the operation is a negation of V, and it works for both integers and floats.
Utilities for dealing with flags related to floating point properties and mode controls.
This file defines a hash set that can be used to remove duplication of nodes in a graph.
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & Float8E4M3FN()
Definition APFloat.h:306
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:174
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:323
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition APFloat.h:334
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:298
llvm::RoundingMode roundingMode
IEEE-754R 4.3: Rounding-direction attributes.
Definition APFloat.h:342
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEquad()
Definition APFloat.h:298
static LLVM_ABI unsigned int semanticsSizeInBits(const fltSemantics &)
Definition APFloat.cpp:301
static const fltSemantics & Float8E8M0FNU()
Definition APFloat.h:313
static LLVM_ABI bool semanticsHasSignedRepr(const fltSemantics &)
Definition APFloat.cpp:319
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:354
static const fltSemantics & x87DoubleExtended()
Definition APFloat.h:317
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:336
static LLVM_ABI ExponentType semanticsMaxExponent(const fltSemantics &)
Definition APFloat.cpp:294
friend class APFloat
Definition APFloat.h:291
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
static LLVM_ABI bool semanticsHasNaN(const fltSemantics &)
Definition APFloat.cpp:327
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Definition APFloat.cpp:221
int32_t ExponentType
A signed type to represent a floating point numbers unbiased exponent.
Definition APFloat.h:155
static constexpr unsigned integerPartWidth
Definition APFloat.h:152
static const fltSemantics & PPCDoubleDoubleLegacy()
Definition APFloat.h:300
APInt::WordType integerPart
Definition APFloat.h:151
static LLVM_ABI bool semanticsHasZero(const fltSemantics &)
Definition APFloat.cpp:315
static LLVM_ABI bool isRepresentableAsNormalIn(const fltSemantics &Src, const fltSemantics &Dst)
Definition APFloat.cpp:340
static const fltSemantics & Float8E5M2FNUZ()
Definition APFloat.h:304
static const fltSemantics & Float8E4M3FNUZ()
Definition APFloat.h:307
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static const fltSemantics & Float4E2M1FN()
Definition APFloat.h:316
static const fltSemantics & Float6E2M3FN()
Definition APFloat.h:315
static const fltSemantics & Float8E4M3()
Definition APFloat.h:305
static const fltSemantics & Float8E4M3B11FNUZ()
Definition APFloat.h:308
static LLVM_ABI bool isRepresentableBy(const fltSemantics &A, const fltSemantics &B)
Definition APFloat.cpp:266
static const fltSemantics & Float8E3M4()
Definition APFloat.h:311
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:331
static const fltSemantics & Float8E5M2()
Definition APFloat.h:303
fltCategory
Category of internally-represented number.
Definition APFloat.h:370
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
static const fltSemantics & Float6E3M2FN()
Definition APFloat.h:314
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
static const fltSemantics & FloatTF32()
Definition APFloat.h:312
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:304
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
Definition APFloat.h:1102
LLVM_ABI void Profile(FoldingSetNodeID &NID) const
Used to insert APFloat objects, or objects that contain APFloat objects, into FoldingSets.
Definition APFloat.cpp:6095
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1190
bool isFiniteNonZero() const
Definition APFloat.h:1441
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6052
LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.h:1479
bool isNegative() const
Definition APFloat.h:1431
LLVM_ABI bool getExactInverse(APFloat *Inv) const
If this value is normal and has an exact, normal, multiplicative inverse, store it in inv and return ...
Definition APFloat.cpp:5994
friend DoubleAPFloat
Definition APFloat.h:1495
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:6111
void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Definition APFloat.h:1460
bool isNormal() const
Definition APFloat.h:1435
bool isDenormal() const
Definition APFloat.h:1432
opStatus add(const APFloat &RHS, roundingMode RM)
Definition APFloat.h:1163
static LLVM_ABI APFloat getAllOnesValue(const fltSemantics &Semantics)
Returns a float which is bitcasted from an all one value int.
Definition APFloat.cpp:6078
LLVM_ABI friend hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition APFloat.cpp:5966
const fltSemantics & getSemantics() const
Definition APFloat.h:1439
bool isFinite() const
Definition APFloat.h:1436
bool isNaN() const
Definition APFloat.h:1429
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1070
unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.h:1421
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6142
bool isSignaling() const
Definition APFloat.h:1433
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1217
opStatus remainder(const APFloat &RHS)
Definition APFloat.h:1199
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
opStatus next(bool nextDown)
Definition APFloat.h:1236
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM)
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1130
LLVM_ABI FPClassTest classify() const
Return the FPClassTest which will return true for the value.
Definition APFloat.cpp:5981
opStatus mod(const APFloat &RHS)
Definition APFloat.h:1208
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5961
friend IEEEFloat
Definition APFloat.h:1494
LLVM_DUMP_METHOD void dump() const
Definition APFloat.cpp:6089
LLVM_ABI void print(raw_ostream &) const
Definition APFloat.cpp:6082
opStatus roundToIntegral(roundingMode RM)
Definition APFloat.h:1230
static bool hasSignificand(const fltSemantics &Sem)
Returns true if the given semantics has actual significand.
Definition APFloat.h:1155
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Definition APFloat.h:1061
bool isInfinity() const
Definition APFloat.h:1428
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1584
static LLVM_ABI void tcSetBit(WordType *, unsigned bit)
Set the given bit of a bignum. Zero-based.
Definition APInt.cpp:2379
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void tcSet(WordType *, WordType, unsigned)
Sets the least significant part of a bignum to the input value, and zeroes out higher parts.
Definition APInt.cpp:2351
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1769
static LLVM_ABI int tcExtractBit(const WordType *, unsigned bit)
Extract the given bit of a bignum; returns 0 or 1. Zero-based.
Definition APInt.cpp:2374
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static LLVM_ABI WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned)
DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2453
static LLVM_ABI void tcExtract(WordType *, unsigned dstCount, const WordType *, unsigned srcBits, unsigned srcLSB)
Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to DST, of dstCOUNT parts,...
Definition APInt.cpp:2423
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1521
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static LLVM_ABI int tcCompare(const WordType *, const WordType *, unsigned)
Comparison (unsigned) of two bignums.
Definition APInt.cpp:2763
static APInt floatToBits(float V)
Converts a float to APInt bits.
Definition APInt.h:1761
uint64_t WordType
Definition APInt.h:80
static LLVM_ABI void tcAssign(WordType *, const WordType *, unsigned)
Assign one bignum to another.
Definition APInt.cpp:2359
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
static LLVM_ABI void tcShiftRight(WordType *, unsigned Words, unsigned Count)
Shift a bignum right Count bits.
Definition APInt.cpp:2737
static LLVM_ABI void tcFullMultiply(WordType *, const WordType *, const WordType *, unsigned, unsigned)
DST = LHS * RHS, where DST has width the sum of the widths of the operands.
Definition APInt.cpp:2643
unsigned getNumWords() const
Get the number of words.
Definition APInt.h:1504
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI void tcClearBit(WordType *, unsigned bit)
Clear the given bit of a bignum. Zero-based.
Definition APInt.cpp:2384
void negate()
Negate this APInt in place.
Definition APInt.h:1477
static WordType tcDecrement(WordType *dst, unsigned parts)
Decrement a bignum in-place. Return the borrow flag.
Definition APInt.h:1927
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1648
static LLVM_ABI unsigned tcLSB(const WordType *, unsigned n)
Returns the bit number of the least or most significant set bit of a number.
Definition APInt.cpp:2390
static LLVM_ABI void tcShiftLeft(WordType *, unsigned Words, unsigned Count)
Shift a bignum left Count bits.
Definition APInt.cpp:2710
static LLVM_ABI bool tcIsZero(const WordType *, unsigned)
Returns true if a bignum is zero, false otherwise.
Definition APInt.cpp:2365
static LLVM_ABI unsigned tcMSB(const WordType *parts, unsigned n)
Returns the bit number of the most significant set bit of a number.
Definition APInt.cpp:2403
float bitsToFloat() const
Converts APInt bits to a float.
Definition APInt.h:1745
static LLVM_ABI int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, unsigned srcParts, unsigned dstParts, bool add)
DST += SRC * MULTIPLIER + PART if add is true DST = SRC * MULTIPLIER + PART if add is false.
Definition APInt.cpp:2541
static constexpr unsigned APINT_BITS_PER_WORD
Bits in a word.
Definition APInt.h:86
static LLVM_ABI WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned)
DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
Definition APInt.cpp:2488
static LLVM_ABI void tcNegate(WordType *, unsigned)
Negate a bignum in-place.
Definition APInt.cpp:2527
static APInt doubleToBits(double V)
Converts a double to APInt bits.
Definition APInt.h:1753
static WordType tcIncrement(WordType *dst, unsigned parts)
Increment a bignum in-place. Return the carry flag.
Definition APInt.h:1922
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1731
const uint64_t * getRawData() const
This function returns a pointer to the internal storage of the APInt.
Definition APInt.h:570
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
bool isSigned() const
Definition APSInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
FoldingSetNodeID - This class is used to gather all the unique data bits of a node.
Definition FoldingSet.h:209
void Add(const T &x)
Definition FoldingSet.h:249
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
const char * iterator
Definition StringRef.h:59
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
iterator begin() const
Definition StringRef.h:112
char back() const
back - Get the last character in the string.
Definition StringRef.h:155
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:696
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
char front() const
front - Get the first character in the string.
Definition StringRef.h:149
iterator end() const
Definition StringRef.h:114
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
bool consume_front_insensitive(StringRef Prefix)
Returns true if this StringRef has the given prefix, ignoring case, and removes that prefix.
Definition StringRef.h:657
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void makeSmallestNormalized(bool Neg)
Definition APFloat.cpp:5309
LLVM_ABI DoubleAPFloat & operator=(const DoubleAPFloat &RHS)
Definition APFloat.cpp:4839
LLVM_ABI void changeSign()
Definition APFloat.cpp:5216
LLVM_ABI bool isLargest() const
Definition APFloat.cpp:5783
LLVM_ABI opStatus remainder(const DoubleAPFloat &RHS)
Definition APFloat.cpp:5103
LLVM_ABI opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:5006
LLVM_ABI fltCategory getCategory() const
Definition APFloat.cpp:5275
LLVM_ABI bool bitwiseIsEqual(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5332
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:5807
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.cpp:5734
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:5343
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:5353
LLVM_ABI bool isSmallest() const
Definition APFloat.cpp:5766
LLVM_ABI opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4998
LLVM_ABI friend hash_code hash_value(const DoubleAPFloat &Arg)
Definition APFloat.cpp:5337
LLVM_ABI cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5222
LLVM_ABI bool isDenormal() const
Definition APFloat.cpp:5759
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.cpp:5570
LLVM_ABI void makeSmallest(bool Neg)
Definition APFloat.cpp:5302
LLVM_ABI friend int ilogb(const DoubleAPFloat &X)
Definition APFloat.cpp:5816
LLVM_ABI opStatus next(bool nextDown)
Definition APFloat.cpp:5369
LLVM_ABI void makeInf(bool Neg)
Definition APFloat.cpp:5281
LLVM_ABI bool isInteger() const
Definition APFloat.cpp:5791
LLVM_ABI void makeZero(bool Neg)
Definition APFloat.cpp:5286
LLVM_ABI opStatus divide(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:5092
LLVM_ABI bool isSmallestNormalized() const
Definition APFloat.cpp:5774
LLVM_ABI opStatus mod(const DoubleAPFloat &RHS)
Definition APFloat.cpp:5113
LLVM_ABI DoubleAPFloat(const fltSemantics &S)
Definition APFloat.cpp:4786
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision, unsigned FormatMaxPadding, bool TruncateZero=true) const
Definition APFloat.cpp:5797
LLVM_ABI void makeLargest(bool Neg)
Definition APFloat.cpp:5291
LLVM_ABI cmpResult compare(const DoubleAPFloat &RHS) const
Definition APFloat.cpp:5324
LLVM_ABI friend DoubleAPFloat scalbn(const DoubleAPFloat &X, int Exp, roundingMode)
LLVM_ABI opStatus roundToIntegral(roundingMode RM)
Definition APFloat.cpp:5139
LLVM_ABI opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, const DoubleAPFloat &Addend, roundingMode RM)
Definition APFloat.cpp:5124
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const
Definition APFloat.cpp:5749
LLVM_ABI bool isNegative() const
Definition APFloat.cpp:5279
LLVM_ABI opStatus add(const DoubleAPFloat &RHS, roundingMode RM)
Definition APFloat.cpp:4993
LLVM_ABI void makeNaN(bool SNaN, bool Neg, const APInt *fill)
Definition APFloat.cpp:5319
LLVM_ABI unsigned int convertToHexString(char *dst, unsigned int hexDigits, bool upperCase, roundingMode) const
Write out a hexadecimal representation of the floating point value to DST, which must be of sufficien...
Definition APFloat.cpp:3323
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const
Definition APFloat.cpp:1541
LLVM_ABI opStatus mod(const IEEEFloat &)
C fmod, or llvm frem.
Definition APFloat.cpp:2312
fltCategory getCategory() const
Definition APFloat.h:573
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode)
Definition APFloat.cpp:2881
bool isFiniteNonZero() const
Definition APFloat.h:576
bool needsCleanup() const
Returns whether this instance allocated memory.
Definition APFloat.h:463
LLVM_ABI void makeLargest(bool Neg=false)
Make this number the largest magnitude normal number in the given semantics.
Definition APFloat.cpp:4108
LLVM_ABI LLVM_READONLY int getExactLog2Abs() const
Definition APFloat.cpp:4503
LLVM_ABI APInt bitcastToAPInt() const
Definition APFloat.cpp:3733
LLVM_ABI friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4746
LLVM_ABI cmpResult compare(const IEEEFloat &) const
IEEE comparison with another floating point number (NaNs compare unordered, 0==-0).
Definition APFloat.cpp:2483
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative.
Definition APFloat.h:538
LLVM_ABI opStatus divide(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2182
bool isNaN() const
Returns true if and only if the float is a quiet or signaling NaN.
Definition APFloat.h:563
LLVM_ABI opStatus remainder(const IEEEFloat &)
IEEE remainder.
Definition APFloat.cpp:2202
LLVM_ABI double convertToDouble() const
Definition APFloat.cpp:3803
LLVM_ABI float convertToFloat() const
Definition APFloat.cpp:3796
LLVM_ABI opStatus subtract(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2156
LLVM_ABI void toString(SmallVectorImpl< char > &Str, unsigned FormatPrecision=0, unsigned FormatMaxPadding=3, bool TruncateZero=true) const
Converts this value into a decimal string.
Definition APFloat.cpp:4459
LLVM_ABI void makeSmallest(bool Neg=false)
Make this number the smallest magnitude denormal number in the given semantics.
Definition APFloat.cpp:4140
LLVM_ABI void makeInf(bool Neg=false)
Definition APFloat.cpp:4693
LLVM_ABI bool isSmallestNormalized() const
Returns true if this is the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:1041
LLVM_ABI void makeQuiet()
Definition APFloat.cpp:4722
LLVM_ABI bool isLargest() const
Returns true if and only if the number has the largest possible finite magnitude in the current seman...
Definition APFloat.cpp:1143
LLVM_ABI opStatus add(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2150
bool isFinite() const
Returns true if and only if the current value is zero, subnormal, or normal.
Definition APFloat.h:550
LLVM_ABI Expected< opStatus > convertFromString(StringRef, roundingMode)
Definition APFloat.cpp:3266
LLVM_ABI void makeNaN(bool SNaN=false, bool Neg=false, const APInt *fill=nullptr)
Definition APFloat.cpp:930
LLVM_ABI opStatus multiply(const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2162
LLVM_ABI opStatus roundToIntegral(roundingMode)
Definition APFloat.cpp:2396
LLVM_ABI IEEEFloat & operator=(const IEEEFloat &)
Definition APFloat.cpp:1002
LLVM_ABI bool bitwiseIsEqual(const IEEEFloat &) const
Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
Definition APFloat.cpp:1168
LLVM_ABI void makeSmallestNormalized(bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.cpp:4154
LLVM_ABI bool isInteger() const
Returns true if and only if the number is an exact integer.
Definition APFloat.cpp:1160
LLVM_ABI IEEEFloat(const fltSemantics &)
Definition APFloat.cpp:1195
LLVM_ABI opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode)
Definition APFloat.cpp:2350
LLVM_ABI friend int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4728
LLVM_ABI opStatus next(bool nextDown)
IEEE-754R 5.3.1: nextUp/nextDown.
Definition APFloat.cpp:4548
bool isInfinity() const
IEEE-754R isInfinite(): Returns true if and only if the float is infinity.
Definition APFloat.h:560
const fltSemantics & getSemantics() const
Definition APFloat.h:574
bool isZero() const
Returns true if and only if the float is plus or minus zero.
Definition APFloat.h:553
LLVM_ABI bool isSignaling() const
Returns true if and only if the float is a signaling NaN.
Definition APFloat.cpp:4532
LLVM_ABI void makeZero(bool Neg=false)
Definition APFloat.cpp:4708
LLVM_ABI opStatus convert(const fltSemantics &, roundingMode, bool *)
IEEEFloat::convert - convert a value of one floating point type to another.
Definition APFloat.cpp:2560
LLVM_ABI void changeSign()
Definition APFloat.cpp:2106
LLVM_ABI bool isDenormal() const
IEEE-754R isSubnormal(): Returns true if and only if the float is a denormal.
Definition APFloat.cpp:1027
LLVM_ABI opStatus convertToInteger(MutableArrayRef< integerPart >, unsigned int, bool, roundingMode, bool *) const
Definition APFloat.cpp:2821
LLVM_ABI bool isSmallest() const
Returns true if and only if the number has the smallest possible non-zero magnitude in the current se...
Definition APFloat.cpp:1033
An opaque object representing a hash code.
Definition Hashing.h:76
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
static constexpr opStatus opInexact
Definition APFloat.h:439
LLVM_ABI SlowDynamicAPInt abs(const SlowDynamicAPInt &X)
Redeclarations of friend declarations above to make it discoverable by lookups.
static constexpr fltCategory fcNaN
Definition APFloat.h:441
static constexpr opStatus opDivByZero
Definition APFloat.h:436
static constexpr opStatus opOverflow
Definition APFloat.h:437
static constexpr cmpResult cmpLessThan
Definition APFloat.h:431
const char unit< Period >::value[]
Definition Chrono.h:104
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, unsigned bits)
Definition APFloat.cpp:1566
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:427
static constexpr uninitializedTag uninitialized
Definition APFloat.h:421
static constexpr fltCategory fcZero
Definition APFloat.h:443
static constexpr opStatus opOK
Definition APFloat.h:434
static constexpr cmpResult cmpGreaterThan
Definition APFloat.h:432
static constexpr unsigned integerPartWidth
Definition APFloat.h:429
LLVM_ABI hash_code hash_value(const IEEEFloat &Arg)
Definition APFloat.cpp:3471
APFloatBase::ExponentType ExponentType
Definition APFloat.h:420
static constexpr fltCategory fcNormal
Definition APFloat.h:442
static constexpr opStatus opInvalidOp
Definition APFloat.h:435
APFloatBase::opStatus opStatus
Definition APFloat.h:417
LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM)
Definition APFloat.cpp:4767
APFloatBase::uninitializedTag uninitializedTag
Definition APFloat.h:415
static constexpr cmpResult cmpUnordered
Definition APFloat.h:433
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:426
APFloatBase::roundingMode roundingMode
Definition APFloat.h:416
APFloatBase::cmpResult cmpResult
Definition APFloat.h:418
static constexpr fltCategory fcInfinity
Definition APFloat.h:440
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:424
static constexpr roundingMode rmTowardZero
Definition APFloat.h:428
static constexpr opStatus opUnderflow
Definition APFloat.h:438
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:422
LLVM_ABI int ilogb(const IEEEFloat &Arg)
Definition APFloat.cpp:4728
static constexpr cmpResult cmpEqual
Definition APFloat.h:430
LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode)
Definition APFloat.cpp:4746
static std::pair< APFloat, APFloat > fastTwoSum(APFloat X, APFloat Y)
Definition APFloat.cpp:4856
APFloatBase::integerPart integerPart
Definition APFloat.h:414
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
static unsigned int partAsHex(char *dst, APFloatBase::integerPart part, unsigned int count, const char *hexDigitChars)
Definition APFloat.cpp:821
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1757
static const char infinityL[]
Definition APFloat.cpp:812
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
static constexpr unsigned int partCountForBits(unsigned int bits)
Definition APFloat.cpp:385
static const char NaNU[]
Definition APFloat.cpp:815
static unsigned int HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
Definition APFloat.cpp:696
static unsigned int powerOf5(APFloatBase::integerPart *dst, unsigned int power)
Definition APFloat.cpp:755
unsigned hexDigitValue(char C)
Interpret the given character C as a hexadecimal digit and return its value.
static APFloat harrisonUlp(const APFloat &X)
Definition APFloat.cpp:872
static constexpr APFloatBase::ExponentType exponentZero(const fltSemantics &semantics)
Definition APFloat.cpp:359
static Expected< int > totalExponent(StringRef::iterator p, StringRef::iterator end, int exponentAdjustment)
Definition APFloat.cpp:447
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:98
const unsigned int maxPowerOfFiveExponent
Definition APFloat.cpp:285
int ilogb(const APFloat &Arg)
Returns the exponent of the internal representation of the APFloat.
Definition APFloat.h:1516
static char * writeUnsignedDecimal(char *dst, unsigned int n)
Definition APFloat.cpp:839
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2163
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
const unsigned int maxPrecision
Definition APFloat.cpp:284
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1537
static const char NaNL[]
Definition APFloat.cpp:814
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
static const char infinityU[]
Definition APFloat.cpp:813
lostFraction
Enum that represents what fraction of the LSB truncated bits of an fp number represent.
Definition APFloat.h:50
@ lfMoreThanHalf
Definition APFloat.h:54
@ lfLessThanHalf
Definition APFloat.h:52
@ lfExactlyHalf
Definition APFloat.h:53
@ lfExactlyZero
Definition APFloat.h:51
static Error interpretDecimal(StringRef::iterator begin, StringRef::iterator end, decimalInfo *D)
Definition APFloat.cpp:539
LLVM_ABI bool isFinite(const Loop *L)
Return true if this loop can be assumed to run for a finite number of iterations.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
const unsigned int maxPowerOfFiveParts
Definition APFloat.cpp:286
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1525
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static constexpr APFloatBase::ExponentType exponentNaN(const fltSemantics &semantics)
Definition APFloat.cpp:369
static Error createError(const Twine &Err)
Definition APFloat.cpp:381
static lostFraction shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
Definition APFloat.cpp:662
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
static const char hexDigitsUpper[]
Definition APFloat.cpp:811
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
const unsigned int maxExponent
Definition APFloat.cpp:283
static unsigned int decDigitValue(unsigned int c)
Definition APFloat.cpp:392
fltNonfiniteBehavior
Definition APFloat.cpp:57
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
static lostFraction combineLostFractions(lostFraction moreSignificant, lostFraction lessSignificant)
Definition APFloat.cpp:675
static Expected< StringRef::iterator > skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, StringRef::iterator *dot)
Definition APFloat.cpp:499
RoundingMode
Rounding mode.
ArrayRef(const T &OneElt) -> ArrayRef< T >
static constexpr APFloatBase::ExponentType exponentInf(const fltSemantics &semantics)
Definition APFloat.cpp:364
static lostFraction lostFractionThroughTruncation(const APFloatBase::integerPart *parts, unsigned int partCount, unsigned int bits)
Definition APFloat.cpp:640
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1551
static APFloatBase::integerPart ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits, bool isNearest)
Definition APFloat.cpp:710
static char * writeSignedDecimal(char *dst, int value)
Definition APFloat.cpp:857
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:592
static Expected< lostFraction > trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, unsigned int digitValue)
Definition APFloat.cpp:609
void consumeError(Error Err)
Consume a Error without doing anything.
Definition Error.h:1083
fltNanEncoding
Definition APFloat.cpp:81
static Expected< int > readExponent(StringRef::iterator begin, StringRef::iterator end)
Definition APFloat.cpp:402
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:466
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
static const char hexDigitsLower[]
Definition APFloat.cpp:810
#define N
const char * lastSigDigit
Definition APFloat.cpp:534
const char * firstSigDigit
Definition APFloat.cpp:533
APFloatBase::ExponentType maxExponent
Definition APFloat.cpp:106
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.cpp:119
APFloatBase::ExponentType minExponent
Definition APFloat.cpp:110
unsigned int sizeInBits
Definition APFloat.cpp:117
unsigned int precision
Definition APFloat.cpp:114
fltNanEncoding nanEncoding
Definition APFloat.cpp:121