29#include "llvm/IR/IntrinsicsAMDGPU.h"
39#define DEBUG_TYPE "amdgpu-codegenprepare"
47 "amdgpu-codegenprepare-widen-constant-loads",
48 cl::desc(
"Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"),
53 BreakLargePHIs(
"amdgpu-codegenprepare-break-large-phis",
54 cl::desc(
"Break large PHI nodes for DAGISel"),
58 ForceBreakLargePHIs(
"amdgpu-codegenprepare-force-break-large-phis",
59 cl::desc(
"For testing purposes, always break large "
60 "PHIs even if it isn't profitable."),
64 "amdgpu-codegenprepare-break-large-phis-threshold",
65 cl::desc(
"Minimum type size in bits for breaking large PHI nodes"),
69 "amdgpu-codegenprepare-mul24",
70 cl::desc(
"Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
76 "amdgpu-codegenprepare-expand-div64",
77 cl::desc(
"Expand 64-bit division in AMDGPUCodeGenPrepare"),
84 "amdgpu-codegenprepare-disable-idiv-expansion",
85 cl::desc(
"Prevent expanding integer division in AMDGPUCodeGenPrepare"),
91 "amdgpu-codegenprepare-disable-fdiv-expansion",
92 cl::desc(
"Prevent expanding floating point division in AMDGPUCodeGenPrepare"),
96class AMDGPUCodeGenPrepareImpl
97 :
public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
107 const bool HasFP32DenormalFlush;
108 bool FlowChanged =
false;
109 mutable Function *SqrtF32 =
nullptr;
110 mutable Function *LdexpF32 =
nullptr;
119 DT(DT), UA(UA),
DL(
F.getDataLayout()),
129 F.getParent(), Intrinsic::amdgcn_sqrt, {Type::getFloatTy(Ctx)});
139 F.getParent(), Intrinsic::ldexp,
140 {Type::getFloatTy(Ctx), Type::getInt32Ty(Ctx)});
144 bool canBreakPHINode(
const PHINode &
I);
147 bool isLegalFloatingTy(
const Type *
T)
const;
155 bool canIgnoreDenormalInput(
const Value *V,
const Instruction *CtxI)
const {
156 return HasFP32DenormalFlush ||
163 unsigned numBitsUnsigned(
Value *
Op)
const;
168 unsigned numBitsSigned(
Value *
Op)
const;
181 unsigned MaxDivBits,
bool Signed)
const;
186 bool IsDiv,
bool IsSigned)
const;
190 bool IsDiv,
bool IsSigned)
const;
208 bool canWidenScalarExtLoad(
LoadInst &
I)
const;
223 float ReqdAccuracy)
const;
228 float ReqdAccuracy)
const;
230 std::pair<Value *, Value *> getFrexpResults(
IRBuilder<> &Builder,
234 bool IsNegative)
const;
268 if (!ExpandDiv64InIR)
272 StringRef getPassName()
const override {
return "AMDGPU IR optimizations"; }
277bool AMDGPUCodeGenPrepareImpl::run() {
278 BreakPhiNodesCache.clear();
279 bool MadeChange =
false;
291 while (!DeadVals.empty()) {
299bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(
const Type *Ty)
const {
304bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &
I)
const {
305 Type *Ty =
I.getType();
306 int TySize =
DL.getTypeSizeInBits(Ty);
307 Align Alignment =
DL.getValueOrABITypeAlignment(
I.getAlign(), Ty);
309 return I.isSimple() && TySize < 32 && Alignment >= 4 && UA.
isUniform(&
I);
312unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(
Value *
Op)
const {
316unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(
Value *
Op)
const {
328 for (
int I = 0,
E = VT->getNumElements();
I !=
E; ++
I)
329 Values.
push_back(Builder.CreateExtractElement(V,
I));
335 if (!Ty->isVectorTy()) {
341 for (
int I = 0,
E = Values.
size();
I !=
E; ++
I)
342 NewVal = Builder.CreateInsertElement(NewVal, Values[
I],
I);
347bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &
I)
const {
348 if (
I.getOpcode() != Instruction::Mul)
351 Type *Ty =
I.getType();
363 Builder.SetCurrentDebugLocation(
I.getDebugLoc());
365 unsigned LHSBits = 0, RHSBits = 0;
366 bool IsSigned =
false;
368 if (ST.
hasMulU24() && (LHSBits = numBitsUnsigned(
LHS)) <= 24 &&
369 (RHSBits = numBitsUnsigned(
RHS)) <= 24) {
372 }
else if (ST.
hasMulI24() && (LHSBits = numBitsSigned(
LHS)) <= 24 &&
373 (RHSBits = numBitsSigned(
RHS)) <= 24) {
385 IntegerType *I32Ty = Builder.getInt32Ty();
386 IntegerType *IntrinTy =
Size > 32 ? Builder.getInt64Ty() : I32Ty;
387 Type *DstTy = LHSVals[0]->getType();
389 for (
int I = 0,
E = LHSVals.
size();
I !=
E; ++
I) {
390 Value *
LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[
I], I32Ty)
391 : Builder.CreateZExtOrTrunc(LHSVals[
I], I32Ty);
392 Value *
RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[
I], I32Ty)
393 : Builder.CreateZExtOrTrunc(RHSVals[
I], I32Ty);
395 IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
397 Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
398 : Builder.CreateZExtOrTrunc(Result, DstTy);
404 I.replaceAllUsesWith(NewVal);
405 DeadVals.push_back(&
I);
425bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO)
const {
446 if (!CBO || !CT || !CF)
473 Builder.setFastMathFlags(FPOp->getFastMathFlags());
479 DeadVals.push_back(&BO);
481 DeadVals.push_back(CastOp);
482 DeadVals.push_back(Sel);
486std::pair<Value *, Value *>
487AMDGPUCodeGenPrepareImpl::getFrexpResults(
IRBuilder<> &Builder,
489 Type *Ty = Src->getType();
502 : Builder.CreateExtractValue(Frexp, {1});
503 return {FrexpMant, FrexpExp};
509 bool IsNegative)
const {
524 auto [FrexpMant, FrexpExp] = getFrexpResults(Builder, Src);
527 return Builder.
CreateCall(getLdexpF32(), {Rcp, ScaleFactor});
533 FastMathFlags FMF)
const {
543 auto [FrexpMantRHS, FrexpExpRHS] = getFrexpResults(Builder,
RHS);
548 auto [FrexpMantLHS, FrexpExpLHS] = getFrexpResults(Builder,
LHS);
560 FastMathFlags FMF)
const {
561 Type *Ty = Src->getType();
565 Builder.
CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
568 Value *InputScaleFactor =
575 Value *OutputScaleFactor =
577 return Builder.
CreateCall(getLdexpF32(), {Sqrt, OutputScaleFactor});
588 Type *Ty = Src->getType();
592 Builder.CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
593 Constant *One = ConstantFP::get(Ty, 1.0);
594 Constant *InputScale = ConstantFP::get(Ty, 0x1.0p+24);
596 ConstantFP::get(Ty, IsNegative ? -0x1.0p+12 : 0x1.0p+12);
598 Value *InputScaleFactor = Builder.CreateSelect(NeedScale, InputScale, One);
600 Value *ScaledInput = Builder.CreateFMul(Src, InputScaleFactor);
601 Value *Rsq = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, ScaledInput);
602 Value *OutputScaleFactor = Builder.CreateSelect(
603 NeedScale, OutputScale, IsNegative ? ConstantFP::get(Ty, -1.0) : One);
605 return Builder.CreateFMul(Rsq, OutputScaleFactor);
608bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(
const FPMathOperator *SqrtOp,
609 FastMathFlags DivFMF,
610 FastMathFlags SqrtFMF)
const {
619Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
621 const FastMathFlags SqrtFMF,
const Instruction *CtxI)
const {
634 bool IsNegative =
false;
639 IRBuilder<>::FastMathFlagGuard Guard(Builder);
643 canIgnoreDenormalInput(Den, CtxI)) {
663 Value *Den, FastMathFlags FMF,
664 const Instruction *CtxI)
const {
671 bool IsNegative =
false;
676 if (HasFP32DenormalFlush || FMF.
approxFunc()) {
697 return emitRcpIEEE1ULP(Builder, Src, IsNegative);
706 if (HasFP32DenormalFlush || FMF.
approxFunc()) {
711 Value *Recip = emitRcpIEEE1ULP(Builder, Den,
false);
725Value *AMDGPUCodeGenPrepareImpl::optimizeWithFDivFast(
728 if (ReqdAccuracy < 2.5f)
734 bool NumIsOne =
false;
736 if (CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0))
744 if (!HasFP32DenormalFlush && !NumIsOne)
747 return Builder.
CreateIntrinsic(Intrinsic::amdgcn_fdiv_fast, {Num, Den});
750Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
752 FastMathFlags SqrtFMF,
Value *RsqOp,
const Instruction *FDivInst,
753 float ReqdDivAccuracy)
const {
756 optimizeWithRsq(Builder, Num, RsqOp, DivFMF, SqrtFMF, FDivInst);
761 Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
769 Value *FDivFast = optimizeWithFDivFast(Builder, Num, Den, ReqdDivAccuracy);
773 return emitFrexpDiv(Builder, Num, Den, DivFMF);
791bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
792 if (DisableFDivExpand)
806 FastMathFlags SqrtFMF;
811 Value *RsqOp =
nullptr;
813 if (DenII && DenII->getIntrinsicID() == Intrinsic::sqrt &&
814 DenII->hasOneUse()) {
817 if (canOptimizeWithRsq(SqrtOp, DivFMF, SqrtFMF))
830 const bool AllowInaccurateRcp = DivFMF.
approxFunc();
831 if (!RsqOp && AllowInaccurateRcp)
835 if (ReqdAccuracy < 1.0f)
852 for (
int I = 0,
E = NumVals.
size();
I !=
E; ++
I) {
853 Value *NumElt = NumVals[
I];
854 Value *DenElt = DenVals[
I];
855 Value *RsqDenElt = RsqOp ? RsqDenVals[
I] :
nullptr;
858 visitFDivElement(Builder, NumElt, DenElt, DivFMF, SqrtFMF, RsqDenElt,
867 NewEltInst->copyMetadata(FDiv);
870 ResultVals[
I] = NewElt;
878 DeadVals.push_back(&FDiv);
889 Value *LHS_EXT64 = Builder.CreateZExt(
LHS, I64Ty);
890 Value *RHS_EXT64 = Builder.CreateZExt(
RHS, I64Ty);
891 Value *MUL64 = Builder.CreateMul(LHS_EXT64, RHS_EXT64);
892 Value *
Lo = Builder.CreateTrunc(MUL64, I32Ty);
893 Value *
Hi = Builder.CreateLShr(MUL64, Builder.getInt64(32));
894 Hi = Builder.CreateTrunc(
Hi, I32Ty);
895 return std::pair(
Lo,
Hi);
906unsigned AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &
I,
Value *Num,
909 bool IsSigned)
const {
916 unsigned DivBits = SSBits - RHSSignBits + 1;
917 if (DivBits > MaxDivBits)
922 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
923 DivBits = SSBits - SignBits + 1;
933 unsigned DivBits = SSBits - RHSSignBits;
934 if (DivBits > MaxDivBits)
942 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
943 DivBits = SSBits - SignBits;
950 BinaryOperator &
I,
Value *Num,
951 Value *Den,
bool IsDiv,
952 bool IsSigned)
const {
953 unsigned DivBits = getDivNumBits(
I, Num, Den, 24, IsSigned);
956 return expandDivRem24Impl(Builder,
I, Num, Den, DivBits, IsDiv, IsSigned);
959Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
961 unsigned DivBits,
bool IsDiv,
bool IsSigned)
const {
967 ConstantInt *One = Builder.
getInt32(1);
1011 {FQNeg->
getType()}, {FQNeg, FB, FA}, FQ);
1039 if (DivBits != 0 && DivBits < 32) {
1042 int InRegBits = 32 - DivBits;
1044 Res = Builder.
CreateShl(Res, InRegBits);
1047 ConstantInt *TruncMask
1048 = Builder.
getInt32((UINT64_C(1) << DivBits) - 1);
1049 Res = Builder.
CreateAnd(Res, TruncMask);
1060bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &
I,
1066 if (
C->getType()->getScalarSizeInBits() <= 32)
1082 if (BinOpDen->getOpcode() == Instruction::Shl &&
1099 return Builder.CreateAShr(V, Builder.getInt32(31));
1106 assert(
Opc == Instruction::URem ||
Opc == Instruction::UDiv ||
1107 Opc == Instruction::SRem ||
Opc == Instruction::SDiv);
1113 if (divHasSpecialOptimization(
I,
X,
Y))
1116 bool IsDiv =
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv;
1117 bool IsSigned =
Opc == Instruction::SRem ||
Opc == Instruction::SDiv;
1119 Type *Ty =
X->getType();
1133 if (
Value *Res = expandDivRem24(Builder,
I,
X,
Y, IsDiv, IsSigned)) {
1139 ConstantInt *One = Builder.
getInt32(1);
1141 Value *Sign =
nullptr;
1146 Sign = IsDiv ? Builder.
CreateXor(SignX, SignY) : SignX;
1227 BinaryOperator &
I,
Value *Num,
1229 if (!ExpandDiv64InIR && divHasSpecialOptimization(
I, Num, Den))
1234 bool IsDiv =
Opc == Instruction::SDiv ||
Opc == Instruction::UDiv;
1235 bool IsSigned =
Opc == Instruction::SDiv ||
Opc == Instruction::SRem;
1237 unsigned NumDivBits = getDivNumBits(
I, Num, Den, 32, IsSigned);
1238 if (NumDivBits > 32)
1241 Value *Narrowed =
nullptr;
1242 if (NumDivBits <= 24) {
1243 Narrowed = expandDivRem24Impl(Builder,
I, Num, Den, NumDivBits,
1245 }
else if (NumDivBits <= 32) {
1246 Narrowed = expandDivRem32(Builder,
I, Num, Den);
1257void AMDGPUCodeGenPrepareImpl::expandDivRem64(BinaryOperator &
I)
const {
1260 if (
Opc == Instruction::UDiv ||
Opc == Instruction::SDiv) {
1265 if (
Opc == Instruction::URem ||
Opc == Instruction::SRem) {
1285bool AMDGPUCodeGenPrepareImpl::tryNarrowMathIfNoOverflow(Instruction *
I) {
1286 unsigned Opc =
I->getOpcode();
1287 Type *OldType =
I->getType();
1289 if (
Opc != Instruction::Add &&
Opc != Instruction::Mul)
1294 if (
Opc != Instruction::Add &&
Opc != Instruction::Mul)
1296 "Instruction::Mul.");
1300 MaxBitsNeeded = std::max<unsigned>(
bit_ceil(MaxBitsNeeded), 8);
1301 Type *NewType =
DL.getSmallestLegalIntType(
I->getContext(), MaxBitsNeeded);
1305 if (NewBit >= OrigBit)
1317 int NumOfNonConstOps = 2;
1320 NumOfNonConstOps = 1;
1330 if (NewCost >= OldCost)
1341 DeadVals.push_back(
I);
1345bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &
I) {
1346 if (foldBinOpIntoSelect(
I))
1349 if (UseMul24Intrin && replaceMulWithMul24(
I))
1351 if (tryNarrowMathIfNoOverflow(&
I))
1356 Type *Ty =
I.getType();
1357 Value *NewDiv =
nullptr;
1362 if ((
Opc == Instruction::URem ||
Opc == Instruction::UDiv ||
1363 Opc == Instruction::SRem ||
Opc == Instruction::SDiv) &&
1365 !DisableIDivExpand) {
1366 Value *Num =
I.getOperand(0);
1367 Value *Den =
I.getOperand(1);
1374 for (
unsigned N = 0,
E = VT->getNumElements();
N !=
E; ++
N) {
1379 if (ScalarSize <= 32) {
1380 NewElt = expandDivRem32(Builder,
I, NumEltN, DenEltN);
1386 NewElt = shrinkDivRem64(Builder,
I, NumEltN, DenEltN);
1400 NewEltI->copyIRFlags(&
I);
1405 if (ScalarSize <= 32)
1406 NewDiv = expandDivRem32(Builder,
I, Num, Den);
1408 NewDiv = shrinkDivRem64(Builder,
I, Num, Den);
1415 I.replaceAllUsesWith(NewDiv);
1416 DeadVals.push_back(&
I);
1421 if (ExpandDiv64InIR) {
1423 for (BinaryOperator *Div : Div64ToExpand) {
1424 expandDivRem64(*Div);
1433bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &
I) {
1439 canWidenScalarExtLoad(
I)) {
1449 if (
auto *
Range =
WidenLoad->getMetadata(LLVMContext::MD_range)) {
1450 ConstantInt *
Lower =
1453 if (
Lower->isNullValue()) {
1454 WidenLoad->setMetadata(LLVMContext::MD_range,
nullptr);
1462 WidenLoad->setMetadata(LLVMContext::MD_range,
1467 int TySize =
DL.getTypeSizeInBits(
I.getType());
1472 DeadVals.push_back(&
I);
1479bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &
I) {
1500 Value *Fract =
nullptr;
1501 if (Pred == FCmpInst::FCMP_UNO && TrueVal == CmpVal && IIFalse &&
1502 CmpVal == matchFractPat(*IIFalse)) {
1504 Fract = applyFractPat(Builder, CmpVal);
1505 }
else if (Pred == FCmpInst::FCMP_ORD && FalseVal == CmpVal && IITrue &&
1506 CmpVal == matchFractPat(*IITrue)) {
1508 Fract = applyFractPat(Builder, CmpVal);
1513 I.replaceAllUsesWith(Fract);
1514 DeadVals.push_back(&
I);
1521 return IA && IB && IA->getParent() == IB->getParent();
1531 const Value *CurVal = V;
1534 BitVector EltsCovered(FVT->getNumElements());
1541 if (!Idx || Idx->getZExtValue() >= FVT->getNumElements())
1544 const auto *VecSrc = IE->getOperand(0);
1553 EltsCovered.
set(Idx->getZExtValue());
1556 if (EltsCovered.
all())
1583 const auto [It, Inserted] = SeenPHIs.
insert(&
I);
1587 for (
const Value *Inc :
I.incoming_values()) {
1592 for (
const User *U :
I.users()) {
1598bool AMDGPUCodeGenPrepareImpl::canBreakPHINode(
const PHINode &
I) {
1600 if (
const auto It = BreakPhiNodesCache.find(&
I);
1601 It != BreakPhiNodesCache.end())
1610 SmallPtrSet<const PHINode *, 8> WorkList;
1616 for (
const PHINode *WLP : WorkList) {
1617 assert(BreakPhiNodesCache.count(WLP) == 0);
1632 const auto Threshold = (
alignTo(WorkList.size() * 2, 3) / 3);
1633 unsigned NumBreakablePHIs = 0;
1634 bool CanBreak =
false;
1635 for (
const PHINode *Cur : WorkList) {
1643 if (++NumBreakablePHIs >= Threshold) {
1650 for (
const PHINode *Cur : WorkList)
1651 BreakPhiNodesCache[Cur] = CanBreak;
1700 Value *&Res = SlicedVals[{BB, Inc}];
1706 B.SetCurrentDebugLocation(IncInst->getDebugLoc());
1712 Res =
B.CreateShuffleVector(Inc, Mask, NewValName);
1714 Res =
B.CreateExtractElement(Inc,
Idx, NewValName);
1723bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &
I) {
1739 DL.getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold)
1742 if (!ForceBreakLargePHIs && !canBreakPHINode(
I))
1745 std::vector<VectorSlice> Slices;
1752 const unsigned EltSize =
DL.getTypeSizeInBits(EltTy);
1754 if (EltSize == 8 || EltSize == 16) {
1755 const unsigned SubVecSize = (32 / EltSize);
1757 for (
unsigned End =
alignDown(NumElts, SubVecSize); Idx < End;
1759 Slices.emplace_back(SubVecTy, Idx, SubVecSize);
1763 for (; Idx < NumElts; ++Idx)
1764 Slices.emplace_back(EltTy, Idx, 1);
1767 assert(Slices.size() > 1);
1773 B.SetCurrentDebugLocation(
I.getDebugLoc());
1775 unsigned IncNameSuffix = 0;
1776 for (VectorSlice &S : Slices) {
1779 B.SetInsertPoint(
I.getParent()->getFirstNonPHIIt());
1780 S.NewPHI =
B.CreatePHI(S.Ty,
I.getNumIncomingValues());
1782 for (
const auto &[Idx, BB] :
enumerate(
I.blocks())) {
1783 S.NewPHI->addIncoming(S.getSlicedVal(BB,
I.getIncomingValue(Idx),
1784 "largephi.extractslice" +
1785 std::to_string(IncNameSuffix++)),
1792 unsigned NameSuffix = 0;
1793 for (VectorSlice &S : Slices) {
1794 const auto ValName =
"largephi.insertslice" + std::to_string(NameSuffix++);
1796 Vec =
B.CreateInsertVector(FVT, Vec, S.NewPHI, S.Idx, ValName);
1798 Vec =
B.CreateInsertElement(Vec, S.NewPHI, S.Idx, ValName);
1801 I.replaceAllUsesWith(Vec);
1802 DeadVals.push_back(&
I);
1825 Load && Load->hasMetadata(LLVMContext::MD_nonnull))
1844 assert(SrcPtrKB.getBitWidth() ==
DL.getPointerSizeInBits(AS));
1845 assert((NullVal == 0 || NullVal == -1) &&
1846 "don't know how to check for this null value!");
1847 return NullVal ? !SrcPtrKB.getMaxValue().isAllOnes() : SrcPtrKB.isNonZero();
1850bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &
I) {
1854 if (
I.getType()->isVectorTy())
1859 const unsigned SrcAS =
I.getSrcAddressSpace();
1860 const unsigned DstAS =
I.getDestAddressSpace();
1862 bool CanLower =
false;
1880 auto *Intrin =
B.CreateIntrinsic(
1881 I.getType(), Intrinsic::amdgcn_addrspacecast_nonnull, {I.getOperand(0)});
1882 I.replaceAllUsesWith(Intrin);
1883 DeadVals.push_back(&
I);
1887bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &
I) {
1888 switch (
I.getIntrinsicID()) {
1889 case Intrinsic::minnum:
1890 case Intrinsic::minimumnum:
1891 case Intrinsic::minimum:
1892 return visitFMinLike(
I);
1893 case Intrinsic::sqrt:
1894 return visitSqrt(
I);
1907Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &
I) {
1915 if (IID != Intrinsic::minnum && IID != Intrinsic::minimum &&
1916 IID != Intrinsic::minimumnum)
1919 Type *Ty =
I.getType();
1923 Value *Arg0 =
I.getArgOperand(0);
1924 Value *Arg1 =
I.getArgOperand(1);
1932 One.convert(
C->getSemantics(), APFloat::rmNearestTiesToEven, &LosesInfo);
1954 for (
unsigned I = 0,
E = FractVals.
size();
I !=
E; ++
I) {
1962bool AMDGPUCodeGenPrepareImpl::visitFMinLike(IntrinsicInst &
I) {
1963 Value *FractArg = matchFractPat(
I);
1973 FastMathFlags FMF =
I.getFastMathFlags();
1977 Value *Fract = applyFractPat(Builder, FractArg);
1979 I.replaceAllUsesWith(Fract);
1980 DeadVals.push_back(&
I);
1985bool AMDGPUCodeGenPrepareImpl::visitSqrt(IntrinsicInst &Sqrt) {
2001 if (ReqdAccuracy < 1.0f)
2005 bool CanTreatAsDAZ = canIgnoreDenormalInput(SrcVal, &Sqrt);
2009 if (!CanTreatAsDAZ && ReqdAccuracy < 2.0f)
2017 for (
int I = 0,
E = SrcVals.
size();
I !=
E; ++
I) {
2019 ResultVals[
I] = Builder.
CreateCall(getSqrtF32(), SrcVals[
I]);
2021 ResultVals[
I] = emitSqrtIEEE2ULP(Builder, SrcVals[
I], SqrtFMF);
2027 DeadVals.push_back(&Sqrt);
2031bool AMDGPUCodeGenPrepare::runOnFunction(Function &
F) {
2032 if (skipFunction(
F))
2035 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
2039 const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
2040 const TargetLibraryInfo *TLI =
2041 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
2042 AssumptionCache *AC =
2043 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
2044 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
2045 const DominatorTree *DT = DTWP ? &DTWP->getDomTree() :
nullptr;
2047 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
2048 return AMDGPUCodeGenPrepareImpl(
F, TM, TLI, AC, DT, UA).run();
2058 AMDGPUCodeGenPrepareImpl Impl(
F, ATM, TLI, AC, DT, UA);
2062 if (!Impl.FlowChanged)
2068 "AMDGPU IR optimizations",
false,
false)
2075char AMDGPUCodeGenPrepare::
ID = 0;
2078 return new AMDGPUCodeGenPrepare();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Value * insertValues(IRBuilder<> &Builder, Type *Ty, SmallVectorImpl< Value * > &Values)
static void extractValues(IRBuilder<> &Builder, SmallVectorImpl< Value * > &Values, Value *V)
static Value * getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS)
static bool isInterestingPHIIncomingValue(const Value *V)
static SelectInst * findSelectThroughCast(Value *V, CastInst *&Cast)
static std::pair< Value *, Value * > getMul64(IRBuilder<> &Builder, Value *LHS, Value *RHS)
static Value * emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src, bool IsNegative)
Emit an expansion of 1.0 / sqrt(Src) good for 1ulp that supports denormals.
static Value * getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL)
static void collectPHINodes(const PHINode &I, SmallPtrSet< const PHINode *, 8 > &SeenPHIs)
static bool isPtrKnownNeverNull(const Value *V, const DataLayout &DL, const AMDGPUTargetMachine &TM, unsigned AS)
static bool areInSameBB(const Value *A, const Value *B)
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static cl::opt< cl::boolOrDefault > EnableGlobalISelOption("global-isel", cl::Hidden, cl::desc("Enable the \"global\" instruction selector"))
Target-Independent Code Generator Pass Configuration Options pass.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
VectorSlice(Type *Ty, unsigned Idx, unsigned NumElts)
Value * getSlicedVal(BasicBlock *BB, Value *Inc, StringRef NewValName)
Slice Inc according to the information contained within this slice.
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
bool hasMadMacF32Insts() const
bool has16BitInsts() const
bool hasFastFMAF32() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
This class represents a conversion between pointers from one address space to another.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
bool all() const
all - Returns true if all bits are set.
Represents analyses that only rely on functions' control flow.
This is the base class for all instructions that perform data casts.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
TargetTransformInfo getTargetTransformInfo(const Function &F) const override
Get a TargetTransformInfo implementation for the target.
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
void setFast(bool B=true)
bool allowReciprocal() const
void setNoNaNs(bool B=true)
bool allowContract() const
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFPToUI(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Value * CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
FastMathFlags getFastMathFlags() const
Get the flags to be applied to created floating point ops.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateFPToSI(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
const Value * getTrueValue() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
LLVM_ABI const fltSemantics & getFltSemantics() const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Type * getElementType() const
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
CmpClass_match< LHS, RHS, FCmpInst > m_FCmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
cstfp_pred_ty< is_nonnan > m_NonNaN()
Match a non-NaN FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool expandRemainderUpTo64Bits(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
auto reverse(ContainerTy &&C)
LLVM_ABI bool expandDivisionUpTo64Bits(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionPass * createAMDGPUCodeGenPreparePass()
To bit_cast(const From &from) noexcept
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
static constexpr DenormalMode getPreserveSign()
bool isNonNegative() const
Returns true if this value is known to be non-negative.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
bool isNegative() const
Returns true if this value is known to be negative.
bool isKnownNeverSubnormal() const
Return true if it's known this can never be a subnormal.