docs/doxygen/ExpandIRInsts_8cpp_source.html

//===--- ExpandIRInsts.cpp - Expand IR instructions -----------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

// This pass expands certain instructions at the IR level.

//

// The following expansions are implemented:

// - Expansion of ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp ..  to’, ‘sitofp

// .. to’ instructions with a bitwidth above a threshold.  This is

// useful for targets like x86_64 that cannot lower fp convertions

// with more than 128 bits.

//

// - Expansion of ‘frem‘ for types MVT::f16, MVT::f32, and MVT::f64 for

// targets which use "Expand" as the legalization action for the

// corresponding type.

//

// - Expansion of ‘udiv‘, ‘sdiv‘, ‘urem‘, and ‘srem‘ instructions with

// a bitwidth above a threshold into a call to auto-generated

// functions.  This is useful for targets like x86_64 that cannot

// lower divisions with more than 128 bits or targets like x86_32 that

// cannot lower divisions with more than 64 bits.

//

// Instructions with vector types are scalarized first if their scalar

// types can be expanded. Scalable vector types are not supported.

//===----------------------------------------------------------------------===//


#include "llvm/CodeGen/ExpandIRInsts.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/GlobalsModRef.h"

#include "llvm/Analysis/SimplifyQuery.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/CodeGen/ISDOpcodes.h"

#include "llvm/CodeGen/Passes.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstIterator.h"

#include "llvm/IR/IntrinsicInst.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/PassManager.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/IntegerDivision.h"

#include <optional>


#define DEBUG_TYPE "expand-ir-insts"


using namespace llvm;


static cl::opt<unsigned>

    ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,

                        cl::init(IntegerType::MAX_INT_BITS),

                        cl::desc("fp convert instructions on integers with "

                                 "more than <N> bits are expanded."));


static cl::opt<unsigned>

    ExpandDivRemBits("expand-div-rem-bits", cl::Hidden,

                     cl::init(IntegerType::MAX_INT_BITS),

                     cl::desc("div and rem instructions on integers with "

                              "more than <N> bits are expanded."));


static bool isConstantPowerOfTwo(Value *V, bool SignedOp) {

  auto *C = dyn_cast<ConstantInt>(V);

  if (!C)

    return false;


  APInt Val = C->getValue();

  if (SignedOp && Val.isNegative())

    Val = -Val;

  return Val.isPowerOf2();

}


static bool isSigned(unsigned Opcode) {

  return Opcode == Instruction::SDiv || Opcode == Instruction::SRem;

}


/// For signed div/rem by a power of 2, compute the bias-adjusted dividend:

///   Sign = ashr X, (BitWidth - 1)          -- 0 or -1

///   Bias = lshr Sign, (BitWidth - ShiftAmt) -- 0 or 2^ShiftAmt - 1

///   Adjusted = add X, Bias

/// The bias adds (2^ShiftAmt - 1) for negative X, correcting rounding towards

/// zero (instead of towards -inf that a plain ashr would give).

/// The lshr form is used instead of 'and' to avoid large immediate constants.


static Value *addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth,

                            unsigned ShiftAmt) {

  assert(ShiftAmt > 0 && ShiftAmt < BitWidth &&

         "ShiftAmt out of range; callers should handle ShiftAmt == 0");

  Value *Sign = Builder.CreateAShr(X, BitWidth - 1, "sign");

  Value *Bias = Builder.CreateLShr(Sign, BitWidth - ShiftAmt, "bias");

  return Builder.CreateAdd(X, Bias, "adjusted");

}


/// Expand division or remainder by a power-of-2 constant.

/// Division (let C = log2(|divisor|)):

///   udiv X, 2^C  ->  lshr X, C

///   sdiv X, 2^C  ->  ashr (add X, Bias), C  (Bias corrects rounding)

///   sdiv exact X, 2^C  ->  ashr exact X, C  (no bias needed)

///   For negative power-of-2 divisors, the division result is negated.

/// Remainder (let C = log2(|divisor|)):

///   urem X, 2^C  ->  and X, (2^C - 1)

///   srem X, 2^C  ->  sub X, (shl (ashr (add X, Bias), C), C)


static void expandPow2DivRem(BinaryOperator *BO) {

  LLVM_DEBUG(dbgs() << "Expanding instruction: " << *BO << '\n');


  unsigned Opcode = BO->getOpcode();

  bool IsDiv = (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv);

  bool IsSigned = isSigned(Opcode);

  // isExact() is only valid for div.

  bool IsExact = IsDiv && BO->isExact();


  assert(isConstantPowerOfTwo(BO->getOperand(1), IsSigned) &&

         "Expected power-of-2 constant divisor");


  Value *X = BO->getOperand(0);

  auto *C = cast<ConstantInt>(BO->getOperand(1));

  Type *Ty = BO->getType();

  unsigned BitWidth = Ty->getIntegerBitWidth();


  APInt DivisorVal = C->getValue();

  bool IsNegativeDivisor = IsSigned && DivisorVal.isNegative();

  // Use countr_zero() to get the shift amount directly from the bit pattern.

  // This works correctly for both positive and negative powers of 2, including

  // INT_MIN, without needing to negate the value first.

  unsigned ShiftAmt = DivisorVal.countr_zero();


  IRBuilder<> Builder(BO);

  Value *Result;


  if (ShiftAmt == 0) {

    // Div by 1/-1: X / 1 = X, X / -1 = -X.

    // Rem by 1/-1: always 0.

    if (IsDiv)

      Result = IsNegativeDivisor ? Builder.CreateNeg(X) : X;

    else

      Result = ConstantInt::get(Ty, 0);

  } else if (IsSigned) {

    // The signed expansion uses X multiple times (bias computation, shift,

    // and sub for remainder). Freeze X to ensure consistent behavior if it is

    // undef/poison. For exact division, no bias is needed and X is used only

    // once, so freeze is unnecessary.

    if (!IsExact && !isGuaranteedNotToBeUndefOrPoison(X))

      X = Builder.CreateFreeze(X, X->getName() + ".fr");

    // For exact division, no bias is needed since there's no rounding.

    Value *Dividend =

        IsExact ? X : addSignedBias(Builder, X, BitWidth, ShiftAmt);

    Value *Quotient = Builder.CreateAShr(

        Dividend, ShiftAmt, IsDiv && IsNegativeDivisor ? "pre.neg" : "shifted",

        IsExact);

    if (IsDiv) {

      Result = IsNegativeDivisor ? Builder.CreateNeg(Quotient) : Quotient;

    } else {

      // Rem = X - (Quotient << ShiftAmt):

      // clear lower ShiftAmt bits via round-trip shift, then subtract.

      Value *Truncated = Builder.CreateShl(Quotient, ShiftAmt, "truncated");

      Result = Builder.CreateSub(X, Truncated);

    }

  } else {

    if (IsDiv) {

      Result = Builder.CreateLShr(X, ShiftAmt, "", IsExact);

    } else {

      APInt Mask = APInt::getLowBitsSet(BitWidth, ShiftAmt);

      Result = Builder.CreateAnd(X, ConstantInt::get(Ty, Mask));

    }

  }


  BO->replaceAllUsesWith(Result);

  if (Result != X)

    if (auto *RI = dyn_cast<Instruction>(Result))

      RI->takeName(BO);

  BO->dropAllReferences();

  BO->eraseFromParent();

}


/// This class implements a precise expansion of the frem instruction.

/// The generated code is based on the fmod implementation in the AMD device

/// libs.

namespace {

class FRemExpander {

  /// The IRBuilder to use for the expansion.

  IRBuilder<> &B;


  /// Floating point type of the return value and the arguments of the FRem

  /// instructions that should be expanded.

  Type *FremTy;


  /// Floating point type to use for the computation.  This may be

  /// wider than the \p FremTy.

  Type *ComputeFpTy;


  /// Integer type used to hold the exponents returned by frexp.

  Type *ExTy;


  /// How many bits of the quotient to compute per iteration of the

  /// algorithm, stored as a value of type \p ExTy.

  Value *Bits;


  /// Constant 1 of type \p ExTy.

  Value *One;


  /// The frem argument/return types that can be expanded by this class.

  // TODO: The expansion could work for other floating point types

  // as well, but this would require additional testing.

  static constexpr std::array<MVT, 3> ExpandableTypes{MVT::f16, MVT::f32,

                                                      MVT::f64};


public:

  static bool canExpandType(Type *Ty) {

    EVT VT = EVT::getEVT(Ty);

    assert(VT.isSimple() && "Can expand only simple types");


    return is_contained(ExpandableTypes, VT.getSimpleVT());

  }


  static bool shouldExpandFremType(const TargetLowering &TLI, EVT VT) {

    assert(!VT.isVector() && "Cannot handle vector type; must scalarize first");

    return TLI.getOperationAction(ISD::FREM, VT) ==

           TargetLowering::LegalizeAction::Expand;

  }


  static bool shouldExpandFremType(const TargetLowering &TLI, Type *Ty) {

    // Consider scalar type for simplicity.  It seems unlikely that a

    // vector type can be legalized without expansion if the scalar

    // type cannot.

    return shouldExpandFremType(TLI, EVT::getEVT(Ty->getScalarType()));

  }


  /// Return true if the pass should expand frem instructions of any type

  /// for the target represented by \p TLI.

  static bool shouldExpandAnyFremType(const TargetLowering &TLI) {

    return any_of(ExpandableTypes,

                  [&](MVT V) { return shouldExpandFremType(TLI, EVT(V)); });

  }


  static FRemExpander create(IRBuilder<> &B, Type *Ty) {

    assert(canExpandType(Ty) && "Expected supported floating point type");


    // The type to use for the computation of the remainder. This may be

    // wider than the input/result type which affects the ...

    Type *ComputeTy = Ty;

    // ... maximum number of iterations of the remainder computation loop

    // to use. This value is for the case in which the computation

    // uses the same input/result type.

    unsigned MaxIter = 2;


    if (Ty->isHalfTy()) {

      // Use the wider type and less iterations.

      ComputeTy = B.getFloatTy();

      MaxIter = 1;

    }


    unsigned Precision = APFloat::semanticsPrecision(Ty->getFltSemantics());

    return FRemExpander{B, Ty, Precision / MaxIter, ComputeTy};

  }


  /// Build the FRem expansion for the numerator \p X and the

  /// denumerator \p Y.  The type of X and Y must match \p FremTy. The

  /// code will be generated at the insertion point of \p B and the

  /// insertion point will be reset at exit.

  Value *buildFRem(Value *X, Value *Y, std::optional<SimplifyQuery> &SQ) const;


  /// Build an approximate FRem expansion for the numerator \p X and

  /// the denumerator \p Y at the insertion point of builder \p B.

  /// The type of X and Y must match \p FremTy.

  Value *buildApproxFRem(Value *X, Value *Y) const;


private:

  FRemExpander(IRBuilder<> &B, Type *FremTy, unsigned Bits, Type *ComputeFpTy)

      : B(B), FremTy(FremTy), ComputeFpTy(ComputeFpTy), ExTy(B.getInt32Ty()),

        Bits(ConstantInt::get(ExTy, Bits)), One(ConstantInt::get(ExTy, 1)) {}


  Value *createRcp(Value *V, const Twine &Name) const {

    // Leave it to later optimizations to turn this into an rcp

    // instruction if available.

    return B.CreateFDiv(ConstantFP::get(ComputeFpTy, 1.0), V, Name);

  }


  // Helper function to build the UPDATE_AX code which is common to the

  // loop body and the "final iteration".

  Value *buildUpdateAx(Value *Ax, Value *Ay, Value *Ayinv) const {

    // Build:

    //   float q = rint(ax * ayinv);

    //   ax = fma(-q, ay, ax);

    //   int clt = ax < 0.0f;

    //   float axp = ax + ay;

    //   ax = clt ? axp : ax;

    Value *Q = B.CreateUnaryIntrinsic(Intrinsic::rint, B.CreateFMul(Ax, Ayinv),

                                      {}, "q");

    Value *AxUpdate = B.CreateFMA(B.CreateFNeg(Q), Ay, Ax, {}, "ax");

    Value *Clt = B.CreateFCmp(CmpInst::FCMP_OLT, AxUpdate,

                              ConstantFP::getZero(ComputeFpTy), "clt");

    Value *Axp = B.CreateFAdd(AxUpdate, Ay, "axp");

    return B.CreateSelect(Clt, Axp, AxUpdate, "ax");

  }


  /// Build code to extract the exponent and mantissa of \p Src.

  /// Return the exponent minus one for use as a loop bound and

  /// the mantissa taken to the given \p NewExp power.

  std::pair<Value *, Value *> buildExpAndPower(Value *Src, Value *NewExp,

                                               const Twine &ExName,

                                               const Twine &PowName) const {

    // Build:

    //   ExName = frexp_exp(Src) - 1;

    //   PowName = fldexp(frexp_mant(ExName), NewExp);

    Type *Ty = Src->getType();

    Type *ExTy = B.getInt32Ty();

    Value *Frexp = B.CreateIntrinsic(Intrinsic::frexp, {Ty, ExTy}, Src);

    Value *Mant = B.CreateExtractValue(Frexp, {0});

    Value *Exp = B.CreateExtractValue(Frexp, {1});


    Exp = B.CreateSub(Exp, One, ExName);

    Value *Pow = B.CreateLdexp(Mant, NewExp, {}, PowName);


    return {Pow, Exp};

  }


  /// Build the main computation of the remainder for the case in which

  /// Ax > Ay, where Ax = |X|, Ay = |Y|, and X is the numerator and Y the

  /// denumerator. Add the incoming edge from the computation result

  /// to \p RetPhi.

  void buildRemainderComputation(Value *AxInitial, Value *AyInitial, Value *X,

                                 PHINode *RetPhi, FastMathFlags FMF) const {

    IRBuilder<>::FastMathFlagGuard Guard(B);

    B.setFastMathFlags(FMF);


    // Build:

    // ex = frexp_exp(ax) - 1;

    // ax = fldexp(frexp_mant(ax), bits);

    // ey = frexp_exp(ay) - 1;

    // ay = fledxp(frexp_mant(ay), 1);

    auto [Ax, Ex] = buildExpAndPower(AxInitial, Bits, "ex", "ax");

    auto [Ay, Ey] = buildExpAndPower(AyInitial, One, "ey", "ay");


    // Build:

    //   int nb = ex - ey;

    //   float ayinv = 1.0/ay;

    Value *Nb = B.CreateSub(Ex, Ey, "nb");

    Value *Ayinv = createRcp(Ay, "ayinv");


    // Build: while (nb > bits)

    BasicBlock *PreheaderBB = B.GetInsertBlock();

    Function *Fun = PreheaderBB->getParent();

    auto *LoopBB = BasicBlock::Create(B.getContext(), "frem.loop_body", Fun);

    auto *ExitBB = BasicBlock::Create(B.getContext(), "frem.loop_exit", Fun);


    B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, Nb, Bits), LoopBB, ExitBB);


    // Build loop body:

    //   UPDATE_AX

    //   ax = fldexp(ax, bits);

    //   nb -= bits;

    // One iteration of the loop is factored out.  The code shared by

    // the loop and this "iteration" is denoted by UPDATE_AX.

    B.SetInsertPoint(LoopBB);

    PHINode *NbIv = B.CreatePHI(Nb->getType(), 2, "nb_iv");

    NbIv->addIncoming(Nb, PreheaderBB);


    auto *AxPhi = B.CreatePHI(ComputeFpTy, 2, "ax_loop_phi");

    AxPhi->addIncoming(Ax, PreheaderBB);


    Value *AxPhiUpdate = buildUpdateAx(AxPhi, Ay, Ayinv);

    AxPhiUpdate = B.CreateLdexp(AxPhiUpdate, Bits, {}, "ax_update");

    AxPhi->addIncoming(AxPhiUpdate, LoopBB);

    NbIv->addIncoming(B.CreateSub(NbIv, Bits, "nb_update"), LoopBB);


    B.CreateCondBr(B.CreateICmp(CmpInst::ICMP_SGT, NbIv, Bits), LoopBB, ExitBB);


    // Build final iteration

    //   ax = fldexp(ax, nb - bits + 1);

    //   UPDATE_AX

    B.SetInsertPoint(ExitBB);


    auto *AxPhiExit = B.CreatePHI(ComputeFpTy, 2, "ax_exit_phi");

    AxPhiExit->addIncoming(Ax, PreheaderBB);

    AxPhiExit->addIncoming(AxPhi, LoopBB);

    auto *NbExitPhi = B.CreatePHI(Nb->getType(), 2, "nb_exit_phi");

    NbExitPhi->addIncoming(NbIv, LoopBB);

    NbExitPhi->addIncoming(Nb, PreheaderBB);


    Value *AxFinal = B.CreateLdexp(

        AxPhiExit, B.CreateAdd(B.CreateSub(NbExitPhi, Bits), One), {}, "ax");

    AxFinal = buildUpdateAx(AxFinal, Ay, Ayinv);


    // Build:

    //    ax = fldexp(ax, ey);

    //    ret = copysign(ax,x);

    AxFinal = B.CreateLdexp(AxFinal, Ey, {}, "ax");

    if (ComputeFpTy != FremTy)

      AxFinal = B.CreateFPTrunc(AxFinal, FremTy);

    Value *Ret = B.CreateCopySign(AxFinal, X);


    RetPhi->addIncoming(Ret, ExitBB);

  }


  /// Build the else-branch of the conditional in the FRem

  /// expansion, i.e. the case in wich Ax <= Ay, where Ax = |X|, Ay

  /// = |Y|, and X is the numerator and Y the denumerator. Add the

  /// incoming edge from the result to \p RetPhi.

  void buildElseBranch(Value *Ax, Value *Ay, Value *X, PHINode *RetPhi) const {

    // Build:

    // ret = ax == ay ? copysign(0.0f, x) : x;

    Value *ZeroWithXSign = B.CreateCopySign(ConstantFP::getZero(FremTy), X);

    Value *Ret = B.CreateSelect(B.CreateFCmpOEQ(Ax, Ay), ZeroWithXSign, X);


    RetPhi->addIncoming(Ret, B.GetInsertBlock());

  }


  /// Return a value that is NaN if one of the corner cases concerning

  /// the inputs \p X and \p Y is detected, and \p Ret otherwise.

  Value *handleInputCornerCases(Value *Ret, Value *X, Value *Y,

                                std::optional<SimplifyQuery> &SQ,

                                bool NoInfs) const {

    // Build:

    //   ret = (y == 0.0f || isnan(y)) ? QNAN : ret;

    //   ret = isfinite(x) ? ret : QNAN;

    Value *Nan = ConstantFP::getQNaN(FremTy);

    Ret = B.CreateSelect(B.CreateFCmpUEQ(Y, ConstantFP::getZero(FremTy)), Nan,

                         Ret);

    Value *XFinite =

        NoInfs || (SQ && isKnownNeverInfinity(X, *SQ))

            ? B.getTrue()

            : B.CreateFCmpULT(B.CreateFAbs(X), ConstantFP::getInfinity(FremTy));

    Ret = B.CreateSelect(XFinite, Ret, Nan);


    return Ret;

  }

};

} // namespace


Value *FRemExpander::buildApproxFRem(Value *X, Value *Y) const {

  IRBuilder<>::FastMathFlagGuard Guard(B);

  // Propagating the approximate functions flag to the

  // division leads to an unacceptable drop in precision

  // on AMDGPU.

  // TODO Find out if any flags might be worth propagating.

  B.clearFastMathFlags();


  Value *Quot = B.CreateFDiv(X, Y);

  Value *Trunc = B.CreateUnaryIntrinsic(Intrinsic::trunc, Quot, {});

  Value *Neg = B.CreateFNeg(Trunc);


  return B.CreateFMA(Neg, Y, X);

}


Value *FRemExpander::buildFRem(Value *X, Value *Y,

                               std::optional<SimplifyQuery> &SQ) const {

  assert(X->getType() == FremTy && Y->getType() == FremTy);


  FastMathFlags FMF = B.getFastMathFlags();


  // This function generates the following code structure:

  //   if (abs(x) > abs(y))

  //   { ret = compute remainder }

  //   else

  //   { ret = x or 0 with sign of x }

  //   Adjust ret to NaN/inf in input

  //   return ret

  Value *Ax = B.CreateFAbs(X, {}, "ax");

  Value *Ay = B.CreateFAbs(Y, {}, "ay");

  if (ComputeFpTy != X->getType()) {

    Ax = B.CreateFPExt(Ax, ComputeFpTy, "ax");

    Ay = B.CreateFPExt(Ay, ComputeFpTy, "ay");

  }

  Value *AxAyCmp = B.CreateFCmpOGT(Ax, Ay);


  PHINode *RetPhi = B.CreatePHI(FremTy, 2, "ret");

  Value *Ret = RetPhi;


  // We would return NaN in all corner cases handled here.

  // Hence, if NaNs are excluded, keep the result as it is.

  if (!FMF.noNaNs())

    Ret = handleInputCornerCases(Ret, X, Y, SQ, FMF.noInfs());


  Function *Fun = B.GetInsertBlock()->getParent();

  auto *ThenBB = BasicBlock::Create(B.getContext(), "frem.compute", Fun);

  auto *ElseBB = BasicBlock::Create(B.getContext(), "frem.else", Fun);

  SplitBlockAndInsertIfThenElse(AxAyCmp, RetPhi, &ThenBB, &ElseBB);


  auto SavedInsertPt = B.GetInsertPoint();


  // Build remainder computation for "then" branch

  //

  // The ordered comparison ensures that ax and ay are not NaNs

  // in the then-branch. Furthermore, y cannot be an infinity and the

  // check at the end of the function ensures that the result will not

  // be used if x is an infinity.

  FastMathFlags ComputeFMF = FMF;

  ComputeFMF.setNoInfs();

  ComputeFMF.setNoNaNs();


  B.SetInsertPoint(ThenBB);

  buildRemainderComputation(Ax, Ay, X, RetPhi, FMF);

  B.CreateBr(RetPhi->getParent());


  // Build "else"-branch

  B.SetInsertPoint(ElseBB);

  buildElseBranch(Ax, Ay, X, RetPhi);

  B.CreateBr(RetPhi->getParent());


  B.SetInsertPoint(SavedInsertPt);


  return Ret;

}


static bool expandFRem(BinaryOperator &I, std::optional<SimplifyQuery> &SQ) {

  LLVM_DEBUG(dbgs() << "Expanding instruction: " << I << '\n');


  Type *Ty = I.getType();

  assert(FRemExpander::canExpandType(Ty) &&

         "Expected supported floating point type");


  FastMathFlags FMF = I.getFastMathFlags();

  // TODO Make use of those flags for optimization?

  FMF.setAllowReciprocal(false);

  FMF.setAllowContract(false);


  IRBuilder<> B(&I);

  B.setFastMathFlags(FMF);

  B.SetCurrentDebugLocation(I.getDebugLoc());


  const FRemExpander Expander = FRemExpander::create(B, Ty);

  Value *Ret = FMF.approxFunc()

                   ? Expander.buildApproxFRem(I.getOperand(0), I.getOperand(1))

                   : Expander.buildFRem(I.getOperand(0), I.getOperand(1), SQ);


  I.replaceAllUsesWith(Ret);

  Ret->takeName(&I);

  I.eraseFromParent();


  return true;

}


// clang-format off: preserve formatting of the following example


/// Generate code to convert a fp number to integer, replacing FPToS(U)I with

/// the generated code. This currently generates code similarly to compiler-rt's

/// implementations.

///

/// An example IR generated from compiler-rt/fixsfdi.c looks like below:

/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {

/// entry:

///   %0 = bitcast float %a to i32

///   %conv.i = zext i32 %0 to i64

///   %tobool.not = icmp sgt i32 %0, -1

///   %conv = select i1 %tobool.not, i64 1, i64 -1

///   %and = lshr i64 %conv.i, 23

///   %shr = and i64 %and, 255

///   %and2 = and i64 %conv.i, 8388607

///   %or = or i64 %and2, 8388608

///   %cmp = icmp ult i64 %shr, 127

///   br i1 %cmp, label %cleanup, label %if.end

///

/// if.end:                                           ; preds = %entry

///   %sub = add nuw nsw i64 %shr, 4294967169

///   %conv5 = and i64 %sub, 4294967232

///   %cmp6.not = icmp eq i64 %conv5, 0

///   br i1 %cmp6.not, label %if.end12, label %if.then8

///

/// if.then8:                                         ; preds = %if.end

///   %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64

///   -9223372036854775808 br label %cleanup

///

/// if.end12:                                         ; preds = %if.end

///   %cmp13 = icmp ult i64 %shr, 150

///   br i1 %cmp13, label %if.then15, label %if.else

///

/// if.then15:                                        ; preds = %if.end12

///   %sub16 = sub nuw nsw i64 150, %shr

///   %shr17 = lshr i64 %or, %sub16

///   %mul = mul nsw i64 %shr17, %conv

///   br label %cleanup

///

/// if.else:                                          ; preds = %if.end12

///   %sub18 = add nsw i64 %shr, -150

///   %shl = shl i64 %or, %sub18

///   %mul19 = mul nsw i64 %shl, %conv

///   br label %cleanup

///

/// cleanup:                                          ; preds = %entry,

/// %if.else, %if.then15, %if.then8

///   %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [

///   %mul19, %if.else ], [ 0, %entry ] ret i64 %retval.0

/// }

///

/// Replace fp to integer with generated code.


static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned) {

  // clang-format on

  IRBuilder<> Builder(FPToI);

  auto *FloatVal = FPToI->getOperand(0);

  IntegerType *IntTy = cast<IntegerType>(FPToI->getType());


  unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();

  unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;


  // FIXME: fp16's range is covered by i32. So `fptoi half` can convert

  // to i32 first following a sext/zext to target integer type.

  Value *A1 = nullptr;

  if (FloatVal->getType()->isHalfTy() && BitWidth >= 32) {

    if (FPToI->getOpcode() == Instruction::FPToUI) {

      Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getInt32Ty());

      A1 = Builder.CreateZExt(A0, IntTy);

    } else { // FPToSI

      Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getInt32Ty());

      A1 = Builder.CreateSExt(A0, IntTy);

    }

    FPToI->replaceAllUsesWith(A1);

    FPToI->dropAllReferences();

    FPToI->eraseFromParent();

    return;

  }


  // fp80 conversion is implemented by fpext to fp128 first then do the

  // conversion.

  FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;

  unsigned FloatWidth =

      PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());

  unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;

  unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;

  IntegerType *FloatIntTy = Builder.getIntNTy(FloatWidth);

  Value *ImplicitBit = ConstantInt::get(

      FloatIntTy, APInt::getOneBitSet(FloatWidth, FPMantissaWidth));

  Value *SignificandMask = ConstantInt::get(

      FloatIntTy, APInt::getLowBitsSet(FloatWidth, FPMantissaWidth));


  BasicBlock *Entry = Builder.GetInsertBlock();

  Function *F = Entry->getParent();

  Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));

  BasicBlock *CheckSaturateBB, *SaturateBB;

  BasicBlock *End =

      Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");

  if (IsSaturating) {

    CheckSaturateBB = BasicBlock::Create(Builder.getContext(),

                                         "fp-to-i-if-check.saturate", F, End);

    SaturateBB =

        BasicBlock::Create(Builder.getContext(), "fp-to-i-if-saturate", F, End);

  }

  BasicBlock *CheckExpSizeBB = BasicBlock::Create(

      Builder.getContext(), "fp-to-i-if-check.exp.size", F, End);

  BasicBlock *ExpSmallBB =

      BasicBlock::Create(Builder.getContext(), "fp-to-i-if-exp.small", F, End);

  BasicBlock *ExpLargeBB =

      BasicBlock::Create(Builder.getContext(), "fp-to-i-if-exp.large", F, End);


  Entry->getTerminator()->eraseFromParent();


  // entry:

  Builder.SetInsertPoint(Entry);

  // We're going to introduce branches on the value, so freeze it.

  if (!isGuaranteedNotToBeUndefOrPoison(FloatVal))

    FloatVal = Builder.CreateFreeze(FloatVal);

  // fp80 conversion is implemented by fpext to fp128 first then do the

  // conversion.

  if (FloatVal->getType()->isX86_FP80Ty())

    FloatVal =

        Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));

  Value *ARep = Builder.CreateBitCast(FloatVal, FloatIntTy);

  Value *PosOrNeg, *Sign;

  if (IsSigned) {

    PosOrNeg =

        Builder.CreateICmpSGT(ARep, ConstantInt::getSigned(FloatIntTy, -1));

    Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),

                                ConstantInt::getSigned(IntTy, -1), "sign");

  }

  Value *And =

      Builder.CreateLShr(ARep, Builder.getIntN(FloatWidth, FPMantissaWidth));

  Value *BiasedExp = Builder.CreateAnd(

      And, Builder.getIntN(FloatWidth, (1 << ExponentWidth) - 1), "biased.exp");

  Value *Abs = Builder.CreateAnd(ARep, SignificandMask);

  Value *Significand = Builder.CreateOr(Abs, ImplicitBit, "significand");

  Value *ZeroResultCond = Builder.CreateICmpULT(

      BiasedExp, Builder.getIntN(FloatWidth, ExponentBias), "exp.is.negative");

  if (IsSaturating) {

    Value *IsNaN = Builder.CreateFCmpUNO(FloatVal, FloatVal, "is.nan");

    ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNaN);

    if (!IsSigned) {

      Value *IsNeg = Builder.CreateIsNeg(ARep);

      ZeroResultCond = Builder.CreateOr(ZeroResultCond, IsNeg);

    }

  }

  Builder.CreateCondBr(ZeroResultCond, End,

                       IsSaturating ? CheckSaturateBB : CheckExpSizeBB);


  Value *Saturated;

  if (IsSaturating) {

    // check.saturate:

    Builder.SetInsertPoint(CheckSaturateBB);

    Value *Cmp3 = Builder.CreateICmpUGE(

        BiasedExp, ConstantInt::getSigned(

                       FloatIntTy, static_cast<int64_t>(ExponentBias +

                                                        BitWidth - IsSigned)));

    Builder.CreateCondBr(Cmp3, SaturateBB, CheckExpSizeBB);


    // saturate:

    Builder.SetInsertPoint(SaturateBB);

    if (IsSigned) {

      Value *SignedMax =

          ConstantInt::get(IntTy, APInt::getSignedMaxValue(BitWidth));

      Value *SignedMin =

          ConstantInt::get(IntTy, APInt::getSignedMinValue(BitWidth));

      Saturated =

          Builder.CreateSelect(PosOrNeg, SignedMax, SignedMin, "saturated");

    } else {

      Saturated = ConstantInt::getAllOnesValue(IntTy);

    }

    Builder.CreateBr(End);

  }


  // if.end9:

  Builder.SetInsertPoint(CheckExpSizeBB);

  Value *ExpSmallerMantissaWidth = Builder.CreateICmpULT(

      BiasedExp, Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth),

      "exp.smaller.mantissa.width");

  Builder.CreateCondBr(ExpSmallerMantissaWidth, ExpSmallBB, ExpLargeBB);


  // exp.small:

  Builder.SetInsertPoint(ExpSmallBB);

  Value *Sub13 = Builder.CreateSub(

      Builder.getIntN(FloatWidth, ExponentBias + FPMantissaWidth), BiasedExp);

  Value *ExpSmallRes =

      Builder.CreateZExtOrTrunc(Builder.CreateLShr(Significand, Sub13), IntTy);

  if (IsSigned)

    ExpSmallRes = Builder.CreateMul(ExpSmallRes, Sign);

  Builder.CreateBr(End);


  // exp.large:

  Builder.SetInsertPoint(ExpLargeBB);

  Value *Sub15 = Builder.CreateAdd(

      BiasedExp,

      ConstantInt::getSigned(

          FloatIntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));

  Value *SignificandCast = Builder.CreateZExtOrTrunc(Significand, IntTy);

  Value *ExpLargeRes = Builder.CreateShl(

      SignificandCast, Builder.CreateZExtOrTrunc(Sub15, IntTy));

  if (IsSigned)

    ExpLargeRes = Builder.CreateMul(ExpLargeRes, Sign);

  Builder.CreateBr(End);


  // cleanup:

  Builder.SetInsertPoint(End, End->begin());

  PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 3 + IsSaturating);


  if (IsSaturating)

    Retval0->addIncoming(Saturated, SaturateBB);

  Retval0->addIncoming(ExpSmallRes, ExpSmallBB);

  Retval0->addIncoming(ExpLargeRes, ExpLargeBB);

  Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);


  FPToI->replaceAllUsesWith(Retval0);

  FPToI->dropAllReferences();

  FPToI->eraseFromParent();

}


// clang-format off: preserve formatting of the following example


/// Generate code to convert a fp number to integer, replacing S(U)IToFP with

/// the generated code. This currently generates code similarly to compiler-rt's

/// implementations. This implementation has an implicit assumption that integer

/// width is larger than fp.

///

/// An example IR generated from compiler-rt/floatdisf.c looks like below:

/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {

/// entry:

///   %cmp = icmp eq i64 %a, 0

///   br i1 %cmp, label %return, label %if.end

///

/// if.end:                                           ; preds = %entry

///   %shr = ashr i64 %a, 63

///   %xor = xor i64 %shr, %a

///   %sub = sub nsw i64 %xor, %shr

///   %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5

///   %cast = trunc i64 %0 to i32

///   %sub1 = sub nuw nsw i32 64, %cast

///   %sub2 = xor i32 %cast, 63

///   %cmp3 = icmp ult i32 %cast, 40

///   br i1 %cmp3, label %if.then4, label %if.else

///

/// if.then4:                                         ; preds = %if.end

///   switch i32 %sub1, label %sw.default [

///     i32 25, label %sw.bb

///     i32 26, label %sw.epilog

///   ]

///

/// sw.bb:                                            ; preds = %if.then4

///   %shl = shl i64 %sub, 1

///   br label %sw.epilog

///

/// sw.default:                                       ; preds = %if.then4

///   %sub5 = sub nsw i64 38, %0

///   %sh_prom = and i64 %sub5, 4294967295

///   %shr6 = lshr i64 %sub, %sh_prom

///   %shr9 = lshr i64 274877906943, %0

///   %and = and i64 %shr9, %sub

///   %cmp10 = icmp ne i64 %and, 0

///   %conv11 = zext i1 %cmp10 to i64

///   %or = or i64 %shr6, %conv11

///   br label %sw.epilog

///

/// sw.epilog:                                        ; preds = %sw.default,

/// %if.then4, %sw.bb

///   %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl,

///   %sw.bb ] %1 = lshr i64 %a.addr.0, 2 %2 = and i64 %1, 1 %or16 = or i64 %2,

///   %a.addr.0 %inc = add nsw i64 %or16, 1 %3 = and i64 %inc, 67108864

///   %tobool.not = icmp eq i64 %3, 0

///   %spec.select.v = select i1 %tobool.not, i64 2, i64 3

///   %spec.select = ashr i64 %inc, %spec.select.v

///   %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1

///   br label %if.end26

///

/// if.else:                                          ; preds = %if.end

///   %sub23 = add nuw nsw i64 %0, 4294967256

///   %sh_prom24 = and i64 %sub23, 4294967295

///   %shl25 = shl i64 %sub, %sh_prom24

///   br label %if.end26

///

/// if.end26:                                         ; preds = %sw.epilog,

/// %if.else

///   %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]

///   %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]

///   %conv27 = trunc i64 %shr to i32

///   %and28 = and i32 %conv27, -2147483648

///   %add = shl nuw nsw i32 %e.0, 23

///   %shl29 = add nuw nsw i32 %add, 1065353216

///   %conv31 = trunc i64 %a.addr.1 to i32

///   %and32 = and i32 %conv31, 8388607

///   %or30 = or i32 %and32, %and28

///   %or33 = or i32 %or30, %shl29

///   %4 = bitcast i32 %or33 to float

///   br label %return

///

/// return:                                           ; preds = %entry,

/// %if.end26

///   %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]

///   ret float %retval.0

/// }

///

/// Replace integer to fp with generated code.


static void expandIToFP(Instruction *IToFP) {

  // clang-format on

  IRBuilder<> Builder(IToFP);

  auto *IntVal = IToFP->getOperand(0);

  IntegerType *IntTy = cast<IntegerType>(IntVal->getType());


  unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();

  unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;

  // fp80 conversion is implemented by conversion tp fp128 first following

  // a fptrunc to fp80.

  FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;

  // FIXME: As there is no related builtins added in compliler-rt,

  // here currently utilized the fp32 <-> fp16 lib calls to implement.

  FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;

  FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;

  unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);

  bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;


  // We're going to introduce branches on the value, so freeze it.

  if (!isGuaranteedNotToBeUndefOrPoison(IntVal))

    IntVal = Builder.CreateFreeze(IntVal);


  // The expansion below assumes that int width >= float width. Zero or sign

  // extend the integer accordingly.

  if (BitWidth < FloatWidth) {

    BitWidth = FloatWidth;

    IntTy = Builder.getIntNTy(BitWidth);

    IntVal = Builder.CreateIntCast(IntVal, IntTy, IsSigned);

  }


  Value *Temp1 =

      Builder.CreateShl(Builder.getIntN(BitWidth, 1),

                        Builder.getIntN(BitWidth, FPMantissaWidth + 3));


  BasicBlock *Entry = Builder.GetInsertBlock();

  Function *F = Entry->getParent();

  Entry->setName(Twine(Entry->getName(), "itofp-entry"));

  BasicBlock *End =

      Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");

  BasicBlock *IfEnd =

      BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);

  BasicBlock *IfThen4 =

      BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);

  BasicBlock *SwBB =

      BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);

  BasicBlock *SwDefault =

      BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);

  BasicBlock *SwEpilog =

      BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);

  BasicBlock *IfThen20 =

      BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);

  BasicBlock *IfElse =

      BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);

  BasicBlock *IfEnd26 =

      BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);


  Entry->getTerminator()->eraseFromParent();


  Function *CTLZ =

      Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);

  ConstantInt *True = Builder.getTrue();


  // entry:

  Builder.SetInsertPoint(Entry);

  Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));

  Builder.CreateCondBr(Cmp, End, IfEnd);


  // if.end:

  Builder.SetInsertPoint(IfEnd);

  Value *Shr =

      Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));

  Value *Xor = Builder.CreateXor(Shr, IntVal);

  Value *Sub = Builder.CreateSub(Xor, Shr);

  Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});

  Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());

  int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;

  Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),

                                  FloatWidth == 128 ? Call : Cast);

  Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),

                                  FloatWidth == 128 ? Call : Cast);

  Value *Cmp3 = Builder.CreateICmpSGT(

      Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));

  Builder.CreateCondBr(Cmp3, IfThen4, IfElse);


  // if.then4:

  Builder.SetInsertPoint(IfThen4);

  SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);

  SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);

  SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);


  // sw.bb:

  Builder.SetInsertPoint(SwBB);

  Value *Shl =

      Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));

  Builder.CreateBr(SwEpilog);


  // sw.default:

  Builder.SetInsertPoint(SwDefault);

  Value *Sub5 = Builder.CreateSub(

      Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),

      FloatWidth == 128 ? Call : Cast);

  Value *ShProm = Builder.CreateZExt(Sub5, IntTy);

  Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,

                                   FloatWidth == 128 ? Sub5 : ShProm);

  Value *Sub8 =

      Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,

                        Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));

  Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);

  Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),

                                   FloatWidth == 128 ? Sub8 : ShProm9);

  Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);

  Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));

  Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);

  Value *Or = Builder.CreateOr(Shr6, Conv11);

  Builder.CreateBr(SwEpilog);


  // sw.epilog:

  Builder.SetInsertPoint(SwEpilog);

  PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);

  AAddr0->addIncoming(Or, SwDefault);

  AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);

  AAddr0->addIncoming(Shl, SwBB);

  Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());

  Value *A1 = Builder.CreateLShr(A0, Builder.getInt32(2));

  Value *A2 = Builder.CreateAnd(A1, Builder.getInt32(1));

  Value *Conv16 = Builder.CreateZExt(A2, IntTy);

  Value *Or17 = Builder.CreateOr(AAddr0, Conv16);

  Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));

  Value *Shr18 = nullptr;

  if (IsSigned)

    Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));

  else

    Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));

  Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");

  Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));

  Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));

  Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));

  Value *ExtractT64 = nullptr;

  if (FloatWidth > 80)

    ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());

  else

    ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());

  Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);


  // if.then20

  Builder.SetInsertPoint(IfThen20);

  Value *Shr21 = nullptr;

  if (IsSigned)

    Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));

  else

    Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));

  Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));

  Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));

  Value *ExtractT62 = nullptr;

  if (FloatWidth > 80)

    ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getInt64Ty());

  else

    ExtractT62 = Builder.CreateTrunc(Extract, Builder.getInt32Ty());

  Builder.CreateBr(IfEnd26);


  // if.else:

  Builder.SetInsertPoint(IfElse);

  Value *Sub24 = Builder.CreateAdd(

      FloatWidth == 128 ? Call : Cast,

      ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),

                             -(int)(BitWidth - FPMantissaWidth - 1)));

  Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);

  Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,

                                   FloatWidth == 128 ? Sub24 : ShProm25);

  Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));

  Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));

  Value *ExtractT66 = nullptr;

  if (FloatWidth > 80)

    ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());

  else

    ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());

  Builder.CreateBr(IfEnd26);


  // if.end26:

  Builder.SetInsertPoint(IfEnd26);

  PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);

  AAddr1Off0->addIncoming(ExtractT, IfThen20);

  AAddr1Off0->addIncoming(ExtractT60, SwEpilog);

  AAddr1Off0->addIncoming(ExtractT61, IfElse);

  PHINode *AAddr1Off32 = nullptr;

  if (FloatWidth > 32) {

    AAddr1Off32 =

        Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);

    AAddr1Off32->addIncoming(ExtractT62, IfThen20);

    AAddr1Off32->addIncoming(ExtractT64, SwEpilog);

    AAddr1Off32->addIncoming(ExtractT66, IfElse);

  }

  PHINode *E0 = nullptr;

  if (FloatWidth <= 80) {

    E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);

    E0->addIncoming(Sub1, IfThen20);

    E0->addIncoming(Sub2, SwEpilog);

    E0->addIncoming(Sub2, IfElse);

  }

  Value *And29 = nullptr;

  if (FloatWidth > 80) {

    Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),

                                     Builder.getIntN(BitWidth, 63));

    And29 = Builder.CreateAnd(Shr, Temp2, "and29");

  } else {

    Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getInt32Ty());

    And29 = Builder.CreateAnd(

        Conv28, ConstantInt::get(Builder.getContext(), APInt::getSignMask(32)));

  }

  unsigned TempMod = FPMantissaWidth % 32;

  Value *And34 = nullptr;

  Value *Shl30 = nullptr;

  if (FloatWidth > 80) {

    TempMod += 32;

    Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getInt64(TempMod));

    Shl30 = Builder.CreateAdd(

        Add, Builder.getInt64(((1ull << (62ull - TempMod)) - 1ull) << TempMod));

    And34 = Builder.CreateZExt(Shl30, Builder.getInt128Ty());

  } else {

    Value *Add = Builder.CreateShl(E0, Builder.getInt32(TempMod));

    Shl30 = Builder.CreateAdd(

        Add, Builder.getInt32(((1 << (30 - TempMod)) - 1) << TempMod));

    And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,

                              Builder.getInt32((1 << TempMod) - 1));

  }

  Value *Or35 = nullptr;

  if (FloatWidth > 80) {

    Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getInt128Ty());

    Value *Or31 = Builder.CreateOr(And29Trunc, And34);

    Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));

    Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),

                                     Builder.getIntN(128, FPMantissaWidth));

    Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));

    Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);

    Or35 = Builder.CreateOr(Or34, A6);

  } else {

    Value *Or31 = Builder.CreateOr(And34, And29);

    Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);

  }

  Value *A4 = nullptr;

  if (IToFP->getType()->isDoubleTy()) {

    Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));

    Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));

    Value *And1 =

        Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));

    Value *Or1 = Builder.CreateOr(Shl1, And1);

    A4 = Builder.CreateBitCast(Or1, IToFP->getType());

  } else if (IToFP->getType()->isX86_FP80Ty()) {

    Value *A40 =

        Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));

    A4 = Builder.CreateFPTrunc(A40, IToFP->getType());

  } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {

    // Deal with "half" situation. This is a workaround since we don't have

    // floattihf.c currently as referring.

    Value *A40 =

        Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));

    A4 = Builder.CreateFPTrunc(A40, IToFP->getType());

  } else // float type

    A4 = Builder.CreateBitCast(Or35, IToFP->getType());

  Builder.CreateBr(End);


  // return:

  Builder.SetInsertPoint(End, End->begin());

  PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);

  Retval0->addIncoming(A4, IfEnd26);

  Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);


  IToFP->replaceAllUsesWith(Retval0);

  IToFP->dropAllReferences();

  IToFP->eraseFromParent();

}


static void scalarize(Instruction *I,

                      SmallVectorImpl<Instruction *> &Worklist) {

  VectorType *VTy = cast<FixedVectorType>(I->getType());


  IRBuilder<> Builder(I);


  unsigned NumElements = VTy->getElementCount().getFixedValue();

  Value *Result = PoisonValue::get(VTy);

  for (unsigned Idx = 0; Idx < NumElements; ++Idx) {

    Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);


    Value *NewOp = nullptr;

    if (auto *BinOp = dyn_cast<BinaryOperator>(I))

      NewOp = Builder.CreateBinOp(

          BinOp->getOpcode(), Ext,

          Builder.CreateExtractElement(I->getOperand(1), Idx));

    else if (auto *CastI = dyn_cast<CastInst>(I))

      NewOp = Builder.CreateCast(CastI->getOpcode(), Ext,

                                 I->getType()->getScalarType());

    else

      llvm_unreachable("Unsupported instruction type");


    Result = Builder.CreateInsertElement(Result, NewOp, Idx);

    if (auto *ScalarizedI = dyn_cast<Instruction>(NewOp)) {

      ScalarizedI->copyIRFlags(I, true);

      Worklist.push_back(ScalarizedI);

    }

  }


  I->replaceAllUsesWith(Result);

  I->dropAllReferences();

  I->eraseFromParent();

}


static void addToWorklist(Instruction &I,

                          SmallVector<Instruction *, 4> &Worklist) {

  if (I.getOperand(0)->getType()->isVectorTy())

    scalarize(&I, Worklist);

  else

    Worklist.push_back(&I);

}


static bool runImpl(Function &F, const TargetLowering &TLI,

                    const LibcallLoweringInfo &Libcalls, AssumptionCache *AC) {

  SmallVector<Instruction *, 4> Worklist;


  unsigned MaxLegalFpConvertBitWidth =

      TLI.getMaxLargeFPConvertBitWidthSupported();

  if (ExpandFpConvertBits != IntegerType::MAX_INT_BITS)

    MaxLegalFpConvertBitWidth = ExpandFpConvertBits;


  unsigned MaxLegalDivRemBitWidth = TLI.getMaxDivRemBitWidthSupported();

  if (ExpandDivRemBits != IntegerType::MAX_INT_BITS)

    MaxLegalDivRemBitWidth = ExpandDivRemBits;


  bool DisableExpandLargeFp =

      MaxLegalFpConvertBitWidth >= IntegerType::MAX_INT_BITS;

  bool DisableExpandLargeDivRem =

      MaxLegalDivRemBitWidth >= IntegerType::MAX_INT_BITS;

  bool DisableFrem = !FRemExpander::shouldExpandAnyFremType(TLI);


  if (DisableExpandLargeFp && DisableFrem && DisableExpandLargeDivRem)

    return false;


  auto ShouldHandleInst = [&](Instruction &I) {

    Type *Ty = I.getType();

    // TODO: This pass doesn't handle scalable vectors.

    if (Ty->isScalableTy())

      return false;


    switch (I.getOpcode()) {

    case Instruction::FRem:

      return !DisableFrem && FRemExpander::shouldExpandFremType(TLI, Ty);

    case Instruction::FPToUI:

    case Instruction::FPToSI:

      return !DisableExpandLargeFp &&

             cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >

                 MaxLegalFpConvertBitWidth;

    case Instruction::UIToFP:

    case Instruction::SIToFP:

      return !DisableExpandLargeFp &&

             cast<IntegerType>(I.getOperand(0)->getType()->getScalarType())

                     ->getIntegerBitWidth() > MaxLegalFpConvertBitWidth;

    case Instruction::UDiv:

    case Instruction::SDiv:

    case Instruction::URem:

    case Instruction::SRem:

      // Power-of-2 divisors are handled inside the expansion (via efficient

      // shift/mask sequences) rather than being excluded here, so that

      // backends that cannot lower wide div/rem even for powers of two

      // (e.g. when DAGCombiner is disabled) still get valid lowered code.

      return !DisableExpandLargeDivRem &&

             cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >

                 MaxLegalDivRemBitWidth;

    case Instruction::Call: {

      auto *II = dyn_cast<IntrinsicInst>(&I);

      if (II && (II->getIntrinsicID() == Intrinsic::fptoui_sat ||

                 II->getIntrinsicID() == Intrinsic::fptosi_sat)) {

        return !DisableExpandLargeFp &&

               cast<IntegerType>(Ty->getScalarType())->getIntegerBitWidth() >

                   MaxLegalFpConvertBitWidth;

      }

      return false;

    }

    }


    return false;

  };


  bool Modified = false;

  for (auto It = inst_begin(&F), End = inst_end(F); It != End;) {

    Instruction &I = *It++;

    if (!ShouldHandleInst(I))

      continue;


    addToWorklist(I, Worklist);

    Modified = true;

  }


  while (!Worklist.empty()) {

    Instruction *I = Worklist.pop_back_val();


    switch (I->getOpcode()) {

    case Instruction::FRem: {

      auto SQ = [&]() -> std::optional<SimplifyQuery> {

        if (AC) {

          auto Res = std::make_optional<SimplifyQuery>(

              I->getModule()->getDataLayout(), I);

          Res->AC = AC;

          return Res;

        }

        return {};

      }();


      expandFRem(cast<BinaryOperator>(*I), SQ);

      break;

    }


    case Instruction::FPToUI:

      expandFPToI(I, /*IsSaturating=*/false, /*IsSigned=*/false);

      break;

    case Instruction::FPToSI:

      expandFPToI(I, /*IsSaturating=*/false, /*IsSigned=*/true);

      break;


    case Instruction::UIToFP:

    case Instruction::SIToFP:

      expandIToFP(I);

      break;


    case Instruction::UDiv:

    case Instruction::SDiv:

    case Instruction::URem:

    case Instruction::SRem: {

      auto *BO = cast<BinaryOperator>(I);

      // TODO: isConstantPowerOfTwo does not handle vector constants, so

      // vector div/rem by a power-of-2 splat goes through the generic path.

      if (isConstantPowerOfTwo(BO->getOperand(1), isSigned(BO->getOpcode()))) {

        expandPow2DivRem(BO);

      } else {

        unsigned Opc = BO->getOpcode();

        if (Opc == Instruction::UDiv || Opc == Instruction::SDiv)

          expandDivision(BO);

        else

          expandRemainder(BO);

      }

      break;

    }

    case Instruction::Call: {

      auto *II = cast<IntrinsicInst>(I);

      assert(II->getIntrinsicID() == Intrinsic::fptoui_sat ||

             II->getIntrinsicID() == Intrinsic::fptosi_sat);

      expandFPToI(I, /*IsSaturating=*/true,

                  /*IsSigned=*/II->getIntrinsicID() == Intrinsic::fptosi_sat);

      break;

    }

    }

  }


  return Modified;

}


namespace {

class ExpandIRInstsLegacyPass : public FunctionPass {

  CodeGenOptLevel OptLevel;


public:

  static char ID;


  ExpandIRInstsLegacyPass(CodeGenOptLevel OptLevel)

      : FunctionPass(ID), OptLevel(OptLevel) {}


  ExpandIRInstsLegacyPass() : ExpandIRInstsLegacyPass(CodeGenOptLevel::None) {}


  bool runOnFunction(Function &F) override {

    auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();

    const TargetSubtargetInfo *Subtarget = TM->getSubtargetImpl(F);

    auto *TLI = Subtarget->getTargetLowering();

    AssumptionCache *AC = nullptr;


    const LibcallLoweringInfo &Libcalls =

        getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(

            *F.getParent(), *Subtarget);


    if (OptLevel != CodeGenOptLevel::None && !F.hasOptNone())

      AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

    return runImpl(F, *TLI, Libcalls, AC);

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<LibcallLoweringInfoWrapper>();

    AU.addRequired<TargetPassConfig>();

    if (OptLevel != CodeGenOptLevel::None)

      AU.addRequired<AssumptionCacheTracker>();

    AU.addPreserved<AAResultsWrapperPass>();

    AU.addPreserved<GlobalsAAWrapperPass>();

    AU.addRequired<LibcallLoweringInfoWrapper>();

  }

};

} // namespace


ExpandIRInstsPass::ExpandIRInstsPass(const TargetMachine &TM,

                                     CodeGenOptLevel OptLevel)

    : TM(&TM), OptLevel(OptLevel) {}


void ExpandIRInstsPass::printPipeline(

    raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {

  static_cast<PassInfoMixin<ExpandIRInstsPass> *>(this)->printPipeline(

      OS, MapClassName2PassName);

  OS << '<';

  OS << "O" << (int)OptLevel;

  OS << '>';

}


PreservedAnalyses ExpandIRInstsPass::run(Function &F,

                                         FunctionAnalysisManager &FAM) {

  const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);

  auto &TLI = *STI->getTargetLowering();

  AssumptionCache *AC = nullptr;

  if (OptLevel != CodeGenOptLevel::None)

    AC = &FAM.getResult<AssumptionAnalysis>(F);


  auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);


  const LibcallLoweringModuleAnalysisResult *LibcallLowering =

      MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());


  if (!LibcallLowering) {

    F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +

                             "' analysis required");

    return PreservedAnalyses::all();

  }


  const LibcallLoweringInfo &Libcalls =

      LibcallLowering->getLibcallLowering(*STI);


  return runImpl(F, TLI, Libcalls, AC) ? PreservedAnalyses::none()

                                       : PreservedAnalyses::all();

}


char ExpandIRInstsLegacyPass::ID = 0;

INITIALIZE_PASS_BEGIN(ExpandIRInstsLegacyPass, "expand-ir-insts",

                      "Expand certain fp instructions", false, false)

INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)

INITIALIZE_PASS_END(ExpandIRInstsLegacyPass, "expand-ir-insts",

                    "Expand IR instructions", false, false)


FunctionPass *llvm::createExpandIRInstsPass(CodeGenOptLevel OptLevel) {

  return new ExpandIRInstsLegacyPass(OptLevel);

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

PowKind::Pow
@ Pow
Definition AMDGPULibCalls.cpp:47

AssumptionCache.h

BasicBlockUtils.h

X
#define X(NUM, ENUM, NAME)
Definition ELF.h:851

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Casting.h

Passes.h

CommandLine.h

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition EntryExitInstrumenter.cpp:109

expandFRem
static bool expandFRem(BinaryOperator &I, std::optional< SimplifyQuery > &SQ)
Definition ExpandIRInsts.cpp:514

expandIToFP
static void expandIToFP(Instruction *IToFP)
Generate code to convert a fp number to integer, replacing S(U)IToFP with the generated code.
Definition ExpandIRInsts.cpp:845

ExpandDivRemBits
static cl::opt< unsigned > ExpandDivRemBits("expand-div-rem-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("div and rem instructions on integers with " "more than <N> bits are expanded."))

runImpl
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
Definition ExpandIRInsts.cpp:1159

expandPow2DivRem
static void expandPow2DivRem(BinaryOperator *BO)
Expand division or remainder by a power-of-2 constant.
Definition ExpandIRInsts.cpp:112

isSigned
static bool isSigned(unsigned Opcode)
Definition ExpandIRInsts.cpp:83

addToWorklist
static void addToWorklist(Instruction &I, SmallVector< Instruction *, 4 > &Worklist)
Definition ExpandIRInsts.cpp:1151

addSignedBias
static Value * addSignedBias(IRBuilder<> &Builder, Value *X, unsigned BitWidth, unsigned ShiftAmt)
For signed div/rem by a power of 2, compute the bias-adjusted dividend: Sign = ashr X,...
Definition ExpandIRInsts.cpp:94

ExpandFpConvertBits
static cl::opt< unsigned > ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, cl::init(IntegerType::MAX_INT_BITS), cl::desc("fp convert instructions on integers with " "more than <N> bits are expanded."))

expandFPToI
static void expandFPToI(Instruction *FPToI, bool IsSaturating, bool IsSigned)
Generate code to convert a fp number to integer, replacing FPToS(U)I with the generated code.
Definition ExpandIRInsts.cpp:594

isConstantPowerOfTwo
static bool isConstantPowerOfTwo(Value *V, bool SignedOp)
Definition ExpandIRInsts.cpp:72

scalarize
static void scalarize(Instruction *I, SmallVectorImpl< Instruction * > &Worklist)
Definition ExpandIRInsts.cpp:1117

ExpandIRInsts.h

GlobalsModRef.h
This is the interface for a simple mod/ref and alias analysis over globals.

IRBuilder.h

IntrinsicInst.h

Module.h
Module.h This file contains the declarations for the Module class.

PassManager.h
This header defines various interfaces for pass management in LLVM.

ISDOpcodes.h

InitializePasses.h

InstIterator.h

IntegerDivision.h

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

LoopDeletionResult::Modified
@ Modified
Definition LoopDeletion.cpp:47

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

FAM
FunctionAnalysisManager FAM
Definition PassBuilderBindings.cpp:61

Fun
Function * Fun
Definition PassBuilderBindings.cpp:51

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39

Pass.h

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:77

SimplifyQuery.h

SmallVector.h
This file defines the SmallVector class.

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

TargetSubtargetInfo.h

ValueTracking.h

VectorType
Definition ItaniumDemangle.h:1189

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::APInt::getSignMask
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230

llvm::APInt::getSignedMaxValue
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210

llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330

llvm::APInt::countr_zero
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662

llvm::APInt::getSignedMinValue
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220

llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441

llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307

llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition AssumptionCache.h:180

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition AssumptionCache.h:44

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461

llvm::BasicBlock::splitBasicBlock
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition BasicBlock.cpp:528

llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213

llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206

llvm::BinaryOperator
Definition InstrTypes.h:171

llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition InstrTypes.h:374

llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682

llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703

llvm::ConstantFP::getInfinity
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
Definition Constants.cpp:1233

llvm::ConstantFP::getZero
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
Definition Constants.cpp:1187

llvm::ConstantFP::getQNaN
static LLVM_ABI Constant * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
Definition Constants.cpp:1165

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition Constants.h:87

llvm::ConstantInt::getTrue
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
Definition Constants.cpp:908

llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135

llvm::Constant::getAllOnesValue
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition Constants.cpp:443

llvm::ExpandIRInstsPass::ExpandIRInstsPass
ExpandIRInstsPass(const TargetMachine &TM, CodeGenOptLevel OptLevel)
Definition ExpandIRInsts.cpp:1338

llvm::ExpandIRInstsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition ExpandIRInsts.cpp:1351

llvm::ExpandIRInstsPass::printPipeline
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition ExpandIRInsts.cpp:1342

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23

llvm::FastMathFlags::setAllowContract
void setAllowContract(bool B=true)
Definition FMF.h:93

llvm::FastMathFlags::noInfs
bool noInfs() const
Definition FMF.h:69

llvm::FastMathFlags::setAllowReciprocal
void setAllowReciprocal(bool B=true)
Definition FMF.h:90

llvm::FastMathFlags::approxFunc
bool approxFunc() const
Definition FMF.h:73

llvm::FastMathFlags::setNoNaNs
void setNoNaNs(bool B=true)
Definition FMF.h:81

llvm::FastMathFlags::noNaNs
bool noNaNs() const
Definition FMF.h:68

llvm::FastMathFlags::setNoInfs
void setNoInfs(bool B=true)
Definition FMF.h:84

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function
Definition Function.h:65

llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition GlobalValue.h:663

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2847

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:112

llvm::Instruction::isExact
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
Definition Instruction.cpp:610

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition Instruction.h:343

llvm::IntegerType
Class to represent integer types.
Definition DerivedTypes.h:42

llvm::IntegerType::MAX_INT_BITS
@ MAX_INT_BITS
Maximum number of bits that can be specified.
Definition DerivedTypes.h:54

llvm::LibcallLoweringInfoWrapper
Definition LibcallLoweringInfo.h:126

llvm::LibcallLoweringInfo
Tracks which library functions to use for a particular subtarget.
Definition LibcallLoweringInfo.h:23

llvm::LibcallLoweringModuleAnalysisResult
Record a mapping from subtarget to LibcallLoweringInfo.
Definition LibcallLoweringInfo.h:82

llvm::LibcallLoweringModuleAnalysis
Definition LibcallLoweringInfo.h:113

llvm::PHINode
Definition Instructions.h:2651

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition Instructions.h:2785

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:2055

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:579

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:679

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:422

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1223

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::SwitchInst
Multiway switch.
Definition Instructions.h:3331

llvm::TargetLoweringBase::getMaxDivRemBitWidthSupported
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
Definition TargetLowering.h:2259

llvm::TargetLoweringBase::getMaxLargeFPConvertBitWidthSupported
unsigned getMaxLargeFPConvertBitWidthSupported() const
Returns the size in bits of the maximum fp to/from int conversion the backend supports.
Definition TargetLowering.h:2265

llvm::TargetLoweringBase::getOperationAction
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
Definition TargetLowering.h:1292

llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition TargetLowering.h:4044

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition TargetMachine.h:83

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition TargetSubtargetInfo.h:66

llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition TargetSubtargetInfo.h:104

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Type::getIntegerBitWidth
LLVM_ABI unsigned getIntegerBitWidth() const
Definition DerivedTypes.h:99

llvm::Type::isX86_FP80Ty
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:161

llvm::Type::isBFloatTy
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147

llvm::Type::getFP128Ty
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:295

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370

llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:144

llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317

llvm::Type::getFloatTy
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290

llvm::Type::getFPMantissaWidth
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:241

llvm::Type::getFltSemantics
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110

llvm::User::dropAllReferences
void dropAllReferences()
Drop all references to operands.
Definition User.h:324

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:207

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255

llvm::Value::replaceAllUsesWith
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549

llvm::Value::takeName
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399

llvm::cl::opt
Definition CommandLine.h:1454

llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition STLFunctionalExtras.h:37

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

Call
CallInst * Call
Definition ObjCARCOpts.cpp:2357

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

TargetMachine.h

false
Definition MachinePipeliner.cpp:245

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81

llvm::ISD::FREM
@ FREM
Definition ISDOpcodes.h:421

llvm::Intrinsic::getOrInsertDeclaration
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition Intrinsics.cpp:774

llvm::SI
Definition SIInstrInfo.h:1897

llvm::bitc::NoInfs
@ NoInfs
Definition LLVMBitCodes.h:547

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::codeview::CompileSym3Flags::Exp
@ Exp
Definition CodeView.h:459

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:408

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::expandDivision
LLVM_ABI bool expandDivision(BinaryOperator *Div)
Generate code to divide two integers, replacing Div with the generated code.
Definition IntegerDivision.cpp:455

llvm::isKnownNeverInfinity
LLVM_ABI bool isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not an infinity or if the floating-point vector val...
Definition ValueTracking.cpp:6202

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::ModuleAnalysisManagerFunctionProxy
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
Definition PassManager.h:824

llvm::inst_begin
inst_iterator inst_begin(Function *F)
Definition InstIterator.h:129

llvm::PowerOf2Ceil
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385

llvm::createExpandIRInstsPass
LLVM_ABI FunctionPass * createExpandIRInstsPass(CodeGenOptLevel)
Definition ExpandIRInsts.cpp:1384

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745

llvm::get
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
Definition PointerIntPair.h:268

llvm::None
@ None
Definition CodeGenData.h:107

llvm::SplitBlockAndInsertIfThenElse
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
Definition BasicBlockUtils.cpp:1586

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::CodeGenOptLevel
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82

llvm::CodeGenOptLevel::None
@ None
-O0
Definition CodeGen.h:83

llvm::inst_end
inst_iterator inst_end(Function *F)
Definition InstIterator.h:130

llvm::IRBuilder
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

llvm::RecurKind::Xor
@ Xor
Bitwise or logical XOR of integers.
Definition IVDescriptors.h:43

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:38

llvm::RecurKind::Add
@ Add
Sum of integers.
Definition IVDescriptors.h:37

llvm::isGuaranteedNotToBeUndefOrPoison
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition ValueTracking.cpp:7960

llvm::BitWidth
constexpr unsigned BitWidth
Definition BitmaskEnum.h:219

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946

llvm::expandRemainder
LLVM_ABI bool expandRemainder(BinaryOperator *Rem)
Generate code to calculate the remainder of two integers, replacing Rem with the generated code.
Definition IntegerDivision.cpp:403

llvm::FunctionAnalysisManager
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Definition PassManager.h:563

llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145

llvm::EVT::getEVT
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition ValueTypes.cpp:303

llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324

llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:314

llvm::MIPatternMatch::Or
Definition MIPatternMatch.h:333

llvm::PassInfoMixin
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70

llvm::PassInfoMixin< LibcallLoweringModuleAnalysis >::name
static StringRef name()
Definition PassManager.h:72

llvm::cl::desc
Definition CommandLine.h:410