/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

Bug Summary

File:	llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:	line 1565, column 21 The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/Hexagon -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/Hexagon -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-08-28-193554-24367-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

→

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//

9#include "HexagonLoopIdiomRecognition.h"
10#include "llvm/ADT/APInt.h"
11#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallPtrSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/Triple.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/InstructionSimplify.h"
20#include "llvm/Analysis/LoopAnalysisManager.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopPass.h"
23#include "llvm/Analysis/MemoryLocation.h"
24#include "llvm/Analysis/ScalarEvolution.h"
25#include "llvm/Analysis/ScalarEvolutionExpressions.h"
26#include "llvm/Analysis/TargetLibraryInfo.h"
27#include "llvm/Analysis/ValueTracking.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/IntrinsicInst.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/IntrinsicsHexagon.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/PatternMatch.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/User.h"
49#include "llvm/IR/Value.h"
50#include "llvm/InitializePasses.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils.h"
61#include "llvm/Transforms/Utils/Local.h"
62#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
63#include <algorithm>
64#include <array>
65#include <cassert>
66#include <cstdint>
67#include <cstdlib>
68#include <deque>
69#include <functional>
70#include <iterator>
71#include <map>
72#include <set>
73#include <utility>
74#include <vector>

76#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"

78using namespace llvm;

80static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
cl::Hidden, cl::init(false),
cl::desc("Disable generation of memcpy in loop idiom recognition"));

84static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
cl::Hidden, cl::init(false),
cl::desc("Disable generation of memmove in loop idiom recognition"));

88static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
"check guarding the memmove."));

92static cl::opt<unsigned> CompileTimeMemSizeThreshold(
"compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
cl::desc("Threshold (in bytes) to perform the transformation, if the "
  "runtime loop count (mem transfer size) is known at compile-time."));

97static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
cl::Hidden, cl::init(true),
cl::desc("Only enable generating memmove in non-nested loops"));

101static cl::opt<bool> HexagonVolatileMemcpy(
  "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false),
  cl::desc("Enable Hexagon-specific memcpy for volatile destination."));

105static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));

108static const char *HexagonVolatileMemcpyName
= "hexagon_memcpy_forward_vp4cp4n2";


112namespace llvm {

114void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
115Pass *createHexagonLoopIdiomPass();

117} // end namespace llvm

119namespace {

121class HexagonLoopIdiomRecognize {
122public:
explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
                                   LoopInfo *LF, const TargetLibraryInfo *TLI,
                                   ScalarEvolution *SE)
    : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}

bool run(Loop *L);

130private:
int getSCEVStride(const SCEVAddRecExpr *StoreEv);
bool isLegalStore(Loop *CurLoop, StoreInst *SI);
void collectStores(Loop *CurLoop, BasicBlock *BB,
                   SmallVectorImpl<StoreInst *> &Stores);
bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const;
bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
                    SmallVectorImpl<BasicBlock *> &ExitBlocks);
bool runOnCountableLoop(Loop *L);

AliasAnalysis *AA;
const DataLayout *DL;
DominatorTree *DT;
LoopInfo *LF;
const TargetLibraryInfo *TLI;
ScalarEvolution *SE;
bool HasMemcpy, HasMemmove;
148};

150class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass {
151public:
static char ID;

explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
  initializeHexagonLoopIdiomRecognizeLegacyPassPass(
      *PassRegistry::getPassRegistry());
}

StringRef getPassName() const override {
  return "Recognize Hexagon-specific loop idioms";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
  AU.addRequired<LoopInfoWrapperPass>();
  AU.addRequiredID(LoopSimplifyID);
  AU.addRequiredID(LCSSAID);
  AU.addRequired<AAResultsWrapperPass>();
  AU.addRequired<ScalarEvolutionWrapperPass>();
  AU.addRequired<DominatorTreeWrapperPass>();
  AU.addRequired<TargetLibraryInfoWrapperPass>();
  AU.addPreserved<TargetLibraryInfoWrapperPass>();
}

bool runOnLoop(Loop *L, LPPassManager &LPM) override;
175};

177struct Simplifier {
struct Rule {
  using FuncType = std::function<Value *(Instruction *, LLVMContext &)>;
  Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
  StringRef Name; // For debugging.
  FuncType Fn;
};

void addRule(StringRef N, const Rule::FuncType &F) {
  Rules.push_back(Rule(N, F));
}

189private:
struct WorkListType {
  WorkListType() = default;

  void push_back(Value *V) {
    // Do not push back duplicates.
    if (!S.count(V)) {
      Q.push_back(V);
      S.insert(V);
    }
  }

  Value *pop_front_val() {
    Value *V = Q.front();
    Q.pop_front();
    S.erase(V);
    return V;
  }

  bool empty() const { return Q.empty(); }

private:
  std::deque<Value *> Q;
  std::set<Value *> S;
};

using ValueSetType = std::set<Value *>;

std::vector<Rule> Rules;

219public:
struct Context {
  using ValueMapType = DenseMap<Value *, Value *>;

  Value *Root;
  ValueSetType Used;   // The set of all cloned values used by Root.
  ValueSetType Clones; // The set of all cloned values.
  LLVMContext &Ctx;

  Context(Instruction *Exp)
      : Ctx(Exp->getParent()->getParent()->getContext()) {
    initialize(Exp);
  }

  ~Context() { cleanup(); }

  void print(raw_ostream &OS, const Value *V) const;
  Value *materialize(BasicBlock *B, BasicBlock::iterator At);

private:
  friend struct Simplifier;

  void initialize(Instruction *Exp);
  void cleanup();

  template <typename FuncT> void traverse(Value *V, FuncT F);
  void record(Value *V);
  void use(Value *V);
  void unuse(Value *V);

  bool equal(const Instruction *I, const Instruction *J) const;
  Value *find(Value *Tree, Value *Sub) const;
  Value *subst(Value *Tree, Value *OldV, Value *NewV);
  void replace(Value *OldV, Value *NewV);
  void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
};

Value *simplify(Context &C);
257};

struct PE {
  PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}

  const Simplifier::Context &C;
  const Value *V;
};

LLVM_ATTRIBUTE_USED__attribute__((__used__))
raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
  P.C.print(OS, P.V ? P.V : P.C.Root);
  return OS;
}

272} // end anonymous namespace

274char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;

276INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
                    "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
278INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
279INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
280INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
281INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
282INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
283INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
284INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
285INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
 PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
                  "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
 PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }

288template <typename FuncT>
289void Simplifier::Context::traverse(Value *V, FuncT F) {
WorkListType Q;
Q.push_back(V);

while (!Q.empty()) {
  Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
  if (!U || U->getParent())
    continue;
  if (!F(U))
    continue;
  for (Value *Op : U->operands())
    Q.push_back(Op);
}
302}

304void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
const auto *U = dyn_cast<const Instruction>(V);
if (!U) {
  OS << V << '(' << *V << ')';
  return;
}

if (U->getParent()) {
  OS << U << '(';
  U->printAsOperand(OS, true);
  OS << ')';
  return;
}

unsigned N = U->getNumOperands();
if (N != 0)
  OS << U << '(';
OS << U->getOpcodeName();
for (const Value *Op : U->operands()) {
  OS << ' ';
  print(OS, Op);
}
if (N != 0)
  OS << ')';
328}

330void Simplifier::Context::initialize(Instruction *Exp) {
// Perform a deep clone of the expression, set Root to the root
// of the clone, and build a map from the cloned values to the
// original ones.
ValueMapType M;
BasicBlock *Block = Exp->getParent();
WorkListType Q;
Q.push_back(Exp);

while (!Q.empty()) {
  Value *V = Q.pop_front_val();
  if (M.find(V) != M.end())
    continue;
  if (Instruction *U = dyn_cast<Instruction>(V)) {
    if (isa<PHINode>(U) || U->getParent() != Block)
      continue;
    for (Value *Op : U->operands())
      Q.push_back(Op);
    M.insert({U, U->clone()});
  }
}

for (std::pair<Value*,Value*> P : M) {
  Instruction *U = cast<Instruction>(P.second);
  for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
    auto F = M.find(U->getOperand(i));
    if (F != M.end())
      U->setOperand(i, F->second);
  }
}

auto R = M.find(Exp);
assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail
 ("R != M.end()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 362, __extension__ __PRETTY_FUNCTION__));
Root = R->second;

record(Root);
use(Root);
367}

369void Simplifier::Context::record(Value *V) {
auto Record = [this](Instruction *U) -> bool {
  Clones.insert(U);
  return true;
};
traverse(V, Record);
375}

377void Simplifier::Context::use(Value *V) {
auto Use = [this](Instruction *U) -> bool {
  Used.insert(U);
  return true;
};
traverse(V, Use);
383}

385void Simplifier::Context::unuse(Value *V) {
if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
  return;

auto Unuse = [this](Instruction *U) -> bool {
  if (!U->use_empty())
    return false;
  Used.erase(U);
  return true;
};
traverse(V, Unuse);
396}

398Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
if (Tree == OldV)
  return NewV;
if (OldV == NewV)
  return Tree;

WorkListType Q;
Q.push_back(Tree);
while (!Q.empty()) {
  Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
  // If U is not an instruction, or it's not a clone, skip it.
  if (!U || U->getParent())
    continue;
  for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
    Value *Op = U->getOperand(i);
    if (Op == OldV) {
      U->setOperand(i, NewV);
      unuse(OldV);
    } else {
      Q.push_back(Op);
    }
  }
}
return Tree;
422}

424void Simplifier::Context::replace(Value *OldV, Value *NewV) {
if (Root == OldV) {
  Root = NewV;
  use(Root);
  return;
}

// NewV may be a complex tree that has just been created by one of the
// transformation rules. We need to make sure that it is commoned with
// the existing Root to the maximum extent possible.
// Identify all subtrees of NewV (including NewV itself) that have
// equivalent counterparts in Root, and replace those subtrees with
// these counterparts.
WorkListType Q;
Q.push_back(NewV);
while (!Q.empty()) {
  Value *V = Q.pop_front_val();
  Instruction *U = dyn_cast<Instruction>(V);
  if (!U || U->getParent())
    continue;
  if (Value *DupV = find(Root, V)) {
    if (DupV != V)
      NewV = subst(NewV, V, DupV);
  } else {
    for (Value *Op : U->operands())
      Q.push_back(Op);
  }
}

// Now, simply replace OldV with NewV in Root.
Root = subst(Root, OldV, NewV);
use(Root);
456}

458void Simplifier::Context::cleanup() {
for (Value *V : Clones) {
  Instruction *U = cast<Instruction>(V);
  if (!U->getParent())
    U->dropAllReferences();
}

for (Value *V : Clones) {
  Instruction *U = cast<Instruction>(V);
  if (!U->getParent())
    U->deleteValue();
}
470}

472bool Simplifier::Context::equal(const Instruction *I,
                              const Instruction *J) const {
if (I == J)
  return true;
if (!I->isSameOperationAs(J))
  return false;
if (isa<PHINode>(I))
  return I->isIdenticalTo(J);

for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
  Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
  if (OpI == OpJ)
    continue;
  auto *InI = dyn_cast<const Instruction>(OpI);
  auto *InJ = dyn_cast<const Instruction>(OpJ);
  if (InI && InJ) {
    if (!equal(InI, InJ))
      return false;
  } else if (InI != InJ || !InI)
    return false;
}
return true;
494}

496Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
Instruction *SubI = dyn_cast<Instruction>(Sub);
WorkListType Q;
Q.push_back(Tree);

while (!Q.empty()) {
  Value *V = Q.pop_front_val();
  if (V == Sub)
    return V;
  Instruction *U = dyn_cast<Instruction>(V);
  if (!U || U->getParent())
    continue;
  if (SubI && equal(SubI, U))
    return U;
  assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0)
 : __assert_fail ("!isa<PHINode>(U)", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 510, __extension__ __PRETTY_FUNCTION__));
  for (Value *Op : U->operands())
    Q.push_back(Op);
}
return nullptr;
515}

517void Simplifier::Context::link(Instruction *I, BasicBlock *B,
    BasicBlock::iterator At) {
if (I->getParent())
  return;

for (Value *Op : I->operands()) {
  if (Instruction *OpI = dyn_cast<Instruction>(Op))
    link(OpI, B, At);
}

B->getInstList().insert(At, I);
528}

530Value *Simplifier::Context::materialize(BasicBlock *B,
    BasicBlock::iterator At) {
if (Instruction *RootI = dyn_cast<Instruction>(Root))
  link(RootI, B, At);
return Root;
535}

537Value *Simplifier::simplify(Context &C) {
WorkListType Q;
Q.push_back(C.Root);
unsigned Count = 0;
const unsigned Limit = SimplifyLimit;

while (!Q.empty()) {
  if (Count++ >= Limit)
    break;
  Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
  if (!U || U->getParent() || !C.Used.count(U))
    continue;
  bool Changed = false;
  for (Rule &R : Rules) {
    Value *W = R.Fn(U, C.Ctx);
    if (!W)
      continue;
    Changed = true;
    C.record(W);
    C.replace(U, W);
    Q.push_back(C.Root);
    break;
  }
  if (!Changed) {
    for (Value *Op : U->operands())
      Q.push_back(Op);
  }
}
return Count < Limit ? C.Root : nullptr;
566}

568//===----------------------------------------------------------------------===//
569//
570//          Implementation of PolynomialMultiplyRecognize
571//
572//===----------------------------------------------------------------------===//

574namespace {

class PolynomialMultiplyRecognize {
public:
  explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
      const DominatorTree &dt, const TargetLibraryInfo &tli,
      ScalarEvolution &se)
    : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}

  bool recognize();

private:
  using ValueSeq = SetVector<Value *>;

  IntegerType *getPmpyType() const {
    LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
    return IntegerType::get(Ctx, 32);
  }

  bool isPromotableTo(Value *V, IntegerType *Ty);
  void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
  bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);

  Value *getCountIV(BasicBlock *BB);
  bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
  void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
        ValueSeq &Late);
  bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
  bool commutesWithShift(Instruction *I);
  bool highBitsAreZero(Value *V, unsigned IterCount);
  bool keepsHighBitsZero(Value *V, unsigned IterCount);
  bool isOperandShifted(Instruction *I, Value *Op);
  bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
        unsigned IterCount);
  void cleanupLoopBody(BasicBlock *LoopB);

  struct ParsedValues {
    ParsedValues() = default;

    Value *M = nullptr;
    Value *P = nullptr;
    Value *Q = nullptr;
    Value *R = nullptr;
    Value *X = nullptr;
    Instruction *Res = nullptr;
    unsigned IterCount = 0;
    bool Left = false;
    bool Inv = false;
  };

  bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
  bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
  bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
        Value *CIV, ParsedValues &PV, bool PreScan);
  unsigned getInverseMxN(unsigned QP);
  Value *generate(BasicBlock::iterator At, ParsedValues &PV);

  void setupPreSimplifier(Simplifier &S);
  void setupPostSimplifier(Simplifier &S);

  Loop *CurLoop;
  const DataLayout &DL;
  const DominatorTree &DT;
  const TargetLibraryInfo &TLI;
  ScalarEvolution &SE;
};

641} // end anonymous namespace

643Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
if (std::distance(PI, PE) != 2)
  return nullptr;
BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;

for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
  auto *PN = cast<PHINode>(I);
  Value *InitV = PN->getIncomingValueForBlock(PB);
  if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
    continue;
  Value *IterV = PN->getIncomingValueForBlock(BB);
  auto *BO = dyn_cast<BinaryOperator>(IterV);
  if (!BO)
    continue;
  if (BO->getOpcode() != Instruction::Add)
    continue;
  Value *IncV = nullptr;
  if (BO->getOperand(0) == PN)
    IncV = BO->getOperand(1);
  else if (BO->getOperand(1) == PN)
    IncV = BO->getOperand(0);
  if (IncV == nullptr)
    continue;

  if (auto *T = dyn_cast<ConstantInt>(IncV))
    if (T->getZExtValue() == 1)
      return PN;
}
return nullptr;
673}

675static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
  Use &TheUse = UI.getUse();
  ++UI;
  if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
    if (BB == II->getParent())
      II->replaceUsesOfWith(I, J);
}
683}

685bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
    Value *CIV, ParsedValues &PV) {
// Match the following:
//   select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
//   select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
// The condition may also check for equality with the masked value, i.e
//   select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
//   select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);

Value *CondV = SelI->getCondition();
Value *TrueV = SelI->getTrueValue();
Value *FalseV = SelI->getFalseValue();

using namespace PatternMatch;

CmpInst::Predicate P;
Value *A = nullptr, *B = nullptr, *C = nullptr;

if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
    !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
  return false;
if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
  return false;
// Matched: select (A & B) == C ? ... : ...
//          select (A & B) != C ? ... : ...

Value *X = nullptr, *Sh1 = nullptr;
// Check (A & B) for (X & (1 << i)):
if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
  Sh1 = A;
  X = B;
} else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
  Sh1 = B;
  X = A;
} else {
  // TODO: Could also check for an induction variable containing single
  // bit shifted left by 1 in each iteration.
  return false;
}

bool TrueIfZero;

// Check C against the possible values for comparison: 0 and (1 << i):
if (match(C, m_Zero()))
  TrueIfZero = (P == CmpInst::ICMP_EQ);
else if (C == Sh1)
  TrueIfZero = (P == CmpInst::ICMP_NE);
else
  return false;

// So far, matched:
//   select (X & (1 << i)) ? ... : ...
// including variations of the check against zero/non-zero value.

Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
if (TrueIfZero) {
  ShouldSameV = TrueV;
  ShouldXoredV = FalseV;
} else {
  ShouldSameV = FalseV;
  ShouldXoredV = TrueV;
}

Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
Value *T = nullptr;
if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
  // Matched: select +++ ? ... : Y ^ Z
  //          select +++ ? Y ^ Z : ...
  // where +++ denotes previously checked matches.
  if (ShouldSameV == Y)
    T = Z;
  else if (ShouldSameV == Z)
    T = Y;
  else
    return false;
  R = ShouldSameV;
  // Matched: select +++ ? R : R ^ T
  //          select +++ ? R ^ T : R
  // depending on TrueIfZero.

} else if (match(ShouldSameV, m_Zero())) {
  // Matched: select +++ ? 0 : ...
  //          select +++ ? ... : 0
  if (!SelI->hasOneUse())
    return false;
  T = ShouldXoredV;
  // Matched: select +++ ? 0 : T
  //          select +++ ? T : 0

  Value *U = *SelI->user_begin();
  if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
      !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
    return false;
  // Matched: xor (select +++ ? 0 : T), R
  //          xor (select +++ ? T : 0), R
} else
  return false;

// The xor input value T is isolated into its own match so that it could
// be checked against an induction variable containing a shifted bit
// (todo).
// For now, check against (Q << i).
if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
    !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
  return false;
// Matched: select +++ ? R : R ^ (Q << i)
//          select +++ ? R ^ (Q << i) : R

PV.X = X;
PV.Q = Q;
PV.R = R;
PV.Left = true;
return true;
798}

800bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
    ParsedValues &PV) {
// Match the following:
//   select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
//   select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
// The condition may also check for equality with the masked value, i.e
//   select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
//   select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q

Value *CondV = SelI->getCondition();
Value *TrueV = SelI->getTrueValue();
Value *FalseV = SelI->getFalseValue();

using namespace PatternMatch;

Value *C = nullptr;
CmpInst::Predicate P;
bool TrueIfZero;

if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
    match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
  if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
    return false;
  // Matched: select C == 0 ? ... : ...
  //          select C != 0 ? ... : ...
  TrueIfZero = (P == CmpInst::ICMP_EQ);
} else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
           match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
  if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
    return false;
  // Matched: select C == 1 ? ... : ...
  //          select C != 1 ? ... : ...
  TrueIfZero = (P == CmpInst::ICMP_NE);
} else
  return false;

Value *X = nullptr;
if (!match(C, m_And(m_Value(X), m_One())) &&
    !match(C, m_And(m_One(), m_Value(X))))
  return false;
// Matched: select (X & 1) == +++ ? ... : ...
//          select (X & 1) != +++ ? ... : ...

Value *R = nullptr, *Q = nullptr;
if (TrueIfZero) {
  // The select's condition is true if the tested bit is 0.
  // TrueV must be the shift, FalseV must be the xor.
  if (!match(TrueV, m_LShr(m_Value(R), m_One())))
    return false;
  // Matched: select +++ ? (R >> 1) : ...
  if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
      !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
    return false;
  // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
  // with commuting ^.
} else {
  // The select's condition is true if the tested bit is 1.
  // TrueV must be the xor, FalseV must be the shift.
  if (!match(FalseV, m_LShr(m_Value(R), m_One())))
    return false;
  // Matched: select +++ ? ... : (R >> 1)
  if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
      !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
    return false;
  // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
  // with commuting ^.
}

PV.X = X;
PV.Q = Q;
PV.R = R;
PV.Left = false;
return true;
873}

875bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
    BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
    bool PreScan) {
using namespace PatternMatch;

// The basic pattern for R = P.Q is:
// for i = 0..31
//   R = phi (0, R')
//   if (P & (1 << i))        ; test-bit(P, i)
//     R' = R ^ (Q << i)
//
// Similarly, the basic pattern for R = (P/Q).Q - P
// for i = 0..31
//   R = phi(P, R')
//   if (R & (1 << i))
//     R' = R ^ (Q << i)

// There exist idioms, where instead of Q being shifted left, P is shifted
// right. This produces a result that is shifted right by 32 bits (the
// non-shifted result is 64-bit).
//
// For R = P.Q, this would be:
// for i = 0..31
//   R = phi (0, R')
//   if ((P >> i) & 1)
//     R' = (R >> 1) ^ Q      ; R is cycled through the loop, so it must
//   else                     ; be shifted by 1, not i.
//     R' = R >> 1
//
// And for the inverse:
// for i = 0..31
//   R = phi (P, R')
//   if (R & 1)
//     R' = (R >> 1) ^ Q
//   else
//     R' = R >> 1

// The left-shifting idioms share the same pattern:
//   select (X & (1 << i)) ? R ^ (Q << i) : R
// Similarly for right-shifting idioms:
//   select (X & 1) ? (R >> 1) ^ Q

if (matchLeftShift(SelI, CIV, PV)) {
  // If this is a pre-scan, getting this far is sufficient.
  if (PreScan)
    return true;

  // Need to make sure that the SelI goes back into R.
  auto *RPhi = dyn_cast<PHINode>(PV.R);
  if (!RPhi)
    return false;
  if (SelI != RPhi->getIncomingValueForBlock(LoopB))
    return false;
  PV.Res = SelI;

  // If X is loop invariant, it must be the input polynomial, and the
  // idiom is the basic polynomial multiply.
  if (CurLoop->isLoopInvariant(PV.X)) {
    PV.P = PV.X;
    PV.Inv = false;
  } else {
    // X is not loop invariant. If X == R, this is the inverse pmpy.
    // Otherwise, check for an xor with an invariant value. If the
    // variable argument to the xor is R, then this is still a valid
    // inverse pmpy.
    PV.Inv = true;
    if (PV.X != PV.R) {
      Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
      if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
        return false;
      auto *I1 = dyn_cast<Instruction>(X1);
      auto *I2 = dyn_cast<Instruction>(X2);
      if (!I1 || I1->getParent() != LoopB) {
        Var = X2;
        Inv = X1;
      } else if (!I2 || I2->getParent() != LoopB) {
        Var = X1;
        Inv = X2;
      } else
        return false;
      if (Var != PV.R)
        return false;
      PV.M = Inv;
    }
    // The input polynomial P still needs to be determined. It will be
    // the entry value of R.
    Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
    PV.P = EntryP;
  }

  return true;
}

if (matchRightShift(SelI, PV)) {
  // If this is an inverse pattern, the Q polynomial must be known at
  // compile time.
  if (PV.Inv && !isa<ConstantInt>(PV.Q))
    return false;
  if (PreScan)
    return true;
  // There is no exact matching of right-shift pmpy.
  return false;
}

return false;
980}

982bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
    IntegerType *DestTy) {
IntegerType *T = dyn_cast<IntegerType>(Val->getType());
if (!T || T->getBitWidth() > DestTy->getBitWidth())
  return false;
if (T->getBitWidth() == DestTy->getBitWidth())
  return true;
// Non-instructions are promotable. The reason why an instruction may not
// be promotable is that it may produce a different result if its operands
// and the result are promoted, for example, it may produce more non-zero
// bits. While it would still be possible to represent the proper result
// in a wider type, it may require adding additional instructions (which
// we don't want to do).
Instruction *In = dyn_cast<Instruction>(Val);
if (!In)
  return true;
// The bitwidth of the source type is smaller than the destination.
// Check if the individual operation can be promoted.
switch (In->getOpcode()) {
  case Instruction::PHI:
  case Instruction::ZExt:
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor:
  case Instruction::LShr: // Shift right is ok.
  case Instruction::Select:
  case Instruction::Trunc:
    return true;
  case Instruction::ICmp:
    if (CmpInst *CI = cast<CmpInst>(In))
      return CI->isEquality() || CI->isUnsigned();
    llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
 "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1013);
  case Instruction::Add:
    return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
}
return false;
1018}

1020void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
    IntegerType *DestTy, BasicBlock *LoopB) {
Type *OrigTy = In->getType();
assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")(static_cast <bool> (!OrigTy->isVoidTy() && "Invalid instruction to promote"
) ? void (0) : __assert_fail ("!OrigTy->isVoidTy() && \"Invalid instruction to promote\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1023, __extension__ __PRETTY_FUNCTION__));

// Leave boolean values alone.
if (!In->getType()->isIntegerTy(1))
  In->mutateType(DestTy);
unsigned DestBW = DestTy->getBitWidth();

// Handle PHIs.
if (PHINode *P = dyn_cast<PHINode>(In)) {
  unsigned N = P->getNumIncomingValues();
  for (unsigned i = 0; i != N; ++i) {
    BasicBlock *InB = P->getIncomingBlock(i);
    if (InB == LoopB)
      continue;
    Value *InV = P->getIncomingValue(i);
    IntegerType *Ty = cast<IntegerType>(InV->getType());
    // Do not promote values in PHI nodes of type i1.
    if (Ty != P->getType()) {
      // If the value type does not match the PHI type, the PHI type
      // must have been promoted.
      assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ?
 void (0) : __assert_fail ("Ty->getBitWidth() < DestBW"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1043, __extension__ __PRETTY_FUNCTION__));
      InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
      P->setIncomingValue(i, InV);
    }
  }
} else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
  Value *Op = Z->getOperand(0);
  if (Op->getType() == Z->getType())
    Z->replaceAllUsesWith(Op);
  Z->eraseFromParent();
  return;
}
if (TruncInst *T = dyn_cast<TruncInst>(In)) {
  IntegerType *TruncTy = cast<IntegerType>(OrigTy);
  Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
  Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
  T->replaceAllUsesWith(And);
  T->eraseFromParent();
  return;
}

// Promote immediates.
for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
  if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
    if (CI->getType()->getBitWidth() < DestBW)
      In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
}
1070}

1072bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
    BasicBlock *ExitB) {
assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail (
"LoopB", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1074, __extension__ __PRETTY_FUNCTION__));
// Skip loops where the exit block has more than one predecessor. The values
// coming from the loop block will be promoted to another type, and so the
// values coming into the exit block from other predecessors would also have
// to be promoted.
if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
  return false;
IntegerType *DestTy = getPmpyType();
// Check if the exit values have types that are no wider than the type
// that we want to promote to.
unsigned DestBW = DestTy->getBitWidth();
for (PHINode &P : ExitB->phis()) {
  if (P.getNumIncomingValues() != 1)
    return false;
  assert(P.getIncomingBlock(0) == LoopB)(static_cast <bool> (P.getIncomingBlock(0) == LoopB) ? void
 (0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1088, __extension__ __PRETTY_FUNCTION__));
  IntegerType *T = dyn_cast<IntegerType>(P.getType());
  if (!T || T->getBitWidth() > DestBW)
    return false;
}

// Check all instructions in the loop.
for (Instruction &In : *LoopB)
  if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
    return false;

// Perform the promotion.
std::vector<Instruction*> LoopIns;
std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
               [](Instruction &In) { return &In; });
for (Instruction *In : LoopIns)
  if (!In->isTerminator())
    promoteTo(In, DestTy, LoopB);

// Fix up the PHI nodes in the exit block.
Instruction *EndI = ExitB->getFirstNonPHI();
BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
for (auto I = ExitB->begin(); I != End; ++I) {
  PHINode *P = dyn_cast<PHINode>(I);
  if (!P)
    break;
  Type *Ty0 = P->getIncomingValue(0)->getType();
  Type *PTy = P->getType();
  if (PTy != Ty0) {
    assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail
 ("Ty0 == DestTy", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1117, __extension__ __PRETTY_FUNCTION__));
    // In order to create the trunc, P must have the promoted type.
    P->mutateType(Ty0);
    Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
    // In order for the RAUW to work, the types of P and T must match.
    P->mutateType(PTy);
    P->replaceAllUsesWith(T);
    // Final update of the P's type.
    P->mutateType(Ty0);
    cast<Instruction>(T)->setOperand(0, P);
  }
}

return true;
1131}

1133bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
    ValueSeq &Cycle) {
// Out = ..., In, ...
if (Out == In)
  return true;

auto *BB = cast<Instruction>(Out)->getParent();
bool HadPhi = false;

for (auto U : Out->users()) {
  auto *I = dyn_cast<Instruction>(&*U);
  if (I == nullptr || I->getParent() != BB)
    continue;
  // Make sure that there are no multi-iteration cycles, e.g.
  //   p1 = phi(p2)
  //   p2 = phi(p1)
  // The cycle p1->p2->p1 would span two loop iterations.
  // Check that there is only one phi in the cycle.
  bool IsPhi = isa<PHINode>(I);
  if (IsPhi && HadPhi)
    return false;
  HadPhi |= IsPhi;
  if (Cycle.count(I))
    return false;
  Cycle.insert(I);
  if (findCycle(I, In, Cycle))
    break;
  Cycle.remove(I);
}
return !Cycle.empty();
1163}

1165void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
    ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
// All the values in the cycle that are between the phi node and the
// divider instruction will be classified as "early", all other values
// will be "late".

bool IsE = true;
unsigned I, N = Cycle.size();
for (I = 0; I < N; ++I) {
  Value *V = Cycle[I];
  if (DivI == V)
    IsE = false;
  else if (!isa<PHINode>(V))
    continue;
  // Stop if found either.
  break;
}
// "I" is the index of either DivI or the phi node, whichever was first.
// "E" is "false" or "true" respectively.
ValueSeq &First = !IsE ? Early : Late;
for (unsigned J = 0; J < I; ++J)
  First.insert(Cycle[J]);

ValueSeq &Second = IsE ? Early : Late;
Second.insert(Cycle[I]);
for (++I; I < N; ++I) {
  Value *V = Cycle[I];
  if (DivI == V || isa<PHINode>(V))
    break;
  Second.insert(V);
}

for (; I < N; ++I)
  First.insert(Cycle[I]);
1199}

1201bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
    ValueSeq &Early, ValueSeq &Late) {
// Select is an exception, since the condition value does not have to be
// classified in the same way as the true/false values. The true/false
// values do have to be both early or both late.
if (UseI->getOpcode() == Instruction::Select) {
  Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
  if (Early.count(TV) || Early.count(FV)) {
    if (Late.count(TV) || Late.count(FV))
      return false;
    Early.insert(UseI);
  } else if (Late.count(TV) || Late.count(FV)) {
    if (Early.count(TV) || Early.count(FV))
      return false;
    Late.insert(UseI);
  }
  return true;
}

// Not sure what would be the example of this, but the code below relies
// on having at least one operand.
if (UseI->getNumOperands() == 0)
  return true;

bool AE = true, AL = true;
for (auto &I : UseI->operands()) {
  if (Early.count(&*I))
    AL = false;
  else if (Late.count(&*I))
    AE = false;
}
// If the operands appear "all early" and "all late" at the same time,
// then it means that none of them are actually classified as either.
// This is harmless.
if (AE && AL)
  return true;
// Conversely, if they are neither "all early" nor "all late", then
// we have a mixture of early and late operands that is not a known
// exception.
if (!AE && !AL)
  return false;

// Check that we have covered the two special cases.
assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail
 ("AE != AL", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1244, __extension__ __PRETTY_FUNCTION__));

if (AE)
  Early.insert(UseI);
else
  Late.insert(UseI);
return true;
1251}

1253bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
switch (I->getOpcode()) {
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor:
  case Instruction::LShr:
  case Instruction::Shl:
  case Instruction::Select:
  case Instruction::ICmp:
  case Instruction::PHI:
    break;
  default:
    return false;
}
return true;
1268}

1270bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
    unsigned IterCount) {
auto *T = dyn_cast<IntegerType>(V->getType());
if (!T)
  return false;

KnownBits Known(T->getBitWidth());
computeKnownBits(V, Known, DL);
return Known.countMinLeadingZeros() >= IterCount;
1279}

1281bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
    unsigned IterCount) {
// Assume that all inputs to the value have the high bits zero.
// Check if the value itself preserves the zeros in the high bits.
if (auto *C = dyn_cast<ConstantInt>(V))
  return C->getValue().countLeadingZeros() >= IterCount;

if (auto *I = dyn_cast<Instruction>(V)) {
  switch (I->getOpcode()) {
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
    case Instruction::LShr:
    case Instruction::Select:
    case Instruction::ICmp:
    case Instruction::PHI:
    case Instruction::ZExt:
      return true;
  }
}

return false;
1303}

1305bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
unsigned Opc = I->getOpcode();
if (Opc == Instruction::Shl || Opc == Instruction::LShr)
  return Op != I->getOperand(1);
return true;
1310}

1312bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
    BasicBlock *ExitB, unsigned IterCount) {
Value *CIV = getCountIV(LoopB);
if (CIV == nullptr)
  return false;
auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
if (CIVTy == nullptr)
  return false;

ValueSeq RShifts;
ValueSeq Early, Late, Cycled;

// Find all value cycles that contain logical right shifts by 1.
for (Instruction &I : *LoopB) {
  using namespace PatternMatch;

  Value *V = nullptr;
  if (!match(&I, m_LShr(m_Value(V), m_One())))
    continue;
  ValueSeq C;
  if (!findCycle(&I, V, C))
    continue;

  // Found a cycle.
  C.insert(&I);
  classifyCycle(&I, C, Early, Late);
  Cycled.insert(C.begin(), C.end());
  RShifts.insert(&I);
}

// Find the set of all values affected by the shift cycles, i.e. all
// cycled values, and (recursively) all their users.
ValueSeq Users(Cycled.begin(), Cycled.end());
for (unsigned i = 0; i < Users.size(); ++i) {
  Value *V = Users[i];
  if (!isa<IntegerType>(V->getType()))
    return false;
  auto *R = cast<Instruction>(V);
  // If the instruction does not commute with shifts, the loop cannot
  // be unshifted.
  if (!commutesWithShift(R))
    return false;
  for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
    auto *T = cast<Instruction>(*I);
    // Skip users from outside of the loop. They will be handled later.
    // Also, skip the right-shifts and phi nodes, since they mix early
    // and late values.
    if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
      continue;

    Users.insert(T);
    if (!classifyInst(T, Early, Late))
      return false;
  }
}

if (Users.empty())
  return false;

// Verify that high bits remain zero.
ValueSeq Internal(Users.begin(), Users.end());
ValueSeq Inputs;
for (unsigned i = 0; i < Internal.size(); ++i) {
  auto *R = dyn_cast<Instruction>(Internal[i]);
  if (!R)
    continue;
  for (Value *Op : R->operands()) {
    auto *T = dyn_cast<Instruction>(Op);
    if (T && T->getParent() != LoopB)
      Inputs.insert(Op);
    else
      Internal.insert(Op);
  }
}
for (Value *V : Inputs)
  if (!highBitsAreZero(V, IterCount))
    return false;
for (Value *V : Internal)
  if (!keepsHighBitsZero(V, IterCount))
    return false;

// Finally, the work can be done. Unshift each user.
IRBuilder<> IRB(LoopB);
std::map<Value*,Value*> ShiftMap;

using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;

CastMapType CastMap;

auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
      IntegerType *Ty) -> Value* {
  auto H = CM.find(std::make_pair(V, Ty));
  if (H != CM.end())
    return H->second;
  Value *CV = IRB.CreateIntCast(V, Ty, false);
  CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
  return CV;
};

for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
  using namespace PatternMatch;

  if (isa<PHINode>(I) || !Users.count(&*I))
    continue;

  // Match lshr x, 1.
  Value *V = nullptr;
  if (match(&*I, m_LShr(m_Value(V), m_One()))) {
    replaceAllUsesOfWithIn(&*I, V, LoopB);
    continue;
  }
  // For each non-cycled operand, replace it with the corresponding
  // value shifted left.
  for (auto &J : I->operands()) {
    Value *Op = J.get();
    if (!isOperandShifted(&*I, Op))
      continue;
    if (Users.count(Op))
      continue;
    // Skip shifting zeros.
    if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
      continue;
    // Check if we have already generated a shift for this value.
    auto F = ShiftMap.find(Op);
    Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
    if (W == nullptr) {
      IRB.SetInsertPoint(&*I);
      // First, the shift amount will be CIV or CIV+1, depending on
      // whether the value is early or late. Instead of creating CIV+1,
      // do a single shift of the value.
      Value *ShAmt = CIV, *ShVal = Op;
      auto *VTy = cast<IntegerType>(ShVal->getType());
      auto *ATy = cast<IntegerType>(ShAmt->getType());
      if (Late.count(&*I))
        ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
      // Second, the types of the shifted value and the shift amount
      // must match.
      if (VTy != ATy) {
        if (VTy->getBitWidth() < ATy->getBitWidth())
          ShVal = upcast(CastMap, IRB, ShVal, ATy);
        else
          ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
      }
      // Ready to generate the shift and memoize it.
      W = IRB.CreateShl(ShVal, ShAmt);
      ShiftMap.insert(std::make_pair(Op, W));
    }
    I->replaceUsesOfWith(Op, W);
  }
}

// Update the users outside of the loop to account for having left
// shifts. They would normally be shifted right in the loop, so shift
// them right after the loop exit.
// Take advantage of the loop-closed SSA form, which has all the post-
// loop values in phi nodes.
IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
  if (!isa<PHINode>(P))
    break;
  auto *PN = cast<PHINode>(P);
  Value *U = PN->getIncomingValueForBlock(LoopB);
  if (!Users.count(U))
    continue;
  Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
  PN->replaceAllUsesWith(S);
  // The above RAUW will create
  //   S = lshr S, IterCount
  // so we need to fix it back into
  //   S = lshr PN, IterCount
  cast<User>(S)->replaceUsesOfWith(S, PN);
}

return true;
1486}

1488void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
for (auto &I : *LoopB)
  if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
    I.replaceAllUsesWith(SV);

for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
  N = std::next(I);
  RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
}
1497}

1499unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
// Arrays of coefficients of Q and the inverse, C.
// Q[i] = coefficient at x^i.
std::array<char,32> Q, C;

for (unsigned i = 0; i < 32; ++i) {
  Q[i] = QP & 1;
  QP >>= 1;
}
assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail
 ("Q[0] == 1", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1508, __extension__ __PRETTY_FUNCTION__));

// Find C, such that
// (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
//
// For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
// operations * and + are & and ^ respectively.
//
// Find C[i] recursively, by comparing i-th coefficient in the product
// with 0 (or 1 for i=0).
//
// C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
C[0] = 1;
for (unsigned i = 1; i < 32; ++i) {
  // Solve for C[i] in:
  //   C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
  // This is equivalent to
  //   C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
  // which is
  //   C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
  unsigned T = 0;
  for (unsigned j = 0; j < i; ++j)
    T = T ^ (C[j] & Q[i-j]);
  C[i] = T;
}

unsigned QV = 0;
for (unsigned i = 0; i < 32; ++i)
  if (C[i])
    QV |= (1 << i);

return QV;
1540}

1542Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
    ParsedValues &PV) {
IRBuilder<> B(&*At);
Module *M = At->getParent()->getParent()->getParent();
Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);

Value *P = PV.P, *Q = PV.Q, *P0 = P;
unsigned IC = PV.IterCount;

if (PV.M != nullptr)
1
Assuming the condition is false→
2
←
Taking false branch→
  P0 = P = B.CreateXor(P, PV.M);

// Create a bit mask to clear the high bits beyond IterCount.
auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
3
←
Calling 'APInt::getLowBitsSet'→
15
←
Returning from 'APInt::getLowBitsSet'→

if (PV.IterCount != 32)
16
←
Assuming field 'IterCount' is equal to 32→
17
←
Taking false branch→
  P = B.CreateAnd(P, BMI);

if (PV.Inv) {
18
←
Assuming field 'Inv' is true→
19
←
Taking true branch→
  auto *QI = dyn_cast<ConstantInt>(PV.Q);
20
←
Assuming field 'Q' is a 'ConstantInt'→
  assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth()
 <= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1562, __extension__ __PRETTY_FUNCTION__));
21
←
Assuming the condition is true→
22
←
'?' condition is true→

  // Again, clearing bits beyond IterCount.
  unsigned M = (1 << PV.IterCount) - 1;
23
←
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
  unsigned Tmp = (QI->getZExtValue() | 1) & M;
  unsigned QV = getInverseMxN(Tmp) & M;
  auto *QVI = ConstantInt::get(QI->getType(), QV);
  P = B.CreateCall(PMF, {P, QVI});
  P = B.CreateTrunc(P, QI->getType());
  if (IC != 32)
    P = B.CreateAnd(P, BMI);
}

Value *R = B.CreateCall(PMF, {P, Q});

if (PV.M != nullptr)
  R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));

return R;
1581}

1583static bool hasZeroSignBit(const Value *V) {
if (const auto *CI = dyn_cast<const ConstantInt>(V))
  return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
const Instruction *I = dyn_cast<const Instruction>(V);
if (!I)
  return false;
switch (I->getOpcode()) {
  case Instruction::LShr:
    if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
      return SI->getZExtValue() > 0;
    return false;
  case Instruction::Or:
  case Instruction::Xor:
    return hasZeroSignBit(I->getOperand(0)) &&
           hasZeroSignBit(I->getOperand(1));
  case Instruction::And:
    return hasZeroSignBit(I->getOperand(0)) ||
           hasZeroSignBit(I->getOperand(1));
}
return false;
1603}

1605void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
S.addRule("sink-zext",
  // Sink zext past bitwise operations.
  [](Instruction *I, LLVMContext &Ctx) -> Value* {
    if (I->getOpcode() != Instruction::ZExt)
      return nullptr;
    Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
    if (!T)
      return nullptr;
    switch (T->getOpcode()) {
      case Instruction::And:
      case Instruction::Or:
      case Instruction::Xor:
        break;
      default:
        return nullptr;
    }
    IRBuilder<> B(Ctx);
    return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
                         B.CreateZExt(T->getOperand(0), I->getType()),
                         B.CreateZExt(T->getOperand(1), I->getType()));
  });
S.addRule("xor/and -> and/xor",
  // (xor (and x a) (and y a)) -> (and (xor x y) a)
  [](Instruction *I, LLVMContext &Ctx) -> Value* {
    if (I->getOpcode() != Instruction::Xor)
      return nullptr;
    Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
    Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
    if (!And0 || !And1)
      return nullptr;
    if (And0->getOpcode() != Instruction::And ||
        And1->getOpcode() != Instruction::And)
      return nullptr;
    if (And0->getOperand(1) != And1->getOperand(1))
      return nullptr;
    IRBuilder<> B(Ctx);
    return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
                       And0->getOperand(1));
  });
S.addRule("sink binop into select",
  // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
  // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
  [](Instruction *I, LLVMContext &Ctx) -> Value* {
    BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
    if (!BO)
      return nullptr;
    Instruction::BinaryOps Op = BO->getOpcode();
    if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
      IRBuilder<> B(Ctx);
      Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
      Value *Z = BO->getOperand(1);
      return B.CreateSelect(Sel->getCondition(),
                            B.CreateBinOp(Op, X, Z),
                            B.CreateBinOp(Op, Y, Z));
    }
    if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
      IRBuilder<> B(Ctx);
      Value *X = BO->getOperand(0);
      Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
      return B.CreateSelect(Sel->getCondition(),
                            B.CreateBinOp(Op, X, Y),
                            B.CreateBinOp(Op, X, Z));
    }
    return nullptr;
  });
S.addRule("fold select-select",
  // (select c (select c x y) z) -> (select c x z)
  // (select c x (select c y z)) -> (select c x z)
  [](Instruction *I, LLVMContext &Ctx) -> Value* {
    SelectInst *Sel = dyn_cast<SelectInst>(I);
    if (!Sel)
      return nullptr;
    IRBuilder<> B(Ctx);
    Value *C = Sel->getCondition();
    if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
      if (Sel0->getCondition() == C)
        return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
    }
    if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
      if (Sel1->getCondition() == C)
        return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
    }
    return nullptr;
  });
S.addRule("or-signbit -> xor-signbit",
  // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
  [](Instruction *I, LLVMContext &Ctx) -> Value* {
    if (I->getOpcode() != Instruction::Or)
      return nullptr;
    ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
    if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
      return nullptr;
    if (!hasZeroSignBit(I->getOperand(0)))
      return nullptr;
    return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
  });
S.addRule("sink lshr into binop",
  // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
  [](Instruction *I, LLVMContext &Ctx) -> Value* {
    if (I->getOpcode() != Instruction::LShr)
      return nullptr;
    BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
    if (!BitOp)
      return nullptr;
    switch (BitOp->getOpcode()) {
      case Instruction::And:
      case Instruction::Or:
      case Instruction::Xor:
        break;
      default:
        return nullptr;
    }
    IRBuilder<> B(Ctx);
    Value *S = I->getOperand(1);
    return B.CreateBinOp(BitOp->getOpcode(),
              B.CreateLShr(BitOp->getOperand(0), S),
              B.CreateLShr(BitOp->getOperand(1), S));
  });
S.addRule("expose bitop-const",
  // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
  [](Instruction *I, LLVMContext &Ctx) -> Value* {
    auto IsBitOp = [](unsigned Op) -> bool {
      switch (Op) {
        case Instruction::And:
        case Instruction::Or:
        case Instruction::Xor:
          return true;
      }
      return false;
    };
    BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
    if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
      return nullptr;
    BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
    if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
      return nullptr;
    ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
    ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
    if (!CA || !CB)
      return nullptr;
    IRBuilder<> B(Ctx);
    Value *X = BitOp2->getOperand(0);
    return B.CreateBinOp(BitOp2->getOpcode(), X,
              B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
  });
1751}

1753void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
  [](Instruction *I, LLVMContext &Ctx) -> Value* {
    if (I->getOpcode() != Instruction::And)
      return nullptr;
    Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
    ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
    if (!Xor || !C0)
      return nullptr;
    if (Xor->getOpcode() != Instruction::Xor)
      return nullptr;
    Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
    Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
    // Pick the first non-null and.
    if (!And0 || And0->getOpcode() != Instruction::And)
      std::swap(And0, And1);
    ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
    if (!C1)
      return nullptr;
    uint32_t V0 = C0->getZExtValue();
    uint32_t V1 = C1->getZExtValue();
    if (V0 != (V0 & V1))
      return nullptr;
    IRBuilder<> B(Ctx);
    return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
  });
1779}

1781bool PolynomialMultiplyRecognize::recognize() {
LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
 << *CurLoop << '\n'; } } while (false)
                  << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
 << *CurLoop << '\n'; } } while (false);
// Restrictions:
// - The loop must consist of a single block.
// - The iteration count must be known at compile-time.
// - The loop must have an induction variable starting from 0, and
//   incremented in each iteration of the loop.
BasicBlock *LoopB = CurLoop->getHeader();
LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false);

if (LoopB != CurLoop->getLoopLatch())
  return false;
BasicBlock *ExitB = CurLoop->getExitBlock();
if (ExitB == nullptr)
  return false;
BasicBlock *EntryB = CurLoop->getLoopPreheader();
if (EntryB == nullptr)
  return false;

unsigned IterCount = 0;
const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
if (isa<SCEVCouldNotCompute>(CT))
  return false;
if (auto *CV = dyn_cast<SCEVConstant>(CT))
  IterCount = CV->getValue()->getZExtValue() + 1;

Value *CIV = getCountIV(LoopB);
ParsedValues PV;
Simplifier PreSimp;
PV.IterCount = IterCount;
LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
 "\nIterCount: " << IterCount << '\n'; } } while (
false)
                  << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
 "\nIterCount: " << IterCount << '\n'; } } while (
false);

setupPreSimplifier(PreSimp);

// Perform a preliminary scan of select instructions to see if any of them
// looks like a generator of the polynomial multiply steps. Assume that a
// loop can only contain a single transformable operation, so stop the
// traversal after the first reasonable candidate was found.
// XXX: Currently this approach can modify the loop before being 100% sure
// that the transformation can be carried out.
bool FoundPreScan = false;
auto FeedsPHI = [LoopB](const Value *V) -> bool {
  for (const Value *U : V->users()) {
    if (const auto *P = dyn_cast<const PHINode>(U))
      if (P->getParent() == LoopB)
        return true;
  }
  return false;
};
for (Instruction &In : *LoopB) {
  SelectInst *SI = dyn_cast<SelectInst>(&In);
  if (!SI || !FeedsPHI(SI))
    continue;

  Simplifier::Context C(SI);
  Value *T = PreSimp.simplify(C);
  SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
  LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
 PE(C, SelI) << '\n'; } } while (false);
  if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
    FoundPreScan = true;
    if (SelI != SI) {
      Value *NewSel = C.materialize(LoopB, SI->getIterator());
      SI->replaceAllUsesWith(NewSel);
      RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
    }
    break;
  }
}

if (!FoundPreScan) {
  LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false);
  return false;
}

if (!PV.Left) {
  // The right shift version actually only returns the higher bits of
  // the result (each iteration discards the LSB). If we want to convert it
  // to a left-shifting loop, the working data type must be at least as
  // wide as the target's pmpy instruction.
  if (!promoteTypes(LoopB, ExitB))
    return false;
  // Run post-promotion simplifications.
  Simplifier PostSimp;
  setupPostSimplifier(PostSimp);
  for (Instruction &In : *LoopB) {
    SelectInst *SI = dyn_cast<SelectInst>(&In);
    if (!SI || !FeedsPHI(SI))
      continue;
    Simplifier::Context C(SI);
    Value *T = PostSimp.simplify(C);
    SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
    if (SelI != SI) {
      Value *NewSel = C.materialize(LoopB, SI->getIterator());
      SI->replaceAllUsesWith(NewSel);
      RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
    }
    break;
  }

  if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
    return false;
  cleanupLoopBody(LoopB);
}

// Scan the loop again, find the generating select instruction.
bool FoundScan = false;
for (Instruction &In : *LoopB) {
  SelectInst *SelI = dyn_cast<SelectInst>(&In);
  if (!SelI)
    continue;
  LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
 << '\n'; } } while (false);
  FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
  if (FoundScan)
    break;
}
assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail
 ("FoundScan", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1898, __extension__ __PRETTY_FUNCTION__));

LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
  StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
  if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
    dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
  elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
    dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
           << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
  dbgs() << "  Res:" << *PV.Res << "\n  P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
  if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
    dbgs() << "  M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
  dbgs() << "  Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
  dbgs() << "  Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false)
})do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
 (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
 << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
 << PP << "/Q).Q) + " << PP << "\n"; dbgs
() << "  Res:" << *PV.Res << "\n  P:" <<
 *PV.P << "\n"; if (PV.M) dbgs() << "  M:" <<
 *PV.M << "\n"; dbgs() << "  Q:" << *PV.Q <<
 "\n"; dbgs() << "  Iteration count:" << PV.IterCount
 << "\n"; }; } } while (false);

BasicBlock::iterator At(EntryB->getTerminator());
Value *PM = generate(At, PV);
if (PM == nullptr)
  return false;

if (PM->getType() != PV.Res->getType())
  PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);

PV.Res->replaceAllUsesWith(PM);
PV.Res->eraseFromParent();
return true;
1925}

1927int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
  return SC->getAPInt().getSExtValue();
return 0;
1931}

1933bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
// Allow volatile stores if HexagonVolatileMemcpy is enabled.
if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
  return false;

Value *StoredVal = SI->getValueOperand();
Value *StorePtr = SI->getPointerOperand();

// Reject stores that are so large that they overflow an unsigned.
uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
  return false;

// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided store.  If we have something else, it's a
// random store we can't handle.
auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
  return false;

// Check to see if the stride matches the size of the store.  If so, then we
// know that every byte is touched in the loop.
int Stride = getSCEVStride(StoreEv);
if (Stride == 0)
  return false;
unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
if (StoreSize != unsigned(std::abs(Stride)))
  return false;

// The store must be feeding a non-volatile load.
LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
if (!LI || !LI->isSimple())
  return false;

// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load.  If we have something else, it's a
// random load we can't handle.
Value *LoadPtr = LI->getPointerOperand();
auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
  return false;

// The store and load must share the same stride.
if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
  return false;

// Success.  This store can be converted into a memcpy.
return true;
1981}

1983/// mayLoopAccessLocation - Return true if the specified loop might access the
1984/// specified pointer location, which is a loop-strided access.  The 'Access'
1985/// argument specifies what the verboten forms of access are (read or write).
1986static bool
1987mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
                    const SCEV *BECount, unsigned StoreSize,
                    AliasAnalysis &AA,
                    SmallPtrSetImpl<Instruction *> &Ignored) {
// Get the location that may be stored across the loop.  Since the access
// is strided positively through memory, we say that the modified location
// starts at the pointer and has infinite size.
LocationSize AccessSize = LocationSize::afterPointer();

// If the loop iterates a fixed number of times, we can refine the access
// size to be exactly the size of the memset, which is (BECount+1)*StoreSize
if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
  AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
                                     StoreSize);

// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store.  Store to &A[i] of 100 will always return may alias
// with store of &A[100], we need to StoreLoc to be "A" with size of 100,
// which will then no-alias a store to &A[100].
MemoryLocation StoreLoc(Ptr, AccessSize);

for (auto *B : L->blocks())
  for (auto &I : *B)
    if (Ignored.count(&I) == 0 &&
        isModOrRefSet(
            intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
      return true;

return false;
2016}

2018void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
    SmallVectorImpl<StoreInst*> &Stores) {
Stores.clear();
for (Instruction &I : *BB)
  if (StoreInst *SI = dyn_cast<StoreInst>(&I))
    if (isLegalStore(CurLoop, SI))
      Stores.push_back(SI);
2025}

2027bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
    StoreInst *SI, const SCEV *BECount) {
assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
 "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2031, __extension__ __PRETTY_FUNCTION__))
       "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
 "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2031, __extension__ __PRETTY_FUNCTION__))
       "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
 "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2031, __extension__ __PRETTY_FUNCTION__));

Value *StorePtr = SI->getPointerOperand();
auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
unsigned Stride = getSCEVStride(StoreEv);
unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
if (Stride != StoreSize)
  return false;

// See if the pointer expression is an AddRec like {base,+,1} on the current
// loop, which indicates a strided load.  If we have something else, it's a
// random load we can't handle.
auto *LI = cast<LoadInst>(SI->getValueOperand());
auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));

// The trip count of the loop and the base pointer of the addrec SCEV is
// guaranteed to be loop invariant, which means that it should dominate the
// header.  This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
Instruction *ExpPt = Preheader->getTerminator();
IRBuilder<> Builder(ExpPt);
SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");

Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());

// Okay, we have a strided store "p[i]" of a loaded value.  We can turn
// this into a memcpy/memmove in the loop preheader now if we want.  However,
// this would be unsafe to do if there is anything else in the loop that may
// read or write the memory region we're storing to.  For memcpy, this
// includes the load that feeds the stores.  Check for an alias by generating
// the base address and checking everything.
Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
    Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
Value *LoadBasePtr = nullptr;

bool Overlap = false;
bool DestVolatile = SI->isVolatile();
Type *BECountTy = BECount->getType();

if (DestVolatile) {
  // The trip count must fit in i32, since it is the type of the "num_words"
  // argument to hexagon_memcpy_forward_vp4cp4n2.
  if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2074CleanupAndExit:
    // If we generated new code for the base pointer, clean up.
    Expander.clear();
    if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
      RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
      StoreBasePtr = nullptr;
    }
    if (LoadBasePtr) {
      RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
      LoadBasePtr = nullptr;
    }
    return false;
  }
}

SmallPtrSet<Instruction*, 2> Ignore1;
Ignore1.insert(SI);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
                          StoreSize, *AA, Ignore1)) {
  // Check if the load is the offending instruction.
  Ignore1.insert(LI);
  if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
                            BECount, StoreSize, *AA, Ignore1)) {
    // Still bad. Nothing we can do.
    goto CleanupAndExit;
  }
  // It worked with the load ignored.
  Overlap = true;
}

if (!Overlap) {
  if (DisableMemcpyIdiom || !HasMemcpy)
    goto CleanupAndExit;
} else {
  // Don't generate memmove if this function will be inlined. This is
  // because the caller will undergo this transformation after inlining.
  Function *Func = CurLoop->getHeader()->getParent();
  if (Func->hasFnAttribute(Attribute::AlwaysInline))
    goto CleanupAndExit;

  // In case of a memmove, the call to memmove will be executed instead
  // of the loop, so we need to make sure that there is nothing else in
  // the loop than the load, store and instructions that these two depend
  // on.
  SmallVector<Instruction*,2> Insts;
  Insts.push_back(SI);
  Insts.push_back(LI);
  if (!coverLoop(CurLoop, Insts))
    goto CleanupAndExit;

  if (DisableMemmoveIdiom || !HasMemmove)
    goto CleanupAndExit;
  bool IsNested = CurLoop->getParentLoop() != nullptr;
  if (IsNested && OnlyNonNestedMemmove)
    goto CleanupAndExit;
}

// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
    Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);

SmallPtrSet<Instruction*, 2> Ignore2;
Ignore2.insert(SI);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
                          StoreSize, *AA, Ignore2))
  goto CleanupAndExit;

// Check the stride.
bool StridePos = getSCEVStride(LoadEv) >= 0;

// Currently, the volatile memcpy only emulates traversing memory forward.
if (!StridePos && DestVolatile)
  goto CleanupAndExit;

bool RuntimeCheck = (Overlap || DestVolatile);

BasicBlock *ExitB;
if (RuntimeCheck) {
  // The runtime check needs a single exit block.
  SmallVector<BasicBlock*, 8> ExitBlocks;
  CurLoop->getUniqueExitBlocks(ExitBlocks);
  if (ExitBlocks.size() != 1)
    goto CleanupAndExit;
  ExitB = ExitBlocks[0];
}

// The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
// pointer size if it isn't already.
LLVMContext &Ctx = SI->getContext();
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
DebugLoc DLoc = SI->getDebugLoc();

const SCEV *NumBytesS =
    SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
if (StoreSize != 1)
  NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
                             SCEV::FlagNUW);
Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
if (Instruction *In = dyn_cast<Instruction>(NumBytes))
  if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
    NumBytes = Simp;

CallInst *NewCall;

if (RuntimeCheck) {
  unsigned Threshold = RuntimeMemSizeThreshold;
  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
    uint64_t C = CI->getZExtValue();
    if (Threshold != 0 && C < Threshold)
      goto CleanupAndExit;
    if (C < CompileTimeMemSizeThreshold)
      goto CleanupAndExit;
  }

  BasicBlock *Header = CurLoop->getHeader();
  Function *Func = Header->getParent();
  Loop *ParentL = LF->getLoopFor(Preheader);
  StringRef HeaderName = Header->getName();

  // Create a new (empty) preheader, and update the PHI nodes in the
  // header to use the new preheader.
  BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
                                                Func, Header);
  if (ParentL)
    ParentL->addBasicBlockToLoop(NewPreheader, *LF);
  IRBuilder<>(NewPreheader).CreateBr(Header);
  for (auto &In : *Header) {
    PHINode *PN = dyn_cast<PHINode>(&In);
    if (!PN)
      break;
    int bx = PN->getBasicBlockIndex(Preheader);
    if (bx >= 0)
      PN->setIncomingBlock(bx, NewPreheader);
  }
  DT->addNewBlock(NewPreheader, Preheader);
  DT->changeImmediateDominator(Header, NewPreheader);

  // Check for safe conditions to execute memmove.
  // If stride is positive, copying things from higher to lower addresses
  // is equivalent to memmove.  For negative stride, it's the other way
  // around.  Copying forward in memory with positive stride may not be
  // same as memmove since we may be copying values that we just stored
  // in some previous iteration.
  Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
  Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
  Value *LowA = StridePos ? SA : LA;
  Value *HighA = StridePos ? LA : SA;
  Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
  Value *Cond = CmpA;

  // Check for distance between pointers. Since the case LowA < HighA
  // is checked for above, assume LowA >= HighA.
  Value *Dist = Builder.CreateSub(LowA, HighA);
  Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
  Value *CmpEither = Builder.CreateOr(Cond, CmpD);
  Cond = CmpEither;

  if (Threshold != 0) {
    Type *Ty = NumBytes->getType();
    Value *Thr = ConstantInt::get(Ty, Threshold);
    Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
    Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
    Cond = CmpBoth;
  }
  BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
                                            Func, NewPreheader);
  if (ParentL)
    ParentL->addBasicBlockToLoop(MemmoveB, *LF);
  Instruction *OldT = Preheader->getTerminator();
  Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
  OldT->eraseFromParent();
  Preheader->setName(Preheader->getName()+".old");
  DT->addNewBlock(MemmoveB, Preheader);
  // Find the new immediate dominator of the exit block.
  BasicBlock *ExitD = Preheader;
  for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
    BasicBlock *PB = *PI;
    ExitD = DT->findNearestCommonDominator(ExitD, PB);
    if (!ExitD)
      break;
  }
  // If the prior immediate dominator of ExitB was dominated by the
  // old preheader, then the old preheader becomes the new immediate
  // dominator.  Otherwise don't change anything (because the newly
  // added blocks are dominated by the old preheader).
  if (ExitD && DT->dominates(Preheader, ExitD)) {
    DomTreeNode *BN = DT->getNode(ExitB);
    DomTreeNode *DN = DT->getNode(ExitD);
    BN->setIDom(DN);
  }

  // Add a call to memmove to the conditional block.
  IRBuilder<> CondBuilder(MemmoveB);
  CondBuilder.CreateBr(ExitB);
  CondBuilder.SetInsertPoint(MemmoveB->getTerminator());

  if (DestVolatile) {
    Type *Int32Ty = Type::getInt32Ty(Ctx);
    Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
    Type *VoidTy = Type::getVoidTy(Ctx);
    Module *M = Func->getParent();
    FunctionCallee Fn = M->getOrInsertFunction(
        HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);

    const SCEV *OneS = SE->getConstant(Int32Ty, 1);
    const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
    const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
    Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
                                             MemmoveB->getTerminator());
    if (Instruction *In = dyn_cast<Instruction>(NumWords))
      if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
        NumWords = Simp;

    Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
                    ? StoreBasePtr
                    : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
    Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
                    ? LoadBasePtr
                    : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
    NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
  } else {
    NewCall = CondBuilder.CreateMemMove(
        StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);
  }
} else {
  NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
                                 LI->getAlign(), NumBytes);
  // Okay, the memcpy has been formed.  Zap the original store and
  // anything that feeds into it.
  RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
}

NewCall->setDebugLoc(DLoc);

LLVM_DEBUG(dbgs() << "  Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "  Formed " << (Overlap
 ? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
 "    from load ptr=" << *LoadEv << " at: " <<
 *LI << "\n" << "    from store ptr=" << *StoreEv
 << " at: " << *SI << "\n"; } } while (false
)
                  << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "  Formed " << (Overlap
 ? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
 "    from load ptr=" << *LoadEv << " at: " <<
 *LI << "\n" << "    from store ptr=" << *StoreEv
 << " at: " << *SI << "\n"; } } while (false
)
                  << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "  Formed " << (Overlap
 ? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
 "    from load ptr=" << *LoadEv << " at: " <<
 *LI << "\n" << "    from store ptr=" << *StoreEv
 << " at: " << *SI << "\n"; } } while (false
)
                  << "    from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "  Formed " << (Overlap
 ? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
 "    from load ptr=" << *LoadEv << " at: " <<
 *LI << "\n" << "    from store ptr=" << *StoreEv
 << " at: " << *SI << "\n"; } } while (false
)
                  << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "  Formed " << (Overlap
 ? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
 "    from load ptr=" << *LoadEv << " at: " <<
 *LI << "\n" << "    from store ptr=" << *StoreEv
 << " at: " << *SI << "\n"; } } while (false
);

return true;
2316}

2318// Check if the instructions in Insts, together with their dependencies
2319// cover the loop in the sense that the loop could be safely eliminated once
2320// the instructions in Insts are removed.
2321bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
    SmallVectorImpl<Instruction*> &Insts) const {
SmallSet<BasicBlock*,8> LoopBlocks;
for (auto *B : L->blocks())
  LoopBlocks.insert(B);

SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());

// Collect all instructions from the loop that the instructions in Insts
// depend on (plus their dependencies, etc.).  These instructions will
// constitute the expression trees that feed those in Insts, but the trees
// will be limited only to instructions contained in the loop.
for (unsigned i = 0; i < Worklist.size(); ++i) {
  Instruction *In = Worklist[i];
  for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
    Instruction *OpI = dyn_cast<Instruction>(I);
    if (!OpI)
      continue;
    BasicBlock *PB = OpI->getParent();
    if (!LoopBlocks.count(PB))
      continue;
    Worklist.insert(OpI);
  }
}

// Scan all instructions in the loop, if any of them have a user outside
// of the loop, or outside of the expressions collected above, then either
// the loop has a side-effect visible outside of it, or there are
// instructions in it that are not involved in the original set Insts.
for (auto *B : L->blocks()) {
  for (auto &In : *B) {
    if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
      continue;
    if (!Worklist.count(&In) && In.mayHaveSideEffects())
      return false;
    for (auto K : In.users()) {
      Instruction *UseI = dyn_cast<Instruction>(K);
      if (!UseI)
        continue;
      BasicBlock *UseB = UseI->getParent();
      if (LF->getLoopFor(UseB) != L)
        return false;
    }
  }
}

return true;
2368}

2370/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2371/// with the specified backedge count.  This block is known to be in the current
2372/// loop and not in any subloops.
2373bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
    const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
// We can only promote stores in this block if they are unconditionally
// executed in the loop.  For a block to be unconditionally executed, it has
// to dominate all the exit blocks of the loop.  Verify this now.
auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
  return DT->dominates(BB, EB);
};
if (!all_of(ExitBlocks, DominatedByBB))
  return false;

bool MadeChange = false;
// Look for store instructions, which may be optimized to memset/memcpy.
SmallVector<StoreInst*,8> Stores;
collectStores(CurLoop, BB, Stores);

// Optimize the store into a memcpy, if it feeds an similarly strided load.
for (auto &SI : Stores)
  MadeChange |= processCopyingStore(CurLoop, SI, BECount);

return MadeChange;
2394}

2396bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
if (PMR.recognize())
  return true;

if (!HasMemcpy && !HasMemmove)
  return false;

const SCEV *BECount = SE->getBackedgeTakenCount(L);
assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
 "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2407, __extension__ __PRETTY_FUNCTION__))
       "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
 "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2407, __extension__ __PRETTY_FUNCTION__))
       "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
 "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2407, __extension__ __PRETTY_FUNCTION__));

SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);

bool Changed = false;

// Scan all the blocks in the loop that are not in subloops.
for (auto *BB : L->getBlocks()) {
  // Ignore blocks in subloops.
  if (LF->getLoopFor(BB) != L)
    continue;
  Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
}

return Changed;
2423}

2425bool HexagonLoopIdiomRecognize::run(Loop *L) {
const Module &M = *L->getHeader()->getParent()->getParent();
if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
  return false;

// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
if (!L->getLoopPreheader())
  return false;

// Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();
if (Name == "memset" || Name == "memcpy" || Name == "memmove")
  return false;

DL = &L->getHeader()->getModule()->getDataLayout();

HasMemcpy = TLI->has(LibFunc_memcpy);
HasMemmove = TLI->has(LibFunc_memmove);

if (SE->hasLoopInvariantBackedgeTakenCount(L))
  return runOnCountableLoop(L);
return false;
2448}

2450bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,
                                                  LPPassManager &LPM) {
if (skipLoop(L))
  return false;

auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
    *L->getHeader()->getParent());
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
2462}

2464Pass *llvm::createHexagonLoopIdiomPass() {
return new HexagonLoopIdiomRecognizeLegacyPass();
2466}

2468PreservedAnalyses
2469HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM,
                                   LoopStandardAnalysisResults &AR,
                                   LPMUpdater &U) {
return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE)
               .run(&L)
           ? getLoopPassPreservedAnalyses()
           : PreservedAnalyses::all();
2476}

←

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//

15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H

18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <utility>

25namespace llvm {
26class FoldingSetNodeID;
27class StringRef;
28class hash_code;
29class raw_ostream;

31template <typename T> class SmallVectorImpl;
32template <typename T> class ArrayRef;
33template <typename T> class Optional;
34template <typename T> struct DenseMapInfo;

36class APInt;

38inline APInt operator-(APInt);

40//===----------------------------------------------------------------------===//
41//                              APInt Class
42//===----------------------------------------------------------------------===//

44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55///   * All bit, byte, and word positions are zero-based.
56///   * Once the bit width is set, it doesn't change except by the Truncate,
57///     SignExtend, or ZeroExtend operations.
58///   * All binary operators must be on APInt instances of the same bit width.
59///     Attempting to use these operators on instances with different bit
60///     widths will yield an assertion.
61///   * The value is stored canonically as an unsigned value. For operations
62///     where it makes a difference, there are both signed and unsigned variants
63///     of the operation. For example, sdiv and udiv. However, because the bit
64///     widths must be the same, operations such as Mul and Add produce the same
65///     results regardless of whether the values are interpreted as signed or
66///     not.
67///   * In general, the class tries to follow the style of computation that LLVM
68///     uses in its IR. This simplifies its use for LLVM.
69///
70class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
71public:
typedef uint64_t WordType;

/// This enum is used to hold the constants we needed for APInt.
enum : unsigned {
  /// Byte size of a word.
  APINT_WORD_SIZE = sizeof(WordType),
  /// Bits in a word.
  APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
};

enum class Rounding {
  DOWN,
  TOWARD_ZERO,
  UP,
};

static constexpr WordType WORDTYPE_MAX = ~WordType(0);

90private:
/// This union is used to store the integer value. When the
/// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
union {
  uint64_t VAL;   ///< Used to store the <= 64 bits integer value.
  uint64_t *pVal; ///< Used to store the >64 bits integer value.
} U;

unsigned BitWidth; ///< The number of bits in this APInt.

friend struct DenseMapInfo<APInt>;

friend class APSInt;

/// Fast internal constructor
///
/// This constructor is used only internally for speed of construction of
/// temporaries. It is unsafe for general use so it is not public.
APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
  U.pVal = val;
}

/// Determine which word a bit is in.
///
/// \returns the word position for the specified bit position.
static unsigned whichWord(unsigned bitPosition) {
  return bitPosition / APINT_BITS_PER_WORD;
}

/// Determine which bit in a word a bit is in.
///
/// \returns the bit position in a word for the specified bit position
/// in the APInt.
static unsigned whichBit(unsigned bitPosition) {
  return bitPosition % APINT_BITS_PER_WORD;
}

/// Get a single bit mask.
///
/// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
/// This method generates and returns a uint64_t (word) mask for a single
/// bit at a specific bit position. This is used to mask the bit in the
/// corresponding word.
static uint64_t maskBit(unsigned bitPosition) {
  return 1ULL << whichBit(bitPosition);
}

/// Clear unused high order bits
///
/// This method is used internally to clear the top "N" bits in the high order
/// word that are not used by the APInt. This is needed after the most
/// significant word is assigned a value to ensure that those bits are
/// zero'd out.
APInt &clearUnusedBits() {
  // Compute how many bits are used in the final word
  unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;

  // Mask out the high bits.
  uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
  if (isSingleWord())
    U.VAL &= mask;
  else
    U.pVal[getNumWords() - 1] &= mask;
  return *this;
}

/// Get the word corresponding to a bit position
/// \returns the corresponding word for the specified bit position.
uint64_t getWord(unsigned bitPosition) const {
  return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
}

/// Utility method to change the bit width of this APInt to new bit width,
/// allocating and/or deallocating as necessary. There is no guarantee on the
/// value of any bits upon return. Caller should populate the bits after.
void reallocate(unsigned NewBitWidth);

/// Convert a char array into an APInt
///
/// \param radix 2, 8, 10, 16, or 36
/// Converts a string into a number.  The string must be non-empty
/// and well-formed as a number of the given base. The bit-width
/// must be sufficient to hold the result.
///
/// This is used by the constructors that take string arguments.
///
/// StringRef::getAsInteger is superficially similar but (1) does
/// not assume that the string is well-formed and (2) grows the
/// result to hold the input.
void fromString(unsigned numBits, StringRef str, uint8_t radix);

/// An internal division function for dividing APInts.
///
/// This is used by the toString method to divide by the radix. It simply
/// provides a more convenient form of divide for internal use since KnuthDiv
/// has specific constraints on its inputs. If those constraints are not met
/// then it provides a simpler form of divide.
static void divide(const WordType *LHS, unsigned lhsWords,
                   const WordType *RHS, unsigned rhsWords, WordType *Quotient,
                   WordType *Remainder);

/// out-of-line slow case for inline constructor
void initSlowCase(uint64_t val, bool isSigned);

/// shared code between two array constructors
void initFromArray(ArrayRef<uint64_t> array);

/// out-of-line slow case for inline copy constructor
void initSlowCase(const APInt &that);

/// out-of-line slow case for shl
void shlSlowCase(unsigned ShiftAmt);

/// out-of-line slow case for lshr.
void lshrSlowCase(unsigned ShiftAmt);

/// out-of-line slow case for ashr.
void ashrSlowCase(unsigned ShiftAmt);

/// out-of-line slow case for operator=
void AssignSlowCase(const APInt &RHS);

/// out-of-line slow case for operator==
bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));

/// out-of-line slow case for countLeadingZeros
unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));

/// out-of-line slow case for countLeadingOnes.
unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));

/// out-of-line slow case for countTrailingZeros.
unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));

/// out-of-line slow case for countTrailingOnes
unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));

/// out-of-line slow case for countPopulation
unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));

/// out-of-line slow case for intersects.
bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));

/// out-of-line slow case for isSubsetOf.
bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));

/// out-of-line slow case for setBits.
void setBitsSlowCase(unsigned loBit, unsigned hiBit);

/// out-of-line slow case for flipAllBits.
void flipAllBitsSlowCase();

/// out-of-line slow case for operator&=.
void AndAssignSlowCase(const APInt& RHS);

/// out-of-line slow case for operator|=.
void OrAssignSlowCase(const APInt& RHS);

/// out-of-line slow case for operator^=.
void XorAssignSlowCase(const APInt& RHS);

/// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
/// to, or greater than RHS.
int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));

/// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
/// to, or greater than RHS.
int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));

259public:
/// \name Constructors
/// @{

/// Create a new APInt of numBits width, initialized as val.
///
/// If isSigned is true then val is treated as if it were a signed value
/// (i.e. as an int64_t) and the appropriate sign extension to the bit width
/// will be done. Otherwise, no sign extension occurs (high order bits beyond
/// the range of val are zero filled).
///
/// \param numBits the bit width of the constructed APInt
/// \param val the initial value of the APInt
/// \param isSigned how to treat signedness of val
APInt(unsigned numBits, uint64_t val, bool isSigned = false)
    : BitWidth(numBits) {
  assert(BitWidth && "bitwidth too small")(static_cast <bool> (BitWidth && "bitwidth too small"
) ? void (0) : __assert_fail ("BitWidth && \"bitwidth too small\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 275, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord()) {
    U.VAL = val;
    clearUnusedBits();
  } else {
    initSlowCase(val, isSigned);
  }
}

/// Construct an APInt of numBits width, initialized as bigVal[].
///
/// Note that bigVal.size() can be smaller or larger than the corresponding
/// bit width but any extraneous bits will be dropped.
///
/// \param numBits the bit width of the constructed APInt
/// \param bigVal a sequence of words to form the initial value of the APInt
APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);

/// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
/// deprecated because this constructor is prone to ambiguity with the
/// APInt(unsigned, uint64_t, bool) constructor.
///
/// If this overload is ever deleted, care should be taken to prevent calls
/// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
/// constructor.
APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);

/// Construct an APInt from a string representation.
///
/// This constructor interprets the string \p str in the given radix. The
/// interpretation stops when the first character that is not suitable for the
/// radix is encountered, or the end of the string. Acceptable radix values
/// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
/// string to require more bits than numBits.
///
/// \param numBits the bit width of the constructed APInt
/// \param str the string to be interpreted
/// \param radix the radix to use for the conversion
APInt(unsigned numBits, StringRef str, uint8_t radix);

/// Simply makes *this a copy of that.
/// Copy Constructor.
APInt(const APInt &that) : BitWidth(that.BitWidth) {
  if (isSingleWord())
    U.VAL = that.U.VAL;
  else
    initSlowCase(that);
}

/// Move Constructor.
APInt(APInt &&that) : BitWidth(that.BitWidth) {
  memcpy(&U, &that.U, sizeof(U));
  that.BitWidth = 0;
}

/// Destructor.
~APInt() {
  if (needsCleanup())
    delete[] U.pVal;
}

/// Default constructor that creates an uninteresting APInt
/// representing a 1-bit zero value.
///
/// This is useful for object deserialization (pair this with the static
///  method Read).
explicit APInt() : BitWidth(1) { U.VAL = 0; }

/// Returns whether this instance allocated memory.
bool needsCleanup() const { return !isSingleWord(); }

/// Used to insert APInt objects, or objects that contain APInt objects, into
///  FoldingSets.
void Profile(FoldingSetNodeID &id) const;

/// @}
/// \name Value Tests
/// @{

/// Determine if this APInt just has one word to store value.
///
/// \returns true if the number of bits <= 64, false otherwise.
bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }

/// Determine sign of this APInt.
///
/// This tests the high bit of this APInt to determine if it is set.
///
/// \returns true if this APInt is negative, false otherwise
bool isNegative() const { return (*this)[BitWidth - 1]; }

/// Determine if this APInt Value is non-negative (>= 0)
///
/// This tests the high bit of the APInt to determine if it is unset.
bool isNonNegative() const { return !isNegative(); }

/// Determine if sign bit of this APInt is set.
///
/// This tests the high bit of this APInt to determine if it is set.
///
/// \returns true if this APInt has its sign bit set, false otherwise.
bool isSignBitSet() const { return (*this)[BitWidth-1]; }

/// Determine if sign bit of this APInt is clear.
///
/// This tests the high bit of this APInt to determine if it is clear.
///
/// \returns true if this APInt has its sign bit clear, false otherwise.
bool isSignBitClear() const { return !isSignBitSet(); }

/// Determine if this APInt Value is positive.
///
/// This tests if the value of this APInt is positive (> 0). Note
/// that 0 is not a positive value.
///
/// \returns true if this APInt is positive.
bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }

/// Determine if this APInt Value is non-positive (<= 0).
///
/// \returns true if this APInt is non-positive.
bool isNonPositive() const { return !isStrictlyPositive(); }

/// Determine if all bits are set
///
/// This checks to see if the value has all bits of the APInt are set or not.
bool isAllOnesValue() const {
  if (isSingleWord())
    return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
  return countTrailingOnesSlowCase() == BitWidth;
}

/// Determine if all bits are clear
///
/// This checks to see if the value has all bits of the APInt are clear or
/// not.
bool isNullValue() const { return !*this; }

/// Determine if this is a value of 1.
///
/// This checks to see if the value of this APInt is one.
bool isOneValue() const {
  if (isSingleWord())
    return U.VAL == 1;
  return countLeadingZerosSlowCase() == BitWidth - 1;
}

/// Determine if this is the largest unsigned value.
///
/// This checks to see if the value of this APInt is the maximum unsigned
/// value for the APInt's bit width.
bool isMaxValue() const { return isAllOnesValue(); }

/// Determine if this is the largest signed value.
///
/// This checks to see if the value of this APInt is the maximum signed
/// value for the APInt's bit width.
bool isMaxSignedValue() const {
  if (isSingleWord())
    return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
  return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
}

/// Determine if this is the smallest unsigned value.
///
/// This checks to see if the value of this APInt is the minimum unsigned
/// value for the APInt's bit width.
bool isMinValue() const { return isNullValue(); }

/// Determine if this is the smallest signed value.
///
/// This checks to see if the value of this APInt is the minimum signed
/// value for the APInt's bit width.
bool isMinSignedValue() const {
  if (isSingleWord())
    return U.VAL == (WordType(1) << (BitWidth - 1));
  return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
}

/// Check if this APInt has an N-bits unsigned integer value.
bool isIntN(unsigned N) const {
  assert(N && "N == 0 ???")(static_cast <bool> (N && "N == 0 ???") ? void (
0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 456, __extension__ __PRETTY_FUNCTION__));
  return getActiveBits() <= N;
}

/// Check if this APInt has an N-bits signed integer value.
bool isSignedIntN(unsigned N) const {
  assert(N && "N == 0 ???")(static_cast <bool> (N && "N == 0 ???") ? void (
0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 462, __extension__ __PRETTY_FUNCTION__));
  return getMinSignedBits() <= N;
}

/// Check if this APInt's value is a power of two greater than zero.
///
/// \returns true if the argument APInt value is a power of two > 0.
bool isPowerOf2() const {
  if (isSingleWord())
    return isPowerOf2_64(U.VAL);
  return countPopulationSlowCase() == 1;
}

/// Check if the APInt's value is returned by getSignMask.
///
/// \returns true if this is the value returned by getSignMask.
bool isSignMask() const { return isMinSignedValue(); }

/// Convert APInt to a boolean value.
///
/// This converts the APInt to a boolean value as a test against zero.
bool getBoolValue() const { return !!*this; }

/// If this value is smaller than the specified limit, return it, otherwise
/// return the limit value.  This causes the value to saturate to the limit.
uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
  return ugt(Limit) ? Limit : getZExtValue();
}

/// Check if the APInt consists of a repeated bit pattern.
///
/// e.g. 0x01010101 satisfies isSplat(8).
/// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
/// width without remainder.
bool isSplat(unsigned SplatSizeInBits) const;

/// \returns true if this APInt value is a sequence of \param numBits ones
/// starting at the least significant bit with the remainder zero.
bool isMask(unsigned numBits) const {
  assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero"
) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 501, __extension__ __PRETTY_FUNCTION__));
  assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range"
) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 502, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord())
    return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
  unsigned Ones = countTrailingOnesSlowCase();
  return (numBits == Ones) &&
         ((Ones + countLeadingZerosSlowCase()) == BitWidth);
}

/// \returns true if this APInt is a non-empty sequence of ones starting at
/// the least significant bit with the remainder zero.
/// Ex. isMask(0x0000FFFFU) == true.
bool isMask() const {
  if (isSingleWord())
    return isMask_64(U.VAL);
  unsigned Ones = countTrailingOnesSlowCase();
  return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
}

/// Return true if this APInt value contains a sequence of ones with
/// the remainder zero.
bool isShiftedMask() const {
  if (isSingleWord())
    return isShiftedMask_64(U.VAL);
  unsigned Ones = countPopulationSlowCase();
  unsigned LeadZ = countLeadingZerosSlowCase();
  return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
}

/// @}
/// \name Value Generators
/// @{

/// Gets maximum unsigned value of APInt for specific bit width.
static APInt getMaxValue(unsigned numBits) {
  return getAllOnesValue(numBits);
}

/// Gets maximum signed value of APInt for a specific bit width.
static APInt getSignedMaxValue(unsigned numBits) {
  APInt API = getAllOnesValue(numBits);
  API.clearBit(numBits - 1);
  return API;
}

/// Gets minimum unsigned value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }

/// Gets minimum signed value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits) {
  APInt API(numBits, 0);
  API.setBit(numBits - 1);
  return API;
}

/// Get the SignMask for a specific bit width.
///
/// This is just a wrapper function of getSignedMinValue(), and it helps code
/// readability when we want to get a SignMask.
static APInt getSignMask(unsigned BitWidth) {
  return getSignedMinValue(BitWidth);
}

/// Get the all-ones value.
///
/// \returns the all-ones value for an APInt of the specified bit-width.
static APInt getAllOnesValue(unsigned numBits) {
  return APInt(numBits, WORDTYPE_MAX, true);
}

/// Get the '0' value.
///
/// \returns the '0' value for an APInt of the specified bit-width.
static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }

/// Compute an APInt containing numBits highbits from this APInt.
///
/// Get an APInt with the same BitWidth as this APInt, just zero mask
/// the low bits and right shift to the least significant bit.
///
/// \returns the high "numBits" bits of this APInt.
APInt getHiBits(unsigned numBits) const;

/// Compute an APInt containing numBits lowbits from this APInt.
///
/// Get an APInt with the same BitWidth as this APInt, just zero mask
/// the high bits.
///
/// \returns the low "numBits" bits of this APInt.
APInt getLoBits(unsigned numBits) const;

/// Return an APInt with exactly one bit set in the result.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
  APInt Res(numBits, 0);
  Res.setBit(BitNo);
  return Res;
}

/// Get a value with a block of bits set.
///
/// Constructs an APInt value that has a contiguous range of bits set. The
/// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
/// bits will be zero. For example, with parameters(32, 0, 16) you would get
/// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
/// \p hiBit.
///
/// \param numBits the intended bit width of the result
/// \param loBit the index of the lowest bit set.
/// \param hiBit the index of the highest bit set.
///
/// \returns An APInt value with the requested bits set.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
  assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 613, __extension__ __PRETTY_FUNCTION__));
  APInt Res(numBits, 0);
  Res.setBits(loBit, hiBit);
  return Res;
}

/// Wrap version of getBitsSet.
/// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
/// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
/// with parameters (32, 28, 4), you would get 0xF000000F.
/// If \p hiBit is equal to \p loBit, you would get a result with all bits
/// set.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
                                unsigned hiBit) {
  APInt Res(numBits, 0);
  Res.setBitsWithWrap(loBit, hiBit);
  return Res;
}

/// Get a value with upper bits starting at loBit set.
///
/// Constructs an APInt value that has a contiguous range of bits set. The
/// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
/// bits will be zero. For example, with parameters(32, 12) you would get
/// 0xFFFFF000.
///
/// \param numBits the intended bit width of the result
/// \param loBit the index of the lowest bit to set.
///
/// \returns An APInt value with the requested bits set.
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
  APInt Res(numBits, 0);
  Res.setBitsFrom(loBit);
  return Res;
}

/// Get a value with high bits set
///
/// Constructs an APInt value that has the top hiBitsSet bits set.
///
/// \param numBits the bitwidth of the result
/// \param hiBitsSet the number of high-order bits set in the result.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
  APInt Res(numBits, 0);
  Res.setHighBits(hiBitsSet);
  return Res;
}

/// Get a value with low bits set
///
/// Constructs an APInt value that has the bottom loBitsSet bits set.
///
/// \param numBits the bitwidth of the result
/// \param loBitsSet the number of low-order bits set in the result.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
  APInt Res(numBits, 0);
  Res.setLowBits(loBitsSet);
4
←
Calling 'APInt::setLowBits'→
14
←
Returning from 'APInt::setLowBits'→
  return Res;
}

/// Return a value containing V broadcasted over NewLen bits.
static APInt getSplat(unsigned NewLen, const APInt &V);

/// Determine if two APInts have the same value, after zero-extending
/// one of them (if needed!) to ensure that the bit-widths match.
static bool isSameValue(const APInt &I1, const APInt &I2) {
  if (I1.getBitWidth() == I2.getBitWidth())
    return I1 == I2;

  if (I1.getBitWidth() > I2.getBitWidth())
    return I1 == I2.zext(I1.getBitWidth());

  return I1.zext(I2.getBitWidth()) == I2;
}

/// Overload to compute a hash_code for an APInt value.
friend hash_code hash_value(const APInt &Arg);

/// This function returns a pointer to the internal storage of the APInt.
/// This is useful for writing out the APInt in binary form without any
/// conversions.
const uint64_t *getRawData() const {
  if (isSingleWord())
    return &U.VAL;
  return &U.pVal[0];
}

/// @}
/// \name Unary Operators
/// @{

/// Postfix increment operator.
///
/// Increments *this by 1.
///
/// \returns a new APInt value representing the original value of *this.
APInt operator++(int) {
  APInt API(*this);
  ++(*this);
  return API;
}

/// Prefix increment operator.
///
/// \returns *this incremented by one
APInt &operator++();

/// Postfix decrement operator.
///
/// Decrements *this by 1.
///
/// \returns a new APInt value representing the original value of *this.
APInt operator--(int) {
  APInt API(*this);
  --(*this);
  return API;
}

/// Prefix decrement operator.
///
/// \returns *this decremented by one.
APInt &operator--();

/// Logical negation operator.
///
/// Performs logical negation operation on this APInt.
///
/// \returns true if *this is zero, false otherwise.
bool operator!() const {
  if (isSingleWord())
    return U.VAL == 0;
  return countLeadingZerosSlowCase() == BitWidth;
}

/// @}
/// \name Assignment Operators
/// @{

/// Copy assignment operator.
///
/// \returns *this after assignment of RHS.
APInt &operator=(const APInt &RHS) {
  // If the bitwidths are the same, we can avoid mucking with memory
  if (isSingleWord() && RHS.isSingleWord()) {
    U.VAL = RHS.U.VAL;
    BitWidth = RHS.BitWidth;
    return clearUnusedBits();
  }

  AssignSlowCase(RHS);
  return *this;
}

/// Move assignment operator.
APInt &operator=(APInt &&that) {
768#ifdef EXPENSIVE_CHECKS
  // Some std::shuffle implementations still do self-assignment.
  if (this == &that)
    return *this;
772#endif
  assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported"
) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 773, __extension__ __PRETTY_FUNCTION__));
  if (!isSingleWord())
    delete[] U.pVal;

  // Use memcpy so that type based alias analysis sees both VAL and pVal
  // as modified.
  memcpy(&U, &that.U, sizeof(U));

  BitWidth = that.BitWidth;
  that.BitWidth = 0;

  return *this;
}

/// Assignment operator.
///
/// The RHS value is assigned to *this. If the significant bits in RHS exceed
/// the bit width, the excess bits are truncated. If the bit width is larger
/// than 64, the value is zero filled in the unspecified high order bits.
///
/// \returns *this after assignment of RHS value.
APInt &operator=(uint64_t RHS) {
  if (isSingleWord()) {
    U.VAL = RHS;
    return clearUnusedBits();
  }
  U.pVal[0] = RHS;
  memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
  return *this;
}

/// Bitwise AND assignment operator.
///
/// Performs a bitwise AND operation on this APInt and RHS. The result is
/// assigned to *this.
///
/// \returns *this after ANDing with RHS.
APInt &operator&=(const APInt &RHS) {
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
 "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 811, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord())
    U.VAL &= RHS.U.VAL;
  else
    AndAssignSlowCase(RHS);
  return *this;
}

/// Bitwise AND assignment operator.
///
/// Performs a bitwise AND operation on this APInt and RHS. RHS is
/// logically zero-extended or truncated to match the bit-width of
/// the LHS.
APInt &operator&=(uint64_t RHS) {
  if (isSingleWord()) {
    U.VAL &= RHS;
    return *this;
  }
  U.pVal[0] &= RHS;
  memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
  return *this;
}

/// Bitwise OR assignment operator.
///
/// Performs a bitwise OR operation on this APInt and RHS. The result is
/// assigned *this;
///
/// \returns *this after ORing with RHS.
APInt &operator|=(const APInt &RHS) {
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
 "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 841, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord())
    U.VAL |= RHS.U.VAL;
  else
    OrAssignSlowCase(RHS);
  return *this;
}

/// Bitwise OR assignment operator.
///
/// Performs a bitwise OR operation on this APInt and RHS. RHS is
/// logically zero-extended or truncated to match the bit-width of
/// the LHS.
APInt &operator|=(uint64_t RHS) {
  if (isSingleWord()) {
    U.VAL |= RHS;
    return clearUnusedBits();
  }
  U.pVal[0] |= RHS;
  return *this;
}

/// Bitwise XOR assignment operator.
///
/// Performs a bitwise XOR operation on this APInt and RHS. The result is
/// assigned to *this.
///
/// \returns *this after XORing with RHS.
APInt &operator^=(const APInt &RHS) {
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
 "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 870, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord())
    U.VAL ^= RHS.U.VAL;
  else
    XorAssignSlowCase(RHS);
  return *this;
}

/// Bitwise XOR assignment operator.
///
/// Performs a bitwise XOR operation on this APInt and RHS. RHS is
/// logically zero-extended or truncated to match the bit-width of
/// the LHS.
APInt &operator^=(uint64_t RHS) {
  if (isSingleWord()) {
    U.VAL ^= RHS;
    return clearUnusedBits();
  }
  U.pVal[0] ^= RHS;
  return *this;
}

/// Multiplication assignment operator.
///
/// Multiplies this APInt by RHS and assigns the result to *this.
///
/// \returns *this
APInt &operator*=(const APInt &RHS);
APInt &operator*=(uint64_t RHS);

/// Addition assignment operator.
///
/// Adds RHS to *this and assigns the result to *this.
///
/// \returns *this
APInt &operator+=(const APInt &RHS);
APInt &operator+=(uint64_t RHS);

/// Subtraction assignment operator.
///
/// Subtracts RHS from *this and assigns the result to *this.
///
/// \returns *this
APInt &operator-=(const APInt &RHS);
APInt &operator-=(uint64_t RHS);

/// Left-shift assignment function.
///
/// Shifts *this left by shiftAmt and assigns the result to *this.
///
/// \returns *this after shifting left by ShiftAmt
APInt &operator<<=(unsigned ShiftAmt) {
  assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
 "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 922, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord()) {
    if (ShiftAmt == BitWidth)
      U.VAL = 0;
    else
      U.VAL <<= ShiftAmt;
    return clearUnusedBits();
  }
  shlSlowCase(ShiftAmt);
  return *this;
}

/// Left-shift assignment function.
///
/// Shifts *this left by shiftAmt and assigns the result to *this.
///
/// \returns *this after shifting left by ShiftAmt
APInt &operator<<=(const APInt &ShiftAmt);

/// @}
/// \name Binary Operators
/// @{

/// Multiplication operator.
///
/// Multiplies this APInt by RHS and returns the result.
APInt operator*(const APInt &RHS) const;

/// Left logical shift operator.
///
/// Shifts this APInt left by \p Bits and returns the result.
APInt operator<<(unsigned Bits) const { return shl(Bits); }

/// Left logical shift operator.
///
/// Shifts this APInt left by \p Bits and returns the result.
APInt operator<<(const APInt &Bits) const { return shl(Bits); }

/// Arithmetic right-shift function.
///
/// Arithmetic right-shift this APInt by shiftAmt.
APInt ashr(unsigned ShiftAmt) const {
  APInt R(*this);
  R.ashrInPlace(ShiftAmt);
  return R;
}

/// Arithmetic right-shift this APInt by ShiftAmt in place.
void ashrInPlace(unsigned ShiftAmt) {
  assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
 "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 971, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord()) {
    int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
    if (ShiftAmt == BitWidth)
      U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
    else
      U.VAL = SExtVAL >> ShiftAmt;
    clearUnusedBits();
    return;
  }
  ashrSlowCase(ShiftAmt);
}

/// Logical right-shift function.
///
/// Logical right-shift this APInt by shiftAmt.
APInt lshr(unsigned shiftAmt) const {
  APInt R(*this);
  R.lshrInPlace(shiftAmt);
  return R;
}

/// Logical right-shift this APInt by ShiftAmt in place.
void lshrInPlace(unsigned ShiftAmt) {
  assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
 "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 995, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord()) {
    if (ShiftAmt == BitWidth)
      U.VAL = 0;
    else
      U.VAL >>= ShiftAmt;
    return;
  }
  lshrSlowCase(ShiftAmt);
}

/// Left-shift function.
///
/// Left-shift this APInt by shiftAmt.
APInt shl(unsigned shiftAmt) const {
  APInt R(*this);
  R <<= shiftAmt;
  return R;
}

/// Rotate left by rotateAmt.
APInt rotl(unsigned rotateAmt) const;

/// Rotate right by rotateAmt.
APInt rotr(unsigned rotateAmt) const;

/// Arithmetic right-shift function.
///
/// Arithmetic right-shift this APInt by shiftAmt.
APInt ashr(const APInt &ShiftAmt) const {
  APInt R(*this);
  R.ashrInPlace(ShiftAmt);
  return R;
}

/// Arithmetic right-shift this APInt by shiftAmt in place.
void ashrInPlace(const APInt &shiftAmt);

/// Logical right-shift function.
///
/// Logical right-shift this APInt by shiftAmt.
APInt lshr(const APInt &ShiftAmt) const {
  APInt R(*this);
  R.lshrInPlace(ShiftAmt);
  return R;
}

/// Logical right-shift this APInt by ShiftAmt in place.
void lshrInPlace(const APInt &ShiftAmt);

/// Left-shift function.
///
/// Left-shift this APInt by shiftAmt.
APInt shl(const APInt &ShiftAmt) const {
  APInt R(*this);
  R <<= ShiftAmt;
  return R;
}

/// Rotate left by rotateAmt.
APInt rotl(const APInt &rotateAmt) const;

/// Rotate right by rotateAmt.
APInt rotr(const APInt &rotateAmt) const;

/// Unsigned division operation.
///
/// Perform an unsigned divide operation on this APInt by RHS. Both this and
/// RHS are treated as unsigned quantities for purposes of this division.
///
/// \returns a new APInt value containing the division result, rounded towards
/// zero.
APInt udiv(const APInt &RHS) const;
APInt udiv(uint64_t RHS) const;

/// Signed division function for APInt.
///
/// Signed divide this APInt by APInt RHS.
///
/// The result is rounded towards zero.
APInt sdiv(const APInt &RHS) const;
APInt sdiv(int64_t RHS) const;

/// Unsigned remainder operation.
///
/// Perform an unsigned remainder operation on this APInt with RHS being the
/// divisor. Both this and RHS are treated as unsigned quantities for purposes
/// of this operation. Note that this is a true remainder operation and not a
/// modulo operation because the sign follows the sign of the dividend which
/// is *this.
///
/// \returns a new APInt value containing the remainder result
APInt urem(const APInt &RHS) const;
uint64_t urem(uint64_t RHS) const;

/// Function for signed remainder operation.
///
/// Signed remainder operation on APInt.
APInt srem(const APInt &RHS) const;
int64_t srem(int64_t RHS) const;

/// Dual division/remainder interface.
///
/// Sometimes it is convenient to divide two APInt values and obtain both the
/// quotient and remainder. This function does both operations in the same
/// computation making it a little more efficient. The pair of input arguments
/// may overlap with the pair of output arguments. It is safe to call
/// udivrem(X, Y, X, Y), for example.
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
                    APInt &Remainder);
static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
                    uint64_t &Remainder);

static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
                    APInt &Remainder);
static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
                    int64_t &Remainder);

// Operations that return overflow indicators.
APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
APInt usub_ov(const APInt &RHS, bool &Overflow) const;
APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
APInt smul_ov(const APInt &RHS, bool &Overflow) const;
APInt umul_ov(const APInt &RHS, bool &Overflow) const;
APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
APInt ushl_ov(const APInt &Amt, bool &Overflow) const;

// Operations that saturate
APInt sadd_sat(const APInt &RHS) const;
APInt uadd_sat(const APInt &RHS) const;
APInt ssub_sat(const APInt &RHS) const;
APInt usub_sat(const APInt &RHS) const;
APInt smul_sat(const APInt &RHS) const;
APInt umul_sat(const APInt &RHS) const;
APInt sshl_sat(const APInt &RHS) const;
APInt ushl_sat(const APInt &RHS) const;

/// Array-indexing support.
///
/// \returns the bit value at bitPosition
bool operator[](unsigned bitPosition) const {
  assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() &&
 "Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1138, __extension__ __PRETTY_FUNCTION__));
  return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
}

/// @}
/// \name Comparison Operators
/// @{

/// Equality operator.
///
/// Compares this APInt with RHS for the validity of the equality
/// relationship.
bool operator==(const APInt &RHS) const {
  assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth &&
 "Comparison requires equal bit widths") ? void (0) : __assert_fail
 ("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1151, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord())
    return U.VAL == RHS.U.VAL;
  return EqualSlowCase(RHS);
}

/// Equality operator.
///
/// Compares this APInt with a uint64_t for the validity of the equality
/// relationship.
///
/// \returns true if *this == Val
bool operator==(uint64_t Val) const {
  return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
}

/// Equality comparison.
///
/// Compares this APInt with RHS for the validity of the equality
/// relationship.
///
/// \returns true if *this == Val
bool eq(const APInt &RHS) const { return (*this) == RHS; }

/// Inequality operator.
///
/// Compares this APInt with RHS for the validity of the inequality
/// relationship.
///
/// \returns true if *this != Val
bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }

/// Inequality operator.
///
/// Compares this APInt with a uint64_t for the validity of the inequality
/// relationship.
///
/// \returns true if *this != Val
bool operator!=(uint64_t Val) const { return !((*this) == Val); }

/// Inequality comparison
///
/// Compares this APInt with RHS for the validity of the inequality
/// relationship.
///
/// \returns true if *this != Val
bool ne(const APInt &RHS) const { return !((*this) == RHS); }

/// Unsigned less than comparison
///
/// Regards both *this and RHS as unsigned quantities and compares them for
/// the validity of the less-than relationship.
///
/// \returns true if *this < RHS when both are considered unsigned.
bool ult(const APInt &RHS) const { return compare(RHS) < 0; }

/// Unsigned less than comparison
///
/// Regards both *this as an unsigned quantity and compares it with RHS for
/// the validity of the less-than relationship.
///
/// \returns true if *this < RHS when considered unsigned.
bool ult(uint64_t RHS) const {
  // Only need to check active bits if not a single word.
  return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
}

/// Signed less than comparison
///
/// Regards both *this and RHS as signed quantities and compares them for
/// validity of the less-than relationship.
///
/// \returns true if *this < RHS when both are considered signed.
bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }

/// Signed less than comparison
///
/// Regards both *this as a signed quantity and compares it with RHS for
/// the validity of the less-than relationship.
///
/// \returns true if *this < RHS when considered signed.
bool slt(int64_t RHS) const {
  return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
                                                      : getSExtValue() < RHS;
}

/// Unsigned less or equal comparison
///
/// Regards both *this and RHS as unsigned quantities and compares them for
/// validity of the less-or-equal relationship.
///
/// \returns true if *this <= RHS when both are considered unsigned.
bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }

/// Unsigned less or equal comparison
///
/// Regards both *this as an unsigned quantity and compares it with RHS for
/// the validity of the less-or-equal relationship.
///
/// \returns true if *this <= RHS when considered unsigned.
bool ule(uint64_t RHS) const { return !ugt(RHS); }

/// Signed less or equal comparison
///
/// Regards both *this and RHS as signed quantities and compares them for
/// validity of the less-or-equal relationship.
///
/// \returns true if *this <= RHS when both are considered signed.
bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }

/// Signed less or equal comparison
///
/// Regards both *this as a signed quantity and compares it with RHS for the
/// validity of the less-or-equal relationship.
///
/// \returns true if *this <= RHS when considered signed.
bool sle(uint64_t RHS) const { return !sgt(RHS); }

/// Unsigned greater than comparison
///
/// Regards both *this and RHS as unsigned quantities and compares them for
/// the validity of the greater-than relationship.
///
/// \returns true if *this > RHS when both are considered unsigned.
bool ugt(const APInt &RHS) const { return !ule(RHS); }

/// Unsigned greater than comparison
///
/// Regards both *this as an unsigned quantity and compares it with RHS for
/// the validity of the greater-than relationship.
///
/// \returns true if *this > RHS when considered unsigned.
bool ugt(uint64_t RHS) const {
  // Only need to check active bits if not a single word.
  return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
}

/// Signed greater than comparison
///
/// Regards both *this and RHS as signed quantities and compares them for the
/// validity of the greater-than relationship.
///
/// \returns true if *this > RHS when both are considered signed.
bool sgt(const APInt &RHS) const { return !sle(RHS); }

/// Signed greater than comparison
///
/// Regards both *this as a signed quantity and compares it with RHS for
/// the validity of the greater-than relationship.
///
/// \returns true if *this > RHS when considered signed.
bool sgt(int64_t RHS) const {
  return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
                                                      : getSExtValue() > RHS;
}

/// Unsigned greater or equal comparison
///
/// Regards both *this and RHS as unsigned quantities and compares them for
/// validity of the greater-or-equal relationship.
///
/// \returns true if *this >= RHS when both are considered unsigned.
bool uge(const APInt &RHS) const { return !ult(RHS); }

/// Unsigned greater or equal comparison
///
/// Regards both *this as an unsigned quantity and compares it with RHS for
/// the validity of the greater-or-equal relationship.
///
/// \returns true if *this >= RHS when considered unsigned.
bool uge(uint64_t RHS) const { return !ult(RHS); }

/// Signed greater or equal comparison
///
/// Regards both *this and RHS as signed quantities and compares them for
/// validity of the greater-or-equal relationship.
///
/// \returns true if *this >= RHS when both are considered signed.
bool sge(const APInt &RHS) const { return !slt(RHS); }

/// Signed greater or equal comparison
///
/// Regards both *this as a signed quantity and compares it with RHS for
/// the validity of the greater-or-equal relationship.
///
/// \returns true if *this >= RHS when considered signed.
bool sge(int64_t RHS) const { return !slt(RHS); }

/// This operation tests if there are any pairs of corresponding bits
/// between this APInt and RHS that are both set.
bool intersects(const APInt &RHS) const {
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
 "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1342, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord())
    return (U.VAL & RHS.U.VAL) != 0;
  return intersectsSlowCase(RHS);
}

/// This operation checks that all bits set in this APInt are also set in RHS.
bool isSubsetOf(const APInt &RHS) const {
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
 "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1350, __extension__ __PRETTY_FUNCTION__));
  if (isSingleWord())
    return (U.VAL & ~RHS.U.VAL) == 0;
  return isSubsetOfSlowCase(RHS);
}

/// @}
/// \name Resizing Operators
/// @{

/// Truncate to new width.
///
/// Truncate the APInt to a specified width. It is an error to specify a width
/// that is greater than or equal to the current width.
APInt trunc(unsigned width) const;

/// Truncate to new width with unsigned saturation.
///
/// If the APInt, treated as unsigned integer, can be losslessly truncated to
/// the new bitwidth, then return truncated APInt. Else, return max value.
APInt truncUSat(unsigned width) const;

/// Truncate to new width with signed saturation.
///
/// If this APInt, treated as signed integer, can be losslessly truncated to
/// the new bitwidth, then return truncated APInt. Else, return either
/// signed min value if the APInt was negative, or signed max value.
APInt truncSSat(unsigned width) const;

/// Sign extend to a new width.
///
/// This operation sign extends the APInt to a new width. If the high order
/// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
/// It is an error to specify a width that is less than or equal to the
/// current width.
APInt sext(unsigned width) const;

/// Zero extend to a new width.
///
/// This operation zero extends the APInt to a new width. The high order bits
/// are filled with 0 bits.  It is an error to specify a width that is less
/// than or equal to the current width.
APInt zext(unsigned width) const;

/// Sign extend or truncate to width
///
/// Make this APInt have the bit width given by \p width. The value is sign
/// extended, truncated, or left alone to make it that width.
APInt sextOrTrunc(unsigned width) const;

/// Zero extend or truncate to width
///
/// Make this APInt have the bit width given by \p width. The value is zero
/// extended, truncated, or left alone to make it that width.
APInt zextOrTrunc(unsigned width) const;

/// Truncate to width
///
/// Make this APInt have the bit width given by \p width. The value is
/// truncated or left alone to make it that width.
APInt truncOrSelf(unsigned width) const;

/// Sign extend or truncate to width
///
/// Make this APInt have the bit width given by \p width. The value is sign
/// extended, or left alone to make it that width.
APInt sextOrSelf(unsigned width) const;

/// Zero extend or truncate to width
///
/// Make this APInt have the bit width given by \p width. The value is zero
/// extended, or left alone to make it that width.
APInt zextOrSelf(unsigned width) const;

/// @}
/// \name Bit Manipulation Operators
/// @{

/// Set every bit to 1.
void setAllBits() {
  if (isSingleWord())
    U.VAL = WORDTYPE_MAX;
  else
    // Set all the bits in all the words.
    memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
  // Clear the unused ones
  clearUnusedBits();
}

/// Set a given bit to 1.
///
/// Set the given bit to 1 whose position is given as "bitPosition".
void setBit(unsigned BitPosition) {
  assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
 "BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1443, __extension__ __PRETTY_FUNCTION__));
  WordType Mask = maskBit(BitPosition);
  if (isSingleWord())
    U.VAL |= Mask;
  else
    U.pVal[whichWord(BitPosition)] |= Mask;
}

/// Set the sign bit to 1.
void setSignBit() {
  setBit(BitWidth - 1);
}

/// Set a given bit to a given value.
void setBitVal(unsigned BitPosition, bool BitValue) {
  if (BitValue)
    setBit(BitPosition);
  else
    clearBit(BitPosition);
}

/// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
/// This function handles "wrap" case when \p loBit >= \p hiBit, and calls
/// setBits when \p loBit < \p hiBit.
/// For \p loBit == \p hiBit wrap case, set every bit to 1.
void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
  assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1469, __extension__ __PRETTY_FUNCTION__));
  assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1470, __extension__ __PRETTY_FUNCTION__));
  if (loBit < hiBit) {
    setBits(loBit, hiBit);
    return;
  }
  setLowBits(hiBit);
  setHighBits(BitWidth - loBit);
}

/// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
/// This function handles case when \p loBit <= \p hiBit.
void setBits(unsigned loBit, unsigned hiBit) {
  assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1482, __extension__ __PRETTY_FUNCTION__));
6
←
Assuming 'hiBit' is <= field 'BitWidth'→
7
←
'?' condition is true→
  assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1483, __extension__ __PRETTY_FUNCTION__));
8
←
'?' condition is true→
  assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1484, __extension__ __PRETTY_FUNCTION__));
9
←
'?' condition is true→
  if (loBit == hiBit)
10
←
Assuming 'loBit' is not equal to 'hiBit'→
    return;
  if (loBit10.1
'loBit' is < APINT_BITS_PER_WORD
1
'loBit' is < APINT_BITS_PER_WORD
 < APINT_BITS_PER_WORD && hiBit10.2
'hiBit' is <= APINT_BITS_PER_WORD
2
'hiBit' is <= APINT_BITS_PER_WORD
 <= APINT_BITS_PER_WORD) {
11
←
Taking true branch→
    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
    mask <<= loBit;
    if (isSingleWord())
12
←
Taking true branch→
      U.VAL |= mask;
    else
      U.pVal[0] |= mask;
  } else {
    setBitsSlowCase(loBit, hiBit);
  }
}

/// Set the top bits starting from loBit.
void setBitsFrom(unsigned loBit) {
  return setBits(loBit, BitWidth);
}

/// Set the bottom loBits bits.
void setLowBits(unsigned loBits) {
  return setBits(0, loBits);
5
←
Calling 'APInt::setBits'→
13
←
Returning from 'APInt::setBits'→
}

/// Set the top hiBits bits.
void setHighBits(unsigned hiBits) {
  return setBits(BitWidth - hiBits, BitWidth);
}

/// Set every bit to 0.
void clearAllBits() {
  if (isSingleWord())
    U.VAL = 0;
  else
    memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
}

/// Set a given bit to 0.
///
/// Set the given bit to 0 whose position is given as "bitPosition".
void clearBit(unsigned BitPosition) {
  assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
 "BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1526, __extension__ __PRETTY_FUNCTION__));
  WordType Mask = ~maskBit(BitPosition);
  if (isSingleWord())
    U.VAL &= Mask;
  else
    U.pVal[whichWord(BitPosition)] &= Mask;
}

/// Set bottom loBits bits to 0.
void clearLowBits(unsigned loBits) {
  assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth"
) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1536, __extension__ __PRETTY_FUNCTION__));
  APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
  *this &= Keep;
}

/// Set the sign bit to 0.
void clearSignBit() {
  clearBit(BitWidth - 1);
}

/// Toggle every bit to its opposite value.
void flipAllBits() {
  if (isSingleWord()) {
    U.VAL ^= WORDTYPE_MAX;
    clearUnusedBits();
  } else {
    flipAllBitsSlowCase();
  }
}

/// Toggles a given bit to its opposite value.
///
/// Toggle a given bit to its opposite value whose position is given
/// as "bitPosition".
void flipBit(unsigned bitPosition);

/// Negate this APInt in place.
void negate() {
  flipAllBits();
  ++(*this);
}

/// Insert the bits from a smaller APInt starting at bitPosition.
void insertBits(const APInt &SubBits, unsigned bitPosition);
void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);

/// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
APInt extractBits(unsigned numBits, unsigned bitPosition) const;
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;

/// @}
/// \name Value Characterization Functions
/// @{

/// Return the number of bits in the APInt.
unsigned getBitWidth() const { return BitWidth; }

/// Get the number of words.
///
/// Here one word's bitwidth equals to that of uint64_t.
///
/// \returns the number of words to hold the integer value of this APInt.
unsigned getNumWords() const { return getNumWords(BitWidth); }

/// Get the number of words.
///
/// *NOTE* Here one word's bitwidth equals to that of uint64_t.
///
/// \returns the number of words to hold the integer value with a given bit
/// width.
static unsigned getNumWords(unsigned BitWidth) {
  return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
}

/// Compute the number of active bits in the value
///
/// This function returns the number of active bits which is defined as the
/// bit width minus the number of leading zeros. This is used in several
/// computations to see how "wide" the value is.
unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }

/// Compute the number of active words in the value of this APInt.
///
/// This is used in conjunction with getActiveData to extract the raw value of
/// the APInt.
unsigned getActiveWords() const {
  unsigned numActiveBits = getActiveBits();
  return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
}

/// Get the minimum bit size for this signed APInt
///
/// Computes the minimum bit width for this APInt while considering it to be a
/// signed (and probably negative) value. If the value is not negative, this
/// function returns the same value as getActiveBits()+1. Otherwise, it
/// returns the smallest bit width that will retain the negative value. For
/// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
/// for -1, this function will always return 1.
unsigned getMinSignedBits() const { return BitWidth - getNumSignBits() + 1; }

/// Get zero extended value
///
/// This method attempts to return the value of this APInt as a zero extended
/// uint64_t. The bitwidth must be <= 64 or the value must fit within a
/// uint64_t. Otherwise an assertion will result.
uint64_t getZExtValue() const {
  if (isSingleWord())
    return U.VAL;
  assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 &&
 "Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1634, __extension__ __PRETTY_FUNCTION__));
  return U.pVal[0];
}

/// Get sign extended value
///
/// This method attempts to return the value of this APInt as a sign extended
/// int64_t. The bit width must be <= 64 or the value must fit within an
/// int64_t. Otherwise an assertion will result.
int64_t getSExtValue() const {
  if (isSingleWord())
    return SignExtend64(U.VAL, BitWidth);
  assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getMinSignedBits() <= 64 &&
 "Too many bits for int64_t") ? void (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1646, __extension__ __PRETTY_FUNCTION__));
  return int64_t(U.pVal[0]);
}

/// Get bits required for string value.
///
/// This method determines how many bits are required to hold the APInt
/// equivalent of the string given by \p str.
static unsigned getBitsNeeded(StringRef str, uint8_t radix);

/// The APInt version of the countLeadingZeros functions in
///   MathExtras.h.
///
/// It counts the number of zeros from the most significant bit to the first
/// one bit.
///
/// \returns BitWidth if the value is zero, otherwise returns the number of
///   zeros from the most significant bit to the first one bits.
unsigned countLeadingZeros() const {
  if (isSingleWord()) {
    unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
    return llvm::countLeadingZeros(U.VAL) - unusedBits;
  }
  return countLeadingZerosSlowCase();
}

/// Count the number of leading one bits.
///
/// This function is an APInt version of the countLeadingOnes
/// functions in MathExtras.h. It counts the number of ones from the most
/// significant bit to the first zero bit.
///
/// \returns 0 if the high order bit is not set, otherwise returns the number
/// of 1 bits from the most significant to the least
unsigned countLeadingOnes() const {
  if (isSingleWord())
    return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
  return countLeadingOnesSlowCase();
}

/// Computes the number of leading bits of this APInt that are equal to its
/// sign bit.
unsigned getNumSignBits() const {
  return isNegative() ? countLeadingOnes() : countLeadingZeros();
}

/// Count the number of trailing zero bits.
///
/// This function is an APInt version of the countTrailingZeros
/// functions in MathExtras.h. It counts the number of zeros from the least
/// significant bit to the first set bit.
///
/// \returns BitWidth if the value is zero, otherwise returns the number of
/// zeros from the least significant bit to the first one bit.
unsigned countTrailingZeros() const {
  if (isSingleWord()) {
    unsigned TrailingZeros = llvm::countTrailingZeros(U.VAL);
    return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros);
  }
  return countTrailingZerosSlowCase();
}

/// Count the number of trailing one bits.
///
/// This function is an APInt version of the countTrailingOnes
/// functions in MathExtras.h. It counts the number of ones from the least
/// significant bit to the first zero bit.
///
/// \returns BitWidth if the value is all ones, otherwise returns the number
/// of ones from the least significant bit to the first zero bit.
unsigned countTrailingOnes() const {
  if (isSingleWord())
    return llvm::countTrailingOnes(U.VAL);
  return countTrailingOnesSlowCase();
}

/// Count the number of bits set.
///
/// This function is an APInt version of the countPopulation functions
/// in MathExtras.h. It counts the number of 1 bits in the APInt value.
///
/// \returns 0 if the value is zero, otherwise returns the number of set bits.
unsigned countPopulation() const {
  if (isSingleWord())
    return llvm::countPopulation(U.VAL);
  return countPopulationSlowCase();
}

/// @}
/// \name Conversion Functions
/// @{
void print(raw_ostream &OS, bool isSigned) const;

/// Converts an APInt to a string and append it to Str.  Str is commonly a
/// SmallString.
void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
              bool formatAsCLiteral = false) const;

/// Considers the APInt to be unsigned and converts it into a string in the
/// radix given. The radix can be 2, 8, 10 16, or 36.
void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
  toString(Str, Radix, false, false);
}

/// Considers the APInt to be signed and converts it into a string in the
/// radix given. The radix can be 2, 8, 10, 16, or 36.
void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
  toString(Str, Radix, true, false);
}

/// \returns a byte-swapped representation of this APInt Value.
APInt byteSwap() const;

/// \returns the value with the bit representation reversed of this APInt
/// Value.
APInt reverseBits() const;

/// Converts this APInt to a double value.
double roundToDouble(bool isSigned) const;

/// Converts this unsigned APInt to a double value.
double roundToDouble() const { return roundToDouble(false); }

/// Converts this signed APInt to a double value.
double signedRoundToDouble() const { return roundToDouble(true); }

/// Converts APInt bits to a double
///
/// The conversion does not do a translation from integer to double, it just
/// re-interprets the bits as a double. Note that it is valid to do this on
/// any bit width. Exactly 64 bits will be translated.
double bitsToDouble() const {
  return BitsToDouble(getWord(0));
}

/// Converts APInt bits to a float
///
/// The conversion does not do a translation from integer to float, it just
/// re-interprets the bits as a float. Note that it is valid to do this on
/// any bit width. Exactly 32 bits will be translated.
float bitsToFloat() const {
  return BitsToFloat(static_cast<uint32_t>(getWord(0)));
}

/// Converts a double to APInt bits.
///
/// The conversion does not do a translation from double to integer, it just
/// re-interprets the bits of the double.
static APInt doubleToBits(double V) {
  return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
}

/// Converts a float to APInt bits.
///
/// The conversion does not do a translation from float to integer, it just
/// re-interprets the bits of the float.
static APInt floatToBits(float V) {
  return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
}

/// @}
/// \name Mathematics Operations
/// @{

/// \returns the floor log base 2 of this APInt.
unsigned logBase2() const { return getActiveBits() -  1; }

/// \returns the ceil log base 2 of this APInt.
unsigned ceilLogBase2() const {
  APInt temp(*this);
  --temp;
  return temp.getActiveBits();
}

/// \returns the nearest log base 2 of this APInt. Ties round up.
///
/// NOTE: When we have a BitWidth of 1, we define:
///
///   log2(0) = UINT32_MAX
///   log2(1) = 0
///
/// to get around any mathematical concerns resulting from
/// referencing 2 in a space where 2 does no exist.
unsigned nearestLogBase2() const {
  // Special case when we have a bitwidth of 1. If VAL is 1, then we
  // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
  // UINT32_MAX.
  if (BitWidth == 1)
    return U.VAL - 1;

  // Handle the zero case.
  if (isNullValue())
    return UINT32_MAX(4294967295U);

  // The non-zero case is handled by computing:
  //
  //   nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
  //
  // where x[i] is referring to the value of the ith bit of x.
  unsigned lg = logBase2();
  return lg + unsigned((*this)[lg - 1]);
}

/// \returns the log base 2 of this APInt if its an exact power of two, -1
/// otherwise
int32_t exactLogBase2() const {
  if (!isPowerOf2())
    return -1;
  return logBase2();
}

/// Compute the square root
APInt sqrt() const;

/// Get the absolute value;
///
/// If *this is < 0 then return -(*this), otherwise *this;
APInt abs() const {
  if (isNegative())
    return -(*this);
  return *this;
}

/// \returns the multiplicative inverse for a given modulo.
APInt multiplicativeInverse(const APInt &modulo) const;

/// @}
/// \name Support for division by constant
/// @{

/// Calculate the magic number for signed division by a constant.
struct ms;
ms magic() const;

/// Calculate the magic number for unsigned division by a constant.
struct mu;
mu magicu(unsigned LeadingZeros = 0) const;

/// @}
/// \name Building-block Operations for APInt and APFloat
/// @{

// These building block operations operate on a representation of arbitrary
// precision, two's-complement, bignum integer values. They should be
// sufficient to implement APInt and APFloat bignum requirements. Inputs are
// generally a pointer to the base of an array of integer parts, representing
// an unsigned bignum, and a count of how many parts there are.

/// Sets the least significant part of a bignum to the input value, and zeroes
/// out higher parts.
static void tcSet(WordType *, WordType, unsigned);

/// Assign one bignum to another.
static void tcAssign(WordType *, const WordType *, unsigned);

/// Returns true if a bignum is zero, false otherwise.
static bool tcIsZero(const WordType *, unsigned);

/// Extract the given bit of a bignum; returns 0 or 1.  Zero-based.
static int tcExtractBit(const WordType *, unsigned bit);

/// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
/// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
/// significant bit of DST.  All high bits above srcBITS in DST are
/// zero-filled.
static void tcExtract(WordType *, unsigned dstCount,
                      const WordType *, unsigned srcBits,
                      unsigned srcLSB);

/// Set the given bit of a bignum.  Zero-based.
static void tcSetBit(WordType *, unsigned bit);

/// Clear the given bit of a bignum.  Zero-based.
static void tcClearBit(WordType *, unsigned bit);

/// Returns the bit number of the least or most significant set bit of a
/// number.  If the input number has no bits set -1U is returned.
static unsigned tcLSB(const WordType *, unsigned n);
static unsigned tcMSB(const WordType *parts, unsigned n);

/// Negate a bignum in-place.
static void tcNegate(WordType *, unsigned);

/// DST += RHS + CARRY where CARRY is zero or one.  Returns the carry flag.
static WordType tcAdd(WordType *, const WordType *,
                      WordType carry, unsigned);
/// DST += RHS.  Returns the carry flag.
static WordType tcAddPart(WordType *, WordType, unsigned);

/// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
static WordType tcSubtract(WordType *, const WordType *,
                           WordType carry, unsigned);
/// DST -= RHS.  Returns the carry flag.
static WordType tcSubtractPart(WordType *, WordType, unsigned);

/// DST += SRC * MULTIPLIER + PART   if add is true
/// DST  = SRC * MULTIPLIER + PART   if add is false
///
/// Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC they must
/// start at the same point, i.e. DST == SRC.
///
/// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
/// Otherwise DST is filled with the least significant DSTPARTS parts of the
/// result, and if all of the omitted higher parts were zero return zero,
/// otherwise overflow occurred and return one.
static int tcMultiplyPart(WordType *dst, const WordType *src,
                          WordType multiplier, WordType carry,
                          unsigned srcParts, unsigned dstParts,
                          bool add);

/// DST = LHS * RHS, where DST has the same width as the operands and is
/// filled with the least significant parts of the result.  Returns one if
/// overflow occurred, otherwise zero.  DST must be disjoint from both
/// operands.
static int tcMultiply(WordType *, const WordType *, const WordType *,
                      unsigned);

/// DST = LHS * RHS, where DST has width the sum of the widths of the
/// operands. No overflow occurs. DST must be disjoint from both operands.
static void tcFullMultiply(WordType *, const WordType *,
                           const WordType *, unsigned, unsigned);

/// If RHS is zero LHS and REMAINDER are left unchanged, return one.
/// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
/// REMAINDER to the remainder, return zero.  i.e.
///
///  OLD_LHS = RHS * LHS + REMAINDER
///
/// SCRATCH is a bignum of the same size as the operands and result for use by
/// the routine; its contents need not be initialized and are destroyed.  LHS,
/// REMAINDER and SCRATCH must be distinct.
static int tcDivide(WordType *lhs, const WordType *rhs,
                    WordType *remainder, WordType *scratch,
                    unsigned parts);

/// Shift a bignum left Count bits. Shifted in bits are zero. There are no
/// restrictions on Count.
static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);

/// Shift a bignum right Count bits.  Shifted in bits are zero.  There are no
/// restrictions on Count.
static void tcShiftRight(WordType *, unsigned Words, unsigned Count);

/// The obvious AND, OR and XOR and complement operations.
static void tcAnd(WordType *, const WordType *, unsigned);
static void tcOr(WordType *, const WordType *, unsigned);
static void tcXor(WordType *, const WordType *, unsigned);
static void tcComplement(WordType *, unsigned);

/// Comparison (unsigned) of two bignums.
static int tcCompare(const WordType *, const WordType *, unsigned);

/// Increment a bignum in-place.  Return the carry flag.
static WordType tcIncrement(WordType *dst, unsigned parts) {
  return tcAddPart(dst, 1, parts);
}

/// Decrement a bignum in-place.  Return the borrow flag.
static WordType tcDecrement(WordType *dst, unsigned parts) {
  return tcSubtractPart(dst, 1, parts);
}

/// Set the least significant BITS and clear the rest.
static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);

/// debug method
void dump() const;

/// @}
2015};

2017/// Magic data for optimising signed division by a constant.
2018struct APInt::ms {
APInt m;    ///< magic number
unsigned s; ///< shift amount
2021};

2023/// Magic data for optimising unsigned division by a constant.
2024struct APInt::mu {
APInt m;    ///< magic number
bool a;     ///< add indicator
unsigned s; ///< shift amount
2028};

2030inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }

2032inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }

2034/// Unary bitwise complement operator.
2035///
2036/// \returns an APInt that is the bitwise complement of \p v.
2037inline APInt operator~(APInt v) {
v.flipAllBits();
return v;
2040}

2042inline APInt operator&(APInt a, const APInt &b) {
a &= b;
return a;
2045}

2047inline APInt operator&(const APInt &a, APInt &&b) {
b &= a;
return std::move(b);
2050}

2052inline APInt operator&(APInt a, uint64_t RHS) {
a &= RHS;
return a;
2055}

2057inline APInt operator&(uint64_t LHS, APInt b) {
b &= LHS;
return b;
2060}

2062inline APInt operator|(APInt a, const APInt &b) {
a |= b;
return a;
2065}

2067inline APInt operator|(const APInt &a, APInt &&b) {
b |= a;
return std::move(b);
2070}

2072inline APInt operator|(APInt a, uint64_t RHS) {
a |= RHS;
return a;
2075}

2077inline APInt operator|(uint64_t LHS, APInt b) {
b |= LHS;
return b;
2080}

2082inline APInt operator^(APInt a, const APInt &b) {
a ^= b;
return a;
2085}

2087inline APInt operator^(const APInt &a, APInt &&b) {
b ^= a;
return std::move(b);
2090}

2092inline APInt operator^(APInt a, uint64_t RHS) {
a ^= RHS;
return a;
2095}

2097inline APInt operator^(uint64_t LHS, APInt b) {
b ^= LHS;
return b;
2100}

2102inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
I.print(OS, true);
return OS;
2105}

2107inline APInt operator-(APInt v) {
v.negate();
return v;
2110}

2112inline APInt operator+(APInt a, const APInt &b) {
a += b;
return a;
2115}

2117inline APInt operator+(const APInt &a, APInt &&b) {
b += a;
return std::move(b);
2120}

2122inline APInt operator+(APInt a, uint64_t RHS) {
a += RHS;
return a;
2125}

2127inline APInt operator+(uint64_t LHS, APInt b) {
b += LHS;
return b;
2130}

2132inline APInt operator-(APInt a, const APInt &b) {
a -= b;
return a;
2135}

2137inline APInt operator-(const APInt &a, APInt &&b) {
b.negate();
b += a;
return std::move(b);
2141}

2143inline APInt operator-(APInt a, uint64_t RHS) {
a -= RHS;
return a;
2146}

2148inline APInt operator-(uint64_t LHS, APInt b) {
b.negate();
b += LHS;
return b;
2152}

2154inline APInt operator*(APInt a, uint64_t RHS) {
a *= RHS;
return a;
2157}

2159inline APInt operator*(uint64_t LHS, APInt b) {
b *= LHS;
return b;
2162}


2165namespace APIntOps {

2167/// Determine the smaller of two APInts considered to be signed.
2168inline const APInt &smin(const APInt &A, const APInt &B) {
return A.slt(B) ? A : B;
2170}

2172/// Determine the larger of two APInts considered to be signed.
2173inline const APInt &smax(const APInt &A, const APInt &B) {
return A.sgt(B) ? A : B;
2175}

2177/// Determine the smaller of two APInts considered to be unsigned.
2178inline const APInt &umin(const APInt &A, const APInt &B) {
return A.ult(B) ? A : B;
2180}

2182/// Determine the larger of two APInts considered to be unsigned.
2183inline const APInt &umax(const APInt &A, const APInt &B) {
return A.ugt(B) ? A : B;
2185}

2187/// Compute GCD of two unsigned APInt values.
2188///
2189/// This function returns the greatest common divisor of the two APInt values
2190/// using Stein's algorithm.
2191///
2192/// \returns the greatest common divisor of A and B.
2193APInt GreatestCommonDivisor(APInt A, APInt B);

2195/// Converts the given APInt to a double value.
2196///
2197/// Treats the APInt as an unsigned value for conversion purposes.
2198inline double RoundAPIntToDouble(const APInt &APIVal) {
return APIVal.roundToDouble();
2200}

2202/// Converts the given APInt to a double value.
2203///
2204/// Treats the APInt as a signed value for conversion purposes.
2205inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
return APIVal.signedRoundToDouble();
2207}

2209/// Converts the given APInt to a float value.
2210inline float RoundAPIntToFloat(const APInt &APIVal) {
return float(RoundAPIntToDouble(APIVal));
2212}

2214/// Converts the given APInt to a float value.
2215///
2216/// Treats the APInt as a signed value for conversion purposes.
2217inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
return float(APIVal.signedRoundToDouble());
2219}

2221/// Converts the given double value into a APInt.
2222///
2223/// This function convert a double value to an APInt value.
2224APInt RoundDoubleToAPInt(double Double, unsigned width);

2226/// Converts a float value into a APInt.
2227///
2228/// Converts a float value into an APInt value.
2229inline APInt RoundFloatToAPInt(float Float, unsigned width) {
return RoundDoubleToAPInt(double(Float), width);
2231}

2233/// Return A unsign-divided by B, rounded by the given rounding mode.
2234APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);

2236/// Return A sign-divided by B, rounded by the given rounding mode.
2237APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);

2239/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2240/// (e.g. 32 for i32).
2241/// This function finds the smallest number n, such that
2242/// (a) n >= 0 and q(n) = 0, or
2243/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2244///     integers, belong to two different intervals [Rk, Rk+R),
2245///     where R = 2^BW, and k is an integer.
2246/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2247/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2248/// subtraction (treated as addition of negated numbers) would always
2249/// count as an overflow, but here we want to allow values to decrease
2250/// and increase as long as they are within the same interval.
2251/// Specifically, adding of two negative numbers should not cause an
2252/// overflow (as long as the magnitude does not exceed the bit width).
2253/// On the other hand, given a positive number, adding a negative
2254/// number to it can give a negative result, which would cause the
2255/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2256/// treated as a special case of an overflow.
2257///
2258/// This function returns None if after finding k that minimizes the
2259/// positive solution to q(n) = kR, both solutions are contained between
2260/// two consecutive integers.
2261///
2262/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2263/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2264/// virtue of *signed* overflow. This function will *not* find such an n,
2265/// however it may find a value of n satisfying the inequalities due to
2266/// an *unsigned* overflow (if the values are treated as unsigned).
2267/// To find a solution for a signed overflow, treat it as a problem of
2268/// finding an unsigned overflow with a range with of BW-1.
2269///
2270/// The returned value may have a different bit width from the input
2271/// coefficients.
2272Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
                                         unsigned RangeWidth);

2275/// Compare two values, and if they are different, return the position of the
2276/// most significant bit that is different in the values.
2277Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
                                                const APInt &B);

2280} // End of APIntOps namespace

2282// See friend declaration above. This additional declaration is required in
2283// order to compile LLVM with IBM xlC compiler.
2284hash_code hash_value(const APInt &Arg);

2286/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2287/// with the integer held in IntVal.
2288void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);

2290/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2291/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2292void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);

2294/// Provide DenseMapInfo for APInt.
2295template <> struct DenseMapInfo<APInt> {
static inline APInt getEmptyKey() {
  APInt V(nullptr, 0);
  V.U.VAL = 0;
  return V;
}

static inline APInt getTombstoneKey() {
  APInt V(nullptr, 0);
  V.U.VAL = 1;
  return V;
}

static unsigned getHashValue(const APInt &Key);

static bool isEqual(const APInt &LHS, const APInt &RHS) {
  return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
}
2313};

2315} // namespace llvm

2317#endif