/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/CodeGen/ExecutionDomainFix.h

Bug Summary

File:	llvm/include/llvm/CodeGen/ExecutionDomainFix.h
Warning:	line 79, column 35 The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ExecutionDomainFix.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/CodeGen -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/CodeGen/ExecutionDomainFix.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/CodeGen/ExecutionDomainFix.cpp

→

1//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//

9#include "llvm/CodeGen/ExecutionDomainFix.h"
10#include "llvm/CodeGen/MachineRegisterInfo.h"
11#include "llvm/CodeGen/TargetInstrInfo.h"
12#include "llvm/Support/Debug.h"

14using namespace llvm;

16#define DEBUG_TYPE"execution-deps-fix" "execution-deps-fix"

18iterator_range<SmallVectorImpl<int>::const_iterator>
19ExecutionDomainFix::regIndices(unsigned Reg) const {
assert(Reg < AliasMap.size() && "Invalid register")(static_cast<void> (0));
const auto &Entry = AliasMap[Reg];
return make_range(Entry.begin(), Entry.end());
23}

25DomainValue *ExecutionDomainFix::alloc(int domain) {
DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue
                                : Avail.pop_back_val();
if (domain >= 0)
  dv->addDomain(domain);
assert(dv->Refs == 0 && "Reference count wasn't cleared")(static_cast<void> (0));
assert(!dv->Next && "Chained DomainValue shouldn't have been recycled")(static_cast<void> (0));
return dv;
33}

35void ExecutionDomainFix::release(DomainValue *DV) {
while (DV) {
  assert(DV->Refs && "Bad DomainValue")(static_cast<void> (0));
  if (--DV->Refs)
    return;

  // There are no more DV references. Collapse any contained instructions.
  if (DV->AvailableDomains && !DV->isCollapsed())
    collapse(DV, DV->getFirstDomain());

  DomainValue *Next = DV->Next;
  DV->clear();
  Avail.push_back(DV);
  // Also release the next DomainValue in the chain.
  DV = Next;
}
51}

53DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) {
DomainValue *DV = DVRef;
if (!DV || !DV->Next)
23
←
Assuming 'DV' is non-null→
24
←
Assuming field 'Next' is null→
25
←
Taking true branch→
  return DV;
26
←
Returning without writing to 'DVRef->Instrs.Size', which participates in a condition later→
27
←
Returning pointer (loaded from 'DV'), which participates in a condition later→

// DV has a chain. Find the end.
do
  DV = DV->Next;
while (DV->Next);

// Update DVRef to point to DV.
retain(DV);
release(DVRef);
DVRef = DV;
return DV;
68}

70void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) {
assert(unsigned(rx) < NumRegs && "Invalid index")(static_cast<void> (0));
assert(!LiveRegs.empty() && "Must enter basic block first.")(static_cast<void> (0));

if (LiveRegs[rx] == dv)
  return;
if (LiveRegs[rx])
  release(LiveRegs[rx]);
LiveRegs[rx] = retain(dv);
79}

81void ExecutionDomainFix::kill(int rx) {
assert(unsigned(rx) < NumRegs && "Invalid index")(static_cast<void> (0));
assert(!LiveRegs.empty() && "Must enter basic block first.")(static_cast<void> (0));
if (!LiveRegs[rx])
  return;

release(LiveRegs[rx]);
LiveRegs[rx] = nullptr;
89}

91void ExecutionDomainFix::force(int rx, unsigned domain) {
assert(unsigned(rx) < NumRegs && "Invalid index")(static_cast<void> (0));
assert(!LiveRegs.empty() && "Must enter basic block first.")(static_cast<void> (0));
if (DomainValue *dv = LiveRegs[rx]) {
  if (dv->isCollapsed())
    dv->addDomain(domain);
  else if (dv->hasDomain(domain))
    collapse(dv, domain);
  else {
    // This is an incompatible open DomainValue. Collapse it to whatever and
    // force the new value into domain. This costs a domain crossing.
    collapse(dv, dv->getFirstDomain());
    assert(LiveRegs[rx] && "Not live after collapse?")(static_cast<void> (0));
    LiveRegs[rx]->addDomain(domain);
  }
} else {
  // Set up basic collapsed DomainValue.
  setLiveReg(rx, alloc(domain));
}
110}

112void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) {
assert(dv->hasDomain(domain) && "Cannot collapse")(static_cast<void> (0));

// Collapse all the instructions.
while (!dv->Instrs.empty())
  TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
dv->setSingleDomain(domain);

// If there are multiple users, give them new, unique DomainValues.
if (!LiveRegs.empty() && dv->Refs > 1)
  for (unsigned rx = 0; rx != NumRegs; ++rx)
    if (LiveRegs[rx] == dv)
      setLiveReg(rx, alloc(domain));
125}

127bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) {
assert(!A->isCollapsed() && "Cannot merge into collapsed")(static_cast<void> (0));
assert(!B->isCollapsed() && "Cannot merge from collapsed")(static_cast<void> (0));
if (A == B)
  return true;
// Restrict to the domains that A and B have in common.
unsigned common = A->getCommonDomains(B->AvailableDomains);
if (!common)
  return false;
A->AvailableDomains = common;
A->Instrs.append(B->Instrs.begin(), B->Instrs.end());

// Clear the old DomainValue so we won't try to swizzle instructions twice.
B->clear();
// All uses of B are referred to A.
B->Next = retain(A);

for (unsigned rx = 0; rx != NumRegs; ++rx) {
  assert(!LiveRegs.empty() && "no space allocated for live registers")(static_cast<void> (0));
  if (LiveRegs[rx] == B)
    setLiveReg(rx, A);
}
return true;
150}

152void ExecutionDomainFix::enterBasicBlock(
  const LoopTraversal::TraversedMBBInfo &TraversedMBB) {

MachineBasicBlock *MBB = TraversedMBB.MBB;

// Set up LiveRegs to represent registers entering MBB.
// Set default domain values to 'no domain' (nullptr)
if (LiveRegs.empty())
14
←
Assuming the condition is false→
15
←
Taking false branch→
  LiveRegs.assign(NumRegs, nullptr);

// This is the entry block.
if (MBB->pred_empty()) {
16
←
Assuming the condition is false→
17
←
Taking false branch→
  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n")do { } while (false);
  return;
}

// Try to coalesce live-out registers from predecessors.
for (MachineBasicBlock *pred : MBB->predecessors()) {
  assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&(static_cast<void> (0))
         "Should have pre-allocated MBBInfos for all MBBs")(static_cast<void> (0));
  LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
  // Incoming is null if this is a backedge from a BB
  // we haven't processed yet
  if (Incoming.empty())
18
←
Assuming the condition is false→
19
←
Taking false branch→
    continue;

  for (unsigned rx = 0; rx != NumRegs; ++rx) {
20
←
Assuming 'rx' is not equal to field 'NumRegs'→
21
←
Loop condition is true.  Entering loop body→
    DomainValue *pdv = resolve(Incoming[rx]);
22
←
Calling 'ExecutionDomainFix::resolve'→
28
←
Returning from 'ExecutionDomainFix::resolve'→
    if (!pdv28.1
'pdv' is non-null
1
'pdv' is non-null
1
'pdv' is non-null
1
'pdv' is non-null
)
29
←
Taking false branch→
      continue;
    if (!LiveRegs[rx]) {
30
←
Assuming pointer value is null→
31
←
Taking false branch→
      setLiveReg(rx, pdv);
      continue;
    }

    // We have a live DomainValue from more than one predecessor.
    if (LiveRegs[rx]->isCollapsed()) {
32
←
Calling 'DomainValue::isCollapsed'→
38
←
Returning from 'DomainValue::isCollapsed'→
39
←
Taking true branch→
      // We are already collapsed, but predecessor is not. Force it.
      unsigned Domain = LiveRegs[rx]->getFirstDomain();
40
←
Calling 'DomainValue::getFirstDomain'→
50
←
Returning from 'DomainValue::getFirstDomain'→
51
←
'Domain' initialized to 32→
      if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
52
←
Calling 'DomainValue::isCollapsed'→
58
←
Returning from 'DomainValue::isCollapsed'→
59
←
Passing the value 32 via 1st parameter 'domain'→
60
←
Calling 'DomainValue::hasDomain'→
        collapse(pdv, Domain);
      continue;
    }

    // Currently open, merge in predecessor.
    if (!pdv->isCollapsed())
      merge(LiveRegs[rx], pdv);
    else
      force(rx, pdv->getFirstDomain());
  }
}
LLVM_DEBUG(dbgs() << printMBBReference(*MBB)do { } while (false)
                  << (!TraversedMBB.IsDone ? ": incomplete\n"do { } while (false)
                                           : ": all preds known\n"))do { } while (false);
206}

208void ExecutionDomainFix::leaveBasicBlock(
  const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
assert(!LiveRegs.empty() && "Must enter basic block first.")(static_cast<void> (0));
unsigned MBBNumber = TraversedMBB.MBB->getNumber();
assert(MBBNumber < MBBOutRegsInfos.size() &&(static_cast<void> (0))
       "Unexpected basic block number.")(static_cast<void> (0));
// Save register clearances at end of MBB - used by enterBasicBlock().
for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) {
  release(OldLiveReg);
}
MBBOutRegsInfos[MBBNumber] = LiveRegs;
LiveRegs.clear();
220}

222bool ExecutionDomainFix::visitInstr(MachineInstr *MI) {
// Update instructions with explicit execution domains.
std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
if (DomP.first) {
  if (DomP.second)
    visitSoftInstr(MI, DomP.second);
  else
    visitHardInstr(MI, DomP.first);
}

return !DomP.first;
233}

235void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) {
assert(!MI->isDebugInstr() && "Won't process debug values")(static_cast<void> (0));
const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
              e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
     i != e; ++i) {
  MachineOperand &MO = MI->getOperand(i);
  if (!MO.isReg())
    continue;
  if (MO.isUse())
    continue;
  for (int rx : regIndices(MO.getReg())) {
    // This instruction explicitly defines rx.
    LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI)do { } while (false);

    // Kill off domains redefined by generic instructions.
    if (Kill)
      kill(rx);
  }
}
255}

257void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
// Collapse all uses.
for (unsigned i = mi->getDesc().getNumDefs(),
              e = mi->getDesc().getNumOperands();
     i != e; ++i) {
  MachineOperand &mo = mi->getOperand(i);
  if (!mo.isReg())
    continue;
  for (int rx : regIndices(mo.getReg())) {
    force(rx, domain);
  }
}

// Kill all defs and force them.
for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
  MachineOperand &mo = mi->getOperand(i);
  if (!mo.isReg())
    continue;
  for (int rx : regIndices(mo.getReg())) {
    kill(rx);
    force(rx, domain);
  }
}
280}

282void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Bitmask of available domains for this instruction after taking collapsed
// operands into account.
unsigned available = mask;

// Scan the explicit use operands for incoming domains.
SmallVector<int, 4> used;
if (!LiveRegs.empty())
  for (unsigned i = mi->getDesc().getNumDefs(),
                e = mi->getDesc().getNumOperands();
       i != e; ++i) {
    MachineOperand &mo = mi->getOperand(i);
    if (!mo.isReg())
      continue;
    for (int rx : regIndices(mo.getReg())) {
      DomainValue *dv = LiveRegs[rx];
      if (dv == nullptr)
        continue;
      // Bitmask of domains that dv and available have in common.
      unsigned common = dv->getCommonDomains(available);
      // Is it possible to use this collapsed register for free?
      if (dv->isCollapsed()) {
        // Restrict available domains to the ones in common with the operand.
        // If there are no common domains, we must pay the cross-domain
        // penalty for this operand.
        if (common)
          available = common;
      } else if (common)
        // Open DomainValue is compatible, save it for merging.
        used.push_back(rx);
      else
        // Open DomainValue is not compatible with instruction. It is useless
        // now.
        kill(rx);
    }
  }

// If the collapsed operands force a single domain, propagate the collapse.
if (isPowerOf2_32(available)) {
  unsigned domain = countTrailingZeros(available);
  TII->setExecutionDomain(*mi, domain);
  visitHardInstr(mi, domain);
  return;
}

// Kill off any remaining uses that don't match available, and build a list of
// incoming DomainValues that we want to merge.
SmallVector<int, 4> Regs;
for (int rx : used) {
  assert(!LiveRegs.empty() && "no space allocated for live registers")(static_cast<void> (0));
  DomainValue *&LR = LiveRegs[rx];
  // This useless DomainValue could have been missed above.
  if (!LR->getCommonDomains(available)) {
    kill(rx);
    continue;
  }
  // Sorted insertion.
  // Enables giving priority to the latest domains during merging.
  const int Def = RDA->getReachingDef(mi, RC->getRegister(rx));
  auto I = partition_point(Regs, [&](int I) {
    return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def;
  });
  Regs.insert(I, rx);
}

// doms are now sorted in order of appearance. Try to merge them all, giving
// priority to the latest ones.
DomainValue *dv = nullptr;
while (!Regs.empty()) {
  if (!dv) {
    dv = LiveRegs[Regs.pop_back_val()];
    // Force the first dv to match the current instruction.
    dv->AvailableDomains = dv->getCommonDomains(available);
    assert(dv->AvailableDomains && "Domain should have been filtered")(static_cast<void> (0));
    continue;
  }

  DomainValue *Latest = LiveRegs[Regs.pop_back_val()];
  // Skip already merged values.
  if (Latest == dv || Latest->Next)
    continue;
  if (merge(dv, Latest))
    continue;

  // If latest didn't merge, it is useless now. Kill all registers using it.
  for (int i : used) {
    assert(!LiveRegs.empty() && "no space allocated for live registers")(static_cast<void> (0));
    if (LiveRegs[i] == Latest)
      kill(i);
  }
}

// dv is the DomainValue we are going to use for this instruction.
if (!dv) {
  dv = alloc();
  dv->AvailableDomains = available;
}
dv->Instrs.push_back(mi);

// Finally set all defs and non-collapsed uses to dv. We must iterate through
// all the operators, including imp-def ones.
for (const MachineOperand &mo : mi->operands()) {
  if (!mo.isReg())
    continue;
  for (int rx : regIndices(mo.getReg())) {
    if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) {
      kill(rx);
      setLiveReg(rx, dv);
    }
  }
}
393}

395void ExecutionDomainFix::processBasicBlock(
  const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
enterBasicBlock(TraversedMBB);
13
←
Calling 'ExecutionDomainFix::enterBasicBlock'→
// If this block is not done, it makes little sense to make any decisions
// based on clearance information. We need to make a second pass anyway,
// and by then we'll have better information, so we can avoid doing the work
// to try and break dependencies now.
for (MachineInstr &MI : *TraversedMBB.MBB) {
  if (!MI.isDebugInstr()) {
    bool Kill = false;
    if (TraversedMBB.PrimaryPass)
      Kill = visitInstr(&MI);
    processDefs(&MI, Kill);
  }
}
leaveBasicBlock(TraversedMBB);
411}

413bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) {
if (skipFunction(mf.getFunction()))
1
Assuming the condition is false→
2
←
Taking false branch→
  return false;
MF = &mf;
TII = MF->getSubtarget().getInstrInfo();
TRI = MF->getSubtarget().getRegisterInfo();
LiveRegs.clear();
assert(NumRegs == RC->getNumRegs() && "Bad regclass")(static_cast<void> (0));

LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: "do { } while (false)
3
←
Loop condition is false.  Exiting loop→
                  << TRI->getRegClassName(RC) << " **********\n")do { } while (false);

// If no relevant registers are used in the function, we can skip it
// completely.
bool anyregs = false;
const MachineRegisterInfo &MRI = mf.getRegInfo();
for (unsigned Reg : *RC) {
4
←
Assuming '__begin1' is not equal to '__end1'→
  if (MRI.isPhysRegUsed(Reg)) {
5
←
Assuming the condition is true→
6
←
Taking true branch→
    anyregs = true;
    break;
  }
}
if (!anyregs7.1
'anyregs' is true
1
'anyregs' is true
1
'anyregs' is true
1
'anyregs' is true
)
7
←
 Execution continues on line 435→
8
←
Taking false branch→
  return false;

RDA = &getAnalysis<ReachingDefAnalysis>();

// Initialize the AliasMap on the first use.
if (AliasMap.empty()) {
9
←
Assuming the condition is false→
10
←
Taking false branch→
  // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
  // therefore the LiveRegs array.
  AliasMap.resize(TRI->getNumRegs());
  for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
    for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid();
         ++AI)
      AliasMap[*AI].push_back(i);
}

// Initialize the MBBOutRegsInfos
MBBOutRegsInfos.resize(mf.getNumBlockIDs());

// Traverse the basic blocks.
LoopTraversal Traversal;
LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
for (const LoopTraversal::TraversedMBBInfo &TraversedMBB : TraversedMBBOrder)
11
←
Assuming '__begin1' is not equal to '__end1'→
  processBasicBlock(TraversedMBB);
12
←
Calling 'ExecutionDomainFix::processBasicBlock'→

for (const LiveRegsDVInfo &OutLiveRegs : MBBOutRegsInfos)
  for (DomainValue *OutLiveReg : OutLiveRegs)
    if (OutLiveReg)
      release(OutLiveReg);

MBBOutRegsInfos.clear();
Avail.clear();
Allocator.DestroyAll();

return false;
470}

←

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/CodeGen/ExecutionDomainFix.h

→

1//==-- llvm/CodeGen/ExecutionDomainFix.h - Execution Domain Fix -*- C++ -*--==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file Execution Domain Fix pass.
10///
11/// Some X86 SSE instructions like mov, and, or, xor are available in different
12/// variants for different operand types. These variant instructions are
13/// equivalent, but on Nehalem and newer cpus there is extra latency
14/// transferring data between integer and floating point domains.  ARM cores
15/// have similar issues when they are configured with both VFP and NEON
16/// pipelines.
17///
18/// This pass changes the variant instructions to minimize domain crossings.
19//
20//===----------------------------------------------------------------------===//
21 
22#ifndef LLVM_CODEGEN_EXECUTIONDOMAINFIX_H
23#define LLVM_CODEGEN_EXECUTIONDOMAINFIX_H
24 
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/CodeGen/LoopTraversal.h"
27#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/CodeGen/ReachingDefAnalysis.h"
29#include "llvm/CodeGen/TargetRegisterInfo.h"
30 
31namespace llvm {
32 
33class MachineInstr;
34class TargetInstrInfo;
35 
36/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
37/// of execution domains.
38///
39/// An open DomainValue represents a set of instructions that can still switch
40/// execution domain. Multiple registers may refer to the same open
41/// DomainValue - they will eventually be collapsed to the same execution
42/// domain.
43///
44/// A collapsed DomainValue represents a single register that has been forced
45/// into one of more execution domains. There is a separate collapsed
46/// DomainValue for each register, but it may contain multiple execution
47/// domains. A register value is initially created in a single execution
48/// domain, but if we were forced to pay the penalty of a domain crossing, we
49/// keep track of the fact that the register is now available in multiple
50/// domains.
51struct DomainValue {
52  /// Basic reference counting.
53  unsigned Refs = 0;
54 
55  /// Bitmask of available domains. For an open DomainValue, it is the still
56  /// possible domains for collapsing. For a collapsed DomainValue it is the
57  /// domains where the register is available for free.
58  unsigned AvailableDomains;
59 
60  /// Pointer to the next DomainValue in a chain.  When two DomainValues are
61  /// merged, Victim.Next is set to point to Victor, so old DomainValue
62  /// references can be updated by following the chain.
63  DomainValue *Next;
64 
65  /// Twiddleable instructions using or defining these registers.
66  SmallVector<MachineInstr *, 8> Instrs;
67 
68  DomainValue() { clear(); }
69 
70  /// A collapsed DomainValue has no instructions to twiddle - it simply keeps
71  /// track of the domains where the registers are already available.
72  bool isCollapsed() const { return Instrs.empty(); }
33
←
Calling 'SmallVectorBase::empty'→
36
←
Returning from 'SmallVectorBase::empty'→
37
←
Returning the value 1, which participates in a condition later→
53
←
Calling 'SmallVectorBase::empty'→
56
←
Returning from 'SmallVectorBase::empty'→
57
←
Returning zero, which participates in a condition later→
73 
74  /// Is domain available?
75  bool hasDomain(unsigned domain) const {
76    assert(domain <(static_cast<void> (0))
77               static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&(static_cast<void> (0))
78           "undefined behavior")(static_cast<void> (0));
79    return AvailableDomains & (1u << domain);
61
←
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'
80  }
81 
82  /// Mark domain as available.
83  void addDomain(unsigned domain) {
84    assert(domain <(static_cast<void> (0))
85               static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&(static_cast<void> (0))
86           "undefined behavior")(static_cast<void> (0));
87    AvailableDomains |= 1u << domain;
88  }
89 
90  // Restrict to a single domain available.
91  void setSingleDomain(unsigned domain) {
92    assert(domain <(static_cast<void> (0))
93               static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&(static_cast<void> (0))
94           "undefined behavior")(static_cast<void> (0));
95    AvailableDomains = 1u << domain;
96  }
97 
98  /// Return bitmask of domains that are available and in mask.
99  unsigned getCommonDomains(unsigned mask) const {
100    return AvailableDomains & mask;
101  }
102 
103  /// First domain available.
104  unsigned getFirstDomain() const {
105    return countTrailingZeros(AvailableDomains);
41
←
Calling 'countTrailingZeros<unsigned int>'→
48
←
Returning from 'countTrailingZeros<unsigned int>'→
49
←
Returning the value 32→
106  }
107 
108  /// Clear this DomainValue and point to next which has all its data.
109  void clear() {
110    AvailableDomains = 0;
111    Next = nullptr;
112    Instrs.clear();
113  }
114};
115 
116class ExecutionDomainFix : public MachineFunctionPass {
117  SpecificBumpPtrAllocator<DomainValue> Allocator;
118  SmallVector<DomainValue *, 16> Avail;
119 
120  const TargetRegisterClass *const RC;
121  MachineFunction *MF;
122  const TargetInstrInfo *TII;
123  const TargetRegisterInfo *TRI;
124  std::vector<SmallVector<int, 1>> AliasMap;
125  const unsigned NumRegs;
126  /// Value currently in each register, or NULL when no value is being tracked.
127  /// This counts as a DomainValue reference.
128  using LiveRegsDVInfo = std::vector<DomainValue *>;
129  LiveRegsDVInfo LiveRegs;
130  /// Keeps domain information for all registers. Note that this
131  /// is different from the usual definition notion of liveness. The CPU
132  /// doesn't care whether or not we consider a register killed.
133  using OutRegsInfoMap = SmallVector<LiveRegsDVInfo, 4>;
134  OutRegsInfoMap MBBOutRegsInfos;
135 
136  ReachingDefAnalysis *RDA;
137 
138public:
139  ExecutionDomainFix(char &PassID, const TargetRegisterClass &RC)
140      : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {}
141 
142  void getAnalysisUsage(AnalysisUsage &AU) const override {
143    AU.setPreservesAll();
144    AU.addRequired<ReachingDefAnalysis>();
145    MachineFunctionPass::getAnalysisUsage(AU);
146  }
147 
148  bool runOnMachineFunction(MachineFunction &MF) override;
149 
150  MachineFunctionProperties getRequiredProperties() const override {
151    return MachineFunctionProperties().set(
152        MachineFunctionProperties::Property::NoVRegs);
153  }
154 
155private:
156  /// Translate TRI register number to a list of indices into our smaller tables
157  /// of interesting registers.
158  iterator_range<SmallVectorImpl<int>::const_iterator>
159  regIndices(unsigned Reg) const;
160 
161  /// DomainValue allocation.
162  DomainValue *alloc(int domain = -1);
163 
164  /// Add reference to DV.
165  DomainValue *retain(DomainValue *DV) {
166    if (DV)
167      ++DV->Refs;
168    return DV;
169  }
170 
171  /// Release a reference to DV.  When the last reference is released,
172  /// collapse if needed.
173  void release(DomainValue *);
174 
175  /// Follow the chain of dead DomainValues until a live DomainValue is reached.
176  /// Update the referenced pointer when necessary.
177  DomainValue *resolve(DomainValue *&);
178 
179  /// Set LiveRegs[rx] = dv, updating reference counts.
180  void setLiveReg(int rx, DomainValue *DV);
181 
182  /// Kill register rx, recycle or collapse any DomainValue.
183  void kill(int rx);
184 
185  /// Force register rx into domain.
186  void force(int rx, unsigned domain);
187 
188  /// Collapse open DomainValue into given domain. If there are multiple
189  /// registers using dv, they each get a unique collapsed DomainValue.
190  void collapse(DomainValue *dv, unsigned domain);
191 
192  /// All instructions and registers in B are moved to A, and B is released.
193  bool merge(DomainValue *A, DomainValue *B);
194 
195  /// Set up LiveRegs by merging predecessor live-out values.
196  void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
197 
198  /// Update live-out values.
199  void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
200 
201  /// Process he given basic block.
202  void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
203 
204  /// Visit given insturcion.
205  bool visitInstr(MachineInstr *);
206 
207  /// Update def-ages for registers defined by MI.
208  /// If Kill is set, also kill off DomainValues clobbered by the defs.
209  void processDefs(MachineInstr *, bool Kill);
210 
211  /// A soft instruction can be changed to work in other domains given by mask.
212  void visitSoftInstr(MachineInstr *, unsigned mask);
213 
214  /// A hard instruction only works in one domain. All input registers will be
215  /// forced into that domain.
216  void visitHardInstr(MachineInstr *, unsigned domain);
217};
218 
219} // namespace llvm
220 
221#endif // LLVM_CODEGEN_EXECUTIONDOMAINFIX_H

←

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/ADT/SmallVector.h

→

1//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SmallVector class.
10//
11//===----------------------------------------------------------------------===//

13#ifndef LLVM_ADT_SMALLVECTOR_H
14#define LLVM_ADT_SMALLVECTOR_H

16#include "llvm/ADT/iterator_range.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/MemAlloc.h"
20#include "llvm/Support/type_traits.h"
21#include <algorithm>
22#include <cassert>
23#include <cstddef>
24#include <cstdlib>
25#include <cstring>
26#include <functional>
27#include <initializer_list>
28#include <iterator>
29#include <limits>
30#include <memory>
31#include <new>
32#include <type_traits>
33#include <utility>

35namespace llvm {

37/// This is all the stuff common to all SmallVectors.
38///
39/// The template parameter specifies the type which should be used to hold the
40/// Size and Capacity of the SmallVector, so it can be adjusted.
41/// Using 32 bit size is desirable to shrink the size of the SmallVector.
42/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
43/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
44/// buffering bitcode output - which can exceed 4GB.
45template <class Size_T> class SmallVectorBase {
46protected:
void *BeginX;
Size_T Size = 0, Capacity;

/// The maximum value of the Size_T used.
static constexpr size_t SizeTypeMax() {
  return std::numeric_limits<Size_T>::max();
}

SmallVectorBase() = delete;
SmallVectorBase(void *FirstEl, size_t TotalCapacity)
    : BeginX(FirstEl), Capacity(TotalCapacity) {}

/// This is a helper for \a grow() that's out of line to reduce code
/// duplication.  This function will report a fatal error if it can't grow at
/// least to \p MinSize.
void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity);

/// This is an implementation of the grow() method which only works
/// on POD-like data types and is out of line to reduce code duplication.
/// This function will report a fatal error if it cannot increase capacity.
void grow_pod(void *FirstEl, size_t MinSize, size_t TSize);

69public:
size_t size() const { return Size; }
size_t capacity() const { return Capacity; }

LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; }
34
←
Assuming field 'Size' is 0→
35
←
Returning the value 1, which participates in a condition later→
54
←
Assuming field 'Size' is not equal to 0→
55
←
Returning zero, which participates in a condition later→

/// Set the array size to \p N, which the current array must have enough
/// capacity for.
///
/// This does not construct or destroy any elements in the vector.
///
/// Clients can use this in conjunction with capacity() to write past the end
/// of the buffer when they know that more elements are available, and only
/// update the size later. This avoids the cost of value initializing elements
/// which will only be overwritten.
void set_size(size_t N) {
  assert(N <= capacity())(static_cast<void> (0));
  Size = N;
}
88};

90template <class T>
91using SmallVectorSizeType =
  typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
                            uint32_t>::type;

95/// Figure out the offset of the first element.
96template <class T, typename = void> struct SmallVectorAlignmentAndSize {
alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof(
    SmallVectorBase<SmallVectorSizeType<T>>)];
alignas(T) char FirstEl[sizeof(T)];
100};

102/// This is the part of SmallVectorTemplateBase which does not depend on whether
103/// the type T is a POD. The extra dummy template argument is used by ArrayRef
104/// to avoid unnecessarily requiring T to be complete.
105template <typename T, typename = void>
106class SmallVectorTemplateCommon
  : public SmallVectorBase<SmallVectorSizeType<T>> {
using Base = SmallVectorBase<SmallVectorSizeType<T>>;

/// Find the address of the first element.  For this pointer math to be valid
/// with small-size of 0 for T with lots of alignment, it's important that
/// SmallVectorStorage is properly-aligned even for small-size of 0.
void *getFirstEl() const {
  return const_cast<void *>(reinterpret_cast<const void *>(
      reinterpret_cast<const char *>(this) +
      offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl
)));
}
// Space after 'FirstEl' is clobbered, do not add any instance vars after it.

120protected:
SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}

void grow_pod(size_t MinSize, size_t TSize) {
  Base::grow_pod(getFirstEl(), MinSize, TSize);
}

/// Return true if this is a smallvector which has not had dynamic
/// memory allocated for it.
bool isSmall() const { return this->BeginX == getFirstEl(); }

/// Put this vector in a state of being small.
void resetToSmall() {
  this->BeginX = getFirstEl();
  this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
}

/// Return true if V is an internal reference to the given range.
bool isReferenceToRange(const void *V, const void *First, const void *Last) const {
  // Use std::less to avoid UB.
  std::less<> LessThan;
  return !LessThan(V, First) && LessThan(V, Last);
}

/// Return true if V is an internal reference to this vector.
bool isReferenceToStorage(const void *V) const {
  return isReferenceToRange(V, this->begin(), this->end());
}

/// Return true if First and Last form a valid (possibly empty) range in this
/// vector's storage.
bool isRangeInStorage(const void *First, const void *Last) const {
  // Use std::less to avoid UB.
  std::less<> LessThan;
  return !LessThan(First, this->begin()) && !LessThan(Last, First) &&
         !LessThan(this->end(), Last);
}

/// Return true unless Elt will be invalidated by resizing the vector to
/// NewSize.
bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
  // Past the end.
  if (LLVM_LIKELY(!isReferenceToStorage(Elt))__builtin_expect((bool)(!isReferenceToStorage(Elt)), true))
    return true;

  // Return false if Elt will be destroyed by shrinking.
  if (NewSize <= this->size())
    return Elt < this->begin() + NewSize;

  // Return false if we need to grow.
  return NewSize <= this->capacity();
}

/// Check whether Elt will be invalidated by resizing the vector to NewSize.
void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
  assert(isSafeToReferenceAfterResize(Elt, NewSize) &&(static_cast<void> (0))
         "Attempting to reference an element of the vector in an operation "(static_cast<void> (0))
         "that invalidates it")(static_cast<void> (0));
}

/// Check whether Elt will be invalidated by increasing the size of the
/// vector by N.
void assertSafeToAdd(const void *Elt, size_t N = 1) {
  this->assertSafeToReferenceAfterResize(Elt, this->size() + N);
}

/// Check whether any part of the range will be invalidated by clearing.
void assertSafeToReferenceAfterClear(const T *From, const T *To) {
  if (From == To)
    return;
  this->assertSafeToReferenceAfterResize(From, 0);
  this->assertSafeToReferenceAfterResize(To - 1, 0);
}
template <
    class ItTy,
    std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
                     bool> = false>
void assertSafeToReferenceAfterClear(ItTy, ItTy) {}

/// Check whether any part of the range will be invalidated by growing.
void assertSafeToAddRange(const T *From, const T *To) {
  if (From == To)
    return;
  this->assertSafeToAdd(From, To - From);
  this->assertSafeToAdd(To - 1, To - From);
}
template <
    class ItTy,
    std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
                     bool> = false>
void assertSafeToAddRange(ItTy, ItTy) {}

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
template <class U>
static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt,
                                                 size_t N) {
  size_t NewSize = This->size() + N;
  if (LLVM_LIKELY(NewSize <= This->capacity())__builtin_expect((bool)(NewSize <= This->capacity()), true
))
    return &Elt;

  bool ReferencesStorage = false;
  int64_t Index = -1;
  if (!U::TakesParamByValue) {
    if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))__builtin_expect((bool)(This->isReferenceToStorage(&Elt
)), false)) {
      ReferencesStorage = true;
      Index = &Elt - This->begin();
    }
  }
  This->grow(NewSize);
  return ReferencesStorage ? This->begin() + Index : &Elt;
}

233public:
using size_type = size_t;
using difference_type = ptrdiff_t;
using value_type = T;
using iterator = T *;
using const_iterator = const T *;

using const_reverse_iterator = std::reverse_iterator<const_iterator>;
using reverse_iterator = std::reverse_iterator<iterator>;

using reference = T &;
using const_reference = const T &;
using pointer = T *;
using const_pointer = const T *;

using Base::capacity;
using Base::empty;
using Base::size;

// forward iterator creation methods.
iterator begin() { return (iterator)this->BeginX; }
const_iterator begin() const { return (const_iterator)this->BeginX; }
iterator end() { return begin() + size(); }
const_iterator end() const { return begin() + size(); }

// reverse iterator creation methods.
reverse_iterator rbegin()            { return reverse_iterator(end()); }
const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
reverse_iterator rend()              { return reverse_iterator(begin()); }
const_reverse_iterator rend() const { return const_reverse_iterator(begin());}

size_type size_in_bytes() const { return size() * sizeof(T); }
size_type max_size() const {
  return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
}

size_t capacity_in_bytes() const { return capacity() * sizeof(T); }

/// Return a pointer to the vector's buffer, even if empty().
pointer data() { return pointer(begin()); }
/// Return a pointer to the vector's buffer, even if empty().
const_pointer data() const { return const_pointer(begin()); }

reference operator[](size_type idx) {
  assert(idx < size())(static_cast<void> (0));
  return begin()[idx];
}
const_reference operator[](size_type idx) const {
  assert(idx < size())(static_cast<void> (0));
  return begin()[idx];
}

reference front() {
  assert(!empty())(static_cast<void> (0));
  return begin()[0];
}
const_reference front() const {
  assert(!empty())(static_cast<void> (0));
  return begin()[0];
}

reference back() {
  assert(!empty())(static_cast<void> (0));
  return end()[-1];
}
const_reference back() const {
  assert(!empty())(static_cast<void> (0));
  return end()[-1];
}
302};

304/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put
305/// method implementations that are designed to work with non-trivial T's.
306///
307/// We approximate is_trivially_copyable with trivial move/copy construction and
308/// trivial destruction. While the standard doesn't specify that you're allowed
309/// copy these types with memcpy, there is no way for the type to observe this.
310/// This catches the important case of std::pair<POD, POD>, which is not
311/// trivially assignable.
312template <typename T, bool = (is_trivially_copy_constructible<T>::value) &&
                           (is_trivially_move_constructible<T>::value) &&
                           std::is_trivially_destructible<T>::value>
315class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
friend class SmallVectorTemplateCommon<T>;

318protected:
static constexpr bool TakesParamByValue = false;
using ValueParamT = const T &;

SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}

static void destroy_range(T *S, T *E) {
  while (S != E) {
    --E;
    E->~T();
  }
}

/// Move the range [I, E) into the uninitialized memory starting with "Dest",
/// constructing elements as needed.
template<typename It1, typename It2>
static void uninitialized_move(It1 I, It1 E, It2 Dest) {
  std::uninitialized_copy(std::make_move_iterator(I),
                          std::make_move_iterator(E), Dest);
}

/// Copy the range [I, E) onto the uninitialized memory starting with "Dest",
/// constructing elements as needed.
template<typename It1, typename It2>
static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
  std::uninitialized_copy(I, E, Dest);
}

/// Grow the allocated memory (without initializing new elements), doubling
/// the size of the allocated memory. Guarantees space for at least one more
/// element, or MinSize more elements if specified.
void grow(size_t MinSize = 0);

/// Create a new allocation big enough for \p MinSize and pass back its size
/// in \p NewCapacity. This is the first section of \a grow().
T *mallocForGrow(size_t MinSize, size_t &NewCapacity) {
  return static_cast<T *>(
      SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
          MinSize, sizeof(T), NewCapacity));
}

/// Move existing elements over to the new allocation \p NewElts, the middle
/// section of \a grow().
void moveElementsForGrow(T *NewElts);

/// Transfer ownership of the allocation, finishing up \a grow().
void takeAllocationForGrow(T *NewElts, size_t NewCapacity);

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
  return this->reserveForParamAndGetAddressImpl(this, Elt, N);
}

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
  return const_cast<T *>(
      this->reserveForParamAndGetAddressImpl(this, Elt, N));
}

static T &&forward_value_param(T &&V) { return std::move(V); }
static const T &forward_value_param(const T &V) { return V; }

void growAndAssign(size_t NumElts, const T &Elt) {
  // Grow manually in case Elt is an internal reference.
  size_t NewCapacity;
  T *NewElts = mallocForGrow(NumElts, NewCapacity);
  std::uninitialized_fill_n(NewElts, NumElts, Elt);
  this->destroy_range(this->begin(), this->end());
  takeAllocationForGrow(NewElts, NewCapacity);
  this->set_size(NumElts);
}

template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
  // Grow manually in case one of Args is an internal reference.
  size_t NewCapacity;
  T *NewElts = mallocForGrow(0, NewCapacity);
  ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...);
  moveElementsForGrow(NewElts);
  takeAllocationForGrow(NewElts, NewCapacity);
  this->set_size(this->size() + 1);
  return this->back();
}

403public:
void push_back(const T &Elt) {
  const T *EltPtr = reserveForParamAndGetAddress(Elt);
  ::new ((void *)this->end()) T(*EltPtr);
  this->set_size(this->size() + 1);
}

void push_back(T &&Elt) {
  T *EltPtr = reserveForParamAndGetAddress(Elt);
  ::new ((void *)this->end()) T(::std::move(*EltPtr));
  this->set_size(this->size() + 1);
}

void pop_back() {
  this->set_size(this->size() - 1);
  this->end()->~T();
}
420};

422// Define this out-of-line to dissuade the C++ compiler from inlining it.
423template <typename T, bool TriviallyCopyable>
424void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
size_t NewCapacity;
T *NewElts = mallocForGrow(MinSize, NewCapacity);
moveElementsForGrow(NewElts);
takeAllocationForGrow(NewElts, NewCapacity);
429}

431// Define this out-of-line to dissuade the C++ compiler from inlining it.
432template <typename T, bool TriviallyCopyable>
433void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow(
  T *NewElts) {
// Move the elements over.
this->uninitialized_move(this->begin(), this->end(), NewElts);

// Destroy the original elements.
destroy_range(this->begin(), this->end());
440}

442// Define this out-of-line to dissuade the C++ compiler from inlining it.
443template <typename T, bool TriviallyCopyable>
444void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow(
  T *NewElts, size_t NewCapacity) {
// If this wasn't grown from the inline copy, deallocate the old space.
if (!this->isSmall())
  free(this->begin());

this->BeginX = NewElts;
this->Capacity = NewCapacity;
452}

454/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
455/// method implementations that are designed to work with trivially copyable
456/// T's. This allows using memcpy in place of copy/move construction and
457/// skipping destruction.
458template <typename T>
459class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
friend class SmallVectorTemplateCommon<T>;

462protected:
/// True if it's cheap enough to take parameters by value. Doing so avoids
/// overhead related to mitigations for reference invalidation.
static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *);

/// Either const T& or T, depending on whether it's cheap enough to take
/// parameters by value.
using ValueParamT =
    typename std::conditional<TakesParamByValue, T, const T &>::type;

SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}

// No need to do a destroy loop for POD's.
static void destroy_range(T *, T *) {}

/// Move the range [I, E) onto the uninitialized memory
/// starting with "Dest", constructing elements into it as needed.
template<typename It1, typename It2>
static void uninitialized_move(It1 I, It1 E, It2 Dest) {
  // Just do a copy.
  uninitialized_copy(I, E, Dest);
}

/// Copy the range [I, E) onto the uninitialized memory
/// starting with "Dest", constructing elements into it as needed.
template<typename It1, typename It2>
static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
  // Arbitrary iterator types; just use the basic implementation.
  std::uninitialized_copy(I, E, Dest);
}

/// Copy the range [I, E) onto the uninitialized memory
/// starting with "Dest", constructing elements into it as needed.
template <typename T1, typename T2>
static void uninitialized_copy(
    T1 *I, T1 *E, T2 *Dest,
    std::enable_if_t<std::is_same<typename std::remove_const<T1>::type,
                                  T2>::value> * = nullptr) {
  // Use memcpy for PODs iterated by pointers (which includes SmallVector
  // iterators): std::uninitialized_copy optimizes to memmove, but we can
  // use memcpy here. Note that I and E are iterators and thus might be
  // invalid for memcpy if they are equal.
  if (I != E)
    memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
}

/// Double the size of the allocated memory, guaranteeing space for at
/// least one more element or MinSize if specified.
void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); }

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
  return this->reserveForParamAndGetAddressImpl(this, Elt, N);
}

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
  return const_cast<T *>(
      this->reserveForParamAndGetAddressImpl(this, Elt, N));
}

/// Copy \p V or return a reference, depending on \a ValueParamT.
static ValueParamT forward_value_param(ValueParamT V) { return V; }

void growAndAssign(size_t NumElts, T Elt) {
  // Elt has been copied in case it's an internal reference, side-stepping
  // reference invalidation problems without losing the realloc optimization.
  this->set_size(0);
  this->grow(NumElts);
  std::uninitialized_fill_n(this->begin(), NumElts, Elt);
  this->set_size(NumElts);
}

template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
  // Use push_back with a copy in case Args has an internal reference,
  // side-stepping reference invalidation problems without losing the realloc
  // optimization.
  push_back(T(std::forward<ArgTypes>(Args)...));
  return this->back();
}

545public:
void push_back(ValueParamT Elt) {
  const T *EltPtr = reserveForParamAndGetAddress(Elt);
  memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T));
  this->set_size(this->size() + 1);
}

void pop_back() { this->set_size(this->size() - 1); }
553};

555/// This class consists of common code factored out of the SmallVector class to
556/// reduce code duplication based on the SmallVector 'N' template parameter.
557template <typename T>
558class SmallVectorImpl : public SmallVectorTemplateBase<T> {
using SuperClass = SmallVectorTemplateBase<T>;

561public:
using iterator = typename SuperClass::iterator;
using const_iterator = typename SuperClass::const_iterator;
using reference = typename SuperClass::reference;
using size_type = typename SuperClass::size_type;

567protected:
using SmallVectorTemplateBase<T>::TakesParamByValue;
using ValueParamT = typename SuperClass::ValueParamT;

// Default ctor - Initialize to empty.
explicit SmallVectorImpl(unsigned N)
    : SmallVectorTemplateBase<T>(N) {}

575public:
SmallVectorImpl(const SmallVectorImpl &) = delete;

~SmallVectorImpl() {
  // Subclass has already destructed this vector's elements.
  // If this wasn't grown from the inline copy, deallocate the old space.
  if (!this->isSmall())
    free(this->begin());
}

void clear() {
  this->destroy_range(this->begin(), this->end());
  this->Size = 0;
}

590private:
template <bool ForOverwrite> void resizeImpl(size_type N) {
  if (N < this->size()) {
    this->pop_back_n(this->size() - N);
  } else if (N > this->size()) {
    this->reserve(N);
    for (auto I = this->end(), E = this->begin() + N; I != E; ++I)
      if (ForOverwrite)
        new (&*I) T;
      else
        new (&*I) T();
    this->set_size(N);
  }
}

605public:
void resize(size_type N) { resizeImpl<false>(N); }

/// Like resize, but \ref T is POD, the new values won't be initialized.
void resize_for_overwrite(size_type N) { resizeImpl<true>(N); }

void resize(size_type N, ValueParamT NV) {
  if (N == this->size())
    return;

  if (N < this->size()) {
    this->pop_back_n(this->size() - N);
    return;
  }

  // N > this->size(). Defer to append.
  this->append(N - this->size(), NV);
}

void reserve(size_type N) {
  if (this->capacity() < N)
    this->grow(N);
}

void pop_back_n(size_type NumItems) {
  assert(this->size() >= NumItems)(static_cast<void> (0));
  this->destroy_range(this->end() - NumItems, this->end());
  this->set_size(this->size() - NumItems);
}

LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() {
  T Result = ::std::move(this->back());
  this->pop_back();
  return Result;
}

void swap(SmallVectorImpl &RHS);

/// Add the specified range to the end of the SmallVector.
template <typename in_iter,
          typename = std::enable_if_t<std::is_convertible<
              typename std::iterator_traits<in_iter>::iterator_category,
              std::input_iterator_tag>::value>>
void append(in_iter in_start, in_iter in_end) {
  this->assertSafeToAddRange(in_start, in_end);
  size_type NumInputs = std::distance(in_start, in_end);
  this->reserve(this->size() + NumInputs);
  this->uninitialized_copy(in_start, in_end, this->end());
  this->set_size(this->size() + NumInputs);
}

/// Append \p NumInputs copies of \p Elt to the end.
void append(size_type NumInputs, ValueParamT Elt) {
  const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs);
  std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr);
  this->set_size(this->size() + NumInputs);
}

void append(std::initializer_list<T> IL) {
  append(IL.begin(), IL.end());
}

void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); }

void assign(size_type NumElts, ValueParamT Elt) {
  // Note that Elt could be an internal reference.
  if (NumElts > this->capacity()) {
    this->growAndAssign(NumElts, Elt);
    return;
  }

  // Assign over existing elements.
  std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
  if (NumElts > this->size())
    std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt);
  else if (NumElts < this->size())
    this->destroy_range(this->begin() + NumElts, this->end());
  this->set_size(NumElts);
}

// FIXME: Consider assigning over existing elements, rather than clearing &
// re-initializing them - for all assign(...) variants.

template <typename in_iter,
          typename = std::enable_if_t<std::is_convertible<
              typename std::iterator_traits<in_iter>::iterator_category,
              std::input_iterator_tag>::value>>
void assign(in_iter in_start, in_iter in_end) {
  this->assertSafeToReferenceAfterClear(in_start, in_end);
  clear();
  append(in_start, in_end);
}

void assign(std::initializer_list<T> IL) {
  clear();
  append(IL);
}

void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); }

iterator erase(const_iterator CI) {
  // Just cast away constness because this is a non-const member function.
  iterator I = const_cast<iterator>(CI);

  assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds.")(static_cast<void> (0));

  iterator N = I;
  // Shift all elts down one.
  std::move(I+1, this->end(), I);
  // Drop the last elt.
  this->pop_back();
  return(N);
}

iterator erase(const_iterator CS, const_iterator CE) {
  // Just cast away constness because this is a non-const member function.
  iterator S = const_cast<iterator>(CS);
  iterator E = const_cast<iterator>(CE);

  assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds.")(static_cast<void> (0));

  iterator N = S;
  // Shift all elts down.
  iterator I = std::move(E, this->end(), S);
  // Drop the last elts.
  this->destroy_range(I, this->end());
  this->set_size(I - this->begin());
  return(N);
}

735private:
template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) {
  // Callers ensure that ArgType is derived from T.
  static_assert(
      std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>,
                   T>::value,
      "ArgType must be derived from T!");

  if (I == this->end()) {  // Important special case for empty vector.
    this->push_back(::std::forward<ArgType>(Elt));
    return this->end()-1;
  }

  assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast<void> (0));

  // Grow if necessary.
  size_t Index = I - this->begin();
  std::remove_reference_t<ArgType> *EltPtr =
      this->reserveForParamAndGetAddress(Elt);
  I = this->begin() + Index;

  ::new ((void*) this->end()) T(::std::move(this->back()));
  // Push everything else over.
  std::move_backward(I, this->end()-1, this->end());
  this->set_size(this->size() + 1);

  // If we just moved the element we're inserting, be sure to update
  // the reference (never happens if TakesParamByValue).
  static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value,
                "ArgType must be 'T' when taking by value!");
  if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end()))
    ++EltPtr;

  *I = ::std::forward<ArgType>(*EltPtr);
  return I;
}

772public:
iterator insert(iterator I, T &&Elt) {
  return insert_one_impl(I, this->forward_value_param(std::move(Elt)));
}

iterator insert(iterator I, const T &Elt) {
  return insert_one_impl(I, this->forward_value_param(Elt));
}

iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) {
  // Convert iterator to elt# to avoid invalidating iterator when we reserve()
  size_t InsertElt = I - this->begin();

  if (I == this->end()) {  // Important special case for empty vector.
    append(NumToInsert, Elt);
    return this->begin()+InsertElt;
  }

  assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast<void> (0));

  // Ensure there is enough space, and get the (maybe updated) address of
  // Elt.
  const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert);

  // Uninvalidate the iterator.
  I = this->begin()+InsertElt;

  // If there are more elements between the insertion point and the end of the
  // range than there are being inserted, we can use a simple approach to
  // insertion.  Since we already reserved space, we know that this won't
  // reallocate the vector.
  if (size_t(this->end()-I) >= NumToInsert) {
    T *OldEnd = this->end();
    append(std::move_iterator<iterator>(this->end() - NumToInsert),
           std::move_iterator<iterator>(this->end()));

    // Copy the existing elements that get replaced.
    std::move_backward(I, OldEnd-NumToInsert, OldEnd);

    // If we just moved the element we're inserting, be sure to update
    // the reference (never happens if TakesParamByValue).
    if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
      EltPtr += NumToInsert;

    std::fill_n(I, NumToInsert, *EltPtr);
    return I;
  }

  // Otherwise, we're inserting more elements than exist already, and we're
  // not inserting at the end.

  // Move over the elements that we're about to overwrite.
  T *OldEnd = this->end();
  this->set_size(this->size() + NumToInsert);
  size_t NumOverwritten = OldEnd-I;
  this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);

  // If we just moved the element we're inserting, be sure to update
  // the reference (never happens if TakesParamByValue).
  if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
    EltPtr += NumToInsert;

  // Replace the overwritten part.
  std::fill_n(I, NumOverwritten, *EltPtr);

  // Insert the non-overwritten middle part.
  std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr);
  return I;
}

template <typename ItTy,
          typename = std::enable_if_t<std::is_convertible<
              typename std::iterator_traits<ItTy>::iterator_category,
              std::input_iterator_tag>::value>>
iterator insert(iterator I, ItTy From, ItTy To) {
  // Convert iterator to elt# to avoid invalidating iterator when we reserve()
  size_t InsertElt = I - this->begin();

  if (I == this->end()) {  // Important special case for empty vector.
    append(From, To);
    return this->begin()+InsertElt;
  }

  assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast<void> (0));

  // Check that the reserve that follows doesn't invalidate the iterators.
  this->assertSafeToAddRange(From, To);

  size_t NumToInsert = std::distance(From, To);

  // Ensure there is enough space.
  reserve(this->size() + NumToInsert);

  // Uninvalidate the iterator.
  I = this->begin()+InsertElt;

  // If there are more elements between the insertion point and the end of the
  // range than there are being inserted, we can use a simple approach to
  // insertion.  Since we already reserved space, we know that this won't
  // reallocate the vector.
  if (size_t(this->end()-I) >= NumToInsert) {
    T *OldEnd = this->end();
    append(std::move_iterator<iterator>(this->end() - NumToInsert),
           std::move_iterator<iterator>(this->end()));

    // Copy the existing elements that get replaced.
    std::move_backward(I, OldEnd-NumToInsert, OldEnd);

    std::copy(From, To, I);
    return I;
  }

  // Otherwise, we're inserting more elements than exist already, and we're
  // not inserting at the end.

  // Move over the elements that we're about to overwrite.
  T *OldEnd = this->end();
  this->set_size(this->size() + NumToInsert);
  size_t NumOverwritten = OldEnd-I;
  this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);

  // Replace the overwritten part.
  for (T *J = I; NumOverwritten > 0; --NumOverwritten) {
    *J = *From;
    ++J; ++From;
  }

  // Insert the non-overwritten middle part.
  this->uninitialized_copy(From, To, OldEnd);
  return I;
}

void insert(iterator I, std::initializer_list<T> IL) {
  insert(I, IL.begin(), IL.end());
}

template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
  if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false))
    return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...);

  ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
  this->set_size(this->size() + 1);
  return this->back();
}

SmallVectorImpl &operator=(const SmallVectorImpl &RHS);

SmallVectorImpl &operator=(SmallVectorImpl &&RHS);

bool operator==(const SmallVectorImpl &RHS) const {
  if (this->size() != RHS.size()) return false;
  return std::equal(this->begin(), this->end(), RHS.begin());
}
bool operator!=(const SmallVectorImpl &RHS) const {
  return !(*this == RHS);
}

bool operator<(const SmallVectorImpl &RHS) const {
  return std::lexicographical_compare(this->begin(), this->end(),
                                      RHS.begin(), RHS.end());
}
933};

935template <typename T>
936void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
if (this == &RHS) return;

// We can only avoid copying elements if neither vector is small.
if (!this->isSmall() && !RHS.isSmall()) {
  std::swap(this->BeginX, RHS.BeginX);
  std::swap(this->Size, RHS.Size);
  std::swap(this->Capacity, RHS.Capacity);
  return;
}
this->reserve(RHS.size());
RHS.reserve(this->size());

// Swap the shared elements.
size_t NumShared = this->size();
if (NumShared > RHS.size()) NumShared = RHS.size();
for (size_type i = 0; i != NumShared; ++i)
  std::swap((*this)[i], RHS[i]);

// Copy over the extra elts.
if (this->size() > RHS.size()) {
  size_t EltDiff = this->size() - RHS.size();
  this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
  RHS.set_size(RHS.size() + EltDiff);
  this->destroy_range(this->begin()+NumShared, this->end());
  this->set_size(NumShared);
} else if (RHS.size() > this->size()) {
  size_t EltDiff = RHS.size() - this->size();
  this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
  this->set_size(this->size() + EltDiff);
  this->destroy_range(RHS.begin()+NumShared, RHS.end());
  RHS.set_size(NumShared);
}
969}

971template <typename T>
972SmallVectorImpl<T> &SmallVectorImpl<T>::
operator=(const SmallVectorImpl<T> &RHS) {
// Avoid self-assignment.
if (this == &RHS) return *this;

// If we already have sufficient space, assign the common elements, then
// destroy any excess.
size_t RHSSize = RHS.size();
size_t CurSize = this->size();
if (CurSize >= RHSSize) {
  // Assign common elements.
  iterator NewEnd;
  if (RHSSize)
    NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
  else
    NewEnd = this->begin();

  // Destroy excess elements.
  this->destroy_range(NewEnd, this->end());

  // Trim.
  this->set_size(RHSSize);
  return *this;
}

// If we have to grow to have enough elements, destroy the current elements.
// This allows us to avoid copying them during the grow.
// FIXME: don't do this if they're efficiently moveable.
if (this->capacity() < RHSSize) {
  // Destroy current elements.
  this->clear();
  CurSize = 0;
  this->grow(RHSSize);
} else if (CurSize) {
  // Otherwise, use assignment for the already-constructed elements.
  std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
}

// Copy construct the new elements in place.
this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
                         this->begin()+CurSize);

// Set end.
this->set_size(RHSSize);
return *this;
1017}

1019template <typename T>
1020SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
// Avoid self-assignment.
if (this == &RHS) return *this;

// If the RHS isn't small, clear this vector and then steal its buffer.
if (!RHS.isSmall()) {
  this->destroy_range(this->begin(), this->end());
  if (!this->isSmall()) free(this->begin());
  this->BeginX = RHS.BeginX;
  this->Size = RHS.Size;
  this->Capacity = RHS.Capacity;
  RHS.resetToSmall();
  return *this;
}

// If we already have sufficient space, assign the common elements, then
// destroy any excess.
size_t RHSSize = RHS.size();
size_t CurSize = this->size();
if (CurSize >= RHSSize) {
  // Assign common elements.
  iterator NewEnd = this->begin();
  if (RHSSize)
    NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd);

  // Destroy excess elements and trim the bounds.
  this->destroy_range(NewEnd, this->end());
  this->set_size(RHSSize);

  // Clear the RHS.
  RHS.clear();

  return *this;
}

// If we have to grow to have enough elements, destroy the current elements.
// This allows us to avoid copying them during the grow.
// FIXME: this may not actually make any sense if we can efficiently move
// elements.
if (this->capacity() < RHSSize) {
  // Destroy current elements.
  this->clear();
  CurSize = 0;
  this->grow(RHSSize);
} else if (CurSize) {
  // Otherwise, use assignment for the already-constructed elements.
  std::move(RHS.begin(), RHS.begin()+CurSize, this->begin());
}

// Move-construct the new elements in place.
this->uninitialized_move(RHS.begin()+CurSize, RHS.end(),
                         this->begin()+CurSize);

// Set end.
this->set_size(RHSSize);

RHS.clear();
return *this;
1078}

1080/// Storage for the SmallVector elements.  This is specialized for the N=0 case
1081/// to avoid allocating unnecessary storage.
1082template <typename T, unsigned N>
1083struct SmallVectorStorage {
alignas(T) char InlineElts[N * sizeof(T)];
1085};

1087/// We need the storage to be properly aligned even for small-size of 0 so that
1088/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is
1089/// well-defined.
1090template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {};

1092/// Forward declaration of SmallVector so that
1093/// calculateSmallVectorDefaultInlinedElements can reference
1094/// `sizeof(SmallVector<T, 0>)`.
1095template <typename T, unsigned N> class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector;

1097/// Helper class for calculating the default number of inline elements for
1098/// `SmallVector<T>`.
1099///
1100/// This should be migrated to a constexpr function when our minimum
1101/// compiler support is enough for multi-statement constexpr functions.
1102template <typename T> struct CalculateSmallVectorDefaultInlinedElements {
// Parameter controlling the default number of inlined elements
// for `SmallVector<T>`.
//
// The default number of inlined elements ensures that
// 1. There is at least one inlined element.
// 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless
// it contradicts 1.
static constexpr size_t kPreferredSmallVectorSizeof = 64;

// static_assert that sizeof(T) is not "too big".
//
// Because our policy guarantees at least one inlined element, it is possible
// for an arbitrarily large inlined element to allocate an arbitrarily large
// amount of inline storage. We generally consider it an antipattern for a
// SmallVector to allocate an excessive amount of inline storage, so we want
// to call attention to these cases and make sure that users are making an
// intentional decision if they request a lot of inline storage.
//
// We want this assertion to trigger in pathological cases, but otherwise
// not be too easy to hit. To accomplish that, the cutoff is actually somewhat
// larger than kPreferredSmallVectorSizeof (otherwise,
// `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that
// pattern seems useful in practice).
//
// One wrinkle is that this assertion is in theory non-portable, since
// sizeof(T) is in general platform-dependent. However, we don't expect this
// to be much of an issue, because most LLVM development happens on 64-bit
// hosts, and therefore sizeof(T) is expected to *decrease* when compiled for
// 32-bit hosts, dodging the issue. The reverse situation, where development
// happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a
// 64-bit host, is expected to be very rare.
static_assert(
    sizeof(T) <= 256,
    "You are trying to use a default number of inlined elements for "
    "`SmallVector<T>` but `sizeof(T)` is really big! Please use an "
    "explicit number of inlined elements with `SmallVector<T, N>` to make "
    "sure you really want that much inline storage.");

// Discount the size of the header itself when calculating the maximum inline
// bytes.
static constexpr size_t PreferredInlineBytes =
    kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>);
static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T);
static constexpr size_t value =
    NumElementsThatFit == 0 ? 1 : NumElementsThatFit;
1148};

1150/// This is a 'vector' (really, a variable-sized array), optimized
1151/// for the case when the array is small.  It contains some number of elements
1152/// in-place, which allows it to avoid heap allocation when the actual number of
1153/// elements is below that threshold.  This allows normal "small" cases to be
1154/// fast without losing generality for large inputs.
1155///
1156/// \note
1157/// In the absence of a well-motivated choice for the number of inlined
1158/// elements \p N, it is recommended to use \c SmallVector<T> (that is,
1159/// omitting the \p N). This will choose a default number of inlined elements
1160/// reasonable for allocation on the stack (for example, trying to keep \c
1161/// sizeof(SmallVector<T>) around 64 bytes).
1162///
1163/// \warning This does not attempt to be exception safe.
1164///
1165/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h
1166template <typename T,
        unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value>
1168class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector : public SmallVectorImpl<T>,
                                 SmallVectorStorage<T, N> {
1170public:
SmallVector() : SmallVectorImpl<T>(N) {}

~SmallVector() {
  // Destroy the constructed elements in the vector.
  this->destroy_range(this->begin(), this->end());
}

explicit SmallVector(size_t Size, const T &Value = T())
  : SmallVectorImpl<T>(N) {
  this->assign(Size, Value);
}

template <typename ItTy,
          typename = std::enable_if_t<std::is_convertible<
              typename std::iterator_traits<ItTy>::iterator_category,
              std::input_iterator_tag>::value>>
SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) {
  this->append(S, E);
}

template <typename RangeTy>
explicit SmallVector(const iterator_range<RangeTy> &R)
    : SmallVectorImpl<T>(N) {
  this->append(R.begin(), R.end());
}

SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) {
  this->assign(IL);
}

SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) {
  if (!RHS.empty())
    SmallVectorImpl<T>::operator=(RHS);
}

SmallVector &operator=(const SmallVector &RHS) {
  SmallVectorImpl<T>::operator=(RHS);
  return *this;
}

SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
  if (!RHS.empty())
    SmallVectorImpl<T>::operator=(::std::move(RHS));
}

SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) {
  if (!RHS.empty())
    SmallVectorImpl<T>::operator=(::std::move(RHS));
}

SmallVector &operator=(SmallVector &&RHS) {
  SmallVectorImpl<T>::operator=(::std::move(RHS));
  return *this;
}

SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
  SmallVectorImpl<T>::operator=(::std::move(RHS));
  return *this;
}

SmallVector &operator=(std::initializer_list<T> IL) {
  this->assign(IL);
  return *this;
}
1235};

1237template <typename T, unsigned N>
1238inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
return X.capacity_in_bytes();
1240}

1242/// Given a range of type R, iterate the entire range and return a
1243/// SmallVector with elements of the vector.  This is useful, for example,
1244/// when you want to iterate a range and then sort the results.
1245template <unsigned Size, typename R>
1246SmallVector<typename std::remove_const<typename std::remove_reference<
              decltype(*std::begin(std::declval<R &>()))>::type>::type,
          Size>
1249to_vector(R &&Range) {
return {std::begin(Range), std::end(Range)};
1251}

1253} // end namespace llvm

1255namespace std {

/// Implement std::swap in terms of SmallVector swap.
template<typename T>
inline void
swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) {
  LHS.swap(RHS);
}

/// Implement std::swap in terms of SmallVector swap.
template<typename T, unsigned N>
inline void
swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
  LHS.swap(RHS);
}

1271} // end namespace std

1273#endif // LLVM_ADT_SMALLVECTOR_H

←

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12 
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15 
16#include "llvm/Support/Compiler.h"
17#include <cassert>
18#include <climits>
19#include <cmath>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24 
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28 
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40 
41namespace llvm {
42 
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45  /// The returned value is undefined.
46  ZB_Undefined,
47  /// The returned value is numeric_limits<T>::max()
48  ZB_Max,
49  /// The returned value is numeric_limits<T>::digits
50  ZB_Width
51};
52 
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e          = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58                 egamma     = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59                 ln2        = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60                 ln10       = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61                 log2e      = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62                 log10e     = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63                 pi         = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64                 inv_pi     = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65                 sqrtpi     = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66                 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67                 sqrt2      = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68                 inv_sqrt2  = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69                 sqrt3      = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70                 inv_sqrt3  = .57735026918962576451, // (0x1.279a74590331cP-1)
71                 phi        = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef          = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73                egammaf     = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74                ln2f        = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75                ln10f       = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76                log2ef      = 1.44269504F, // (0x1.715476P+0)
77                log10ef     = .434294482F, // (0x1.bcb7b2P-2)
78                pif         = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79                inv_pif     = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80                sqrtpif     = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81                inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82                sqrt2f      = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83                inv_sqrt2f  = .707106781F, // (0x1.6a09e6P-1)
84                sqrt3f      = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85                inv_sqrt3f  = .577350269F, // (0x1.279a74P-1)
86                phif        = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88 
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91  static unsigned count(T Val, ZeroBehavior) {
92    if (!Val)
93      return std::numeric_limits<T>::digits;
94    if (Val & 0x1)
95      return 0;
96 
97    // Bisection method.
98    unsigned ZeroBits = 0;
99    T Shift = std::numeric_limits<T>::digits >> 1;
100    T Mask = std::numeric_limits<T>::max() >> Shift;
101    while (Shift) {
102      if ((Val & Mask) == 0) {
103        Val >>= Shift;
104        ZeroBits |= Shift;
105      }
106      Shift >>= 1;
107      Mask >>= Shift;
108    }
109    return ZeroBits;
110  }
111};
112 
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115  static unsigned count(T Val, ZeroBehavior ZB) {
116    if (ZB42.1
'ZB' is not equal to ZB_Undefined
42.1
'ZB' is not equal to ZB_Undefined
42.1
'ZB' is not equal to ZB_Undefined
42.1
'ZB' is not equal to ZB_Undefined
 != ZB_Undefined && Val == 0)
43
←
Assuming 'Val' is equal to 0→
44
←
Taking true branch→
117      return 32;
45
←
Returning the value 32→
118 
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120    return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122    unsigned long Index;
123    _BitScanForward(&Index, Val);
124    return Index;
125#endif
126  }
127};
128 
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131  static unsigned count(T Val, ZeroBehavior ZB) {
132    if (ZB != ZB_Undefined && Val == 0)
133      return 64;
134 
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136    return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138    unsigned long Index;
139    _BitScanForward64(&Index, Val);
140    return Index;
141#endif
142  }
143};
144#endif
145#endif
146} // namespace detail
147 
148/// Count number of 0's from the least significant bit to the most
149///   stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154///   valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157  static_assert(std::numeric_limits<T>::is_integer &&
158                    !std::numeric_limits<T>::is_signed,
159                "Only unsigned integral types are allowed.");
160  return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
42
←
Calling 'TrailingZerosCounter::count'→
46
←
Returning from 'TrailingZerosCounter::count'→
47
←
Returning the value 32→
161}
162 
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165  static unsigned count(T Val, ZeroBehavior) {
166    if (!Val)
167      return std::numeric_limits<T>::digits;
168 
169    // Bisection method.
170    unsigned ZeroBits = 0;
171    for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172      T Tmp = Val >> Shift;
173      if (Tmp)
174        Val = Tmp;
175      else
176        ZeroBits |= Shift;
177    }
178    return ZeroBits;
179  }
180};
181 
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184  static unsigned count(T Val, ZeroBehavior ZB) {
185    if (ZB != ZB_Undefined && Val == 0)
186      return 32;
187 
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189    return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191    unsigned long Index;
192    _BitScanReverse(&Index, Val);
193    return Index ^ 31;
194#endif
195  }
196};
197 
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200  static unsigned count(T Val, ZeroBehavior ZB) {
201    if (ZB != ZB_Undefined && Val == 0)
202      return 64;
203 
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205    return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207    unsigned long Index;
208    _BitScanReverse64(&Index, Val);
209    return Index ^ 63;
210#endif
211  }
212};
213#endif
214#endif
215} // namespace detail
216 
217/// Count number of 0's from the most significant bit to the least
218///   stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223///   valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226  static_assert(std::numeric_limits<T>::is_integer &&
227                    !std::numeric_limits<T>::is_signed,
228                "Only unsigned integral types are allowed.");
229  return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231 
232/// Get the index of the first set bit starting from the least
233///   significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238///   valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240  if (ZB == ZB_Max && Val == 0)
241    return std::numeric_limits<T>::max();
242 
243  return countTrailingZeros(Val, ZB_Undefined);
244}
245 
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0.  Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249  static_assert(std::is_unsigned<T>::value, "Invalid type!");
250  const unsigned Bits = CHAR_BIT8 * sizeof(T);
251  assert(N <= Bits && "Invalid bit index")(static_cast<void> (0));
252  return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254 
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0.  Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258  return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260 
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1.  Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264  return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266 
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1.  Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270  return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272 
273/// Get the index of the last set bit starting from the least
274///   significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279///   valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281  if (ZB == ZB_Max && Val == 0)
282    return std::numeric_limits<T>::max();
283 
284  // Use ^ instead of - because both gcc and llvm can remove the associated ^
285  // in the __builtin_clz intrinsic on x86.
286  return countLeadingZeros(Val, ZB_Undefined) ^
287         (std::numeric_limits<T>::digits - 1);
288}
289 
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297  R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302 
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306  unsigned char in[sizeof(Val)];
307  unsigned char out[sizeof(Val)];
308  std::memcpy(in, &Val, sizeof(Val));
309  for (unsigned i = 0; i < sizeof(Val); ++i)
310    out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311  std::memcpy(&Val, out, sizeof(Val));
312  return Val;
313}
314 
315#if __has_builtin(__builtin_bitreverse8)1
316template<>
317inline uint8_t reverseBits<uint8_t>(uint8_t Val) {
318  return __builtin_bitreverse8(Val);
319}
320#endif
321 
322#if __has_builtin(__builtin_bitreverse16)1
323template<>
324inline uint16_t reverseBits<uint16_t>(uint16_t Val) {
325  return __builtin_bitreverse16(Val);
326}
327#endif
328 
329#if __has_builtin(__builtin_bitreverse32)1
330template<>
331inline uint32_t reverseBits<uint32_t>(uint32_t Val) {
332  return __builtin_bitreverse32(Val);
333}
334#endif
335 
336#if __has_builtin(__builtin_bitreverse64)1
337template<>
338inline uint64_t reverseBits<uint64_t>(uint64_t Val) {
339  return __builtin_bitreverse64(Val);
340}
341#endif
342 
343// NOTE: The following support functions use the _32/_64 extensions instead of
344// type overloading so that signed and unsigned integers can be used without
345// ambiguity.
346 
347/// Return the high 32 bits of a 64 bit value.
348constexpr inline uint32_t Hi_32(uint64_t Value) {
349  return static_cast<uint32_t>(Value >> 32);
350}
351 
352/// Return the low 32 bits of a 64 bit value.
353constexpr inline uint32_t Lo_32(uint64_t Value) {
354  return static_cast<uint32_t>(Value);
355}
356 
357/// Make a 64-bit integer from a high / low pair of 32-bit integers.
358constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
359  return ((uint64_t)High << 32) | (uint64_t)Low;
360}
361 
362/// Checks if an integer fits into the given bit width.
363template <unsigned N> constexpr inline bool isInt(int64_t x) {
364  return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
365}
366// Template specializations to get better code for common cases.
367template <> constexpr inline bool isInt<8>(int64_t x) {
368  return static_cast<int8_t>(x) == x;
369}
370template <> constexpr inline bool isInt<16>(int64_t x) {
371  return static_cast<int16_t>(x) == x;
372}
373template <> constexpr inline bool isInt<32>(int64_t x) {
374  return static_cast<int32_t>(x) == x;
375}
376 
377/// Checks if a signed integer is an N bit number shifted left by S.
378template <unsigned N, unsigned S>
379constexpr inline bool isShiftedInt(int64_t x) {
380  static_assert(
381      N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
382  static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
383  return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
384}
385 
386/// Checks if an unsigned integer fits into the given bit width.
387///
388/// This is written as two functions rather than as simply
389///
390///   return N >= 64 || X < (UINT64_C(1) << N);
391///
392/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
393/// left too many places.
394template <unsigned N>
395constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) {
396  static_assert(N > 0, "isUInt<0> doesn't make sense");
397  return X < (UINT64_C(1)1UL << (N));
398}
399template <unsigned N>
400constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) {
401  return true;
402}
403 
404// Template specializations to get better code for common cases.
405template <> constexpr inline bool isUInt<8>(uint64_t x) {
406  return static_cast<uint8_t>(x) == x;
407}
408template <> constexpr inline bool isUInt<16>(uint64_t x) {
409  return static_cast<uint16_t>(x) == x;
410}
411template <> constexpr inline bool isUInt<32>(uint64_t x) {
412  return static_cast<uint32_t>(x) == x;
413}
414 
415/// Checks if a unsigned integer is an N bit number shifted left by S.
416template <unsigned N, unsigned S>
417constexpr inline bool isShiftedUInt(uint64_t x) {
418  static_assert(
419      N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
420  static_assert(N + S <= 64,
421                "isShiftedUInt<N, S> with N + S > 64 is too wide.");
422  // Per the two static_asserts above, S must be strictly less than 64.  So
423  // 1 << S is not undefined behavior.
424  return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
425}
426 
427/// Gets the maximum value for a N-bit unsigned integer.
428inline uint64_t maxUIntN(uint64_t N) {
429  assert(N > 0 && N <= 64 && "integer width out of range")(static_cast<void> (0));
430 
431  // uint64_t(1) << 64 is undefined behavior, so we can't do
432  //   (uint64_t(1) << N) - 1
433  // without checking first that N != 64.  But this works and doesn't have a
434  // branch.
435  return UINT64_MAX(18446744073709551615UL) >> (64 - N);
436}
437 
438/// Gets the minimum value for a N-bit signed integer.
439inline int64_t minIntN(int64_t N) {
440  assert(N > 0 && N <= 64 && "integer width out of range")(static_cast<void> (0));
441 
442  return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
443}
444 
445/// Gets the maximum value for a N-bit signed integer.
446inline int64_t maxIntN(int64_t N) {
447  assert(N > 0 && N <= 64 && "integer width out of range")(static_cast<void> (0));
448 
449  // This relies on two's complement wraparound when N == 64, so we convert to
450  // int64_t only at the very end to avoid UB.
451  return (UINT64_C(1)1UL << (N - 1)) - 1;
452}
453 
454/// Checks if an unsigned integer fits into the given (dynamic) bit width.
455inline bool isUIntN(unsigned N, uint64_t x) {
456  return N >= 64 || x <= maxUIntN(N);
457}
458 
459/// Checks if an signed integer fits into the given (dynamic) bit width.
460inline bool isIntN(unsigned N, int64_t x) {
461  return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
462}
463 
464/// Return true if the argument is a non-empty sequence of ones starting at the
465/// least significant bit with the remainder zero (32 bit version).
466/// Ex. isMask_32(0x0000FFFFU) == true.
467constexpr inline bool isMask_32(uint32_t Value) {
468  return Value && ((Value + 1) & Value) == 0;
469}
470 
471/// Return true if the argument is a non-empty sequence of ones starting at the
472/// least significant bit with the remainder zero (64 bit version).
473constexpr inline bool isMask_64(uint64_t Value) {
474  return Value && ((Value + 1) & Value) == 0;
475}
476 
477/// Return true if the argument contains a non-empty sequence of ones with the
478/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
479constexpr inline bool isShiftedMask_32(uint32_t Value) {
480  return Value && isMask_32((Value - 1) | Value);
481}
482 
483/// Return true if the argument contains a non-empty sequence of ones with the
484/// remainder zero (64 bit version.)
485constexpr inline bool isShiftedMask_64(uint64_t Value) {
486  return Value && isMask_64((Value - 1) | Value);
487}
488 
489/// Return true if the argument is a power of two > 0.
490/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
491constexpr inline bool isPowerOf2_32(uint32_t Value) {
492  return Value && !(Value & (Value - 1));
493}
494 
495/// Return true if the argument is a power of two > 0 (64 bit edition.)
496constexpr inline bool isPowerOf2_64(uint64_t Value) {
497  return Value && !(Value & (Value - 1));
498}
499 
500/// Count the number of ones from the most significant bit to the first
501/// zero bit.
502///
503/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
504/// Only unsigned integral types are allowed.
505///
506/// \param ZB the behavior on an input of all ones. Only ZB_Width and
507/// ZB_Undefined are valid arguments.
508template <typename T>
509unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
510  static_assert(std::numeric_limits<T>::is_integer &&
511                    !std::numeric_limits<T>::is_signed,
512                "Only unsigned integral types are allowed.");
513  return countLeadingZeros<T>(~Value, ZB);
514}
515 
516/// Count the number of ones from the least significant bit to the first
517/// zero bit.
518///
519/// Ex. countTrailingOnes(0x00FF00FF) == 8.
520/// Only unsigned integral types are allowed.
521///
522/// \param ZB the behavior on an input of all ones. Only ZB_Width and
523/// ZB_Undefined are valid arguments.
524template <typename T>
525unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
526  static_assert(std::numeric_limits<T>::is_integer &&
527                    !std::numeric_limits<T>::is_signed,
528                "Only unsigned integral types are allowed.");
529  return countTrailingZeros<T>(~Value, ZB);
530}
531 
532namespace detail {
533template <typename T, std::size_t SizeOfT> struct PopulationCounter {
534  static unsigned count(T Value) {
535    // Generic version, forward to 32 bits.
536    static_assert(SizeOfT <= 4, "Not implemented!");
537#if defined(__GNUC__4)
538    return __builtin_popcount(Value);
539#else
540    uint32_t v = Value;
541    v = v - ((v >> 1) & 0x55555555);
542    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
543    return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
544#endif
545  }
546};
547 
548template <typename T> struct PopulationCounter<T, 8> {
549  static unsigned count(T Value) {
550#if defined(__GNUC__4)
551    return __builtin_popcountll(Value);
552#else
553    uint64_t v = Value;
554    v = v - ((v >> 1) & 0x5555555555555555ULL);
555    v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
556    v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
557    return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
558#endif
559  }
560};
561} // namespace detail
562 
563/// Count the number of set bits in a value.
564/// Ex. countPopulation(0xF000F000) = 8
565/// Returns 0 if the word is zero.
566template <typename T>
567inline unsigned countPopulation(T Value) {
568  static_assert(std::numeric_limits<T>::is_integer &&
569                    !std::numeric_limits<T>::is_signed,
570                "Only unsigned integral types are allowed.");
571  return detail::PopulationCounter<T, sizeof(T)>::count(Value);
572}
573 
574/// Compile time Log2.
575/// Valid only for positive powers of two.
576template <size_t kValue> constexpr inline size_t CTLog2() {
577  static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
578                "Value is not a valid power of 2");
579  return 1 + CTLog2<kValue / 2>();
580}
581 
582template <> constexpr inline size_t CTLog2<1>() { return 0; }
583 
584/// Return the log base 2 of the specified value.
585inline double Log2(double Value) {
586#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
587  return __builtin_log(Value) / __builtin_log(2.0);
588#else
589  return log2(Value);
590#endif
591}
592 
593/// Return the floor log base 2 of the specified value, -1 if the value is zero.
594/// (32 bit edition.)
595/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
596inline unsigned Log2_32(uint32_t Value) {
597  return 31 - countLeadingZeros(Value);
598}
599 
600/// Return the floor log base 2 of the specified value, -1 if the value is zero.
601/// (64 bit edition.)
602inline unsigned Log2_64(uint64_t Value) {
603  return 63 - countLeadingZeros(Value);
604}
605 
606/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
607/// (32 bit edition).
608/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
609inline unsigned Log2_32_Ceil(uint32_t Value) {
610  return 32 - countLeadingZeros(Value - 1);
611}
612 
613/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
614/// (64 bit edition.)
615inline unsigned Log2_64_Ceil(uint64_t Value) {
616  return 64 - countLeadingZeros(Value - 1);
617}
618 
619/// Return the greatest common divisor of the values using Euclid's algorithm.
620template <typename T>
621inline T greatestCommonDivisor(T A, T B) {
622  while (B) {
623    T Tmp = B;
624    B = A % B;
625    A = Tmp;
626  }
627  return A;
628}
629 
630inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
631  return greatestCommonDivisor<uint64_t>(A, B);
632}
633 
634/// This function takes a 64-bit integer and returns the bit equivalent double.
635inline double BitsToDouble(uint64_t Bits) {
636  double D;
637  static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
638  memcpy(&D, &Bits, sizeof(Bits));
639  return D;
640}
641 
642/// This function takes a 32-bit integer and returns the bit equivalent float.
643inline float BitsToFloat(uint32_t Bits) {
644  float F;
645  static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
646  memcpy(&F, &Bits, sizeof(Bits));
647  return F;
648}
649 
650/// This function takes a double and returns the bit equivalent 64-bit integer.
651/// Note that copying doubles around changes the bits of NaNs on some hosts,
652/// notably x86, so this routine cannot be used if these bits are needed.
653inline uint64_t DoubleToBits(double Double) {
654  uint64_t Bits;
655  static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
656  memcpy(&Bits, &Double, sizeof(Double));
657  return Bits;
658}
659 
660/// This function takes a float and returns the bit equivalent 32-bit integer.
661/// Note that copying floats around changes the bits of NaNs on some hosts,
662/// notably x86, so this routine cannot be used if these bits are needed.
663inline uint32_t FloatToBits(float Float) {
664  uint32_t Bits;
665  static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
666  memcpy(&Bits, &Float, sizeof(Float));
667  return Bits;
668}
669 
670/// A and B are either alignments or offsets. Return the minimum alignment that
671/// may be assumed after adding the two together.
672constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
673  // The largest power of 2 that divides both A and B.
674  //
675  // Replace "-Value" by "1+~Value" in the following commented code to avoid
676  // MSVC warning C4146
677  //    return (A | B) & -(A | B);
678  return (A | B) & (1 + ~(A | B));
679}
680 
681/// Returns the next power of two (in 64-bits) that is strictly greater than A.
682/// Returns zero on overflow.
683inline uint64_t NextPowerOf2(uint64_t A) {
684  A |= (A >> 1);
685  A |= (A >> 2);
686  A |= (A >> 4);
687  A |= (A >> 8);
688  A |= (A >> 16);
689  A |= (A >> 32);
690  return A + 1;
691}
692 
693/// Returns the power of two which is less than or equal to the given value.
694/// Essentially, it is a floor operation across the domain of powers of two.
695inline uint64_t PowerOf2Floor(uint64_t A) {
696  if (!A) return 0;
697  return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
698}
699 
700/// Returns the power of two which is greater than or equal to the given value.
701/// Essentially, it is a ceil operation across the domain of powers of two.
702inline uint64_t PowerOf2Ceil(uint64_t A) {
703  if (!A)
704    return 0;
705  return NextPowerOf2(A - 1);
706}
707 
708/// Returns the next integer (mod 2**64) that is greater than or equal to
709/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
710///
711/// If non-zero \p Skew is specified, the return value will be a minimal
712/// integer that is greater than or equal to \p Value and equal to
713/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
714/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
715///
716/// Examples:
717/// \code
718///   alignTo(5, 8) = 8
719///   alignTo(17, 8) = 24
720///   alignTo(~0LL, 8) = 0
721///   alignTo(321, 255) = 510
722///
723///   alignTo(5, 8, 7) = 7
724///   alignTo(17, 8, 1) = 17
725///   alignTo(~0LL, 8, 3) = 3
726///   alignTo(321, 255, 42) = 552
727/// \endcode
728inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
729  assert(Align != 0u && "Align can't be 0.")(static_cast<void> (0));
730  Skew %= Align;
731  return (Value + Align - 1 - Skew) / Align * Align + Skew;
732}
733 
734/// Returns the next integer (mod 2**64) that is greater than or equal to
735/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
736template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
737  static_assert(Align != 0u, "Align must be non-zero");
738  return (Value + Align - 1) / Align * Align;
739}
740 
741/// Returns the integer ceil(Numerator / Denominator).
742inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
743  return alignTo(Numerator, Denominator) / Denominator;
744}
745 
746/// Returns the integer nearest(Numerator / Denominator).
747inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
748  return (Numerator + (Denominator / 2)) / Denominator;
749}
750 
751/// Returns the largest uint64_t less than or equal to \p Value and is
752/// \p Skew mod \p Align. \p Align must be non-zero
753inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
754  assert(Align != 0u && "Align can't be 0.")(static_cast<void> (0));
755  Skew %= Align;
756  return (Value - Skew) / Align * Align + Skew;
757}
758 
759/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
760/// Requires 0 < B <= 32.
761template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
762  static_assert(B > 0, "Bit width can't be 0.");
763  static_assert(B <= 32, "Bit width out of range.");
764  return int32_t(X << (32 - B)) >> (32 - B);
765}
766 
767/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
768/// Requires 0 < B <= 32.
769inline int32_t SignExtend32(uint32_t X, unsigned B) {
770  assert(B > 0 && "Bit width can't be 0.")(static_cast<void> (0));
771  assert(B <= 32 && "Bit width out of range.")(static_cast<void> (0));
772  return int32_t(X << (32 - B)) >> (32 - B);
773}
774 
775/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
776/// Requires 0 < B <= 64.
777template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
778  static_assert(B > 0, "Bit width can't be 0.");
779  static_assert(B <= 64, "Bit width out of range.");
780  return int64_t(x << (64 - B)) >> (64 - B);
781}
782 
783/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
784/// Requires 0 < B <= 64.
785inline int64_t SignExtend64(uint64_t X, unsigned B) {
786  assert(B > 0 && "Bit width can't be 0.")(static_cast<void> (0));
787  assert(B <= 64 && "Bit width out of range.")(static_cast<void> (0));
788  return int64_t(X << (64 - B)) >> (64 - B);
789}
790 
791/// Subtract two unsigned integers, X and Y, of type T and return the absolute
792/// value of the result.
793template <typename T>
794std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
795  return X > Y ? (X - Y) : (Y - X);
796}
797 
798/// Add two unsigned integers, X and Y, of type T.  Clamp the result to the
799/// maximum representable value of T on overflow.  ResultOverflowed indicates if
800/// the result is larger than the maximum representable value of type T.
801template <typename T>
802std::enable_if_t<std::is_unsigned<T>::value, T>
803SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
804  bool Dummy;
805  bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
806  // Hacker's Delight, p. 29
807  T Z = X + Y;
808  Overflowed = (Z < X || Z < Y);
809  if (Overflowed)
810    return std::numeric_limits<T>::max();
811  else
812    return Z;
813}
814 
815/// Multiply two unsigned integers, X and Y, of type T.  Clamp the result to the
816/// maximum representable value of T on overflow.  ResultOverflowed indicates if
817/// the result is larger than the maximum representable value of type T.
818template <typename T>
819std::enable_if_t<std::is_unsigned<T>::value, T>
820SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
821  bool Dummy;
822  bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
823 
824  // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
825  // because it fails for uint16_t (where multiplication can have undefined
826  // behavior due to promotion to int), and requires a division in addition
827  // to the multiplication.
828 
829  Overflowed = false;
830 
831  // Log2(Z) would be either Log2Z or Log2Z + 1.
832  // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
833  // will necessarily be less than Log2Max as desired.
834  int Log2Z = Log2_64(X) + Log2_64(Y);
835  const T Max = std::numeric_limits<T>::max();
836  int Log2Max = Log2_64(Max);
837  if (Log2Z < Log2Max) {
838    return X * Y;
839  }
840  if (Log2Z > Log2Max) {
841    Overflowed = true;
842    return Max;
843  }
844 
845  // We're going to use the top bit, and maybe overflow one
846  // bit past it. Multiply all but the bottom bit then add
847  // that on at the end.
848  T Z = (X >> 1) * Y;
849  if (Z & ~(Max >> 1)) {
850    Overflowed = true;
851    return Max;
852  }
853  Z <<= 1;
854  if (X & 1)
855    return SaturatingAdd(Z, Y, ResultOverflowed);
856 
857  return Z;
858}
859 
860/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
861/// the product. Clamp the result to the maximum representable value of T on
862/// overflow. ResultOverflowed indicates if the result is larger than the
863/// maximum representable value of type T.
864template <typename T>
865std::enable_if_t<std::is_unsigned<T>::value, T>
866SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
867  bool Dummy;
868  bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
869 
870  T Product = SaturatingMultiply(X, Y, &Overflowed);
871  if (Overflowed)
872    return Product;
873 
874  return SaturatingAdd(A, Product, &Overflowed);
875}
876 
877/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
878extern const float huge_valf;
879 
880 
881/// Add two signed integers, computing the two's complement truncated result,
882/// returning true if overflow occured.
883template <typename T>
884std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
885#if __has_builtin(__builtin_add_overflow)1
886  return __builtin_add_overflow(X, Y, &Result);
887#else
888  // Perform the unsigned addition.
889  using U = std::make_unsigned_t<T>;
890  const U UX = static_cast<U>(X);
891  const U UY = static_cast<U>(Y);
892  const U UResult = UX + UY;
893 
894  // Convert to signed.
895  Result = static_cast<T>(UResult);
896 
897  // Adding two positive numbers should result in a positive number.
898  if (X > 0 && Y > 0)
899    return Result <= 0;
900  // Adding two negatives should result in a negative number.
901  if (X < 0 && Y < 0)
902    return Result >= 0;
903  return false;
904#endif
905}
906 
907/// Subtract two signed integers, computing the two's complement truncated
908/// result, returning true if an overflow ocurred.
909template <typename T>
910std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
911#if __has_builtin(__builtin_sub_overflow)1
912  return __builtin_sub_overflow(X, Y, &Result);
913#else
914  // Perform the unsigned addition.
915  using U = std::make_unsigned_t<T>;
916  const U UX = static_cast<U>(X);
917  const U UY = static_cast<U>(Y);
918  const U UResult = UX - UY;
919 
920  // Convert to signed.
921  Result = static_cast<T>(UResult);
922 
923  // Subtracting a positive number from a negative results in a negative number.
924  if (X <= 0 && Y > 0)
925    return Result >= 0;
926  // Subtracting a negative number from a positive results in a positive number.
927  if (X >= 0 && Y < 0)
928    return Result <= 0;
929  return false;
930#endif
931}
932 
933/// Multiply two signed integers, computing the two's complement truncated
934/// result, returning true if an overflow ocurred.
935template <typename T>
936std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
937  // Perform the unsigned multiplication on absolute values.
938  using U = std::make_unsigned_t<T>;
939  const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
940  const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
941  const U UResult = UX * UY;
942 
943  // Convert to signed.
944  const bool IsNegative = (X < 0) ^ (Y < 0);
945  Result = IsNegative ? (0 - UResult) : UResult;
946 
947  // If any of the args was 0, result is 0 and no overflow occurs.
948  if (UX == 0 || UY == 0)
949    return false;
950 
951  // UX and UY are in [1, 2^n], where n is the number of digits.
952  // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
953  // positive) divided by an argument compares to the other.
954  if (IsNegative)
955    return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
956  else
957    return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
958}
959 
960} // End llvm namespace
961 
962#endif