doxygen/StringTableBuilder_8cpp_source.html

//===- StringTableBuilder.cpp - String table building utility -------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "llvm/MC/StringTableBuilder.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/CachedHashString.h"

#include "llvm/ADT/SmallString.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/BinaryFormat/COFF.h"

#include "llvm/Support/Endian.h"

#include "llvm/Support/MathExtras.h"

#include "llvm/Support/raw_ostream.h"

#include <cassert>

#include <cstddef>

#include <cstdint>

#include <cstring>

#include <utility>

#include <vector>


using namespace llvm;


StringTableBuilder::~StringTableBuilder() = default;


void StringTableBuilder::initSize() {

  // Account for leading bytes in table so that offsets returned from add are

  // correct.

  switch (K) {

  case RAW:

  case DWARF:

    Size = 0;

    break;

  case MachOLinked:

  case MachO64Linked:

    Size = 2;

    break;

  case MachO:

  case MachO64:

  case ELF:

  case DXContainer:

    // Start the table with a NUL byte.

    Size = 1;

    break;

  case XCOFF:

  case WinCOFF:

    // Make room to write the table size later.

    Size = 4;

    break;

  }

}


StringTableBuilder::StringTableBuilder(Kind K, Align Alignment)

    : K(K), Alignment(Alignment) {

  initSize();

}


void StringTableBuilder::write(raw_ostream &OS) const {

  assert(isFinalized());

  SmallString<0> Data;

  Data.resize(getSize());

  write((uint8_t *)Data.data());

  OS << Data;

}


using StringPair = std::pair<CachedHashStringRef, size_t>;


void StringTableBuilder::write(uint8_t *Buf) const {

  assert(isFinalized());

  for (const StringPair &P : StringIndexMap) {

    StringRef Data = P.first.val();

    if (!Data.empty())

      memcpy(Buf + P.second, Data.data(), Data.size());

  }

  // The COFF formats store the size of the string table in the first 4 bytes.

  // For Windows, the format is little-endian; for AIX, it is big-endian.

  if (K == WinCOFF)

    support::endian::write32le(Buf, Size);

  else if (K == XCOFF)

    support::endian::write32be(Buf, Size);

}


// Returns the character at Pos from end of a string.

static int charTailAt(StringPair *P, size_t Pos) {

  StringRef S = P->first.val();

  if (Pos >= S.size())

    return -1;

  return (unsigned char)S[S.size() - Pos - 1];

}


// Three-way radix quicksort. This is much faster than std::sort with strcmp

// because it does not compare characters that we already know the same.

static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) {

tailcall:

  if (Vec.size() <= 1)

    return;


  // Partition items so that items in [0, I) are greater than the pivot,

  // [I, J) are the same as the pivot, and [J, Vec.size()) are less than

  // the pivot.

  int Pivot = charTailAt(Vec[0], Pos);

  size_t I = 0;

  size_t J = Vec.size();

  for (size_t K = 1; K < J;) {

    int C = charTailAt(Vec[K], Pos);

    if (C > Pivot)

      std::swap(Vec[I++], Vec[K++]);

    else if (C < Pivot)

      std::swap(Vec[--J], Vec[K]);

    else

      K++;

  }


  multikeySort(Vec.slice(0, I), Pos);

  multikeySort(Vec.slice(J), Pos);


  // multikeySort(Vec.slice(I, J - I), Pos + 1), but with

  // tail call optimization.

  if (Pivot != -1) {

    Vec = Vec.slice(I, J - I);

    ++Pos;

    goto tailcall;

  }

}


void StringTableBuilder::finalize() {

  assert(K != DWARF);

  finalizeStringTable(/*Optimize=*/true);

}


void StringTableBuilder::finalizeInOrder() {

  finalizeStringTable(/*Optimize=*/false);

}


void StringTableBuilder::finalizeStringTable(bool Optimize) {

  Finalized = true;


  if (Optimize) {

    std::vector<StringPair *> Strings;

    Strings.reserve(StringIndexMap.size());

    for (StringPair &P : StringIndexMap)

      Strings.push_back(&P);


    multikeySort(Strings, 0);

    initSize();


    StringRef Previous;

    for (StringPair *P : Strings) {

      StringRef S = P->first.val();

      if (Previous.ends_with(S)) {

        size_t Pos = Size - S.size() - (K != RAW);

        if (isAligned(Alignment, Pos)) {

          P->second = Pos;

          continue;

        }

      }


      Size = alignTo(Size, Alignment);

      P->second = Size;


      Size += S.size();

      if (K != RAW)

        ++Size;

      Previous = S;

    }

  }


  if (K == MachO || K == MachOLinked || K == DXContainer)

    Size = alignTo(Size, 4); // Pad to multiple of 4.

  if (K == MachO64 || K == MachO64Linked)

    Size = alignTo(Size, 8); // Pad to multiple of 8.


  // According to ld64 the string table of a final linked Mach-O binary starts

  // with " ", i.e. the first byte is ' ' and the second byte is zero. In

  // 'initSize()' we reserved the first two bytes for holding this string.

  if (K == MachOLinked || K == MachO64Linked)

    StringIndexMap[CachedHashStringRef(" ")] = 0;


  // The first byte in an ELF string table must be null, according to the ELF

  // specification. In 'initSize()' we reserved the first byte to hold null for

  // this purpose and here we actually add the string to allow 'getOffset()' to

  // be called on an empty string.

  if (K == ELF)

    StringIndexMap[CachedHashStringRef("")] = 0;

}


void StringTableBuilder::clear() {

  Finalized = false;

  StringIndexMap.clear();

}


size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {

  assert(isFinalized());

  auto I = StringIndexMap.find(S);

  assert(I != StringIndexMap.end() && "String is not in table!");

  return I->second;

}


size_t StringTableBuilder::add(CachedHashStringRef S) {

  if (K == WinCOFF)

    assert(S.size() > COFF::NameSize && "Short string in COFF string table!");


  assert(!isFinalized());

  auto P = StringIndexMap.insert(std::make_pair(S, 0));

  if (P.second) {

    size_t Start = alignTo(Size, Alignment);

    P.first->second = Start;

    Size = Start + S.size() + (K != RAW);

  }

  return P.first->second;

}

ArrayRef.h

COFF.h

CachedHashString.h
This file defines CachedHashString and CachedHashStringRef.

Endian.h

I
#define I(x, y, z)
Definition: MD5.cpp:58

MathExtras.h

P
#define P(N)

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:51

SmallString.h
This file defines the SmallString class.

StringRef.h

charTailAt
static int charTailAt(StringPair *P, size_t Pos)
Definition: StringTableBuilder.cpp:87

StringPair
std::pair< CachedHashStringRef, size_t > StringPair
Definition: StringTableBuilder.cpp:69

multikeySort
static void multikeySort(MutableArrayRef< StringPair * > Vec, int Pos)
Definition: StringTableBuilder.cpp:96

StringTableBuilder.h

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168

llvm::CachedHashStringRef
A container which contains a StringRef plus a precomputed hash.
Definition: CachedHashString.h:29

llvm::CachedHashStringRef::size
uint32_t size() const
Definition: CachedHashString.h:46

llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310

llvm::MutableArrayRef::slice
MutableArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:379

llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150

llvm::StringRef::ends_with
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:277

llvm::StringTableBuilder::clear
void clear()
Definition: StringTableBuilder.cpp:190

llvm::StringTableBuilder::finalizeInOrder
void finalizeInOrder()
Finalize the string table without reording it.
Definition: StringTableBuilder.cpp:134

llvm::StringTableBuilder::getOffset
size_t getOffset(CachedHashStringRef S) const
Get the offest of a string in the string table.
Definition: StringTableBuilder.cpp:195

llvm::StringTableBuilder::Kind
Kind
Definition: StringTableBuilder.h:26

llvm::StringTableBuilder::DXContainer
@ DXContainer
Definition: StringTableBuilder.h:36

llvm::StringTableBuilder::MachO64Linked
@ MachO64Linked
Definition: StringTableBuilder.h:32

llvm::StringTableBuilder::WinCOFF
@ WinCOFF
Definition: StringTableBuilder.h:28

llvm::StringTableBuilder::XCOFF
@ XCOFF
Definition: StringTableBuilder.h:35

llvm::StringTableBuilder::RAW
@ RAW
Definition: StringTableBuilder.h:33

llvm::StringTableBuilder::MachOLinked
@ MachOLinked
Definition: StringTableBuilder.h:31

llvm::StringTableBuilder::MachO64
@ MachO64
Definition: StringTableBuilder.h:30

llvm::StringTableBuilder::MachO
@ MachO
Definition: StringTableBuilder.h:29

llvm::StringTableBuilder::ELF
@ ELF
Definition: StringTableBuilder.h:27

llvm::StringTableBuilder::DWARF
@ DWARF
Definition: StringTableBuilder.h:34

llvm::StringTableBuilder::~StringTableBuilder
~StringTableBuilder()

llvm::StringTableBuilder::isFinalized
bool isFinalized() const
Definition: StringTableBuilder.h:86

llvm::StringTableBuilder::write
void write(raw_ostream &OS) const
Definition: StringTableBuilder.cpp:61

llvm::StringTableBuilder::add
size_t add(CachedHashStringRef S)
Add a string to the builder.
Definition: StringTableBuilder.cpp:202

llvm::StringTableBuilder::finalize
void finalize()
Analyze the strings and build the final table.
Definition: StringTableBuilder.cpp:129

llvm::StringTableBuilder::StringTableBuilder
StringTableBuilder(Kind K, Align Alignment=Align(1))
Definition: StringTableBuilder.cpp:56

llvm::StringTableBuilder::getSize
size_t getSize() const
Definition: StringTableBuilder.h:80

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

uint8_t

llvm::COFF::NameSize
@ NameSize
Definition: COFF.h:57

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::support::endian::write32le
void write32le(void *P, uint32_t V)
Definition: Endian.h:468

llvm::support::endian::write32be
void write32be(void *P, uint32_t V)
Definition: Endian.h:477

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145

llvm::ExpandVariadicsMode::Optimize
@ Optimize

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155

llvm::Data
@ Data
Definition: SIMachineScheduler.h:55

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860

raw_ostream.h

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39