LCOV - code coverage report
Current view: top level - include/llvm/Transforms/IPO - LowerTypeTests.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 15 15 100.0 %
Date: 2018-10-20 13:21:21 Functions: 1 1 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- LowerTypeTests.h - type metadata lowering pass -----------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines parts of the type test lowering pass implementation that
      11             : // may be usefully unit tested.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #ifndef LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H
      16             : #define LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H
      17             : 
      18             : #include "llvm/ADT/SmallVector.h"
      19             : #include "llvm/IR/PassManager.h"
      20             : #include <cstdint>
      21             : #include <cstring>
      22             : #include <limits>
      23             : #include <set>
      24             : #include <vector>
      25             : 
      26             : namespace llvm {
      27             : 
      28             : class Module;
      29             : class ModuleSummaryIndex;
      30             : class raw_ostream;
      31             : 
      32             : namespace lowertypetests {
      33             : 
      34          14 : struct BitSetInfo {
      35             :   // The indices of the set bits in the bitset.
      36             :   std::set<uint64_t> Bits;
      37             : 
      38             :   // The byte offset into the combined global represented by the bitset.
      39             :   uint64_t ByteOffset;
      40             : 
      41             :   // The size of the bitset in bits.
      42             :   uint64_t BitSize;
      43             : 
      44             :   // Log2 alignment of the bit set relative to the combined global.
      45             :   // For example, a log2 alignment of 3 means that bits in the bitset
      46             :   // represent addresses 8 bytes apart.
      47             :   unsigned AlignLog2;
      48             : 
      49             :   bool isSingleOffset() const {
      50          14 :     return Bits.size() == 1;
      51             :   }
      52             : 
      53             :   bool isAllOnes() const {
      54         100 :     return Bits.size() == BitSize;
      55             :   }
      56             : 
      57             :   bool containsGlobalOffset(uint64_t Offset) const;
      58             : 
      59             :   void print(raw_ostream &OS) const;
      60             : };
      61             : 
      62             : struct BitSetBuilder {
      63             :   SmallVector<uint64_t, 16> Offsets;
      64             :   uint64_t Min = std::numeric_limits<uint64_t>::max();
      65             :   uint64_t Max = 0;
      66             : 
      67          14 :   BitSetBuilder() = default;
      68             : 
      69             :   void addOffset(uint64_t Offset) {
      70          40 :     if (Min > Offset)
      71          13 :       Min = Offset;
      72          40 :     if (Max < Offset)
      73          30 :       Max = Offset;
      74             : 
      75          40 :     Offsets.push_back(Offset);
      76             :   }
      77             : 
      78             :   BitSetInfo build();
      79             : };
      80             : 
      81             : /// This class implements a layout algorithm for globals referenced by bit sets
      82             : /// that tries to keep members of small bit sets together. This can
      83             : /// significantly reduce bit set sizes in many cases.
      84             : ///
      85             : /// It works by assembling fragments of layout from sets of referenced globals.
      86             : /// Each set of referenced globals causes the algorithm to create a new
      87             : /// fragment, which is assembled by appending each referenced global in the set
      88             : /// into the fragment. If a referenced global has already been referenced by an
      89             : /// fragment created earlier, we instead delete that fragment and append its
      90             : /// contents into the fragment we are assembling.
      91             : ///
      92             : /// By starting with the smallest fragments, we minimize the size of the
      93             : /// fragments that are copied into larger fragments. This is most intuitively
      94             : /// thought about when considering the case where the globals are virtual tables
      95             : /// and the bit sets represent their derived classes: in a single inheritance
      96             : /// hierarchy, the optimum layout would involve a depth-first search of the
      97             : /// class hierarchy (and in fact the computed layout ends up looking a lot like
      98             : /// a DFS), but a naive DFS would not work well in the presence of multiple
      99             : /// inheritance. This aspect of the algorithm ends up fitting smaller
     100             : /// hierarchies inside larger ones where that would be beneficial.
     101             : ///
     102             : /// For example, consider this class hierarchy:
     103             : ///
     104             : /// A       B
     105             : ///   \   / | \
     106             : ///     C   D   E
     107             : ///
     108             : /// We have five bit sets: bsA (A, C), bsB (B, C, D, E), bsC (C), bsD (D) and
     109             : /// bsE (E). If we laid out our objects by DFS traversing B followed by A, our
     110             : /// layout would be {B, C, D, E, A}. This is optimal for bsB as it needs to
     111             : /// cover the only 4 objects in its hierarchy, but not for bsA as it needs to
     112             : /// cover 5 objects, i.e. the entire layout. Our algorithm proceeds as follows:
     113             : ///
     114             : /// Add bsC, fragments {{C}}
     115             : /// Add bsD, fragments {{C}, {D}}
     116             : /// Add bsE, fragments {{C}, {D}, {E}}
     117             : /// Add bsA, fragments {{A, C}, {D}, {E}}
     118             : /// Add bsB, fragments {{B, A, C, D, E}}
     119             : ///
     120             : /// This layout is optimal for bsA, as it now only needs to cover two (i.e. 3
     121             : /// fewer) objects, at the cost of bsB needing to cover 1 more object.
     122             : ///
     123             : /// The bit set lowering pass assigns an object index to each object that needs
     124             : /// to be laid out, and calls addFragment for each bit set passing the object
     125             : /// indices of its referenced globals. It then assembles a layout from the
     126             : /// computed layout in the Fragments field.
     127             : struct GlobalLayoutBuilder {
     128             :   /// The computed layout. Each element of this vector contains a fragment of
     129             :   /// layout (which may be empty) consisting of object indices.
     130             :   std::vector<std::vector<uint64_t>> Fragments;
     131             : 
     132             :   /// Mapping from object index to fragment index.
     133             :   std::vector<uint64_t> FragmentMap;
     134             : 
     135          82 :   GlobalLayoutBuilder(uint64_t NumObjects)
     136          82 :       : Fragments(1), FragmentMap(NumObjects) {}
     137             : 
     138             :   /// Add F to the layout while trying to keep its indices contiguous.
     139             :   /// If a previously seen fragment uses any of F's indices, that
     140             :   /// fragment will be laid out inside F.
     141             :   void addFragment(const std::set<uint64_t> &F);
     142             : };
     143             : 
     144             : /// This class is used to build a byte array containing overlapping bit sets. By
     145             : /// loading from indexed offsets into the byte array and applying a mask, a
     146             : /// program can test bits from the bit set with a relatively short instruction
     147             : /// sequence. For example, suppose we have 15 bit sets to lay out:
     148             : ///
     149             : /// A (16 bits), B (15 bits), C (14 bits), D (13 bits), E (12 bits),
     150             : /// F (11 bits), G (10 bits), H (9 bits), I (7 bits), J (6 bits), K (5 bits),
     151             : /// L (4 bits), M (3 bits), N (2 bits), O (1 bit)
     152             : ///
     153             : /// These bits can be laid out in a 16-byte array like this:
     154             : ///
     155             : ///       Byte Offset
     156             : ///     0123456789ABCDEF
     157             : /// Bit
     158             : ///   7 HHHHHHHHHIIIIIII
     159             : ///   6 GGGGGGGGGGJJJJJJ
     160             : ///   5 FFFFFFFFFFFKKKKK
     161             : ///   4 EEEEEEEEEEEELLLL
     162             : ///   3 DDDDDDDDDDDDDMMM
     163             : ///   2 CCCCCCCCCCCCCCNN
     164             : ///   1 BBBBBBBBBBBBBBBO
     165             : ///   0 AAAAAAAAAAAAAAAA
     166             : ///
     167             : /// For example, to test bit X of A, we evaluate ((bits[X] & 1) != 0), or to
     168             : /// test bit X of I, we evaluate ((bits[9 + X] & 0x80) != 0). This can be done
     169             : /// in 1-2 machine instructions on x86, or 4-6 instructions on ARM.
     170             : ///
     171             : /// This is a byte array, rather than (say) a 2-byte array or a 4-byte array,
     172             : /// because for one thing it gives us better packing (the more bins there are,
     173             : /// the less evenly they will be filled), and for another, the instruction
     174             : /// sequences can be slightly shorter, both on x86 and ARM.
     175          64 : struct ByteArrayBuilder {
     176             :   /// The byte array built so far.
     177             :   std::vector<uint8_t> Bytes;
     178             : 
     179             :   enum { BitsPerByte = 8 };
     180             : 
     181             :   /// The number of bytes allocated so far for each of the bits.
     182             :   uint64_t BitAllocs[BitsPerByte];
     183             : 
     184          66 :   ByteArrayBuilder() {
     185          66 :     memset(BitAllocs, 0, sizeof(BitAllocs));
     186             :   }
     187             : 
     188             :   /// Allocate BitSize bits in the byte array where Bits contains the bits to
     189             :   /// set. AllocByteOffset is set to the offset within the byte array and
     190             :   /// AllocMask is set to the bitmask for those bits. This uses the LPT (Longest
     191             :   /// Processing Time) multiprocessor scheduling algorithm to lay out the bits
     192             :   /// efficiently; the pass allocates bit sets in decreasing size order.
     193             :   void allocate(const std::set<uint64_t> &Bits, uint64_t BitSize,
     194             :                 uint64_t &AllocByteOffset, uint8_t &AllocMask);
     195             : };
     196             : 
     197             : } // end namespace lowertypetests
     198             : 
     199             : class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> {
     200             : public:
     201             :   ModuleSummaryIndex *ExportSummary;
     202             :   const ModuleSummaryIndex *ImportSummary;
     203             :   LowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
     204             :                      const ModuleSummaryIndex *ImportSummary)
     205          30 :       : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {}
     206             :   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
     207             : };
     208             : 
     209             : } // end namespace llvm
     210             : 
     211             : #endif // LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H

Generated by: LCOV version 1.13