LCOV - code coverage report
Current view: top level - include/llvm/Transforms/IPO - LowerTypeTests.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 16 16 100.0 %
Date: 2017-09-14 15:23:50 Functions: 2 2 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- LowerTypeTests.h - type metadata lowering pass -----------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file defines parts of the type test lowering pass implementation that
      11             : // may be usefully unit tested.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #ifndef LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H
      16             : #define LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H
      17             : 
      18             : #include "llvm/ADT/SmallVector.h"
      19             : #include "llvm/IR/Module.h"
      20             : #include "llvm/IR/PassManager.h"
      21             : #include <cstdint>
      22             : #include <cstring>
      23             : #include <limits>
      24             : #include <set>
      25             : #include <vector>
      26             : 
      27             : namespace llvm {
      28             : 
      29             : class raw_ostream;
      30             : 
      31             : namespace lowertypetests {
      32             : 
      33         316 : struct BitSetInfo {
      34             :   // The indices of the set bits in the bitset.
      35             :   std::set<uint64_t> Bits;
      36             : 
      37             :   // The byte offset into the combined global represented by the bitset.
      38             :   uint64_t ByteOffset;
      39             : 
      40             :   // The size of the bitset in bits.
      41             :   uint64_t BitSize;
      42             : 
      43             :   // Log2 alignment of the bit set relative to the combined global.
      44             :   // For example, a log2 alignment of 3 means that bits in the bitset
      45             :   // represent addresses 8 bytes apart.
      46             :   unsigned AlignLog2;
      47             : 
      48             :   bool isSingleOffset() const {
      49          28 :     return Bits.size() == 1;
      50             :   }
      51             : 
      52             :   bool isAllOnes() const {
      53         158 :     return Bits.size() == BitSize;
      54             :   }
      55             : 
      56             :   bool containsGlobalOffset(uint64_t Offset) const;
      57             : 
      58             :   void print(raw_ostream &OS) const;
      59             : };
      60             : 
      61         158 : struct BitSetBuilder {
      62             :   SmallVector<uint64_t, 16> Offsets;
      63             :   uint64_t Min = std::numeric_limits<uint64_t>::max();
      64             :   uint64_t Max = 0;
      65             : 
      66         158 :   BitSetBuilder() = default;
      67             : 
      68             :   void addOffset(uint64_t Offset) {
      69         400 :     if (Min > Offset)
      70          89 :       Min = Offset;
      71         400 :     if (Max < Offset)
      72         336 :       Max = Offset;
      73             : 
      74         400 :     Offsets.push_back(Offset);
      75             :   }
      76             : 
      77             :   BitSetInfo build();
      78             : };
      79             : 
      80             : /// This class implements a layout algorithm for globals referenced by bit sets
      81             : /// that tries to keep members of small bit sets together. This can
      82             : /// significantly reduce bit set sizes in many cases.
      83             : ///
      84             : /// It works by assembling fragments of layout from sets of referenced globals.
      85             : /// Each set of referenced globals causes the algorithm to create a new
      86             : /// fragment, which is assembled by appending each referenced global in the set
      87             : /// into the fragment. If a referenced global has already been referenced by an
      88             : /// fragment created earlier, we instead delete that fragment and append its
      89             : /// contents into the fragment we are assembling.
      90             : ///
      91             : /// By starting with the smallest fragments, we minimize the size of the
      92             : /// fragments that are copied into larger fragments. This is most intuitively
      93             : /// thought about when considering the case where the globals are virtual tables
      94             : /// and the bit sets represent their derived classes: in a single inheritance
      95             : /// hierarchy, the optimum layout would involve a depth-first search of the
      96             : /// class hierarchy (and in fact the computed layout ends up looking a lot like
      97             : /// a DFS), but a naive DFS would not work well in the presence of multiple
      98             : /// inheritance. This aspect of the algorithm ends up fitting smaller
      99             : /// hierarchies inside larger ones where that would be beneficial.
     100             : ///
     101             : /// For example, consider this class hierarchy:
     102             : ///
     103             : /// A       B
     104             : ///   \   / | \
     105             : ///     C   D   E
     106             : ///
     107             : /// We have five bit sets: bsA (A, C), bsB (B, C, D, E), bsC (C), bsD (D) and
     108             : /// bsE (E). If we laid out our objects by DFS traversing B followed by A, our
     109             : /// layout would be {B, C, D, E, A}. This is optimal for bsB as it needs to
     110             : /// cover the only 4 objects in its hierarchy, but not for bsA as it needs to
     111             : /// cover 5 objects, i.e. the entire layout. Our algorithm proceeds as follows:
     112             : ///
     113             : /// Add bsC, fragments {{C}}
     114             : /// Add bsD, fragments {{C}, {D}}
     115             : /// Add bsE, fragments {{C}, {D}, {E}}
     116             : /// Add bsA, fragments {{A, C}, {D}, {E}}
     117             : /// Add bsB, fragments {{B, A, C, D, E}}
     118             : ///
     119             : /// This layout is optimal for bsA, as it now only needs to cover two (i.e. 3
     120             : /// fewer) objects, at the cost of bsB needing to cover 1 more object.
     121             : ///
     122             : /// The bit set lowering pass assigns an object index to each object that needs
     123             : /// to be laid out, and calls addFragment for each bit set passing the object
     124             : /// indices of its referenced globals. It then assembles a layout from the
     125             : /// computed layout in the Fragments field.
     126         114 : struct GlobalLayoutBuilder {
     127             :   /// The computed layout. Each element of this vector contains a fragment of
     128             :   /// layout (which may be empty) consisting of object indices.
     129             :   std::vector<std::vector<uint64_t>> Fragments;
     130             : 
     131             :   /// Mapping from object index to fragment index.
     132             :   std::vector<uint64_t> FragmentMap;
     133             : 
     134          57 :   GlobalLayoutBuilder(uint64_t NumObjects)
     135         285 :       : Fragments(1), FragmentMap(NumObjects) {}
     136             : 
     137             :   /// Add F to the layout while trying to keep its indices contiguous.
     138             :   /// If a previously seen fragment uses any of F's indices, that
     139             :   /// fragment will be laid out inside F.
     140             :   void addFragment(const std::set<uint64_t> &F);
     141             : };
     142             : 
     143             : /// This class is used to build a byte array containing overlapping bit sets. By
     144             : /// loading from indexed offsets into the byte array and applying a mask, a
     145             : /// program can test bits from the bit set with a relatively short instruction
     146             : /// sequence. For example, suppose we have 15 bit sets to lay out:
     147             : ///
     148             : /// A (16 bits), B (15 bits), C (14 bits), D (13 bits), E (12 bits),
     149             : /// F (11 bits), G (10 bits), H (9 bits), I (7 bits), J (6 bits), K (5 bits),
     150             : /// L (4 bits), M (3 bits), N (2 bits), O (1 bit)
     151             : ///
     152             : /// These bits can be laid out in a 16-byte array like this:
     153             : ///
     154             : ///       Byte Offset
     155             : ///     0123456789ABCDEF
     156             : /// Bit
     157             : ///   7 HHHHHHHHHIIIIIII
     158             : ///   6 GGGGGGGGGGJJJJJJ
     159             : ///   5 FFFFFFFFFFFKKKKK
     160             : ///   4 EEEEEEEEEEEELLLL
     161             : ///   3 DDDDDDDDDDDDDMMM
     162             : ///   2 CCCCCCCCCCCCCCNN
     163             : ///   1 BBBBBBBBBBBBBBBO
     164             : ///   0 AAAAAAAAAAAAAAAA
     165             : ///
     166             : /// For example, to test bit X of A, we evaluate ((bits[X] & 1) != 0), or to
     167             : /// test bit X of I, we evaluate ((bits[9 + X] & 0x80) != 0). This can be done
     168             : /// in 1-2 machine instructions on x86, or 4-6 instructions on ARM.
     169             : ///
     170             : /// This is a byte array, rather than (say) a 2-byte array or a 4-byte array,
     171             : /// because for one thing it gives us better packing (the more bins there are,
     172             : /// the less evenly they will be filled), and for another, the instruction
     173             : /// sequences can be slightly shorter, both on x86 and ARM.
     174          96 : struct ByteArrayBuilder {
     175             :   /// The byte array built so far.
     176             :   std::vector<uint8_t> Bytes;
     177             : 
     178             :   enum { BitsPerByte = 8 };
     179             : 
     180             :   /// The number of bytes allocated so far for each of the bits.
     181             :   uint64_t BitAllocs[BitsPerByte];
     182             : 
     183          96 :   ByteArrayBuilder() {
     184          48 :     memset(BitAllocs, 0, sizeof(BitAllocs));
     185             :   }
     186             : 
     187             :   /// Allocate BitSize bits in the byte array where Bits contains the bits to
     188             :   /// set. AllocByteOffset is set to the offset within the byte array and
     189             :   /// AllocMask is set to the bitmask for those bits. This uses the LPT (Longest
     190             :   /// Processing Time) multiprocessor scheduling algorithm to lay out the bits
     191             :   /// efficiently; the pass allocates bit sets in decreasing size order.
     192             :   void allocate(const std::set<uint64_t> &Bits, uint64_t BitSize,
     193             :                 uint64_t &AllocByteOffset, uint8_t &AllocMask);
     194             : };
     195             : 
     196             : } // end namespace lowertypetests
     197             : 
     198             : class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> {
     199             : public:
     200             :   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
     201             : };
     202             : 
     203             : } // end namespace llvm
     204             : 
     205             : #endif // LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H

Generated by: LCOV version 1.13