Line data Source code
1 : //===- LowerTypeTests.h - type metadata lowering pass -----------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file defines parts of the type test lowering pass implementation that
11 : // may be usefully unit tested.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #ifndef LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H
16 : #define LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H
17 :
18 : #include "llvm/ADT/SmallVector.h"
19 : #include "llvm/IR/PassManager.h"
20 : #include <cstdint>
21 : #include <cstring>
22 : #include <limits>
23 : #include <set>
24 : #include <vector>
25 :
26 : namespace llvm {
27 :
28 : class Module;
29 : class ModuleSummaryIndex;
30 : class raw_ostream;
31 :
32 : namespace lowertypetests {
33 :
34 14 : struct BitSetInfo {
35 : // The indices of the set bits in the bitset.
36 : std::set<uint64_t> Bits;
37 :
38 : // The byte offset into the combined global represented by the bitset.
39 : uint64_t ByteOffset;
40 :
41 : // The size of the bitset in bits.
42 : uint64_t BitSize;
43 :
44 : // Log2 alignment of the bit set relative to the combined global.
45 : // For example, a log2 alignment of 3 means that bits in the bitset
46 : // represent addresses 8 bytes apart.
47 : unsigned AlignLog2;
48 :
49 : bool isSingleOffset() const {
50 14 : return Bits.size() == 1;
51 : }
52 :
53 : bool isAllOnes() const {
54 100 : return Bits.size() == BitSize;
55 : }
56 :
57 : bool containsGlobalOffset(uint64_t Offset) const;
58 :
59 : void print(raw_ostream &OS) const;
60 : };
61 :
62 : struct BitSetBuilder {
63 : SmallVector<uint64_t, 16> Offsets;
64 : uint64_t Min = std::numeric_limits<uint64_t>::max();
65 : uint64_t Max = 0;
66 :
67 14 : BitSetBuilder() = default;
68 :
69 : void addOffset(uint64_t Offset) {
70 40 : if (Min > Offset)
71 13 : Min = Offset;
72 40 : if (Max < Offset)
73 30 : Max = Offset;
74 :
75 40 : Offsets.push_back(Offset);
76 : }
77 :
78 : BitSetInfo build();
79 : };
80 :
81 : /// This class implements a layout algorithm for globals referenced by bit sets
82 : /// that tries to keep members of small bit sets together. This can
83 : /// significantly reduce bit set sizes in many cases.
84 : ///
85 : /// It works by assembling fragments of layout from sets of referenced globals.
86 : /// Each set of referenced globals causes the algorithm to create a new
87 : /// fragment, which is assembled by appending each referenced global in the set
88 : /// into the fragment. If a referenced global has already been referenced by an
89 : /// fragment created earlier, we instead delete that fragment and append its
90 : /// contents into the fragment we are assembling.
91 : ///
92 : /// By starting with the smallest fragments, we minimize the size of the
93 : /// fragments that are copied into larger fragments. This is most intuitively
94 : /// thought about when considering the case where the globals are virtual tables
95 : /// and the bit sets represent their derived classes: in a single inheritance
96 : /// hierarchy, the optimum layout would involve a depth-first search of the
97 : /// class hierarchy (and in fact the computed layout ends up looking a lot like
98 : /// a DFS), but a naive DFS would not work well in the presence of multiple
99 : /// inheritance. This aspect of the algorithm ends up fitting smaller
100 : /// hierarchies inside larger ones where that would be beneficial.
101 : ///
102 : /// For example, consider this class hierarchy:
103 : ///
104 : /// A B
105 : /// \ / | \
106 : /// C D E
107 : ///
108 : /// We have five bit sets: bsA (A, C), bsB (B, C, D, E), bsC (C), bsD (D) and
109 : /// bsE (E). If we laid out our objects by DFS traversing B followed by A, our
110 : /// layout would be {B, C, D, E, A}. This is optimal for bsB as it needs to
111 : /// cover the only 4 objects in its hierarchy, but not for bsA as it needs to
112 : /// cover 5 objects, i.e. the entire layout. Our algorithm proceeds as follows:
113 : ///
114 : /// Add bsC, fragments {{C}}
115 : /// Add bsD, fragments {{C}, {D}}
116 : /// Add bsE, fragments {{C}, {D}, {E}}
117 : /// Add bsA, fragments {{A, C}, {D}, {E}}
118 : /// Add bsB, fragments {{B, A, C, D, E}}
119 : ///
120 : /// This layout is optimal for bsA, as it now only needs to cover two (i.e. 3
121 : /// fewer) objects, at the cost of bsB needing to cover 1 more object.
122 : ///
123 : /// The bit set lowering pass assigns an object index to each object that needs
124 : /// to be laid out, and calls addFragment for each bit set passing the object
125 : /// indices of its referenced globals. It then assembles a layout from the
126 : /// computed layout in the Fragments field.
127 : struct GlobalLayoutBuilder {
128 : /// The computed layout. Each element of this vector contains a fragment of
129 : /// layout (which may be empty) consisting of object indices.
130 : std::vector<std::vector<uint64_t>> Fragments;
131 :
132 : /// Mapping from object index to fragment index.
133 : std::vector<uint64_t> FragmentMap;
134 :
135 82 : GlobalLayoutBuilder(uint64_t NumObjects)
136 82 : : Fragments(1), FragmentMap(NumObjects) {}
137 :
138 : /// Add F to the layout while trying to keep its indices contiguous.
139 : /// If a previously seen fragment uses any of F's indices, that
140 : /// fragment will be laid out inside F.
141 : void addFragment(const std::set<uint64_t> &F);
142 : };
143 :
144 : /// This class is used to build a byte array containing overlapping bit sets. By
145 : /// loading from indexed offsets into the byte array and applying a mask, a
146 : /// program can test bits from the bit set with a relatively short instruction
147 : /// sequence. For example, suppose we have 15 bit sets to lay out:
148 : ///
149 : /// A (16 bits), B (15 bits), C (14 bits), D (13 bits), E (12 bits),
150 : /// F (11 bits), G (10 bits), H (9 bits), I (7 bits), J (6 bits), K (5 bits),
151 : /// L (4 bits), M (3 bits), N (2 bits), O (1 bit)
152 : ///
153 : /// These bits can be laid out in a 16-byte array like this:
154 : ///
155 : /// Byte Offset
156 : /// 0123456789ABCDEF
157 : /// Bit
158 : /// 7 HHHHHHHHHIIIIIII
159 : /// 6 GGGGGGGGGGJJJJJJ
160 : /// 5 FFFFFFFFFFFKKKKK
161 : /// 4 EEEEEEEEEEEELLLL
162 : /// 3 DDDDDDDDDDDDDMMM
163 : /// 2 CCCCCCCCCCCCCCNN
164 : /// 1 BBBBBBBBBBBBBBBO
165 : /// 0 AAAAAAAAAAAAAAAA
166 : ///
167 : /// For example, to test bit X of A, we evaluate ((bits[X] & 1) != 0), or to
168 : /// test bit X of I, we evaluate ((bits[9 + X] & 0x80) != 0). This can be done
169 : /// in 1-2 machine instructions on x86, or 4-6 instructions on ARM.
170 : ///
171 : /// This is a byte array, rather than (say) a 2-byte array or a 4-byte array,
172 : /// because for one thing it gives us better packing (the more bins there are,
173 : /// the less evenly they will be filled), and for another, the instruction
174 : /// sequences can be slightly shorter, both on x86 and ARM.
175 64 : struct ByteArrayBuilder {
176 : /// The byte array built so far.
177 : std::vector<uint8_t> Bytes;
178 :
179 : enum { BitsPerByte = 8 };
180 :
181 : /// The number of bytes allocated so far for each of the bits.
182 : uint64_t BitAllocs[BitsPerByte];
183 :
184 66 : ByteArrayBuilder() {
185 66 : memset(BitAllocs, 0, sizeof(BitAllocs));
186 : }
187 :
188 : /// Allocate BitSize bits in the byte array where Bits contains the bits to
189 : /// set. AllocByteOffset is set to the offset within the byte array and
190 : /// AllocMask is set to the bitmask for those bits. This uses the LPT (Longest
191 : /// Processing Time) multiprocessor scheduling algorithm to lay out the bits
192 : /// efficiently; the pass allocates bit sets in decreasing size order.
193 : void allocate(const std::set<uint64_t> &Bits, uint64_t BitSize,
194 : uint64_t &AllocByteOffset, uint8_t &AllocMask);
195 : };
196 :
197 : } // end namespace lowertypetests
198 :
199 : class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> {
200 : public:
201 : ModuleSummaryIndex *ExportSummary;
202 : const ModuleSummaryIndex *ImportSummary;
203 : LowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
204 : const ModuleSummaryIndex *ImportSummary)
205 30 : : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {}
206 : PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
207 : };
208 :
209 : } // end namespace llvm
210 :
211 : #endif // LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H
|