File: | build/source/llvm/include/llvm/MC/LaneBitmask.h |
Warning: | line 84, column 34 The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'Type' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- CodeGenRegisters.cpp - Register and RegisterClass Info -------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file defines structures to encapsulate information gleaned from the | |||
10 | // target register and register class definitions. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "CodeGenRegisters.h" | |||
15 | #include "llvm/ADT/ArrayRef.h" | |||
16 | #include "llvm/ADT/BitVector.h" | |||
17 | #include "llvm/ADT/DenseMap.h" | |||
18 | #include "llvm/ADT/IntEqClasses.h" | |||
19 | #include "llvm/ADT/STLExtras.h" | |||
20 | #include "llvm/ADT/SetVector.h" | |||
21 | #include "llvm/ADT/SmallPtrSet.h" | |||
22 | #include "llvm/ADT/SmallSet.h" | |||
23 | #include "llvm/ADT/SmallVector.h" | |||
24 | #include "llvm/ADT/StringRef.h" | |||
25 | #include "llvm/ADT/Twine.h" | |||
26 | #include "llvm/Support/Debug.h" | |||
27 | #include "llvm/Support/raw_ostream.h" | |||
28 | #include "llvm/TableGen/Error.h" | |||
29 | #include "llvm/TableGen/Record.h" | |||
30 | #include <algorithm> | |||
31 | #include <cassert> | |||
32 | #include <cstdint> | |||
33 | #include <iterator> | |||
34 | #include <map> | |||
35 | #include <queue> | |||
36 | #include <set> | |||
37 | #include <string> | |||
38 | #include <tuple> | |||
39 | #include <utility> | |||
40 | #include <vector> | |||
41 | ||||
42 | using namespace llvm; | |||
43 | ||||
44 | #define DEBUG_TYPE"regalloc-emitter" "regalloc-emitter" | |||
45 | ||||
46 | //===----------------------------------------------------------------------===// | |||
47 | // CodeGenSubRegIndex | |||
48 | //===----------------------------------------------------------------------===// | |||
49 | ||||
50 | CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum) | |||
51 | : TheDef(R), EnumValue(Enum), AllSuperRegsCovered(true), Artificial(true) { | |||
52 | Name = std::string(R->getName()); | |||
53 | if (R->getValue("Namespace")) | |||
54 | Namespace = std::string(R->getValueAsString("Namespace")); | |||
55 | Size = R->getValueAsInt("Size"); | |||
56 | Offset = R->getValueAsInt("Offset"); | |||
57 | } | |||
58 | ||||
59 | CodeGenSubRegIndex::CodeGenSubRegIndex(StringRef N, StringRef Nspace, | |||
60 | unsigned Enum) | |||
61 | : TheDef(nullptr), Name(std::string(N)), Namespace(std::string(Nspace)), | |||
62 | Size(-1), Offset(-1), EnumValue(Enum), AllSuperRegsCovered(true), | |||
63 | Artificial(true) {} | |||
64 | ||||
65 | std::string CodeGenSubRegIndex::getQualifiedName() const { | |||
66 | std::string N = getNamespace(); | |||
67 | if (!N.empty()) | |||
68 | N += "::"; | |||
69 | N += getName(); | |||
70 | return N; | |||
71 | } | |||
72 | ||||
73 | void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) { | |||
74 | if (!TheDef) | |||
75 | return; | |||
76 | ||||
77 | std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf"); | |||
78 | if (!Comps.empty()) { | |||
79 | if (Comps.size() != 2) | |||
80 | PrintFatalError(TheDef->getLoc(), | |||
81 | "ComposedOf must have exactly two entries"); | |||
82 | CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]); | |||
83 | CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]); | |||
84 | CodeGenSubRegIndex *X = A->addComposite(B, this); | |||
85 | if (X) | |||
86 | PrintFatalError(TheDef->getLoc(), "Ambiguous ComposedOf entries"); | |||
87 | } | |||
88 | ||||
89 | std::vector<Record*> Parts = | |||
90 | TheDef->getValueAsListOfDefs("CoveringSubRegIndices"); | |||
91 | if (!Parts.empty()) { | |||
92 | if (Parts.size() < 2) | |||
93 | PrintFatalError(TheDef->getLoc(), | |||
94 | "CoveredBySubRegs must have two or more entries"); | |||
95 | SmallVector<CodeGenSubRegIndex*, 8> IdxParts; | |||
96 | for (Record *Part : Parts) | |||
97 | IdxParts.push_back(RegBank.getSubRegIdx(Part)); | |||
98 | setConcatenationOf(IdxParts); | |||
99 | } | |||
100 | } | |||
101 | ||||
102 | LaneBitmask CodeGenSubRegIndex::computeLaneMask() const { | |||
103 | // Already computed? | |||
104 | if (LaneMask.any()) | |||
105 | return LaneMask; | |||
106 | ||||
107 | // Recursion guard, shouldn't be required. | |||
108 | LaneMask = LaneBitmask::getAll(); | |||
109 | ||||
110 | // The lane mask is simply the union of all sub-indices. | |||
111 | LaneBitmask M; | |||
112 | for (const auto &C : Composed) | |||
113 | M |= C.second->computeLaneMask(); | |||
114 | assert(M.any() && "Missing lane mask, sub-register cycle?")(static_cast <bool> (M.any() && "Missing lane mask, sub-register cycle?" ) ? void (0) : __assert_fail ("M.any() && \"Missing lane mask, sub-register cycle?\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 114, __extension__ __PRETTY_FUNCTION__)); | |||
115 | LaneMask = M; | |||
116 | return LaneMask; | |||
117 | } | |||
118 | ||||
119 | void CodeGenSubRegIndex::setConcatenationOf( | |||
120 | ArrayRef<CodeGenSubRegIndex*> Parts) { | |||
121 | if (ConcatenationOf.empty()) | |||
122 | ConcatenationOf.assign(Parts.begin(), Parts.end()); | |||
123 | else | |||
124 | assert(std::equal(Parts.begin(), Parts.end(),(static_cast <bool> (std::equal(Parts.begin(), Parts.end (), ConcatenationOf.begin()) && "parts consistent") ? void (0) : __assert_fail ("std::equal(Parts.begin(), Parts.end(), ConcatenationOf.begin()) && \"parts consistent\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 125, __extension__ __PRETTY_FUNCTION__)) | |||
125 | ConcatenationOf.begin()) && "parts consistent")(static_cast <bool> (std::equal(Parts.begin(), Parts.end (), ConcatenationOf.begin()) && "parts consistent") ? void (0) : __assert_fail ("std::equal(Parts.begin(), Parts.end(), ConcatenationOf.begin()) && \"parts consistent\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 125, __extension__ __PRETTY_FUNCTION__)); | |||
126 | } | |||
127 | ||||
128 | void CodeGenSubRegIndex::computeConcatTransitiveClosure() { | |||
129 | for (SmallVectorImpl<CodeGenSubRegIndex*>::iterator | |||
130 | I = ConcatenationOf.begin(); I != ConcatenationOf.end(); /*empty*/) { | |||
131 | CodeGenSubRegIndex *SubIdx = *I; | |||
132 | SubIdx->computeConcatTransitiveClosure(); | |||
133 | #ifndef NDEBUG | |||
134 | for (CodeGenSubRegIndex *SRI : SubIdx->ConcatenationOf) | |||
135 | assert(SRI->ConcatenationOf.empty() && "No transitive closure?")(static_cast <bool> (SRI->ConcatenationOf.empty() && "No transitive closure?") ? void (0) : __assert_fail ("SRI->ConcatenationOf.empty() && \"No transitive closure?\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 135, __extension__ __PRETTY_FUNCTION__)); | |||
136 | #endif | |||
137 | ||||
138 | if (SubIdx->ConcatenationOf.empty()) { | |||
139 | ++I; | |||
140 | } else { | |||
141 | I = ConcatenationOf.erase(I); | |||
142 | I = ConcatenationOf.insert(I, SubIdx->ConcatenationOf.begin(), | |||
143 | SubIdx->ConcatenationOf.end()); | |||
144 | I += SubIdx->ConcatenationOf.size(); | |||
145 | } | |||
146 | } | |||
147 | } | |||
148 | ||||
149 | //===----------------------------------------------------------------------===// | |||
150 | // CodeGenRegister | |||
151 | //===----------------------------------------------------------------------===// | |||
152 | ||||
153 | CodeGenRegister::CodeGenRegister(Record *R, unsigned Enum) | |||
154 | : TheDef(R), EnumValue(Enum), | |||
155 | CostPerUse(R->getValueAsListOfInts("CostPerUse")), | |||
156 | CoveredBySubRegs(R->getValueAsBit("CoveredBySubRegs")), | |||
157 | HasDisjunctSubRegs(false), Constant(R->getValueAsBit("isConstant")), | |||
158 | SubRegsComplete(false), SuperRegsComplete(false), TopoSig(~0u) { | |||
159 | Artificial = R->getValueAsBit("isArtificial"); | |||
160 | } | |||
161 | ||||
162 | void CodeGenRegister::buildObjectGraph(CodeGenRegBank &RegBank) { | |||
163 | std::vector<Record*> SRIs = TheDef->getValueAsListOfDefs("SubRegIndices"); | |||
164 | std::vector<Record*> SRs = TheDef->getValueAsListOfDefs("SubRegs"); | |||
165 | ||||
166 | if (SRIs.size() != SRs.size()) | |||
167 | PrintFatalError(TheDef->getLoc(), | |||
168 | "SubRegs and SubRegIndices must have the same size"); | |||
169 | ||||
170 | for (unsigned i = 0, e = SRIs.size(); i != e; ++i) { | |||
171 | ExplicitSubRegIndices.push_back(RegBank.getSubRegIdx(SRIs[i])); | |||
172 | ExplicitSubRegs.push_back(RegBank.getReg(SRs[i])); | |||
173 | } | |||
174 | ||||
175 | // Also compute leading super-registers. Each register has a list of | |||
176 | // covered-by-subregs super-registers where it appears as the first explicit | |||
177 | // sub-register. | |||
178 | // | |||
179 | // This is used by computeSecondarySubRegs() to find candidates. | |||
180 | if (CoveredBySubRegs && !ExplicitSubRegs.empty()) | |||
181 | ExplicitSubRegs.front()->LeadingSuperRegs.push_back(this); | |||
182 | ||||
183 | // Add ad hoc alias links. This is a symmetric relationship between two | |||
184 | // registers, so build a symmetric graph by adding links in both ends. | |||
185 | std::vector<Record*> Aliases = TheDef->getValueAsListOfDefs("Aliases"); | |||
186 | for (Record *Alias : Aliases) { | |||
187 | CodeGenRegister *Reg = RegBank.getReg(Alias); | |||
188 | ExplicitAliases.push_back(Reg); | |||
189 | Reg->ExplicitAliases.push_back(this); | |||
190 | } | |||
191 | } | |||
192 | ||||
193 | StringRef CodeGenRegister::getName() const { | |||
194 | assert(TheDef && "no def")(static_cast <bool> (TheDef && "no def") ? void (0) : __assert_fail ("TheDef && \"no def\"", "llvm/utils/TableGen/CodeGenRegisters.cpp" , 194, __extension__ __PRETTY_FUNCTION__)); | |||
195 | return TheDef->getName(); | |||
196 | } | |||
197 | ||||
198 | namespace { | |||
199 | ||||
200 | // Iterate over all register units in a set of registers. | |||
201 | class RegUnitIterator { | |||
202 | CodeGenRegister::Vec::const_iterator RegI, RegE; | |||
203 | CodeGenRegister::RegUnitList::iterator UnitI, UnitE; | |||
204 | static CodeGenRegister::RegUnitList Sentinel; | |||
205 | ||||
206 | public: | |||
207 | RegUnitIterator(const CodeGenRegister::Vec &Regs): | |||
208 | RegI(Regs.begin()), RegE(Regs.end()) { | |||
209 | ||||
210 | if (RegI == RegE) { | |||
211 | UnitI = Sentinel.end(); | |||
212 | UnitE = Sentinel.end(); | |||
213 | } else { | |||
214 | UnitI = (*RegI)->getRegUnits().begin(); | |||
215 | UnitE = (*RegI)->getRegUnits().end(); | |||
216 | advance(); | |||
217 | } | |||
218 | } | |||
219 | ||||
220 | bool isValid() const { return UnitI != UnitE; } | |||
221 | ||||
222 | unsigned operator* () const { assert(isValid())(static_cast <bool> (isValid()) ? void (0) : __assert_fail ("isValid()", "llvm/utils/TableGen/CodeGenRegisters.cpp", 222 , __extension__ __PRETTY_FUNCTION__)); return *UnitI; } | |||
223 | ||||
224 | const CodeGenRegister *getReg() const { assert(isValid())(static_cast <bool> (isValid()) ? void (0) : __assert_fail ("isValid()", "llvm/utils/TableGen/CodeGenRegisters.cpp", 224 , __extension__ __PRETTY_FUNCTION__)); return *RegI; } | |||
225 | ||||
226 | /// Preincrement. Move to the next unit. | |||
227 | void operator++() { | |||
228 | assert(isValid() && "Cannot advance beyond the last operand")(static_cast <bool> (isValid() && "Cannot advance beyond the last operand" ) ? void (0) : __assert_fail ("isValid() && \"Cannot advance beyond the last operand\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 228, __extension__ __PRETTY_FUNCTION__)); | |||
229 | ++UnitI; | |||
230 | advance(); | |||
231 | } | |||
232 | ||||
233 | protected: | |||
234 | void advance() { | |||
235 | while (UnitI == UnitE) { | |||
236 | if (++RegI == RegE) | |||
237 | break; | |||
238 | UnitI = (*RegI)->getRegUnits().begin(); | |||
239 | UnitE = (*RegI)->getRegUnits().end(); | |||
240 | } | |||
241 | } | |||
242 | }; | |||
243 | ||||
244 | CodeGenRegister::RegUnitList RegUnitIterator::Sentinel; | |||
245 | ||||
246 | } // end anonymous namespace | |||
247 | ||||
248 | // Return true of this unit appears in RegUnits. | |||
249 | static bool hasRegUnit(CodeGenRegister::RegUnitList &RegUnits, unsigned Unit) { | |||
250 | return RegUnits.test(Unit); | |||
251 | } | |||
252 | ||||
253 | // Inherit register units from subregisters. | |||
254 | // Return true if the RegUnits changed. | |||
255 | bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) { | |||
256 | bool changed = false; | |||
257 | for (const auto &SubReg : SubRegs) { | |||
258 | CodeGenRegister *SR = SubReg.second; | |||
259 | // Merge the subregister's units into this register's RegUnits. | |||
260 | changed |= (RegUnits |= SR->RegUnits); | |||
261 | } | |||
262 | ||||
263 | return changed; | |||
264 | } | |||
265 | ||||
266 | const CodeGenRegister::SubRegMap & | |||
267 | CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { | |||
268 | // Only compute this map once. | |||
269 | if (SubRegsComplete) | |||
270 | return SubRegs; | |||
271 | SubRegsComplete = true; | |||
272 | ||||
273 | HasDisjunctSubRegs = ExplicitSubRegs.size() > 1; | |||
274 | ||||
275 | // First insert the explicit subregs and make sure they are fully indexed. | |||
276 | for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { | |||
277 | CodeGenRegister *SR = ExplicitSubRegs[i]; | |||
278 | CodeGenSubRegIndex *Idx = ExplicitSubRegIndices[i]; | |||
279 | if (!SR->Artificial) | |||
280 | Idx->Artificial = false; | |||
281 | if (!SubRegs.insert(std::make_pair(Idx, SR)).second) | |||
282 | PrintFatalError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() + | |||
283 | " appears twice in Register " + getName()); | |||
284 | // Map explicit sub-registers first, so the names take precedence. | |||
285 | // The inherited sub-registers are mapped below. | |||
286 | SubReg2Idx.insert(std::make_pair(SR, Idx)); | |||
287 | } | |||
288 | ||||
289 | // Keep track of inherited subregs and how they can be reached. | |||
290 | SmallPtrSet<CodeGenRegister*, 8> Orphans; | |||
291 | ||||
292 | // Clone inherited subregs and place duplicate entries in Orphans. | |||
293 | // Here the order is important - earlier subregs take precedence. | |||
294 | for (CodeGenRegister *ESR : ExplicitSubRegs) { | |||
295 | const SubRegMap &Map = ESR->computeSubRegs(RegBank); | |||
296 | HasDisjunctSubRegs |= ESR->HasDisjunctSubRegs; | |||
297 | ||||
298 | for (const auto &SR : Map) { | |||
299 | if (!SubRegs.insert(SR).second) | |||
300 | Orphans.insert(SR.second); | |||
301 | } | |||
302 | } | |||
303 | ||||
304 | // Expand any composed subreg indices. | |||
305 | // If dsub_2 has ComposedOf = [qsub_1, dsub_0], and this register has a | |||
306 | // qsub_1 subreg, add a dsub_2 subreg. Keep growing Indices and process | |||
307 | // expanded subreg indices recursively. | |||
308 | SmallVector<CodeGenSubRegIndex*, 8> Indices = ExplicitSubRegIndices; | |||
309 | for (unsigned i = 0; i != Indices.size(); ++i) { | |||
310 | CodeGenSubRegIndex *Idx = Indices[i]; | |||
311 | const CodeGenSubRegIndex::CompMap &Comps = Idx->getComposites(); | |||
312 | CodeGenRegister *SR = SubRegs[Idx]; | |||
313 | const SubRegMap &Map = SR->computeSubRegs(RegBank); | |||
314 | ||||
315 | // Look at the possible compositions of Idx. | |||
316 | // They may not all be supported by SR. | |||
317 | for (auto Comp : Comps) { | |||
318 | SubRegMap::const_iterator SRI = Map.find(Comp.first); | |||
319 | if (SRI == Map.end()) | |||
320 | continue; // Idx + I->first doesn't exist in SR. | |||
321 | // Add I->second as a name for the subreg SRI->second, assuming it is | |||
322 | // orphaned, and the name isn't already used for something else. | |||
323 | if (SubRegs.count(Comp.second) || !Orphans.erase(SRI->second)) | |||
324 | continue; | |||
325 | // We found a new name for the orphaned sub-register. | |||
326 | SubRegs.insert(std::make_pair(Comp.second, SRI->second)); | |||
327 | Indices.push_back(Comp.second); | |||
328 | } | |||
329 | } | |||
330 | ||||
331 | // Now Orphans contains the inherited subregisters without a direct index. | |||
332 | // Create inferred indexes for all missing entries. | |||
333 | // Work backwards in the Indices vector in order to compose subregs bottom-up. | |||
334 | // Consider this subreg sequence: | |||
335 | // | |||
336 | // qsub_1 -> dsub_0 -> ssub_0 | |||
337 | // | |||
338 | // The qsub_1 -> dsub_0 composition becomes dsub_2, so the ssub_0 register | |||
339 | // can be reached in two different ways: | |||
340 | // | |||
341 | // qsub_1 -> ssub_0 | |||
342 | // dsub_2 -> ssub_0 | |||
343 | // | |||
344 | // We pick the latter composition because another register may have [dsub_0, | |||
345 | // dsub_1, dsub_2] subregs without necessarily having a qsub_1 subreg. The | |||
346 | // dsub_2 -> ssub_0 composition can be shared. | |||
347 | while (!Indices.empty() && !Orphans.empty()) { | |||
348 | CodeGenSubRegIndex *Idx = Indices.pop_back_val(); | |||
349 | CodeGenRegister *SR = SubRegs[Idx]; | |||
350 | const SubRegMap &Map = SR->computeSubRegs(RegBank); | |||
351 | for (const auto &SubReg : Map) | |||
352 | if (Orphans.erase(SubReg.second)) | |||
353 | SubRegs[RegBank.getCompositeSubRegIndex(Idx, SubReg.first)] = SubReg.second; | |||
354 | } | |||
355 | ||||
356 | // Compute the inverse SubReg -> Idx map. | |||
357 | for (const auto &SubReg : SubRegs) { | |||
358 | if (SubReg.second == this) { | |||
359 | ArrayRef<SMLoc> Loc; | |||
360 | if (TheDef) | |||
361 | Loc = TheDef->getLoc(); | |||
362 | PrintFatalError(Loc, "Register " + getName() + | |||
363 | " has itself as a sub-register"); | |||
364 | } | |||
365 | ||||
366 | // Compute AllSuperRegsCovered. | |||
367 | if (!CoveredBySubRegs) | |||
368 | SubReg.first->AllSuperRegsCovered = false; | |||
369 | ||||
370 | // Ensure that every sub-register has a unique name. | |||
371 | DenseMap<const CodeGenRegister*, CodeGenSubRegIndex*>::iterator Ins = | |||
372 | SubReg2Idx.insert(std::make_pair(SubReg.second, SubReg.first)).first; | |||
373 | if (Ins->second == SubReg.first) | |||
374 | continue; | |||
375 | // Trouble: Two different names for SubReg.second. | |||
376 | ArrayRef<SMLoc> Loc; | |||
377 | if (TheDef) | |||
378 | Loc = TheDef->getLoc(); | |||
379 | PrintFatalError(Loc, "Sub-register can't have two names: " + | |||
380 | SubReg.second->getName() + " available as " + | |||
381 | SubReg.first->getName() + " and " + Ins->second->getName()); | |||
382 | } | |||
383 | ||||
384 | // Derive possible names for sub-register concatenations from any explicit | |||
385 | // sub-registers. By doing this before computeSecondarySubRegs(), we ensure | |||
386 | // that getConcatSubRegIndex() won't invent any concatenated indices that the | |||
387 | // user already specified. | |||
388 | for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { | |||
389 | CodeGenRegister *SR = ExplicitSubRegs[i]; | |||
390 | if (!SR->CoveredBySubRegs || SR->ExplicitSubRegs.size() <= 1 || | |||
391 | SR->Artificial) | |||
392 | continue; | |||
393 | ||||
394 | // SR is composed of multiple sub-regs. Find their names in this register. | |||
395 | SmallVector<CodeGenSubRegIndex*, 8> Parts; | |||
396 | for (unsigned j = 0, e = SR->ExplicitSubRegs.size(); j != e; ++j) { | |||
397 | CodeGenSubRegIndex &I = *SR->ExplicitSubRegIndices[j]; | |||
398 | if (!I.Artificial) | |||
399 | Parts.push_back(getSubRegIndex(SR->ExplicitSubRegs[j])); | |||
400 | } | |||
401 | ||||
402 | // Offer this as an existing spelling for the concatenation of Parts. | |||
403 | CodeGenSubRegIndex &Idx = *ExplicitSubRegIndices[i]; | |||
404 | Idx.setConcatenationOf(Parts); | |||
405 | } | |||
406 | ||||
407 | // Initialize RegUnitList. Because getSubRegs is called recursively, this | |||
408 | // processes the register hierarchy in postorder. | |||
409 | // | |||
410 | // Inherit all sub-register units. It is good enough to look at the explicit | |||
411 | // sub-registers, the other registers won't contribute any more units. | |||
412 | for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { | |||
413 | CodeGenRegister *SR = ExplicitSubRegs[i]; | |||
414 | RegUnits |= SR->RegUnits; | |||
415 | } | |||
416 | ||||
417 | // Absent any ad hoc aliasing, we create one register unit per leaf register. | |||
418 | // These units correspond to the maximal cliques in the register overlap | |||
419 | // graph which is optimal. | |||
420 | // | |||
421 | // When there is ad hoc aliasing, we simply create one unit per edge in the | |||
422 | // undirected ad hoc aliasing graph. Technically, we could do better by | |||
423 | // identifying maximal cliques in the ad hoc graph, but cliques larger than 2 | |||
424 | // are extremely rare anyway (I've never seen one), so we don't bother with | |||
425 | // the added complexity. | |||
426 | for (unsigned i = 0, e = ExplicitAliases.size(); i != e; ++i) { | |||
427 | CodeGenRegister *AR = ExplicitAliases[i]; | |||
428 | // Only visit each edge once. | |||
429 | if (AR->SubRegsComplete) | |||
430 | continue; | |||
431 | // Create a RegUnit representing this alias edge, and add it to both | |||
432 | // registers. | |||
433 | unsigned Unit = RegBank.newRegUnit(this, AR); | |||
434 | RegUnits.set(Unit); | |||
435 | AR->RegUnits.set(Unit); | |||
436 | } | |||
437 | ||||
438 | // Finally, create units for leaf registers without ad hoc aliases. Note that | |||
439 | // a leaf register with ad hoc aliases doesn't get its own unit - it isn't | |||
440 | // necessary. This means the aliasing leaf registers can share a single unit. | |||
441 | if (RegUnits.empty()) | |||
442 | RegUnits.set(RegBank.newRegUnit(this)); | |||
443 | ||||
444 | // We have now computed the native register units. More may be adopted later | |||
445 | // for balancing purposes. | |||
446 | NativeRegUnits = RegUnits; | |||
447 | ||||
448 | return SubRegs; | |||
449 | } | |||
450 | ||||
451 | // In a register that is covered by its sub-registers, try to find redundant | |||
452 | // sub-registers. For example: | |||
453 | // | |||
454 | // QQ0 = {Q0, Q1} | |||
455 | // Q0 = {D0, D1} | |||
456 | // Q1 = {D2, D3} | |||
457 | // | |||
458 | // We can infer that D1_D2 is also a sub-register, even if it wasn't named in | |||
459 | // the register definition. | |||
460 | // | |||
461 | // The explicitly specified registers form a tree. This function discovers | |||
462 | // sub-register relationships that would force a DAG. | |||
463 | // | |||
464 | void CodeGenRegister::computeSecondarySubRegs(CodeGenRegBank &RegBank) { | |||
465 | SmallVector<SubRegMap::value_type, 8> NewSubRegs; | |||
466 | ||||
467 | std::queue<std::pair<CodeGenSubRegIndex*,CodeGenRegister*>> SubRegQueue; | |||
468 | for (std::pair<CodeGenSubRegIndex*,CodeGenRegister*> P : SubRegs) | |||
469 | SubRegQueue.push(P); | |||
470 | ||||
471 | // Look at the leading super-registers of each sub-register. Those are the | |||
472 | // candidates for new sub-registers, assuming they are fully contained in | |||
473 | // this register. | |||
474 | while (!SubRegQueue.empty()) { | |||
475 | CodeGenSubRegIndex *SubRegIdx; | |||
476 | const CodeGenRegister *SubReg; | |||
477 | std::tie(SubRegIdx, SubReg) = SubRegQueue.front(); | |||
478 | SubRegQueue.pop(); | |||
479 | ||||
480 | const CodeGenRegister::SuperRegList &Leads = SubReg->LeadingSuperRegs; | |||
481 | for (unsigned i = 0, e = Leads.size(); i != e; ++i) { | |||
482 | CodeGenRegister *Cand = const_cast<CodeGenRegister*>(Leads[i]); | |||
483 | // Already got this sub-register? | |||
484 | if (Cand == this || getSubRegIndex(Cand)) | |||
485 | continue; | |||
486 | // Check if each component of Cand is already a sub-register. | |||
487 | assert(!Cand->ExplicitSubRegs.empty() &&(static_cast <bool> (!Cand->ExplicitSubRegs.empty() && "Super-register has no sub-registers") ? void (0) : __assert_fail ("!Cand->ExplicitSubRegs.empty() && \"Super-register has no sub-registers\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 488, __extension__ __PRETTY_FUNCTION__)) | |||
488 | "Super-register has no sub-registers")(static_cast <bool> (!Cand->ExplicitSubRegs.empty() && "Super-register has no sub-registers") ? void (0) : __assert_fail ("!Cand->ExplicitSubRegs.empty() && \"Super-register has no sub-registers\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 488, __extension__ __PRETTY_FUNCTION__)); | |||
489 | if (Cand->ExplicitSubRegs.size() == 1) | |||
490 | continue; | |||
491 | SmallVector<CodeGenSubRegIndex*, 8> Parts; | |||
492 | // We know that the first component is (SubRegIdx,SubReg). However we | |||
493 | // may still need to split it into smaller subregister parts. | |||
494 | assert(Cand->ExplicitSubRegs[0] == SubReg && "LeadingSuperRegs correct")(static_cast <bool> (Cand->ExplicitSubRegs[0] == SubReg && "LeadingSuperRegs correct") ? void (0) : __assert_fail ("Cand->ExplicitSubRegs[0] == SubReg && \"LeadingSuperRegs correct\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 494, __extension__ __PRETTY_FUNCTION__)); | |||
495 | assert(getSubRegIndex(SubReg) == SubRegIdx && "LeadingSuperRegs correct")(static_cast <bool> (getSubRegIndex(SubReg) == SubRegIdx && "LeadingSuperRegs correct") ? void (0) : __assert_fail ("getSubRegIndex(SubReg) == SubRegIdx && \"LeadingSuperRegs correct\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 495, __extension__ __PRETTY_FUNCTION__)); | |||
496 | for (CodeGenRegister *SubReg : Cand->ExplicitSubRegs) { | |||
497 | if (CodeGenSubRegIndex *SubRegIdx = getSubRegIndex(SubReg)) { | |||
498 | if (SubRegIdx->ConcatenationOf.empty()) | |||
499 | Parts.push_back(SubRegIdx); | |||
500 | else | |||
501 | append_range(Parts, SubRegIdx->ConcatenationOf); | |||
502 | } else { | |||
503 | // Sub-register doesn't exist. | |||
504 | Parts.clear(); | |||
505 | break; | |||
506 | } | |||
507 | } | |||
508 | // There is nothing to do if some Cand sub-register is not part of this | |||
509 | // register. | |||
510 | if (Parts.empty()) | |||
511 | continue; | |||
512 | ||||
513 | // Each part of Cand is a sub-register of this. Make the full Cand also | |||
514 | // a sub-register with a concatenated sub-register index. | |||
515 | CodeGenSubRegIndex *Concat = RegBank.getConcatSubRegIndex(Parts); | |||
516 | std::pair<CodeGenSubRegIndex*,CodeGenRegister*> NewSubReg = | |||
517 | std::make_pair(Concat, Cand); | |||
518 | ||||
519 | if (!SubRegs.insert(NewSubReg).second) | |||
520 | continue; | |||
521 | ||||
522 | // We inserted a new subregister. | |||
523 | NewSubRegs.push_back(NewSubReg); | |||
524 | SubRegQueue.push(NewSubReg); | |||
525 | SubReg2Idx.insert(std::make_pair(Cand, Concat)); | |||
526 | } | |||
527 | } | |||
528 | ||||
529 | // Create sub-register index composition maps for the synthesized indices. | |||
530 | for (unsigned i = 0, e = NewSubRegs.size(); i != e; ++i) { | |||
531 | CodeGenSubRegIndex *NewIdx = NewSubRegs[i].first; | |||
532 | CodeGenRegister *NewSubReg = NewSubRegs[i].second; | |||
533 | for (auto SubReg : NewSubReg->SubRegs) { | |||
534 | CodeGenSubRegIndex *SubIdx = getSubRegIndex(SubReg.second); | |||
535 | if (!SubIdx) | |||
536 | PrintFatalError(TheDef->getLoc(), "No SubRegIndex for " + | |||
537 | SubReg.second->getName() + | |||
538 | " in " + getName()); | |||
539 | NewIdx->addComposite(SubReg.first, SubIdx); | |||
540 | } | |||
541 | } | |||
542 | } | |||
543 | ||||
544 | void CodeGenRegister::computeSuperRegs(CodeGenRegBank &RegBank) { | |||
545 | // Only visit each register once. | |||
546 | if (SuperRegsComplete) | |||
547 | return; | |||
548 | SuperRegsComplete = true; | |||
549 | ||||
550 | // Make sure all sub-registers have been visited first, so the super-reg | |||
551 | // lists will be topologically ordered. | |||
552 | for (auto SubReg : SubRegs) | |||
553 | SubReg.second->computeSuperRegs(RegBank); | |||
554 | ||||
555 | // Now add this as a super-register on all sub-registers. | |||
556 | // Also compute the TopoSigId in post-order. | |||
557 | TopoSigId Id; | |||
558 | for (auto SubReg : SubRegs) { | |||
559 | // Topological signature computed from SubIdx, TopoId(SubReg). | |||
560 | // Loops and idempotent indices have TopoSig = ~0u. | |||
561 | Id.push_back(SubReg.first->EnumValue); | |||
562 | Id.push_back(SubReg.second->TopoSig); | |||
563 | ||||
564 | // Don't add duplicate entries. | |||
565 | if (!SubReg.second->SuperRegs.empty() && | |||
566 | SubReg.second->SuperRegs.back() == this) | |||
567 | continue; | |||
568 | SubReg.second->SuperRegs.push_back(this); | |||
569 | } | |||
570 | TopoSig = RegBank.getTopoSig(Id); | |||
571 | } | |||
572 | ||||
573 | void | |||
574 | CodeGenRegister::addSubRegsPreOrder(SetVector<const CodeGenRegister*> &OSet, | |||
575 | CodeGenRegBank &RegBank) const { | |||
576 | assert(SubRegsComplete && "Must precompute sub-registers")(static_cast <bool> (SubRegsComplete && "Must precompute sub-registers" ) ? void (0) : __assert_fail ("SubRegsComplete && \"Must precompute sub-registers\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 576, __extension__ __PRETTY_FUNCTION__)); | |||
577 | for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { | |||
578 | CodeGenRegister *SR = ExplicitSubRegs[i]; | |||
579 | if (OSet.insert(SR)) | |||
580 | SR->addSubRegsPreOrder(OSet, RegBank); | |||
581 | } | |||
582 | // Add any secondary sub-registers that weren't part of the explicit tree. | |||
583 | for (auto SubReg : SubRegs) | |||
584 | OSet.insert(SubReg.second); | |||
585 | } | |||
586 | ||||
587 | // Get the sum of this register's unit weights. | |||
588 | unsigned CodeGenRegister::getWeight(const CodeGenRegBank &RegBank) const { | |||
589 | unsigned Weight = 0; | |||
590 | for (unsigned RegUnit : RegUnits) { | |||
591 | Weight += RegBank.getRegUnit(RegUnit).Weight; | |||
592 | } | |||
593 | return Weight; | |||
594 | } | |||
595 | ||||
596 | //===----------------------------------------------------------------------===// | |||
597 | // RegisterTuples | |||
598 | //===----------------------------------------------------------------------===// | |||
599 | ||||
600 | // A RegisterTuples def is used to generate pseudo-registers from lists of | |||
601 | // sub-registers. We provide a SetTheory expander class that returns the new | |||
602 | // registers. | |||
603 | namespace { | |||
604 | ||||
605 | struct TupleExpander : SetTheory::Expander { | |||
606 | // Reference to SynthDefs in the containing CodeGenRegBank, to keep track of | |||
607 | // the synthesized definitions for their lifetime. | |||
608 | std::vector<std::unique_ptr<Record>> &SynthDefs; | |||
609 | ||||
610 | TupleExpander(std::vector<std::unique_ptr<Record>> &SynthDefs) | |||
611 | : SynthDefs(SynthDefs) {} | |||
612 | ||||
613 | void expand(SetTheory &ST, Record *Def, SetTheory::RecSet &Elts) override { | |||
614 | std::vector<Record*> Indices = Def->getValueAsListOfDefs("SubRegIndices"); | |||
615 | unsigned Dim = Indices.size(); | |||
616 | ListInit *SubRegs = Def->getValueAsListInit("SubRegs"); | |||
617 | if (Dim != SubRegs->size()) | |||
618 | PrintFatalError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch"); | |||
619 | if (Dim < 2) | |||
620 | PrintFatalError(Def->getLoc(), | |||
621 | "Tuples must have at least 2 sub-registers"); | |||
622 | ||||
623 | // Evaluate the sub-register lists to be zipped. | |||
624 | unsigned Length = ~0u; | |||
625 | SmallVector<SetTheory::RecSet, 4> Lists(Dim); | |||
626 | for (unsigned i = 0; i != Dim; ++i) { | |||
627 | ST.evaluate(SubRegs->getElement(i), Lists[i], Def->getLoc()); | |||
628 | Length = std::min(Length, unsigned(Lists[i].size())); | |||
629 | } | |||
630 | ||||
631 | if (Length == 0) | |||
632 | return; | |||
633 | ||||
634 | // Precompute some types. | |||
635 | Record *RegisterCl = Def->getRecords().getClass("Register"); | |||
636 | RecTy *RegisterRecTy = RecordRecTy::get(RegisterCl); | |||
637 | std::vector<StringRef> RegNames = | |||
638 | Def->getValueAsListOfStrings("RegAsmNames"); | |||
639 | ||||
640 | // Zip them up. | |||
641 | RecordKeeper &RK = Def->getRecords(); | |||
642 | for (unsigned n = 0; n != Length; ++n) { | |||
643 | std::string Name; | |||
644 | Record *Proto = Lists[0][n]; | |||
645 | std::vector<Init*> Tuple; | |||
646 | for (unsigned i = 0; i != Dim; ++i) { | |||
647 | Record *Reg = Lists[i][n]; | |||
648 | if (i) Name += '_'; | |||
649 | Name += Reg->getName(); | |||
650 | Tuple.push_back(DefInit::get(Reg)); | |||
651 | } | |||
652 | ||||
653 | // Take the cost list of the first register in the tuple. | |||
654 | ListInit *CostList = Proto->getValueAsListInit("CostPerUse"); | |||
655 | SmallVector<Init *, 2> CostPerUse; | |||
656 | CostPerUse.insert(CostPerUse.end(), CostList->begin(), CostList->end()); | |||
657 | ||||
658 | StringInit *AsmName = StringInit::get(RK, ""); | |||
659 | if (!RegNames.empty()) { | |||
660 | if (RegNames.size() <= n) | |||
661 | PrintFatalError(Def->getLoc(), | |||
662 | "Register tuple definition missing name for '" + | |||
663 | Name + "'."); | |||
664 | AsmName = StringInit::get(RK, RegNames[n]); | |||
665 | } | |||
666 | ||||
667 | // Create a new Record representing the synthesized register. This record | |||
668 | // is only for consumption by CodeGenRegister, it is not added to the | |||
669 | // RecordKeeper. | |||
670 | SynthDefs.emplace_back( | |||
671 | std::make_unique<Record>(Name, Def->getLoc(), Def->getRecords())); | |||
672 | Record *NewReg = SynthDefs.back().get(); | |||
673 | Elts.insert(NewReg); | |||
674 | ||||
675 | // Copy Proto super-classes. | |||
676 | ArrayRef<std::pair<Record *, SMRange>> Supers = Proto->getSuperClasses(); | |||
677 | for (const auto &SuperPair : Supers) | |||
678 | NewReg->addSuperClass(SuperPair.first, SuperPair.second); | |||
679 | ||||
680 | // Copy Proto fields. | |||
681 | for (unsigned i = 0, e = Proto->getValues().size(); i != e; ++i) { | |||
682 | RecordVal RV = Proto->getValues()[i]; | |||
683 | ||||
684 | // Skip existing fields, like NAME. | |||
685 | if (NewReg->getValue(RV.getNameInit())) | |||
686 | continue; | |||
687 | ||||
688 | StringRef Field = RV.getName(); | |||
689 | ||||
690 | // Replace the sub-register list with Tuple. | |||
691 | if (Field == "SubRegs") | |||
692 | RV.setValue(ListInit::get(Tuple, RegisterRecTy)); | |||
693 | ||||
694 | if (Field == "AsmName") | |||
695 | RV.setValue(AsmName); | |||
696 | ||||
697 | // CostPerUse is aggregated from all Tuple members. | |||
698 | if (Field == "CostPerUse") | |||
699 | RV.setValue(ListInit::get(CostPerUse, CostList->getElementType())); | |||
700 | ||||
701 | // Composite registers are always covered by sub-registers. | |||
702 | if (Field == "CoveredBySubRegs") | |||
703 | RV.setValue(BitInit::get(RK, true)); | |||
704 | ||||
705 | // Copy fields from the RegisterTuples def. | |||
706 | if (Field == "SubRegIndices" || | |||
707 | Field == "CompositeIndices") { | |||
708 | NewReg->addValue(*Def->getValue(Field)); | |||
709 | continue; | |||
710 | } | |||
711 | ||||
712 | // Some fields get their default uninitialized value. | |||
713 | if (Field == "DwarfNumbers" || | |||
714 | Field == "DwarfAlias" || | |||
715 | Field == "Aliases") { | |||
716 | if (const RecordVal *DefRV = RegisterCl->getValue(Field)) | |||
717 | NewReg->addValue(*DefRV); | |||
718 | continue; | |||
719 | } | |||
720 | ||||
721 | // Everything else is copied from Proto. | |||
722 | NewReg->addValue(RV); | |||
723 | } | |||
724 | } | |||
725 | } | |||
726 | }; | |||
727 | ||||
728 | } // end anonymous namespace | |||
729 | ||||
730 | //===----------------------------------------------------------------------===// | |||
731 | // CodeGenRegisterClass | |||
732 | //===----------------------------------------------------------------------===// | |||
733 | ||||
734 | static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) { | |||
735 | llvm::sort(M, deref<std::less<>>()); | |||
736 | M.erase(std::unique(M.begin(), M.end(), deref<std::equal_to<>>()), M.end()); | |||
737 | } | |||
738 | ||||
739 | CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) | |||
740 | : TheDef(R), Name(std::string(R->getName())), | |||
741 | TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), TSFlags(0) { | |||
742 | GeneratePressureSet = R->getValueAsBit("GeneratePressureSet"); | |||
743 | std::vector<Record*> TypeList = R->getValueAsListOfDefs("RegTypes"); | |||
744 | if (TypeList.empty()) | |||
745 | PrintFatalError(R->getLoc(), "RegTypes list must not be empty!"); | |||
746 | for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { | |||
747 | Record *Type = TypeList[i]; | |||
748 | if (!Type->isSubClassOf("ValueType")) | |||
749 | PrintFatalError(R->getLoc(), | |||
750 | "RegTypes list member '" + Type->getName() + | |||
751 | "' does not derive from the ValueType class!"); | |||
752 | VTs.push_back(getValueTypeByHwMode(Type, RegBank.getHwModes())); | |||
753 | } | |||
754 | ||||
755 | // Allocation order 0 is the full set. AltOrders provides others. | |||
756 | const SetTheory::RecVec *Elements = RegBank.getSets().expand(R); | |||
757 | ListInit *AltOrders = R->getValueAsListInit("AltOrders"); | |||
758 | Orders.resize(1 + AltOrders->size()); | |||
759 | ||||
760 | // Default allocation order always contains all registers. | |||
761 | Artificial = true; | |||
762 | for (unsigned i = 0, e = Elements->size(); i != e; ++i) { | |||
763 | Orders[0].push_back((*Elements)[i]); | |||
764 | const CodeGenRegister *Reg = RegBank.getReg((*Elements)[i]); | |||
765 | Members.push_back(Reg); | |||
766 | Artificial &= Reg->Artificial; | |||
767 | TopoSigs.set(Reg->getTopoSig()); | |||
768 | } | |||
769 | sortAndUniqueRegisters(Members); | |||
770 | ||||
771 | // Alternative allocation orders may be subsets. | |||
772 | SetTheory::RecSet Order; | |||
773 | for (unsigned i = 0, e = AltOrders->size(); i != e; ++i) { | |||
774 | RegBank.getSets().evaluate(AltOrders->getElement(i), Order, R->getLoc()); | |||
775 | Orders[1 + i].append(Order.begin(), Order.end()); | |||
776 | // Verify that all altorder members are regclass members. | |||
777 | while (!Order.empty()) { | |||
778 | CodeGenRegister *Reg = RegBank.getReg(Order.back()); | |||
779 | Order.pop_back(); | |||
780 | if (!contains(Reg)) | |||
781 | PrintFatalError(R->getLoc(), " AltOrder register " + Reg->getName() + | |||
782 | " is not a class member"); | |||
783 | } | |||
784 | } | |||
785 | ||||
786 | Namespace = R->getValueAsString("Namespace"); | |||
787 | ||||
788 | if (const RecordVal *RV = R->getValue("RegInfos")) | |||
789 | if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) | |||
790 | RSI = RegSizeInfoByHwMode(DI->getDef(), RegBank.getHwModes()); | |||
791 | unsigned Size = R->getValueAsInt("Size"); | |||
792 | assert((RSI.hasDefault() || Size != 0 || VTs[0].isSimple()) &&(static_cast <bool> ((RSI.hasDefault() || Size != 0 || VTs [0].isSimple()) && "Impossible to determine register size" ) ? void (0) : __assert_fail ("(RSI.hasDefault() || Size != 0 || VTs[0].isSimple()) && \"Impossible to determine register size\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 793, __extension__ __PRETTY_FUNCTION__)) | |||
793 | "Impossible to determine register size")(static_cast <bool> ((RSI.hasDefault() || Size != 0 || VTs [0].isSimple()) && "Impossible to determine register size" ) ? void (0) : __assert_fail ("(RSI.hasDefault() || Size != 0 || VTs[0].isSimple()) && \"Impossible to determine register size\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 793, __extension__ __PRETTY_FUNCTION__)); | |||
794 | if (!RSI.hasDefault()) { | |||
795 | RegSizeInfo RI; | |||
796 | RI.RegSize = RI.SpillSize = Size ? Size | |||
797 | : VTs[0].getSimple().getSizeInBits(); | |||
798 | RI.SpillAlignment = R->getValueAsInt("Alignment"); | |||
799 | RSI.insertRegSizeForMode(DefaultMode, RI); | |||
800 | } | |||
801 | ||||
802 | CopyCost = R->getValueAsInt("CopyCost"); | |||
803 | Allocatable = R->getValueAsBit("isAllocatable"); | |||
804 | AltOrderSelect = R->getValueAsString("AltOrderSelect"); | |||
805 | int AllocationPriority = R->getValueAsInt("AllocationPriority"); | |||
806 | if (!isUInt<5>(AllocationPriority)) | |||
807 | PrintFatalError(R->getLoc(), "AllocationPriority out of range [0,31]"); | |||
808 | this->AllocationPriority = AllocationPriority; | |||
809 | ||||
810 | GlobalPriority = R->getValueAsBit("GlobalPriority"); | |||
811 | ||||
812 | BitsInit *TSF = R->getValueAsBitsInit("TSFlags"); | |||
813 | for (unsigned I = 0, E = TSF->getNumBits(); I != E; ++I) { | |||
814 | BitInit *Bit = cast<BitInit>(TSF->getBit(I)); | |||
815 | TSFlags |= uint8_t(Bit->getValue()) << I; | |||
816 | } | |||
817 | } | |||
818 | ||||
819 | // Create an inferred register class that was missing from the .td files. | |||
820 | // Most properties will be inherited from the closest super-class after the | |||
821 | // class structure has been computed. | |||
822 | CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, | |||
823 | StringRef Name, Key Props) | |||
824 | : Members(*Props.Members), TheDef(nullptr), Name(std::string(Name)), | |||
825 | TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), RSI(Props.RSI), | |||
826 | CopyCost(0), Allocatable(true), AllocationPriority(0), | |||
827 | GlobalPriority(false), TSFlags(0) { | |||
828 | Artificial = true; | |||
829 | GeneratePressureSet = false; | |||
830 | for (const auto R : Members) { | |||
831 | TopoSigs.set(R->getTopoSig()); | |||
832 | Artificial &= R->Artificial; | |||
833 | } | |||
834 | } | |||
835 | ||||
836 | // Compute inherited propertied for a synthesized register class. | |||
837 | void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) { | |||
838 | assert(!getDef() && "Only synthesized classes can inherit properties")(static_cast <bool> (!getDef() && "Only synthesized classes can inherit properties" ) ? void (0) : __assert_fail ("!getDef() && \"Only synthesized classes can inherit properties\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 838, __extension__ __PRETTY_FUNCTION__)); | |||
839 | assert(!SuperClasses.empty() && "Synthesized class without super class")(static_cast <bool> (!SuperClasses.empty() && "Synthesized class without super class" ) ? void (0) : __assert_fail ("!SuperClasses.empty() && \"Synthesized class without super class\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 839, __extension__ __PRETTY_FUNCTION__)); | |||
840 | ||||
841 | // The last super-class is the smallest one. | |||
842 | CodeGenRegisterClass &Super = *SuperClasses.back(); | |||
843 | ||||
844 | // Most properties are copied directly. | |||
845 | // Exceptions are members, size, and alignment | |||
846 | Namespace = Super.Namespace; | |||
847 | VTs = Super.VTs; | |||
848 | CopyCost = Super.CopyCost; | |||
849 | // Check for allocatable superclasses. | |||
850 | Allocatable = any_of(SuperClasses, [&](const CodeGenRegisterClass *S) { | |||
851 | return S->Allocatable; | |||
852 | }); | |||
853 | AltOrderSelect = Super.AltOrderSelect; | |||
854 | AllocationPriority = Super.AllocationPriority; | |||
855 | GlobalPriority = Super.GlobalPriority; | |||
856 | TSFlags = Super.TSFlags; | |||
857 | GeneratePressureSet |= Super.GeneratePressureSet; | |||
858 | ||||
859 | // Copy all allocation orders, filter out foreign registers from the larger | |||
860 | // super-class. | |||
861 | Orders.resize(Super.Orders.size()); | |||
862 | for (unsigned i = 0, ie = Super.Orders.size(); i != ie; ++i) | |||
863 | for (unsigned j = 0, je = Super.Orders[i].size(); j != je; ++j) | |||
864 | if (contains(RegBank.getReg(Super.Orders[i][j]))) | |||
865 | Orders[i].push_back(Super.Orders[i][j]); | |||
866 | } | |||
867 | ||||
868 | bool CodeGenRegisterClass::hasType(const ValueTypeByHwMode &VT) const { | |||
869 | if (llvm::is_contained(VTs, VT)) | |||
870 | return true; | |||
871 | ||||
872 | // If VT is not identical to any of this class's types, but is a simple | |||
873 | // type, check if any of the types for this class contain it under some | |||
874 | // mode. | |||
875 | // The motivating example came from RISC-V, where (likely because of being | |||
876 | // guarded by "64-bit" predicate), the type of X5 was {*:[i64]}, but the | |||
877 | // type in GRC was {*:[i32], m1:[i64]}. | |||
878 | if (VT.isSimple()) { | |||
879 | MVT T = VT.getSimple(); | |||
880 | for (const ValueTypeByHwMode &OurVT : VTs) { | |||
881 | if (llvm::count_if(OurVT, [T](auto &&P) { return P.second == T; })) | |||
882 | return true; | |||
883 | } | |||
884 | } | |||
885 | return false; | |||
886 | } | |||
887 | ||||
888 | bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const { | |||
889 | return std::binary_search(Members.begin(), Members.end(), Reg, | |||
890 | deref<std::less<>>()); | |||
891 | } | |||
892 | ||||
893 | unsigned CodeGenRegisterClass::getWeight(const CodeGenRegBank& RegBank) const { | |||
894 | if (TheDef && !TheDef->isValueUnset("Weight")) | |||
895 | return TheDef->getValueAsInt("Weight"); | |||
896 | ||||
897 | if (Members.empty() || Artificial) | |||
898 | return 0; | |||
899 | ||||
900 | return (*Members.begin())->getWeight(RegBank); | |||
901 | } | |||
902 | ||||
903 | namespace llvm { | |||
904 | ||||
905 | raw_ostream &operator<<(raw_ostream &OS, const CodeGenRegisterClass::Key &K) { | |||
906 | OS << "{ " << K.RSI; | |||
907 | for (const auto R : *K.Members) | |||
908 | OS << ", " << R->getName(); | |||
909 | return OS << " }"; | |||
910 | } | |||
911 | ||||
912 | } // end namespace llvm | |||
913 | ||||
914 | // This is a simple lexicographical order that can be used to search for sets. | |||
915 | // It is not the same as the topological order provided by TopoOrderRC. | |||
916 | bool CodeGenRegisterClass::Key:: | |||
917 | operator<(const CodeGenRegisterClass::Key &B) const { | |||
918 | assert(Members && B.Members)(static_cast <bool> (Members && B.Members) ? void (0) : __assert_fail ("Members && B.Members", "llvm/utils/TableGen/CodeGenRegisters.cpp" , 918, __extension__ __PRETTY_FUNCTION__)); | |||
919 | return std::tie(*Members, RSI) < std::tie(*B.Members, B.RSI); | |||
920 | } | |||
921 | ||||
922 | // Returns true if RC is a strict subclass. | |||
923 | // RC is a sub-class of this class if it is a valid replacement for any | |||
924 | // instruction operand where a register of this classis required. It must | |||
925 | // satisfy these conditions: | |||
926 | // | |||
927 | // 1. All RC registers are also in this. | |||
928 | // 2. The RC spill size must not be smaller than our spill size. | |||
929 | // 3. RC spill alignment must be compatible with ours. | |||
930 | // | |||
931 | static bool testSubClass(const CodeGenRegisterClass *A, | |||
932 | const CodeGenRegisterClass *B) { | |||
933 | return A->RSI.isSubClassOf(B->RSI) && | |||
934 | std::includes(A->getMembers().begin(), A->getMembers().end(), | |||
935 | B->getMembers().begin(), B->getMembers().end(), | |||
936 | deref<std::less<>>()); | |||
937 | } | |||
938 | ||||
939 | /// Sorting predicate for register classes. This provides a topological | |||
940 | /// ordering that arranges all register classes before their sub-classes. | |||
941 | /// | |||
942 | /// Register classes with the same registers, spill size, and alignment form a | |||
943 | /// clique. They will be ordered alphabetically. | |||
944 | /// | |||
945 | static bool TopoOrderRC(const CodeGenRegisterClass &PA, | |||
946 | const CodeGenRegisterClass &PB) { | |||
947 | auto *A = &PA; | |||
948 | auto *B = &PB; | |||
949 | if (A == B) | |||
950 | return false; | |||
951 | ||||
952 | if (A->RSI < B->RSI) | |||
953 | return true; | |||
954 | if (A->RSI != B->RSI) | |||
955 | return false; | |||
956 | ||||
957 | // Order by descending set size. Note that the classes' allocation order may | |||
958 | // not have been computed yet. The Members set is always vaild. | |||
959 | if (A->getMembers().size() > B->getMembers().size()) | |||
960 | return true; | |||
961 | if (A->getMembers().size() < B->getMembers().size()) | |||
962 | return false; | |||
963 | ||||
964 | // Finally order by name as a tie breaker. | |||
965 | return StringRef(A->getName()) < B->getName(); | |||
966 | } | |||
967 | ||||
968 | std::string CodeGenRegisterClass::getQualifiedName() const { | |||
969 | if (Namespace.empty()) | |||
970 | return getName(); | |||
971 | else | |||
972 | return (Namespace + "::" + getName()).str(); | |||
973 | } | |||
974 | ||||
975 | // Compute sub-classes of all register classes. | |||
976 | // Assume the classes are ordered topologically. | |||
977 | void CodeGenRegisterClass::computeSubClasses(CodeGenRegBank &RegBank) { | |||
978 | auto &RegClasses = RegBank.getRegClasses(); | |||
979 | ||||
980 | // Visit backwards so sub-classes are seen first. | |||
981 | for (auto I = RegClasses.rbegin(), E = RegClasses.rend(); I != E; ++I) { | |||
982 | CodeGenRegisterClass &RC = *I; | |||
983 | RC.SubClasses.resize(RegClasses.size()); | |||
984 | RC.SubClasses.set(RC.EnumValue); | |||
985 | if (RC.Artificial) | |||
986 | continue; | |||
987 | ||||
988 | // Normally, all subclasses have IDs >= rci, unless RC is part of a clique. | |||
989 | for (auto I2 = I.base(), E2 = RegClasses.end(); I2 != E2; ++I2) { | |||
990 | CodeGenRegisterClass &SubRC = *I2; | |||
991 | if (RC.SubClasses.test(SubRC.EnumValue)) | |||
992 | continue; | |||
993 | if (!testSubClass(&RC, &SubRC)) | |||
994 | continue; | |||
995 | // SubRC is a sub-class. Grap all its sub-classes so we won't have to | |||
996 | // check them again. | |||
997 | RC.SubClasses |= SubRC.SubClasses; | |||
998 | } | |||
999 | ||||
1000 | // Sweep up missed clique members. They will be immediately preceding RC. | |||
1001 | for (auto I2 = std::next(I); I2 != E && testSubClass(&RC, &*I2); ++I2) | |||
1002 | RC.SubClasses.set(I2->EnumValue); | |||
1003 | } | |||
1004 | ||||
1005 | // Compute the SuperClasses lists from the SubClasses vectors. | |||
1006 | for (auto &RC : RegClasses) { | |||
1007 | const BitVector &SC = RC.getSubClasses(); | |||
1008 | auto I = RegClasses.begin(); | |||
1009 | for (int s = 0, next_s = SC.find_first(); next_s != -1; | |||
1010 | next_s = SC.find_next(s)) { | |||
1011 | std::advance(I, next_s - s); | |||
1012 | s = next_s; | |||
1013 | if (&*I == &RC) | |||
1014 | continue; | |||
1015 | I->SuperClasses.push_back(&RC); | |||
1016 | } | |||
1017 | } | |||
1018 | ||||
1019 | // With the class hierarchy in place, let synthesized register classes inherit | |||
1020 | // properties from their closest super-class. The iteration order here can | |||
1021 | // propagate properties down multiple levels. | |||
1022 | for (auto &RC : RegClasses) | |||
1023 | if (!RC.getDef()) | |||
1024 | RC.inheritProperties(RegBank); | |||
1025 | } | |||
1026 | ||||
1027 | std::optional<std::pair<CodeGenRegisterClass *, CodeGenRegisterClass *>> | |||
1028 | CodeGenRegisterClass::getMatchingSubClassWithSubRegs( | |||
1029 | CodeGenRegBank &RegBank, const CodeGenSubRegIndex *SubIdx) const { | |||
1030 | auto SizeOrder = [this](const CodeGenRegisterClass *A, | |||
1031 | const CodeGenRegisterClass *B) { | |||
1032 | // If there are multiple, identical register classes, prefer the original | |||
1033 | // register class. | |||
1034 | if (A == B) | |||
1035 | return false; | |||
1036 | if (A->getMembers().size() == B->getMembers().size()) | |||
1037 | return A == this; | |||
1038 | return A->getMembers().size() > B->getMembers().size(); | |||
1039 | }; | |||
1040 | ||||
1041 | auto &RegClasses = RegBank.getRegClasses(); | |||
1042 | ||||
1043 | // Find all the subclasses of this one that fully support the sub-register | |||
1044 | // index and order them by size. BiggestSuperRC should always be first. | |||
1045 | CodeGenRegisterClass *BiggestSuperRegRC = getSubClassWithSubReg(SubIdx); | |||
1046 | if (!BiggestSuperRegRC) | |||
1047 | return std::nullopt; | |||
1048 | BitVector SuperRegRCsBV = BiggestSuperRegRC->getSubClasses(); | |||
1049 | std::vector<CodeGenRegisterClass *> SuperRegRCs; | |||
1050 | for (auto &RC : RegClasses) | |||
1051 | if (SuperRegRCsBV[RC.EnumValue]) | |||
1052 | SuperRegRCs.emplace_back(&RC); | |||
1053 | llvm::stable_sort(SuperRegRCs, SizeOrder); | |||
1054 | ||||
1055 | assert(SuperRegRCs.front() == BiggestSuperRegRC &&(static_cast <bool> (SuperRegRCs.front() == BiggestSuperRegRC && "Biggest class wasn't first") ? void (0) : __assert_fail ("SuperRegRCs.front() == BiggestSuperRegRC && \"Biggest class wasn't first\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1056, __extension__ __PRETTY_FUNCTION__)) | |||
1056 | "Biggest class wasn't first")(static_cast <bool> (SuperRegRCs.front() == BiggestSuperRegRC && "Biggest class wasn't first") ? void (0) : __assert_fail ("SuperRegRCs.front() == BiggestSuperRegRC && \"Biggest class wasn't first\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1056, __extension__ __PRETTY_FUNCTION__)); | |||
1057 | ||||
1058 | // Find all the subreg classes and order them by size too. | |||
1059 | std::vector<std::pair<CodeGenRegisterClass *, BitVector>> SuperRegClasses; | |||
1060 | for (auto &RC: RegClasses) { | |||
1061 | BitVector SuperRegClassesBV(RegClasses.size()); | |||
1062 | RC.getSuperRegClasses(SubIdx, SuperRegClassesBV); | |||
1063 | if (SuperRegClassesBV.any()) | |||
1064 | SuperRegClasses.push_back(std::make_pair(&RC, SuperRegClassesBV)); | |||
1065 | } | |||
1066 | llvm::sort(SuperRegClasses, | |||
1067 | [&](const std::pair<CodeGenRegisterClass *, BitVector> &A, | |||
1068 | const std::pair<CodeGenRegisterClass *, BitVector> &B) { | |||
1069 | return SizeOrder(A.first, B.first); | |||
1070 | }); | |||
1071 | ||||
1072 | // Find the biggest subclass and subreg class such that R:subidx is in the | |||
1073 | // subreg class for all R in subclass. | |||
1074 | // | |||
1075 | // For example: | |||
1076 | // All registers in X86's GR64 have a sub_32bit subregister but no class | |||
1077 | // exists that contains all the 32-bit subregisters because GR64 contains RIP | |||
1078 | // but GR32 does not contain EIP. Instead, we constrain SuperRegRC to | |||
1079 | // GR32_with_sub_8bit (which is identical to GR32_with_sub_32bit) and then, | |||
1080 | // having excluded RIP, we are able to find a SubRegRC (GR32). | |||
1081 | CodeGenRegisterClass *ChosenSuperRegClass = nullptr; | |||
1082 | CodeGenRegisterClass *SubRegRC = nullptr; | |||
1083 | for (auto *SuperRegRC : SuperRegRCs) { | |||
1084 | for (const auto &SuperRegClassPair : SuperRegClasses) { | |||
1085 | const BitVector &SuperRegClassBV = SuperRegClassPair.second; | |||
1086 | if (SuperRegClassBV[SuperRegRC->EnumValue]) { | |||
1087 | SubRegRC = SuperRegClassPair.first; | |||
1088 | ChosenSuperRegClass = SuperRegRC; | |||
1089 | ||||
1090 | // If SubRegRC is bigger than SuperRegRC then there are members of | |||
1091 | // SubRegRC that don't have super registers via SubIdx. Keep looking to | |||
1092 | // find a better fit and fall back on this one if there isn't one. | |||
1093 | // | |||
1094 | // This is intended to prevent X86 from making odd choices such as | |||
1095 | // picking LOW32_ADDR_ACCESS_RBP instead of GR32 in the example above. | |||
1096 | // LOW32_ADDR_ACCESS_RBP is a valid choice but contains registers that | |||
1097 | // aren't subregisters of SuperRegRC whereas GR32 has a direct 1:1 | |||
1098 | // mapping. | |||
1099 | if (SuperRegRC->getMembers().size() >= SubRegRC->getMembers().size()) | |||
1100 | return std::make_pair(ChosenSuperRegClass, SubRegRC); | |||
1101 | } | |||
1102 | } | |||
1103 | ||||
1104 | // If we found a fit but it wasn't quite ideal because SubRegRC had excess | |||
1105 | // registers, then we're done. | |||
1106 | if (ChosenSuperRegClass) | |||
1107 | return std::make_pair(ChosenSuperRegClass, SubRegRC); | |||
1108 | } | |||
1109 | ||||
1110 | return std::nullopt; | |||
1111 | } | |||
1112 | ||||
1113 | void CodeGenRegisterClass::getSuperRegClasses(const CodeGenSubRegIndex *SubIdx, | |||
1114 | BitVector &Out) const { | |||
1115 | auto FindI = SuperRegClasses.find(SubIdx); | |||
1116 | if (FindI == SuperRegClasses.end()) | |||
1117 | return; | |||
1118 | for (CodeGenRegisterClass *RC : FindI->second) | |||
1119 | Out.set(RC->EnumValue); | |||
1120 | } | |||
1121 | ||||
1122 | // Populate a unique sorted list of units from a register set. | |||
1123 | void CodeGenRegisterClass::buildRegUnitSet(const CodeGenRegBank &RegBank, | |||
1124 | std::vector<unsigned> &RegUnits) const { | |||
1125 | std::vector<unsigned> TmpUnits; | |||
1126 | for (RegUnitIterator UnitI(Members); UnitI.isValid(); ++UnitI) { | |||
1127 | const RegUnit &RU = RegBank.getRegUnit(*UnitI); | |||
1128 | if (!RU.Artificial) | |||
1129 | TmpUnits.push_back(*UnitI); | |||
1130 | } | |||
1131 | llvm::sort(TmpUnits); | |||
1132 | std::unique_copy(TmpUnits.begin(), TmpUnits.end(), | |||
1133 | std::back_inserter(RegUnits)); | |||
1134 | } | |||
1135 | ||||
1136 | //===----------------------------------------------------------------------===// | |||
1137 | // CodeGenRegisterCategory | |||
1138 | //===----------------------------------------------------------------------===// | |||
1139 | ||||
1140 | CodeGenRegisterCategory::CodeGenRegisterCategory(CodeGenRegBank &RegBank, | |||
1141 | Record *R) | |||
1142 | : TheDef(R), Name(std::string(R->getName())) { | |||
1143 | for (Record *RegClass : R->getValueAsListOfDefs("Classes")) | |||
1144 | Classes.push_back(RegBank.getRegClass(RegClass)); | |||
1145 | } | |||
1146 | ||||
1147 | //===----------------------------------------------------------------------===// | |||
1148 | // CodeGenRegBank | |||
1149 | //===----------------------------------------------------------------------===// | |||
1150 | ||||
1151 | CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records, | |||
1152 | const CodeGenHwModes &Modes) : CGH(Modes) { | |||
1153 | // Configure register Sets to understand register classes and tuples. | |||
1154 | Sets.addFieldExpander("RegisterClass", "MemberList"); | |||
1155 | Sets.addFieldExpander("CalleeSavedRegs", "SaveList"); | |||
1156 | Sets.addExpander("RegisterTuples", | |||
1157 | std::make_unique<TupleExpander>(SynthDefs)); | |||
1158 | ||||
1159 | // Read in the user-defined (named) sub-register indices. | |||
1160 | // More indices will be synthesized later. | |||
1161 | std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex"); | |||
1162 | llvm::sort(SRIs, LessRecord()); | |||
1163 | for (unsigned i = 0, e = SRIs.size(); i != e; ++i) | |||
1164 | getSubRegIdx(SRIs[i]); | |||
1165 | // Build composite maps from ComposedOf fields. | |||
1166 | for (auto &Idx : SubRegIndices) | |||
1167 | Idx.updateComponents(*this); | |||
1168 | ||||
1169 | // Read in the register definitions. | |||
1170 | std::vector<Record*> Regs = Records.getAllDerivedDefinitions("Register"); | |||
1171 | llvm::sort(Regs, LessRecordRegister()); | |||
1172 | // Assign the enumeration values. | |||
1173 | for (unsigned i = 0, e = Regs.size(); i != e; ++i) | |||
1174 | getReg(Regs[i]); | |||
1175 | ||||
1176 | // Expand tuples and number the new registers. | |||
1177 | std::vector<Record*> Tups = | |||
1178 | Records.getAllDerivedDefinitions("RegisterTuples"); | |||
1179 | ||||
1180 | for (Record *R : Tups) { | |||
1181 | std::vector<Record *> TupRegs = *Sets.expand(R); | |||
1182 | llvm::sort(TupRegs, LessRecordRegister()); | |||
1183 | for (Record *RC : TupRegs) | |||
1184 | getReg(RC); | |||
1185 | } | |||
1186 | ||||
1187 | // Now all the registers are known. Build the object graph of explicit | |||
1188 | // register-register references. | |||
1189 | for (auto &Reg : Registers) | |||
1190 | Reg.buildObjectGraph(*this); | |||
1191 | ||||
1192 | // Compute register name map. | |||
1193 | for (auto &Reg : Registers) | |||
1194 | // FIXME: This could just be RegistersByName[name] = register, except that | |||
1195 | // causes some failures in MIPS - perhaps they have duplicate register name | |||
1196 | // entries? (or maybe there's a reason for it - I don't know much about this | |||
1197 | // code, just drive-by refactoring) | |||
1198 | RegistersByName.insert( | |||
1199 | std::make_pair(Reg.TheDef->getValueAsString("AsmName"), &Reg)); | |||
1200 | ||||
1201 | // Precompute all sub-register maps. | |||
1202 | // This will create Composite entries for all inferred sub-register indices. | |||
1203 | for (auto &Reg : Registers) | |||
1204 | Reg.computeSubRegs(*this); | |||
1205 | ||||
1206 | // Compute transitive closure of subregister index ConcatenationOf vectors | |||
1207 | // and initialize ConcatIdx map. | |||
1208 | for (CodeGenSubRegIndex &SRI : SubRegIndices) { | |||
1209 | SRI.computeConcatTransitiveClosure(); | |||
1210 | if (!SRI.ConcatenationOf.empty()) | |||
1211 | ConcatIdx.insert(std::make_pair( | |||
1212 | SmallVector<CodeGenSubRegIndex*,8>(SRI.ConcatenationOf.begin(), | |||
1213 | SRI.ConcatenationOf.end()), &SRI)); | |||
1214 | } | |||
1215 | ||||
1216 | // Infer even more sub-registers by combining leading super-registers. | |||
1217 | for (auto &Reg : Registers) | |||
1218 | if (Reg.CoveredBySubRegs) | |||
1219 | Reg.computeSecondarySubRegs(*this); | |||
1220 | ||||
1221 | // After the sub-register graph is complete, compute the topologically | |||
1222 | // ordered SuperRegs list. | |||
1223 | for (auto &Reg : Registers) | |||
1224 | Reg.computeSuperRegs(*this); | |||
1225 | ||||
1226 | // For each pair of Reg:SR, if both are non-artificial, mark the | |||
1227 | // corresponding sub-register index as non-artificial. | |||
1228 | for (auto &Reg : Registers) { | |||
1229 | if (Reg.Artificial) | |||
1230 | continue; | |||
1231 | for (auto P : Reg.getSubRegs()) { | |||
1232 | const CodeGenRegister *SR = P.second; | |||
1233 | if (!SR->Artificial) | |||
1234 | P.first->Artificial = false; | |||
1235 | } | |||
1236 | } | |||
1237 | ||||
1238 | // Native register units are associated with a leaf register. They've all been | |||
1239 | // discovered now. | |||
1240 | NumNativeRegUnits = RegUnits.size(); | |||
1241 | ||||
1242 | // Read in register class definitions. | |||
1243 | std::vector<Record*> RCs = Records.getAllDerivedDefinitions("RegisterClass"); | |||
1244 | if (RCs.empty()) | |||
1245 | PrintFatalError("No 'RegisterClass' subclasses defined!"); | |||
1246 | ||||
1247 | // Allocate user-defined register classes. | |||
1248 | for (auto *R : RCs) { | |||
1249 | RegClasses.emplace_back(*this, R); | |||
1250 | CodeGenRegisterClass &RC = RegClasses.back(); | |||
1251 | if (!RC.Artificial) | |||
1252 | addToMaps(&RC); | |||
1253 | } | |||
1254 | ||||
1255 | // Infer missing classes to create a full algebra. | |||
1256 | computeInferredRegisterClasses(); | |||
1257 | ||||
1258 | // Order register classes topologically and assign enum values. | |||
1259 | RegClasses.sort(TopoOrderRC); | |||
1260 | unsigned i = 0; | |||
1261 | for (auto &RC : RegClasses) | |||
1262 | RC.EnumValue = i++; | |||
1263 | CodeGenRegisterClass::computeSubClasses(*this); | |||
1264 | ||||
1265 | // Read in the register category definitions. | |||
1266 | std::vector<Record *> RCats = | |||
1267 | Records.getAllDerivedDefinitions("RegisterCategory"); | |||
1268 | for (auto *R : RCats) | |||
1269 | RegCategories.emplace_back(*this, R); | |||
1270 | } | |||
1271 | ||||
1272 | // Create a synthetic CodeGenSubRegIndex without a corresponding Record. | |||
1273 | CodeGenSubRegIndex* | |||
1274 | CodeGenRegBank::createSubRegIndex(StringRef Name, StringRef Namespace) { | |||
1275 | SubRegIndices.emplace_back(Name, Namespace, SubRegIndices.size() + 1); | |||
1276 | return &SubRegIndices.back(); | |||
1277 | } | |||
1278 | ||||
1279 | CodeGenSubRegIndex *CodeGenRegBank::getSubRegIdx(Record *Def) { | |||
1280 | CodeGenSubRegIndex *&Idx = Def2SubRegIdx[Def]; | |||
1281 | if (Idx) | |||
1282 | return Idx; | |||
1283 | SubRegIndices.emplace_back(Def, SubRegIndices.size() + 1); | |||
1284 | Idx = &SubRegIndices.back(); | |||
1285 | return Idx; | |||
1286 | } | |||
1287 | ||||
1288 | const CodeGenSubRegIndex * | |||
1289 | CodeGenRegBank::findSubRegIdx(const Record* Def) const { | |||
1290 | return Def2SubRegIdx.lookup(Def); | |||
1291 | } | |||
1292 | ||||
1293 | CodeGenRegister *CodeGenRegBank::getReg(Record *Def) { | |||
1294 | CodeGenRegister *&Reg = Def2Reg[Def]; | |||
1295 | if (Reg) | |||
1296 | return Reg; | |||
1297 | Registers.emplace_back(Def, Registers.size() + 1); | |||
1298 | Reg = &Registers.back(); | |||
1299 | return Reg; | |||
1300 | } | |||
1301 | ||||
1302 | void CodeGenRegBank::addToMaps(CodeGenRegisterClass *RC) { | |||
1303 | if (Record *Def = RC->getDef()) | |||
1304 | Def2RC.insert(std::make_pair(Def, RC)); | |||
1305 | ||||
1306 | // Duplicate classes are rejected by insert(). | |||
1307 | // That's OK, we only care about the properties handled by CGRC::Key. | |||
1308 | CodeGenRegisterClass::Key K(*RC); | |||
1309 | Key2RC.insert(std::make_pair(K, RC)); | |||
1310 | } | |||
1311 | ||||
1312 | // Create a synthetic sub-class if it is missing. | |||
1313 | CodeGenRegisterClass* | |||
1314 | CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC, | |||
1315 | const CodeGenRegister::Vec *Members, | |||
1316 | StringRef Name) { | |||
1317 | // Synthetic sub-class has the same size and alignment as RC. | |||
1318 | CodeGenRegisterClass::Key K(Members, RC->RSI); | |||
1319 | RCKeyMap::const_iterator FoundI = Key2RC.find(K); | |||
1320 | if (FoundI != Key2RC.end()) | |||
1321 | return FoundI->second; | |||
1322 | ||||
1323 | // Sub-class doesn't exist, create a new one. | |||
1324 | RegClasses.emplace_back(*this, Name, K); | |||
1325 | addToMaps(&RegClasses.back()); | |||
1326 | return &RegClasses.back(); | |||
1327 | } | |||
1328 | ||||
1329 | CodeGenRegisterClass *CodeGenRegBank::getRegClass(const Record *Def) const { | |||
1330 | if (CodeGenRegisterClass *RC = Def2RC.lookup(Def)) | |||
1331 | return RC; | |||
1332 | ||||
1333 | PrintFatalError(Def->getLoc(), "Not a known RegisterClass!"); | |||
1334 | } | |||
1335 | ||||
1336 | CodeGenSubRegIndex* | |||
1337 | CodeGenRegBank::getCompositeSubRegIndex(CodeGenSubRegIndex *A, | |||
1338 | CodeGenSubRegIndex *B) { | |||
1339 | // Look for an existing entry. | |||
1340 | CodeGenSubRegIndex *Comp = A->compose(B); | |||
1341 | if (Comp) | |||
1342 | return Comp; | |||
1343 | ||||
1344 | // None exists, synthesize one. | |||
1345 | std::string Name = A->getName() + "_then_" + B->getName(); | |||
1346 | Comp = createSubRegIndex(Name, A->getNamespace()); | |||
1347 | A->addComposite(B, Comp); | |||
1348 | return Comp; | |||
1349 | } | |||
1350 | ||||
1351 | CodeGenSubRegIndex *CodeGenRegBank:: | |||
1352 | getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex *, 8> &Parts) { | |||
1353 | assert(Parts.size() > 1 && "Need two parts to concatenate")(static_cast <bool> (Parts.size() > 1 && "Need two parts to concatenate" ) ? void (0) : __assert_fail ("Parts.size() > 1 && \"Need two parts to concatenate\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1353, __extension__ __PRETTY_FUNCTION__)); | |||
1354 | #ifndef NDEBUG | |||
1355 | for (CodeGenSubRegIndex *Idx : Parts) { | |||
1356 | assert(Idx->ConcatenationOf.empty() && "No transitive closure?")(static_cast <bool> (Idx->ConcatenationOf.empty() && "No transitive closure?") ? void (0) : __assert_fail ("Idx->ConcatenationOf.empty() && \"No transitive closure?\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1356, __extension__ __PRETTY_FUNCTION__)); | |||
1357 | } | |||
1358 | #endif | |||
1359 | ||||
1360 | // Look for an existing entry. | |||
1361 | CodeGenSubRegIndex *&Idx = ConcatIdx[Parts]; | |||
1362 | if (Idx) | |||
1363 | return Idx; | |||
1364 | ||||
1365 | // None exists, synthesize one. | |||
1366 | std::string Name = Parts.front()->getName(); | |||
1367 | // Determine whether all parts are contiguous. | |||
1368 | bool isContinuous = true; | |||
1369 | unsigned Size = Parts.front()->Size; | |||
1370 | unsigned LastOffset = Parts.front()->Offset; | |||
1371 | unsigned LastSize = Parts.front()->Size; | |||
1372 | unsigned UnknownSize = (uint16_t)-1; | |||
1373 | for (unsigned i = 1, e = Parts.size(); i != e; ++i) { | |||
1374 | Name += '_'; | |||
1375 | Name += Parts[i]->getName(); | |||
1376 | if (Size == UnknownSize || Parts[i]->Size == UnknownSize) | |||
1377 | Size = UnknownSize; | |||
1378 | else | |||
1379 | Size += Parts[i]->Size; | |||
1380 | if (LastSize == UnknownSize || Parts[i]->Offset != (LastOffset + LastSize)) | |||
1381 | isContinuous = false; | |||
1382 | LastOffset = Parts[i]->Offset; | |||
1383 | LastSize = Parts[i]->Size; | |||
1384 | } | |||
1385 | Idx = createSubRegIndex(Name, Parts.front()->getNamespace()); | |||
1386 | Idx->Size = Size; | |||
1387 | Idx->Offset = isContinuous ? Parts.front()->Offset : -1; | |||
1388 | Idx->ConcatenationOf.assign(Parts.begin(), Parts.end()); | |||
1389 | return Idx; | |||
1390 | } | |||
1391 | ||||
1392 | void CodeGenRegBank::computeComposites() { | |||
1393 | using RegMap = std::map<const CodeGenRegister*, const CodeGenRegister*>; | |||
1394 | ||||
1395 | // Subreg -> { Reg->Reg }, where the right-hand side is the mapping from | |||
1396 | // register to (sub)register associated with the action of the left-hand | |||
1397 | // side subregister. | |||
1398 | std::map<const CodeGenSubRegIndex*, RegMap> SubRegAction; | |||
1399 | for (const CodeGenRegister &R : Registers) { | |||
1400 | const CodeGenRegister::SubRegMap &SM = R.getSubRegs(); | |||
1401 | for (std::pair<const CodeGenSubRegIndex*, const CodeGenRegister*> P : SM) | |||
1402 | SubRegAction[P.first].insert({&R, P.second}); | |||
1403 | } | |||
1404 | ||||
1405 | // Calculate the composition of two subregisters as compositions of their | |||
1406 | // associated actions. | |||
1407 | auto compose = [&SubRegAction] (const CodeGenSubRegIndex *Sub1, | |||
1408 | const CodeGenSubRegIndex *Sub2) { | |||
1409 | RegMap C; | |||
1410 | const RegMap &Img1 = SubRegAction.at(Sub1); | |||
1411 | const RegMap &Img2 = SubRegAction.at(Sub2); | |||
1412 | for (std::pair<const CodeGenRegister*, const CodeGenRegister*> P : Img1) { | |||
1413 | auto F = Img2.find(P.second); | |||
1414 | if (F != Img2.end()) | |||
1415 | C.insert({P.first, F->second}); | |||
1416 | } | |||
1417 | return C; | |||
1418 | }; | |||
1419 | ||||
1420 | // Check if the two maps agree on the intersection of their domains. | |||
1421 | auto agree = [] (const RegMap &Map1, const RegMap &Map2) { | |||
1422 | // Technically speaking, an empty map agrees with any other map, but | |||
1423 | // this could flag false positives. We're interested in non-vacuous | |||
1424 | // agreements. | |||
1425 | if (Map1.empty() || Map2.empty()) | |||
1426 | return false; | |||
1427 | for (std::pair<const CodeGenRegister*, const CodeGenRegister*> P : Map1) { | |||
1428 | auto F = Map2.find(P.first); | |||
1429 | if (F == Map2.end() || P.second != F->second) | |||
1430 | return false; | |||
1431 | } | |||
1432 | return true; | |||
1433 | }; | |||
1434 | ||||
1435 | using CompositePair = std::pair<const CodeGenSubRegIndex*, | |||
1436 | const CodeGenSubRegIndex*>; | |||
1437 | SmallSet<CompositePair,4> UserDefined; | |||
1438 | for (const CodeGenSubRegIndex &Idx : SubRegIndices) | |||
1439 | for (auto P : Idx.getComposites()) | |||
1440 | UserDefined.insert(std::make_pair(&Idx, P.first)); | |||
1441 | ||||
1442 | // Keep track of TopoSigs visited. We only need to visit each TopoSig once, | |||
1443 | // and many registers will share TopoSigs on regular architectures. | |||
1444 | BitVector TopoSigs(getNumTopoSigs()); | |||
1445 | ||||
1446 | for (const auto &Reg1 : Registers) { | |||
1447 | // Skip identical subreg structures already processed. | |||
1448 | if (TopoSigs.test(Reg1.getTopoSig())) | |||
1449 | continue; | |||
1450 | TopoSigs.set(Reg1.getTopoSig()); | |||
1451 | ||||
1452 | const CodeGenRegister::SubRegMap &SRM1 = Reg1.getSubRegs(); | |||
1453 | for (auto I1 : SRM1) { | |||
1454 | CodeGenSubRegIndex *Idx1 = I1.first; | |||
1455 | CodeGenRegister *Reg2 = I1.second; | |||
1456 | // Ignore identity compositions. | |||
1457 | if (&Reg1 == Reg2) | |||
1458 | continue; | |||
1459 | const CodeGenRegister::SubRegMap &SRM2 = Reg2->getSubRegs(); | |||
1460 | // Try composing Idx1 with another SubRegIndex. | |||
1461 | for (auto I2 : SRM2) { | |||
1462 | CodeGenSubRegIndex *Idx2 = I2.first; | |||
1463 | CodeGenRegister *Reg3 = I2.second; | |||
1464 | // Ignore identity compositions. | |||
1465 | if (Reg2 == Reg3) | |||
1466 | continue; | |||
1467 | // OK Reg1:IdxPair == Reg3. Find the index with Reg:Idx == Reg3. | |||
1468 | CodeGenSubRegIndex *Idx3 = Reg1.getSubRegIndex(Reg3); | |||
1469 | assert(Idx3 && "Sub-register doesn't have an index")(static_cast <bool> (Idx3 && "Sub-register doesn't have an index" ) ? void (0) : __assert_fail ("Idx3 && \"Sub-register doesn't have an index\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1469, __extension__ __PRETTY_FUNCTION__)); | |||
1470 | ||||
1471 | // Conflicting composition? Emit a warning but allow it. | |||
1472 | if (CodeGenSubRegIndex *Prev = Idx1->addComposite(Idx2, Idx3)) { | |||
1473 | // If the composition was not user-defined, always emit a warning. | |||
1474 | if (!UserDefined.count({Idx1, Idx2}) || | |||
1475 | agree(compose(Idx1, Idx2), SubRegAction.at(Idx3))) | |||
1476 | PrintWarning(Twine("SubRegIndex ") + Idx1->getQualifiedName() + | |||
1477 | " and " + Idx2->getQualifiedName() + | |||
1478 | " compose ambiguously as " + Prev->getQualifiedName() + | |||
1479 | " or " + Idx3->getQualifiedName()); | |||
1480 | } | |||
1481 | } | |||
1482 | } | |||
1483 | } | |||
1484 | } | |||
1485 | ||||
1486 | // Compute lane masks. This is similar to register units, but at the | |||
1487 | // sub-register index level. Each bit in the lane mask is like a register unit | |||
1488 | // class, and two lane masks will have a bit in common if two sub-register | |||
1489 | // indices overlap in some register. | |||
1490 | // | |||
1491 | // Conservatively share a lane mask bit if two sub-register indices overlap in | |||
1492 | // some registers, but not in others. That shouldn't happen a lot. | |||
1493 | void CodeGenRegBank::computeSubRegLaneMasks() { | |||
1494 | // First assign individual bits to all the leaf indices. | |||
1495 | unsigned Bit = 0; | |||
1496 | // Determine mask of lanes that cover their registers. | |||
1497 | CoveringLanes = LaneBitmask::getAll(); | |||
1498 | for (auto &Idx : SubRegIndices) { | |||
1499 | if (Idx.getComposites().empty()) { | |||
1500 | if (Bit > LaneBitmask::BitWidth) { | |||
1501 | PrintFatalError( | |||
1502 | Twine("Ran out of lanemask bits to represent subregister ") | |||
1503 | + Idx.getName()); | |||
1504 | } | |||
1505 | Idx.LaneMask = LaneBitmask::getLane(Bit); | |||
1506 | ++Bit; | |||
1507 | } else { | |||
1508 | Idx.LaneMask = LaneBitmask::getNone(); | |||
1509 | } | |||
1510 | } | |||
1511 | ||||
1512 | // Compute transformation sequences for composeSubRegIndexLaneMask. The idea | |||
1513 | // here is that for each possible target subregister we look at the leafs | |||
1514 | // in the subregister graph that compose for this target and create | |||
1515 | // transformation sequences for the lanemasks. Each step in the sequence | |||
1516 | // consists of a bitmask and a bitrotate operation. As the rotation amounts | |||
1517 | // are usually the same for many subregisters we can easily combine the steps | |||
1518 | // by combining the masks. | |||
1519 | for (const auto &Idx : SubRegIndices) { | |||
1520 | const auto &Composites = Idx.getComposites(); | |||
1521 | auto &LaneTransforms = Idx.CompositionLaneMaskTransform; | |||
1522 | ||||
1523 | if (Composites.empty()) { | |||
1524 | // Moving from a class with no subregisters we just had a single lane: | |||
1525 | // The subregister must be a leaf subregister and only occupies 1 bit. | |||
1526 | // Move the bit from the class without subregisters into that position. | |||
1527 | unsigned DstBit = Idx.LaneMask.getHighestLane(); | |||
1528 | assert(Idx.LaneMask == LaneBitmask::getLane(DstBit) &&(static_cast <bool> (Idx.LaneMask == LaneBitmask::getLane (DstBit) && "Must be a leaf subregister") ? void (0) : __assert_fail ("Idx.LaneMask == LaneBitmask::getLane(DstBit) && \"Must be a leaf subregister\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1529, __extension__ __PRETTY_FUNCTION__)) | |||
1529 | "Must be a leaf subregister")(static_cast <bool> (Idx.LaneMask == LaneBitmask::getLane (DstBit) && "Must be a leaf subregister") ? void (0) : __assert_fail ("Idx.LaneMask == LaneBitmask::getLane(DstBit) && \"Must be a leaf subregister\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1529, __extension__ __PRETTY_FUNCTION__)); | |||
1530 | MaskRolPair MaskRol = { LaneBitmask::getLane(0), (uint8_t)DstBit }; | |||
1531 | LaneTransforms.push_back(MaskRol); | |||
1532 | } else { | |||
1533 | // Go through all leaf subregisters and find the ones that compose with | |||
1534 | // Idx. These make out all possible valid bits in the lane mask we want to | |||
1535 | // transform. Looking only at the leafs ensure that only a single bit in | |||
1536 | // the mask is set. | |||
1537 | unsigned NextBit = 0; | |||
1538 | for (auto &Idx2 : SubRegIndices) { | |||
1539 | // Skip non-leaf subregisters. | |||
1540 | if (!Idx2.getComposites().empty()) | |||
1541 | continue; | |||
1542 | // Replicate the behaviour from the lane mask generation loop above. | |||
1543 | unsigned SrcBit = NextBit; | |||
1544 | LaneBitmask SrcMask = LaneBitmask::getLane(SrcBit); | |||
1545 | if (NextBit < LaneBitmask::BitWidth-1) | |||
1546 | ++NextBit; | |||
1547 | assert(Idx2.LaneMask == SrcMask)(static_cast <bool> (Idx2.LaneMask == SrcMask) ? void ( 0) : __assert_fail ("Idx2.LaneMask == SrcMask", "llvm/utils/TableGen/CodeGenRegisters.cpp" , 1547, __extension__ __PRETTY_FUNCTION__)); | |||
1548 | ||||
1549 | // Get the composed subregister if there is any. | |||
1550 | auto C = Composites.find(&Idx2); | |||
1551 | if (C == Composites.end()) | |||
1552 | continue; | |||
1553 | const CodeGenSubRegIndex *Composite = C->second; | |||
1554 | // The Composed subreg should be a leaf subreg too | |||
1555 | assert(Composite->getComposites().empty())(static_cast <bool> (Composite->getComposites().empty ()) ? void (0) : __assert_fail ("Composite->getComposites().empty()" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1555, __extension__ __PRETTY_FUNCTION__)); | |||
1556 | ||||
1557 | // Create Mask+Rotate operation and merge with existing ops if possible. | |||
1558 | unsigned DstBit = Composite->LaneMask.getHighestLane(); | |||
1559 | int Shift = DstBit - SrcBit; | |||
1560 | uint8_t RotateLeft = Shift >= 0 ? (uint8_t)Shift | |||
1561 | : LaneBitmask::BitWidth + Shift; | |||
1562 | for (auto &I : LaneTransforms) { | |||
1563 | if (I.RotateLeft == RotateLeft) { | |||
1564 | I.Mask |= SrcMask; | |||
1565 | SrcMask = LaneBitmask::getNone(); | |||
1566 | } | |||
1567 | } | |||
1568 | if (SrcMask.any()) { | |||
1569 | MaskRolPair MaskRol = { SrcMask, RotateLeft }; | |||
1570 | LaneTransforms.push_back(MaskRol); | |||
1571 | } | |||
1572 | } | |||
1573 | } | |||
1574 | ||||
1575 | // Optimize if the transformation consists of one step only: Set mask to | |||
1576 | // 0xffffffff (including some irrelevant invalid bits) so that it should | |||
1577 | // merge with more entries later while compressing the table. | |||
1578 | if (LaneTransforms.size() == 1) | |||
1579 | LaneTransforms[0].Mask = LaneBitmask::getAll(); | |||
1580 | ||||
1581 | // Further compression optimization: For invalid compositions resulting | |||
1582 | // in a sequence with 0 entries we can just pick any other. Choose | |||
1583 | // Mask 0xffffffff with Rotation 0. | |||
1584 | if (LaneTransforms.size() == 0) { | |||
1585 | MaskRolPair P = { LaneBitmask::getAll(), 0 }; | |||
1586 | LaneTransforms.push_back(P); | |||
1587 | } | |||
1588 | } | |||
1589 | ||||
1590 | // FIXME: What if ad-hoc aliasing introduces overlaps that aren't represented | |||
1591 | // by the sub-register graph? This doesn't occur in any known targets. | |||
1592 | ||||
1593 | // Inherit lanes from composites. | |||
1594 | for (const auto &Idx : SubRegIndices) { | |||
1595 | LaneBitmask Mask = Idx.computeLaneMask(); | |||
1596 | // If some super-registers without CoveredBySubRegs use this index, we can | |||
1597 | // no longer assume that the lanes are covering their registers. | |||
1598 | if (!Idx.AllSuperRegsCovered) | |||
1599 | CoveringLanes &= ~Mask; | |||
1600 | } | |||
1601 | ||||
1602 | // Compute lane mask combinations for register classes. | |||
1603 | for (auto &RegClass : RegClasses) { | |||
1604 | LaneBitmask LaneMask; | |||
1605 | for (const auto &SubRegIndex : SubRegIndices) { | |||
1606 | if (RegClass.getSubClassWithSubReg(&SubRegIndex) == nullptr) | |||
1607 | continue; | |||
1608 | LaneMask |= SubRegIndex.LaneMask; | |||
1609 | } | |||
1610 | ||||
1611 | // For classes without any subregisters set LaneMask to 1 instead of 0. | |||
1612 | // This makes it easier for client code to handle classes uniformly. | |||
1613 | if (LaneMask.none()) | |||
1614 | LaneMask = LaneBitmask::getLane(0); | |||
1615 | ||||
1616 | RegClass.LaneMask = LaneMask; | |||
1617 | } | |||
1618 | } | |||
1619 | ||||
1620 | namespace { | |||
1621 | ||||
1622 | // UberRegSet is a helper class for computeRegUnitWeights. Each UberRegSet is | |||
1623 | // the transitive closure of the union of overlapping register | |||
1624 | // classes. Together, the UberRegSets form a partition of the registers. If we | |||
1625 | // consider overlapping register classes to be connected, then each UberRegSet | |||
1626 | // is a set of connected components. | |||
1627 | // | |||
1628 | // An UberRegSet will likely be a horizontal slice of register names of | |||
1629 | // the same width. Nontrivial subregisters should then be in a separate | |||
1630 | // UberRegSet. But this property isn't required for valid computation of | |||
1631 | // register unit weights. | |||
1632 | // | |||
1633 | // A Weight field caches the max per-register unit weight in each UberRegSet. | |||
1634 | // | |||
1635 | // A set of SingularDeterminants flags single units of some register in this set | |||
1636 | // for which the unit weight equals the set weight. These units should not have | |||
1637 | // their weight increased. | |||
1638 | struct UberRegSet { | |||
1639 | CodeGenRegister::Vec Regs; | |||
1640 | unsigned Weight = 0; | |||
1641 | CodeGenRegister::RegUnitList SingularDeterminants; | |||
1642 | ||||
1643 | UberRegSet() = default; | |||
1644 | }; | |||
1645 | ||||
1646 | } // end anonymous namespace | |||
1647 | ||||
1648 | // Partition registers into UberRegSets, where each set is the transitive | |||
1649 | // closure of the union of overlapping register classes. | |||
1650 | // | |||
1651 | // UberRegSets[0] is a special non-allocatable set. | |||
1652 | static void computeUberSets(std::vector<UberRegSet> &UberSets, | |||
1653 | std::vector<UberRegSet*> &RegSets, | |||
1654 | CodeGenRegBank &RegBank) { | |||
1655 | const auto &Registers = RegBank.getRegisters(); | |||
1656 | ||||
1657 | // The Register EnumValue is one greater than its index into Registers. | |||
1658 | assert(Registers.size() == Registers.back().EnumValue &&(static_cast <bool> (Registers.size() == Registers.back ().EnumValue && "register enum value mismatch") ? void (0) : __assert_fail ("Registers.size() == Registers.back().EnumValue && \"register enum value mismatch\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1659, __extension__ __PRETTY_FUNCTION__)) | |||
1659 | "register enum value mismatch")(static_cast <bool> (Registers.size() == Registers.back ().EnumValue && "register enum value mismatch") ? void (0) : __assert_fail ("Registers.size() == Registers.back().EnumValue && \"register enum value mismatch\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1659, __extension__ __PRETTY_FUNCTION__)); | |||
1660 | ||||
1661 | // For simplicitly make the SetID the same as EnumValue. | |||
1662 | IntEqClasses UberSetIDs(Registers.size() + 1); | |||
1663 | BitVector AllocatableRegs(Registers.size() + 1); | |||
1664 | for (auto &RegClass : RegBank.getRegClasses()) { | |||
1665 | if (!RegClass.Allocatable) | |||
1666 | continue; | |||
1667 | ||||
1668 | const CodeGenRegister::Vec &Regs = RegClass.getMembers(); | |||
1669 | if (Regs.empty()) | |||
1670 | continue; | |||
1671 | ||||
1672 | unsigned USetID = UberSetIDs.findLeader((*Regs.begin())->EnumValue); | |||
1673 | assert(USetID && "register number 0 is invalid")(static_cast <bool> (USetID && "register number 0 is invalid" ) ? void (0) : __assert_fail ("USetID && \"register number 0 is invalid\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1673, __extension__ __PRETTY_FUNCTION__)); | |||
1674 | ||||
1675 | AllocatableRegs.set((*Regs.begin())->EnumValue); | |||
1676 | for (const CodeGenRegister *CGR : llvm::drop_begin(Regs)) { | |||
1677 | AllocatableRegs.set(CGR->EnumValue); | |||
1678 | UberSetIDs.join(USetID, CGR->EnumValue); | |||
1679 | } | |||
1680 | } | |||
1681 | // Combine non-allocatable regs. | |||
1682 | for (const auto &Reg : Registers) { | |||
1683 | unsigned RegNum = Reg.EnumValue; | |||
1684 | if (AllocatableRegs.test(RegNum)) | |||
1685 | continue; | |||
1686 | ||||
1687 | UberSetIDs.join(0, RegNum); | |||
1688 | } | |||
1689 | UberSetIDs.compress(); | |||
1690 | ||||
1691 | // Make the first UberSet a special unallocatable set. | |||
1692 | unsigned ZeroID = UberSetIDs[0]; | |||
1693 | ||||
1694 | // Insert Registers into the UberSets formed by union-find. | |||
1695 | // Do not resize after this. | |||
1696 | UberSets.resize(UberSetIDs.getNumClasses()); | |||
1697 | unsigned i = 0; | |||
1698 | for (const CodeGenRegister &Reg : Registers) { | |||
1699 | unsigned USetID = UberSetIDs[Reg.EnumValue]; | |||
1700 | if (!USetID) | |||
1701 | USetID = ZeroID; | |||
1702 | else if (USetID == ZeroID) | |||
1703 | USetID = 0; | |||
1704 | ||||
1705 | UberRegSet *USet = &UberSets[USetID]; | |||
1706 | USet->Regs.push_back(&Reg); | |||
1707 | RegSets[i++] = USet; | |||
1708 | } | |||
1709 | } | |||
1710 | ||||
1711 | // Recompute each UberSet weight after changing unit weights. | |||
1712 | static void computeUberWeights(std::vector<UberRegSet> &UberSets, | |||
1713 | CodeGenRegBank &RegBank) { | |||
1714 | // Skip the first unallocatable set. | |||
1715 | for (std::vector<UberRegSet>::iterator I = std::next(UberSets.begin()), | |||
1716 | E = UberSets.end(); I != E; ++I) { | |||
1717 | ||||
1718 | // Initialize all unit weights in this set, and remember the max units/reg. | |||
1719 | const CodeGenRegister *Reg = nullptr; | |||
1720 | unsigned MaxWeight = 0, Weight = 0; | |||
1721 | for (RegUnitIterator UnitI(I->Regs); UnitI.isValid(); ++UnitI) { | |||
1722 | if (Reg != UnitI.getReg()) { | |||
1723 | if (Weight > MaxWeight) | |||
1724 | MaxWeight = Weight; | |||
1725 | Reg = UnitI.getReg(); | |||
1726 | Weight = 0; | |||
1727 | } | |||
1728 | if (!RegBank.getRegUnit(*UnitI).Artificial) { | |||
1729 | unsigned UWeight = RegBank.getRegUnit(*UnitI).Weight; | |||
1730 | if (!UWeight) { | |||
1731 | UWeight = 1; | |||
1732 | RegBank.increaseRegUnitWeight(*UnitI, UWeight); | |||
1733 | } | |||
1734 | Weight += UWeight; | |||
1735 | } | |||
1736 | } | |||
1737 | if (Weight > MaxWeight) | |||
1738 | MaxWeight = Weight; | |||
1739 | if (I->Weight != MaxWeight) { | |||
1740 | LLVM_DEBUG(dbgs() << "UberSet " << I - UberSets.begin() << " Weight "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1741 | << MaxWeight;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1742 | for (auto &Unitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1743 | : I->Regs) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1744 | << " " << Unit->getName();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1745 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false); | |||
1746 | // Update the set weight. | |||
1747 | I->Weight = MaxWeight; | |||
1748 | } | |||
1749 | ||||
1750 | // Find singular determinants. | |||
1751 | for (const auto R : I->Regs) { | |||
1752 | if (R->getRegUnits().count() == 1 && R->getWeight(RegBank) == I->Weight) { | |||
1753 | I->SingularDeterminants |= R->getRegUnits(); | |||
1754 | } | |||
1755 | } | |||
1756 | } | |||
1757 | } | |||
1758 | ||||
1759 | // normalizeWeight is a computeRegUnitWeights helper that adjusts the weight of | |||
1760 | // a register and its subregisters so that they have the same weight as their | |||
1761 | // UberSet. Self-recursion processes the subregister tree in postorder so | |||
1762 | // subregisters are normalized first. | |||
1763 | // | |||
1764 | // Side effects: | |||
1765 | // - creates new adopted register units | |||
1766 | // - causes superregisters to inherit adopted units | |||
1767 | // - increases the weight of "singular" units | |||
1768 | // - induces recomputation of UberWeights. | |||
1769 | static bool normalizeWeight(CodeGenRegister *Reg, | |||
1770 | std::vector<UberRegSet> &UberSets, | |||
1771 | std::vector<UberRegSet*> &RegSets, | |||
1772 | BitVector &NormalRegs, | |||
1773 | CodeGenRegister::RegUnitList &NormalUnits, | |||
1774 | CodeGenRegBank &RegBank) { | |||
1775 | NormalRegs.resize(std::max(Reg->EnumValue + 1, NormalRegs.size())); | |||
1776 | if (NormalRegs.test(Reg->EnumValue)) | |||
1777 | return false; | |||
1778 | NormalRegs.set(Reg->EnumValue); | |||
1779 | ||||
1780 | bool Changed = false; | |||
1781 | const CodeGenRegister::SubRegMap &SRM = Reg->getSubRegs(); | |||
1782 | for (auto SRI : SRM) { | |||
1783 | if (SRI.second == Reg) | |||
1784 | continue; // self-cycles happen | |||
1785 | ||||
1786 | Changed |= normalizeWeight(SRI.second, UberSets, RegSets, NormalRegs, | |||
1787 | NormalUnits, RegBank); | |||
1788 | } | |||
1789 | // Postorder register normalization. | |||
1790 | ||||
1791 | // Inherit register units newly adopted by subregisters. | |||
1792 | if (Reg->inheritRegUnits(RegBank)) | |||
1793 | computeUberWeights(UberSets, RegBank); | |||
1794 | ||||
1795 | // Check if this register is too skinny for its UberRegSet. | |||
1796 | UberRegSet *UberSet = RegSets[RegBank.getRegIndex(Reg)]; | |||
1797 | ||||
1798 | unsigned RegWeight = Reg->getWeight(RegBank); | |||
1799 | if (UberSet->Weight > RegWeight) { | |||
1800 | // A register unit's weight can be adjusted only if it is the singular unit | |||
1801 | // for this register, has not been used to normalize a subregister's set, | |||
1802 | // and has not already been used to singularly determine this UberRegSet. | |||
1803 | unsigned AdjustUnit = *Reg->getRegUnits().begin(); | |||
1804 | if (Reg->getRegUnits().count() != 1 | |||
1805 | || hasRegUnit(NormalUnits, AdjustUnit) | |||
1806 | || hasRegUnit(UberSet->SingularDeterminants, AdjustUnit)) { | |||
1807 | // We don't have an adjustable unit, so adopt a new one. | |||
1808 | AdjustUnit = RegBank.newRegUnit(UberSet->Weight - RegWeight); | |||
1809 | Reg->adoptRegUnit(AdjustUnit); | |||
1810 | // Adopting a unit does not immediately require recomputing set weights. | |||
1811 | } | |||
1812 | else { | |||
1813 | // Adjust the existing single unit. | |||
1814 | if (!RegBank.getRegUnit(AdjustUnit).Artificial) | |||
1815 | RegBank.increaseRegUnitWeight(AdjustUnit, UberSet->Weight - RegWeight); | |||
1816 | // The unit may be shared among sets and registers within this set. | |||
1817 | computeUberWeights(UberSets, RegBank); | |||
1818 | } | |||
1819 | Changed = true; | |||
1820 | } | |||
1821 | ||||
1822 | // Mark these units normalized so superregisters can't change their weights. | |||
1823 | NormalUnits |= Reg->getRegUnits(); | |||
1824 | ||||
1825 | return Changed; | |||
1826 | } | |||
1827 | ||||
1828 | // Compute a weight for each register unit created during getSubRegs. | |||
1829 | // | |||
1830 | // The goal is that two registers in the same class will have the same weight, | |||
1831 | // where each register's weight is defined as sum of its units' weights. | |||
1832 | void CodeGenRegBank::computeRegUnitWeights() { | |||
1833 | std::vector<UberRegSet> UberSets; | |||
1834 | std::vector<UberRegSet*> RegSets(Registers.size()); | |||
1835 | computeUberSets(UberSets, RegSets, *this); | |||
1836 | // UberSets and RegSets are now immutable. | |||
1837 | ||||
1838 | computeUberWeights(UberSets, *this); | |||
1839 | ||||
1840 | // Iterate over each Register, normalizing the unit weights until reaching | |||
1841 | // a fix point. | |||
1842 | unsigned NumIters = 0; | |||
1843 | for (bool Changed = true; Changed; ++NumIters) { | |||
1844 | assert(NumIters <= NumNativeRegUnits && "Runaway register unit weights")(static_cast <bool> (NumIters <= NumNativeRegUnits && "Runaway register unit weights") ? void (0) : __assert_fail ( "NumIters <= NumNativeRegUnits && \"Runaway register unit weights\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1844, __extension__ __PRETTY_FUNCTION__)); | |||
1845 | (void) NumIters; | |||
1846 | Changed = false; | |||
1847 | for (auto &Reg : Registers) { | |||
1848 | CodeGenRegister::RegUnitList NormalUnits; | |||
1849 | BitVector NormalRegs; | |||
1850 | Changed |= normalizeWeight(&Reg, UberSets, RegSets, NormalRegs, | |||
1851 | NormalUnits, *this); | |||
1852 | } | |||
1853 | } | |||
1854 | } | |||
1855 | ||||
1856 | // Find a set in UniqueSets with the same elements as Set. | |||
1857 | // Return an iterator into UniqueSets. | |||
1858 | static std::vector<RegUnitSet>::const_iterator | |||
1859 | findRegUnitSet(const std::vector<RegUnitSet> &UniqueSets, | |||
1860 | const RegUnitSet &Set) { | |||
1861 | std::vector<RegUnitSet>::const_iterator | |||
1862 | I = UniqueSets.begin(), E = UniqueSets.end(); | |||
1863 | for(;I != E; ++I) { | |||
1864 | if (I->Units == Set.Units) | |||
1865 | break; | |||
1866 | } | |||
1867 | return I; | |||
1868 | } | |||
1869 | ||||
1870 | // Return true if the RUSubSet is a subset of RUSuperSet. | |||
1871 | static bool isRegUnitSubSet(const std::vector<unsigned> &RUSubSet, | |||
1872 | const std::vector<unsigned> &RUSuperSet) { | |||
1873 | return std::includes(RUSuperSet.begin(), RUSuperSet.end(), | |||
1874 | RUSubSet.begin(), RUSubSet.end()); | |||
1875 | } | |||
1876 | ||||
1877 | /// Iteratively prune unit sets. Prune subsets that are close to the superset, | |||
1878 | /// but with one or two registers removed. We occasionally have registers like | |||
1879 | /// APSR and PC thrown in with the general registers. We also see many | |||
1880 | /// special-purpose register subsets, such as tail-call and Thumb | |||
1881 | /// encodings. Generating all possible overlapping sets is combinatorial and | |||
1882 | /// overkill for modeling pressure. Ideally we could fix this statically in | |||
1883 | /// tablegen by (1) having the target define register classes that only include | |||
1884 | /// the allocatable registers and marking other classes as non-allocatable and | |||
1885 | /// (2) having a way to mark special purpose classes as "don't-care" classes for | |||
1886 | /// the purpose of pressure. However, we make an attempt to handle targets that | |||
1887 | /// are not nicely defined by merging nearly identical register unit sets | |||
1888 | /// statically. This generates smaller tables. Then, dynamically, we adjust the | |||
1889 | /// set limit by filtering the reserved registers. | |||
1890 | /// | |||
1891 | /// Merge sets only if the units have the same weight. For example, on ARM, | |||
1892 | /// Q-tuples with ssub index 0 include all S regs but also include D16+. We | |||
1893 | /// should not expand the S set to include D regs. | |||
1894 | void CodeGenRegBank::pruneUnitSets() { | |||
1895 | assert(RegClassUnitSets.empty() && "this invalidates RegClassUnitSets")(static_cast <bool> (RegClassUnitSets.empty() && "this invalidates RegClassUnitSets") ? void (0) : __assert_fail ("RegClassUnitSets.empty() && \"this invalidates RegClassUnitSets\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1895, __extension__ __PRETTY_FUNCTION__)); | |||
1896 | ||||
1897 | // Form an equivalence class of UnitSets with no significant difference. | |||
1898 | std::vector<unsigned> SuperSetIDs; | |||
1899 | for (unsigned SubIdx = 0, EndIdx = RegUnitSets.size(); | |||
1900 | SubIdx != EndIdx; ++SubIdx) { | |||
1901 | const RegUnitSet &SubSet = RegUnitSets[SubIdx]; | |||
1902 | unsigned SuperIdx = 0; | |||
1903 | for (; SuperIdx != EndIdx; ++SuperIdx) { | |||
1904 | if (SuperIdx == SubIdx) | |||
1905 | continue; | |||
1906 | ||||
1907 | unsigned UnitWeight = RegUnits[SubSet.Units[0]].Weight; | |||
1908 | const RegUnitSet &SuperSet = RegUnitSets[SuperIdx]; | |||
1909 | if (isRegUnitSubSet(SubSet.Units, SuperSet.Units) | |||
1910 | && (SubSet.Units.size() + 3 > SuperSet.Units.size()) | |||
1911 | && UnitWeight == RegUnits[SuperSet.Units[0]].Weight | |||
1912 | && UnitWeight == RegUnits[SuperSet.Units.back()].Weight) { | |||
1913 | LLVM_DEBUG(dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdx << "\n"; } } while (false) | |||
1914 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdx << "\n"; } } while (false); | |||
1915 | // We can pick any of the set names for the merged set. Go for the | |||
1916 | // shortest one to avoid picking the name of one of the classes that are | |||
1917 | // artificially created by tablegen. So "FPR128_lo" instead of | |||
1918 | // "QQQQ_with_qsub3_in_FPR128_lo". | |||
1919 | if (RegUnitSets[SubIdx].Name.size() < RegUnitSets[SuperIdx].Name.size()) | |||
1920 | RegUnitSets[SuperIdx].Name = RegUnitSets[SubIdx].Name; | |||
1921 | break; | |||
1922 | } | |||
1923 | } | |||
1924 | if (SuperIdx == EndIdx) | |||
1925 | SuperSetIDs.push_back(SubIdx); | |||
1926 | } | |||
1927 | // Populate PrunedUnitSets with each equivalence class's superset. | |||
1928 | std::vector<RegUnitSet> PrunedUnitSets(SuperSetIDs.size()); | |||
1929 | for (unsigned i = 0, e = SuperSetIDs.size(); i != e; ++i) { | |||
1930 | unsigned SuperIdx = SuperSetIDs[i]; | |||
1931 | PrunedUnitSets[i].Name = RegUnitSets[SuperIdx].Name; | |||
1932 | PrunedUnitSets[i].Units.swap(RegUnitSets[SuperIdx].Units); | |||
1933 | } | |||
1934 | RegUnitSets.swap(PrunedUnitSets); | |||
1935 | } | |||
1936 | ||||
1937 | // Create a RegUnitSet for each RegClass that contains all units in the class | |||
1938 | // including adopted units that are necessary to model register pressure. Then | |||
1939 | // iteratively compute RegUnitSets such that the union of any two overlapping | |||
1940 | // RegUnitSets is repreresented. | |||
1941 | // | |||
1942 | // RegisterInfoEmitter will map each RegClass to its RegUnitClass and any | |||
1943 | // RegUnitSet that is a superset of that RegUnitClass. | |||
1944 | void CodeGenRegBank::computeRegUnitSets() { | |||
1945 | assert(RegUnitSets.empty() && "dirty RegUnitSets")(static_cast <bool> (RegUnitSets.empty() && "dirty RegUnitSets" ) ? void (0) : __assert_fail ("RegUnitSets.empty() && \"dirty RegUnitSets\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1945, __extension__ __PRETTY_FUNCTION__)); | |||
1946 | ||||
1947 | // Compute a unique RegUnitSet for each RegClass. | |||
1948 | auto &RegClasses = getRegClasses(); | |||
1949 | for (auto &RC : RegClasses) { | |||
1950 | if (!RC.Allocatable || RC.Artificial || !RC.GeneratePressureSet) | |||
1951 | continue; | |||
1952 | ||||
1953 | // Speculatively grow the RegUnitSets to hold the new set. | |||
1954 | RegUnitSets.resize(RegUnitSets.size() + 1); | |||
1955 | RegUnitSets.back().Name = RC.getName(); | |||
1956 | ||||
1957 | // Compute a sorted list of units in this class. | |||
1958 | RC.buildRegUnitSet(*this, RegUnitSets.back().Units); | |||
1959 | ||||
1960 | // Find an existing RegUnitSet. | |||
1961 | std::vector<RegUnitSet>::const_iterator SetI = | |||
1962 | findRegUnitSet(RegUnitSets, RegUnitSets.back()); | |||
1963 | if (SetI != std::prev(RegUnitSets.end())) | |||
1964 | RegUnitSets.pop_back(); | |||
1965 | } | |||
1966 | ||||
1967 | if (RegUnitSets.empty()) | |||
1968 | PrintFatalError("RegUnitSets cannot be empty!"); | |||
1969 | ||||
1970 | LLVM_DEBUG(dbgs() << "\nBefore pruning:\n"; for (unsigned USIdx = 0,do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1971 | USEnd = RegUnitSets.size();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1972 | USIdx < USEnd; ++USIdx) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1973 | dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1974 | for (auto &U : RegUnitSets[USIdx].Units)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1975 | printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1976 | dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1977 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false); | |||
1978 | ||||
1979 | // Iteratively prune unit sets. | |||
1980 | pruneUnitSets(); | |||
1981 | ||||
1982 | LLVM_DEBUG(dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0,do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1983 | USEnd = RegUnitSets.size();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1984 | USIdx < USEnd; ++USIdx) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1985 | dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1986 | for (auto &U : RegUnitSets[USIdx].Units)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1987 | printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1988 | dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1989 | } dbgs() << "\nUnion sets:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false); | |||
1990 | ||||
1991 | // Iterate over all unit sets, including new ones added by this loop. | |||
1992 | unsigned NumRegUnitSubSets = RegUnitSets.size(); | |||
1993 | for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) { | |||
1994 | // In theory, this is combinatorial. In practice, it needs to be bounded | |||
1995 | // by a small number of sets for regpressure to be efficient. | |||
1996 | // If the assert is hit, we need to implement pruning. | |||
1997 | assert(Idx < (2*NumRegUnitSubSets) && "runaway unit set inference")(static_cast <bool> (Idx < (2*NumRegUnitSubSets) && "runaway unit set inference") ? void (0) : __assert_fail ("Idx < (2*NumRegUnitSubSets) && \"runaway unit set inference\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 1997, __extension__ __PRETTY_FUNCTION__)); | |||
1998 | ||||
1999 | // Compare new sets with all original classes. | |||
2000 | for (unsigned SearchIdx = (Idx >= NumRegUnitSubSets) ? 0 : Idx+1; | |||
2001 | SearchIdx != EndIdx; ++SearchIdx) { | |||
2002 | std::set<unsigned> Intersection; | |||
2003 | std::set_intersection(RegUnitSets[Idx].Units.begin(), | |||
2004 | RegUnitSets[Idx].Units.end(), | |||
2005 | RegUnitSets[SearchIdx].Units.begin(), | |||
2006 | RegUnitSets[SearchIdx].Units.end(), | |||
2007 | std::inserter(Intersection, Intersection.begin())); | |||
2008 | if (Intersection.empty()) | |||
2009 | continue; | |||
2010 | ||||
2011 | // Speculatively grow the RegUnitSets to hold the new set. | |||
2012 | RegUnitSets.resize(RegUnitSets.size() + 1); | |||
2013 | RegUnitSets.back().Name = | |||
2014 | RegUnitSets[Idx].Name + "_with_" + RegUnitSets[SearchIdx].Name; | |||
2015 | ||||
2016 | std::set_union(RegUnitSets[Idx].Units.begin(), | |||
2017 | RegUnitSets[Idx].Units.end(), | |||
2018 | RegUnitSets[SearchIdx].Units.begin(), | |||
2019 | RegUnitSets[SearchIdx].Units.end(), | |||
2020 | std::inserter(RegUnitSets.back().Units, | |||
2021 | RegUnitSets.back().Units.begin())); | |||
2022 | ||||
2023 | // Find an existing RegUnitSet, or add the union to the unique sets. | |||
2024 | std::vector<RegUnitSet>::const_iterator SetI = | |||
2025 | findRegUnitSet(RegUnitSets, RegUnitSets.back()); | |||
2026 | if (SetI != std::prev(RegUnitSets.end())) | |||
2027 | RegUnitSets.pop_back(); | |||
2028 | else { | |||
2029 | LLVM_DEBUG(dbgs() << "UnitSet " << RegUnitSets.size() - 1 << " "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false) | |||
2030 | << RegUnitSets.back().Name << ":";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false) | |||
2031 | for (auto &Udo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false) | |||
2032 | : RegUnitSets.back().Units) printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false) | |||
2033 | dbgs() << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false); | |||
2034 | } | |||
2035 | } | |||
2036 | } | |||
2037 | ||||
2038 | // Iteratively prune unit sets after inferring supersets. | |||
2039 | pruneUnitSets(); | |||
2040 | ||||
2041 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
2042 | dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
2043 | USIdx < USEnd; ++USIdx) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
2044 | dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
2045 | for (auto &U : RegUnitSets[USIdx].Units)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
2046 | printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
2047 | dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
2048 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false); | |||
2049 | ||||
2050 | // For each register class, list the UnitSets that are supersets. | |||
2051 | RegClassUnitSets.resize(RegClasses.size()); | |||
2052 | int RCIdx = -1; | |||
2053 | for (auto &RC : RegClasses) { | |||
2054 | ++RCIdx; | |||
2055 | if (!RC.Allocatable) | |||
2056 | continue; | |||
2057 | ||||
2058 | // Recompute the sorted list of units in this class. | |||
2059 | std::vector<unsigned> RCRegUnits; | |||
2060 | RC.buildRegUnitSet(*this, RCRegUnits); | |||
2061 | ||||
2062 | // Don't increase pressure for unallocatable regclasses. | |||
2063 | if (RCRegUnits.empty()) | |||
2064 | continue; | |||
2065 | ||||
2066 | LLVM_DEBUG(dbgs() << "RC " << RC.getName() << " Units:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "RC " << RC.getName () << " Units:\n"; for (auto U : RCRegUnits) printRegUnitName (U); dbgs() << "\n UnitSetIDs:"; } } while (false) | |||
2067 | for (auto Udo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "RC " << RC.getName () << " Units:\n"; for (auto U : RCRegUnits) printRegUnitName (U); dbgs() << "\n UnitSetIDs:"; } } while (false) | |||
2068 | : RCRegUnits) printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "RC " << RC.getName () << " Units:\n"; for (auto U : RCRegUnits) printRegUnitName (U); dbgs() << "\n UnitSetIDs:"; } } while (false) | |||
2069 | dbgs() << "\n UnitSetIDs:")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "RC " << RC.getName () << " Units:\n"; for (auto U : RCRegUnits) printRegUnitName (U); dbgs() << "\n UnitSetIDs:"; } } while (false); | |||
2070 | ||||
2071 | // Find all supersets. | |||
2072 | for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); | |||
2073 | USIdx != USEnd; ++USIdx) { | |||
2074 | if (isRegUnitSubSet(RCRegUnits, RegUnitSets[USIdx].Units)) { | |||
2075 | LLVM_DEBUG(dbgs() << " " << USIdx)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << " " << USIdx; } } while (false); | |||
2076 | RegClassUnitSets[RCIdx].push_back(USIdx); | |||
2077 | } | |||
2078 | } | |||
2079 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; } } while (false ); | |||
2080 | assert((!RegClassUnitSets[RCIdx].empty() || !RC.GeneratePressureSet) &&(static_cast <bool> ((!RegClassUnitSets[RCIdx].empty() || !RC.GeneratePressureSet) && "missing unit set for regclass" ) ? void (0) : __assert_fail ("(!RegClassUnitSets[RCIdx].empty() || !RC.GeneratePressureSet) && \"missing unit set for regclass\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 2081, __extension__ __PRETTY_FUNCTION__)) | |||
2081 | "missing unit set for regclass")(static_cast <bool> ((!RegClassUnitSets[RCIdx].empty() || !RC.GeneratePressureSet) && "missing unit set for regclass" ) ? void (0) : __assert_fail ("(!RegClassUnitSets[RCIdx].empty() || !RC.GeneratePressureSet) && \"missing unit set for regclass\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 2081, __extension__ __PRETTY_FUNCTION__)); | |||
2082 | } | |||
2083 | ||||
2084 | // For each register unit, ensure that we have the list of UnitSets that | |||
2085 | // contain the unit. Normally, this matches an existing list of UnitSets for a | |||
2086 | // register class. If not, we create a new entry in RegClassUnitSets as a | |||
2087 | // "fake" register class. | |||
2088 | for (unsigned UnitIdx = 0, UnitEnd = NumNativeRegUnits; | |||
2089 | UnitIdx < UnitEnd; ++UnitIdx) { | |||
2090 | std::vector<unsigned> RUSets; | |||
2091 | for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) { | |||
2092 | RegUnitSet &RUSet = RegUnitSets[i]; | |||
2093 | if (!is_contained(RUSet.Units, UnitIdx)) | |||
2094 | continue; | |||
2095 | RUSets.push_back(i); | |||
2096 | } | |||
2097 | unsigned RCUnitSetsIdx = 0; | |||
2098 | for (unsigned e = RegClassUnitSets.size(); | |||
2099 | RCUnitSetsIdx != e; ++RCUnitSetsIdx) { | |||
2100 | if (RegClassUnitSets[RCUnitSetsIdx] == RUSets) { | |||
2101 | break; | |||
2102 | } | |||
2103 | } | |||
2104 | RegUnits[UnitIdx].RegClassUnitSetsIdx = RCUnitSetsIdx; | |||
2105 | if (RCUnitSetsIdx == RegClassUnitSets.size()) { | |||
2106 | // Create a new list of UnitSets as a "fake" register class. | |||
2107 | RegClassUnitSets.resize(RCUnitSetsIdx + 1); | |||
2108 | RegClassUnitSets[RCUnitSetsIdx].swap(RUSets); | |||
2109 | } | |||
2110 | } | |||
2111 | } | |||
2112 | ||||
2113 | void CodeGenRegBank::computeRegUnitLaneMasks() { | |||
2114 | for (auto &Register : Registers) { | |||
2115 | // Create an initial lane mask for all register units. | |||
2116 | const auto &RegUnits = Register.getRegUnits(); | |||
2117 | CodeGenRegister::RegUnitLaneMaskList | |||
2118 | RegUnitLaneMasks(RegUnits.count(), LaneBitmask::getNone()); | |||
2119 | // Iterate through SubRegisters. | |||
2120 | typedef CodeGenRegister::SubRegMap SubRegMap; | |||
2121 | const SubRegMap &SubRegs = Register.getSubRegs(); | |||
2122 | for (auto S : SubRegs) { | |||
2123 | CodeGenRegister *SubReg = S.second; | |||
2124 | // Ignore non-leaf subregisters, their lane masks are fully covered by | |||
2125 | // the leaf subregisters anyway. | |||
2126 | if (!SubReg->getSubRegs().empty()) | |||
2127 | continue; | |||
2128 | CodeGenSubRegIndex *SubRegIndex = S.first; | |||
2129 | const CodeGenRegister *SubRegister = S.second; | |||
2130 | LaneBitmask LaneMask = SubRegIndex->LaneMask; | |||
2131 | // Distribute LaneMask to Register Units touched. | |||
2132 | for (unsigned SUI : SubRegister->getRegUnits()) { | |||
2133 | bool Found = false; | |||
2134 | unsigned u = 0; | |||
2135 | for (unsigned RU : RegUnits) { | |||
2136 | if (SUI == RU) { | |||
2137 | RegUnitLaneMasks[u] |= LaneMask; | |||
2138 | assert(!Found)(static_cast <bool> (!Found) ? void (0) : __assert_fail ("!Found", "llvm/utils/TableGen/CodeGenRegisters.cpp", 2138, __extension__ __PRETTY_FUNCTION__)); | |||
2139 | Found = true; | |||
2140 | } | |||
2141 | ++u; | |||
2142 | } | |||
2143 | (void)Found; | |||
2144 | assert(Found)(static_cast <bool> (Found) ? void (0) : __assert_fail ( "Found", "llvm/utils/TableGen/CodeGenRegisters.cpp", 2144, __extension__ __PRETTY_FUNCTION__)); | |||
2145 | } | |||
2146 | } | |||
2147 | Register.setRegUnitLaneMasks(RegUnitLaneMasks); | |||
2148 | } | |||
2149 | } | |||
2150 | ||||
2151 | void CodeGenRegBank::computeDerivedInfo() { | |||
2152 | computeComposites(); | |||
2153 | computeSubRegLaneMasks(); | |||
| ||||
2154 | ||||
2155 | // Compute a weight for each register unit created during getSubRegs. | |||
2156 | // This may create adopted register units (with unit # >= NumNativeRegUnits). | |||
2157 | computeRegUnitWeights(); | |||
2158 | ||||
2159 | // Compute a unique set of RegUnitSets. One for each RegClass and inferred | |||
2160 | // supersets for the union of overlapping sets. | |||
2161 | computeRegUnitSets(); | |||
2162 | ||||
2163 | computeRegUnitLaneMasks(); | |||
2164 | ||||
2165 | // Compute register class HasDisjunctSubRegs/CoveredBySubRegs flag. | |||
2166 | for (CodeGenRegisterClass &RC : RegClasses) { | |||
2167 | RC.HasDisjunctSubRegs = false; | |||
2168 | RC.CoveredBySubRegs = true; | |||
2169 | for (const CodeGenRegister *Reg : RC.getMembers()) { | |||
2170 | RC.HasDisjunctSubRegs |= Reg->HasDisjunctSubRegs; | |||
2171 | RC.CoveredBySubRegs &= Reg->CoveredBySubRegs; | |||
2172 | } | |||
2173 | } | |||
2174 | ||||
2175 | // Get the weight of each set. | |||
2176 | for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) | |||
2177 | RegUnitSets[Idx].Weight = getRegUnitSetWeight(RegUnitSets[Idx].Units); | |||
2178 | ||||
2179 | // Find the order of each set. | |||
2180 | RegUnitSetOrder.reserve(RegUnitSets.size()); | |||
2181 | for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) | |||
2182 | RegUnitSetOrder.push_back(Idx); | |||
2183 | ||||
2184 | llvm::stable_sort(RegUnitSetOrder, [this](unsigned ID1, unsigned ID2) { | |||
2185 | return getRegPressureSet(ID1).Units.size() < | |||
2186 | getRegPressureSet(ID2).Units.size(); | |||
2187 | }); | |||
2188 | for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) { | |||
2189 | RegUnitSets[RegUnitSetOrder[Idx]].Order = Idx; | |||
2190 | } | |||
2191 | } | |||
2192 | ||||
2193 | // | |||
2194 | // Synthesize missing register class intersections. | |||
2195 | // | |||
2196 | // Make sure that sub-classes of RC exists such that getCommonSubClass(RC, X) | |||
2197 | // returns a maximal register class for all X. | |||
2198 | // | |||
2199 | void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) { | |||
2200 | assert(!RegClasses.empty())(static_cast <bool> (!RegClasses.empty()) ? void (0) : __assert_fail ("!RegClasses.empty()", "llvm/utils/TableGen/CodeGenRegisters.cpp" , 2200, __extension__ __PRETTY_FUNCTION__)); | |||
2201 | // Stash the iterator to the last element so that this loop doesn't visit | |||
2202 | // elements added by the getOrCreateSubClass call within it. | |||
2203 | for (auto I = RegClasses.begin(), E = std::prev(RegClasses.end()); | |||
2204 | I != std::next(E); ++I) { | |||
2205 | CodeGenRegisterClass *RC1 = RC; | |||
2206 | CodeGenRegisterClass *RC2 = &*I; | |||
2207 | if (RC1 == RC2) | |||
2208 | continue; | |||
2209 | ||||
2210 | // Compute the set intersection of RC1 and RC2. | |||
2211 | const CodeGenRegister::Vec &Memb1 = RC1->getMembers(); | |||
2212 | const CodeGenRegister::Vec &Memb2 = RC2->getMembers(); | |||
2213 | CodeGenRegister::Vec Intersection; | |||
2214 | std::set_intersection(Memb1.begin(), Memb1.end(), Memb2.begin(), | |||
2215 | Memb2.end(), | |||
2216 | std::inserter(Intersection, Intersection.begin()), | |||
2217 | deref<std::less<>>()); | |||
2218 | ||||
2219 | // Skip disjoint class pairs. | |||
2220 | if (Intersection.empty()) | |||
2221 | continue; | |||
2222 | ||||
2223 | // If RC1 and RC2 have different spill sizes or alignments, use the | |||
2224 | // stricter one for sub-classing. If they are equal, prefer RC1. | |||
2225 | if (RC2->RSI.hasStricterSpillThan(RC1->RSI)) | |||
2226 | std::swap(RC1, RC2); | |||
2227 | ||||
2228 | getOrCreateSubClass(RC1, &Intersection, | |||
2229 | RC1->getName() + "_and_" + RC2->getName()); | |||
2230 | } | |||
2231 | } | |||
2232 | ||||
2233 | // | |||
2234 | // Synthesize missing sub-classes for getSubClassWithSubReg(). | |||
2235 | // | |||
2236 | // Make sure that the set of registers in RC with a given SubIdx sub-register | |||
2237 | // form a register class. Update RC->SubClassWithSubReg. | |||
2238 | // | |||
2239 | void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) { | |||
2240 | // Map SubRegIndex to set of registers in RC supporting that SubRegIndex. | |||
2241 | typedef std::map<const CodeGenSubRegIndex *, CodeGenRegister::Vec, | |||
2242 | deref<std::less<>>> | |||
2243 | SubReg2SetMap; | |||
2244 | ||||
2245 | // Compute the set of registers supporting each SubRegIndex. | |||
2246 | SubReg2SetMap SRSets; | |||
2247 | for (const auto R : RC->getMembers()) { | |||
2248 | if (R->Artificial) | |||
2249 | continue; | |||
2250 | const CodeGenRegister::SubRegMap &SRM = R->getSubRegs(); | |||
2251 | for (auto I : SRM) { | |||
2252 | if (!I.first->Artificial) | |||
2253 | SRSets[I.first].push_back(R); | |||
2254 | } | |||
2255 | } | |||
2256 | ||||
2257 | for (auto I : SRSets) | |||
2258 | sortAndUniqueRegisters(I.second); | |||
2259 | ||||
2260 | // Find matching classes for all SRSets entries. Iterate in SubRegIndex | |||
2261 | // numerical order to visit synthetic indices last. | |||
2262 | for (const auto &SubIdx : SubRegIndices) { | |||
2263 | if (SubIdx.Artificial) | |||
2264 | continue; | |||
2265 | SubReg2SetMap::const_iterator I = SRSets.find(&SubIdx); | |||
2266 | // Unsupported SubRegIndex. Skip it. | |||
2267 | if (I == SRSets.end()) | |||
2268 | continue; | |||
2269 | // In most cases, all RC registers support the SubRegIndex. | |||
2270 | if (I->second.size() == RC->getMembers().size()) { | |||
2271 | RC->setSubClassWithSubReg(&SubIdx, RC); | |||
2272 | continue; | |||
2273 | } | |||
2274 | // This is a real subset. See if we have a matching class. | |||
2275 | CodeGenRegisterClass *SubRC = | |||
2276 | getOrCreateSubClass(RC, &I->second, | |||
2277 | RC->getName() + "_with_" + I->first->getName()); | |||
2278 | RC->setSubClassWithSubReg(&SubIdx, SubRC); | |||
2279 | } | |||
2280 | } | |||
2281 | ||||
2282 | // | |||
2283 | // Synthesize missing sub-classes of RC for getMatchingSuperRegClass(). | |||
2284 | // | |||
2285 | // Create sub-classes of RC such that getMatchingSuperRegClass(RC, SubIdx, X) | |||
2286 | // has a maximal result for any SubIdx and any X >= FirstSubRegRC. | |||
2287 | // | |||
2288 | ||||
2289 | void CodeGenRegBank::inferMatchingSuperRegClass(CodeGenRegisterClass *RC, | |||
2290 | std::list<CodeGenRegisterClass>::iterator FirstSubRegRC) { | |||
2291 | SmallVector<std::pair<const CodeGenRegister*, | |||
2292 | const CodeGenRegister*>, 16> SSPairs; | |||
2293 | BitVector TopoSigs(getNumTopoSigs()); | |||
2294 | ||||
2295 | // Iterate in SubRegIndex numerical order to visit synthetic indices last. | |||
2296 | for (auto &SubIdx : SubRegIndices) { | |||
2297 | // Skip indexes that aren't fully supported by RC's registers. This was | |||
2298 | // computed by inferSubClassWithSubReg() above which should have been | |||
2299 | // called first. | |||
2300 | if (RC->getSubClassWithSubReg(&SubIdx) != RC) | |||
2301 | continue; | |||
2302 | ||||
2303 | // Build list of (Super, Sub) pairs for this SubIdx. | |||
2304 | SSPairs.clear(); | |||
2305 | TopoSigs.reset(); | |||
2306 | for (const auto Super : RC->getMembers()) { | |||
2307 | const CodeGenRegister *Sub = Super->getSubRegs().find(&SubIdx)->second; | |||
2308 | assert(Sub && "Missing sub-register")(static_cast <bool> (Sub && "Missing sub-register" ) ? void (0) : __assert_fail ("Sub && \"Missing sub-register\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 2308, __extension__ __PRETTY_FUNCTION__)); | |||
2309 | SSPairs.push_back(std::make_pair(Super, Sub)); | |||
2310 | TopoSigs.set(Sub->getTopoSig()); | |||
2311 | } | |||
2312 | ||||
2313 | // Iterate over sub-register class candidates. Ignore classes created by | |||
2314 | // this loop. They will never be useful. | |||
2315 | // Store an iterator to the last element (not end) so that this loop doesn't | |||
2316 | // visit newly inserted elements. | |||
2317 | assert(!RegClasses.empty())(static_cast <bool> (!RegClasses.empty()) ? void (0) : __assert_fail ("!RegClasses.empty()", "llvm/utils/TableGen/CodeGenRegisters.cpp" , 2317, __extension__ __PRETTY_FUNCTION__)); | |||
2318 | for (auto I = FirstSubRegRC, E = std::prev(RegClasses.end()); | |||
2319 | I != std::next(E); ++I) { | |||
2320 | CodeGenRegisterClass &SubRC = *I; | |||
2321 | if (SubRC.Artificial) | |||
2322 | continue; | |||
2323 | // Topological shortcut: SubRC members have the wrong shape. | |||
2324 | if (!TopoSigs.anyCommon(SubRC.getTopoSigs())) | |||
2325 | continue; | |||
2326 | // Compute the subset of RC that maps into SubRC. | |||
2327 | CodeGenRegister::Vec SubSetVec; | |||
2328 | for (unsigned i = 0, e = SSPairs.size(); i != e; ++i) | |||
2329 | if (SubRC.contains(SSPairs[i].second)) | |||
2330 | SubSetVec.push_back(SSPairs[i].first); | |||
2331 | ||||
2332 | if (SubSetVec.empty()) | |||
2333 | continue; | |||
2334 | ||||
2335 | // RC injects completely into SubRC. | |||
2336 | sortAndUniqueRegisters(SubSetVec); | |||
2337 | if (SubSetVec.size() == SSPairs.size()) { | |||
2338 | SubRC.addSuperRegClass(&SubIdx, RC); | |||
2339 | continue; | |||
2340 | } | |||
2341 | ||||
2342 | // Only a subset of RC maps into SubRC. Make sure it is represented by a | |||
2343 | // class. | |||
2344 | getOrCreateSubClass(RC, &SubSetVec, RC->getName() + "_with_" + | |||
2345 | SubIdx.getName() + "_in_" + | |||
2346 | SubRC.getName()); | |||
2347 | } | |||
2348 | } | |||
2349 | } | |||
2350 | ||||
2351 | // | |||
2352 | // Infer missing register classes. | |||
2353 | // | |||
2354 | void CodeGenRegBank::computeInferredRegisterClasses() { | |||
2355 | assert(!RegClasses.empty())(static_cast <bool> (!RegClasses.empty()) ? void (0) : __assert_fail ("!RegClasses.empty()", "llvm/utils/TableGen/CodeGenRegisters.cpp" , 2355, __extension__ __PRETTY_FUNCTION__)); | |||
2356 | // When this function is called, the register classes have not been sorted | |||
2357 | // and assigned EnumValues yet. That means getSubClasses(), | |||
2358 | // getSuperClasses(), and hasSubClass() functions are defunct. | |||
2359 | ||||
2360 | // Use one-before-the-end so it doesn't move forward when new elements are | |||
2361 | // added. | |||
2362 | auto FirstNewRC = std::prev(RegClasses.end()); | |||
2363 | ||||
2364 | // Visit all register classes, including the ones being added by the loop. | |||
2365 | // Watch out for iterator invalidation here. | |||
2366 | for (auto I = RegClasses.begin(), E = RegClasses.end(); I != E; ++I) { | |||
2367 | CodeGenRegisterClass *RC = &*I; | |||
2368 | if (RC->Artificial) | |||
2369 | continue; | |||
2370 | ||||
2371 | // Synthesize answers for getSubClassWithSubReg(). | |||
2372 | inferSubClassWithSubReg(RC); | |||
2373 | ||||
2374 | // Synthesize answers for getCommonSubClass(). | |||
2375 | inferCommonSubClass(RC); | |||
2376 | ||||
2377 | // Synthesize answers for getMatchingSuperRegClass(). | |||
2378 | inferMatchingSuperRegClass(RC); | |||
2379 | ||||
2380 | // New register classes are created while this loop is running, and we need | |||
2381 | // to visit all of them. I particular, inferMatchingSuperRegClass needs | |||
2382 | // to match old super-register classes with sub-register classes created | |||
2383 | // after inferMatchingSuperRegClass was called. At this point, | |||
2384 | // inferMatchingSuperRegClass has checked SuperRC = [0..rci] with SubRC = | |||
2385 | // [0..FirstNewRC). We need to cover SubRC = [FirstNewRC..rci]. | |||
2386 | if (I == FirstNewRC) { | |||
2387 | auto NextNewRC = std::prev(RegClasses.end()); | |||
2388 | for (auto I2 = RegClasses.begin(), E2 = std::next(FirstNewRC); I2 != E2; | |||
2389 | ++I2) | |||
2390 | inferMatchingSuperRegClass(&*I2, E2); | |||
2391 | FirstNewRC = NextNewRC; | |||
2392 | } | |||
2393 | } | |||
2394 | } | |||
2395 | ||||
2396 | /// getRegisterClassForRegister - Find the register class that contains the | |||
2397 | /// specified physical register. If the register is not in a register class, | |||
2398 | /// return null. If the register is in multiple classes, and the classes have a | |||
2399 | /// superset-subset relationship and the same set of types, return the | |||
2400 | /// superclass. Otherwise return null. | |||
2401 | const CodeGenRegisterClass* | |||
2402 | CodeGenRegBank::getRegClassForRegister(Record *R) { | |||
2403 | const CodeGenRegister *Reg = getReg(R); | |||
2404 | const CodeGenRegisterClass *FoundRC = nullptr; | |||
2405 | for (const auto &RC : getRegClasses()) { | |||
2406 | if (!RC.contains(Reg)) | |||
2407 | continue; | |||
2408 | ||||
2409 | // If this is the first class that contains the register, | |||
2410 | // make a note of it and go on to the next class. | |||
2411 | if (!FoundRC) { | |||
2412 | FoundRC = &RC; | |||
2413 | continue; | |||
2414 | } | |||
2415 | ||||
2416 | // If a register's classes have different types, return null. | |||
2417 | if (RC.getValueTypes() != FoundRC->getValueTypes()) | |||
2418 | return nullptr; | |||
2419 | ||||
2420 | // Check to see if the previously found class that contains | |||
2421 | // the register is a subclass of the current class. If so, | |||
2422 | // prefer the superclass. | |||
2423 | if (RC.hasSubClass(FoundRC)) { | |||
2424 | FoundRC = &RC; | |||
2425 | continue; | |||
2426 | } | |||
2427 | ||||
2428 | // Check to see if the previously found class that contains | |||
2429 | // the register is a superclass of the current class. If so, | |||
2430 | // prefer the superclass. | |||
2431 | if (FoundRC->hasSubClass(&RC)) | |||
2432 | continue; | |||
2433 | ||||
2434 | // Multiple classes, and neither is a superclass of the other. | |||
2435 | // Return null. | |||
2436 | return nullptr; | |||
2437 | } | |||
2438 | return FoundRC; | |||
2439 | } | |||
2440 | ||||
2441 | const CodeGenRegisterClass * | |||
2442 | CodeGenRegBank::getMinimalPhysRegClass(Record *RegRecord, | |||
2443 | ValueTypeByHwMode *VT) { | |||
2444 | const CodeGenRegister *Reg = getReg(RegRecord); | |||
2445 | const CodeGenRegisterClass *BestRC = nullptr; | |||
2446 | for (const auto &RC : getRegClasses()) { | |||
2447 | if ((!VT || RC.hasType(*VT)) && | |||
2448 | RC.contains(Reg) && (!BestRC || BestRC->hasSubClass(&RC))) | |||
2449 | BestRC = &RC; | |||
2450 | } | |||
2451 | ||||
2452 | assert(BestRC && "Couldn't find the register class")(static_cast <bool> (BestRC && "Couldn't find the register class" ) ? void (0) : __assert_fail ("BestRC && \"Couldn't find the register class\"" , "llvm/utils/TableGen/CodeGenRegisters.cpp", 2452, __extension__ __PRETTY_FUNCTION__)); | |||
2453 | return BestRC; | |||
2454 | } | |||
2455 | ||||
2456 | BitVector CodeGenRegBank::computeCoveredRegisters(ArrayRef<Record*> Regs) { | |||
2457 | SetVector<const CodeGenRegister*> Set; | |||
2458 | ||||
2459 | // First add Regs with all sub-registers. | |||
2460 | for (unsigned i = 0, e = Regs.size(); i != e; ++i) { | |||
2461 | CodeGenRegister *Reg = getReg(Regs[i]); | |||
2462 | if (Set.insert(Reg)) | |||
2463 | // Reg is new, add all sub-registers. | |||
2464 | // The pre-ordering is not important here. | |||
2465 | Reg->addSubRegsPreOrder(Set, *this); | |||
2466 | } | |||
2467 | ||||
2468 | // Second, find all super-registers that are completely covered by the set. | |||
2469 | for (unsigned i = 0; i != Set.size(); ++i) { | |||
2470 | const CodeGenRegister::SuperRegList &SR = Set[i]->getSuperRegs(); | |||
2471 | for (unsigned j = 0, e = SR.size(); j != e; ++j) { | |||
2472 | const CodeGenRegister *Super = SR[j]; | |||
2473 | if (!Super->CoveredBySubRegs || Set.count(Super)) | |||
2474 | continue; | |||
2475 | // This new super-register is covered by its sub-registers. | |||
2476 | bool AllSubsInSet = true; | |||
2477 | const CodeGenRegister::SubRegMap &SRM = Super->getSubRegs(); | |||
2478 | for (auto I : SRM) | |||
2479 | if (!Set.count(I.second)) { | |||
2480 | AllSubsInSet = false; | |||
2481 | break; | |||
2482 | } | |||
2483 | // All sub-registers in Set, add Super as well. | |||
2484 | // We will visit Super later to recheck its super-registers. | |||
2485 | if (AllSubsInSet) | |||
2486 | Set.insert(Super); | |||
2487 | } | |||
2488 | } | |||
2489 | ||||
2490 | // Convert to BitVector. | |||
2491 | BitVector BV(Registers.size() + 1); | |||
2492 | for (unsigned i = 0, e = Set.size(); i != e; ++i) | |||
2493 | BV.set(Set[i]->EnumValue); | |||
2494 | return BV; | |||
2495 | } | |||
2496 | ||||
2497 | void CodeGenRegBank::printRegUnitName(unsigned Unit) const { | |||
2498 | if (Unit < NumNativeRegUnits) | |||
2499 | dbgs() << ' ' << RegUnits[Unit].Roots[0]->getName(); | |||
2500 | else | |||
2501 | dbgs() << " #" << Unit; | |||
2502 | } |
1 | //===- llvm/MC/LaneBitmask.h ------------------------------------*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// | |||
9 | /// \file | |||
10 | /// A common definition of LaneBitmask for use in TableGen and CodeGen. | |||
11 | /// | |||
12 | /// A lane mask is a bitmask representing the covering of a register with | |||
13 | /// sub-registers. | |||
14 | /// | |||
15 | /// This is typically used to track liveness at sub-register granularity. | |||
16 | /// Lane masks for sub-register indices are similar to register units for | |||
17 | /// physical registers. The individual bits in a lane mask can't be assigned | |||
18 | /// any specific meaning. They can be used to check if two sub-register | |||
19 | /// indices overlap. | |||
20 | /// | |||
21 | /// Iff the target has a register such that: | |||
22 | /// | |||
23 | /// getSubReg(Reg, A) overlaps getSubReg(Reg, B) | |||
24 | /// | |||
25 | /// then: | |||
26 | /// | |||
27 | /// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0 | |||
28 | ||||
29 | #ifndef LLVM_MC_LANEBITMASK_H | |||
30 | #define LLVM_MC_LANEBITMASK_H | |||
31 | ||||
32 | #include "llvm/Support/Compiler.h" | |||
33 | #include "llvm/Support/Format.h" | |||
34 | #include "llvm/Support/MathExtras.h" | |||
35 | #include "llvm/Support/Printable.h" | |||
36 | #include "llvm/Support/raw_ostream.h" | |||
37 | ||||
38 | namespace llvm { | |||
39 | ||||
40 | struct LaneBitmask { | |||
41 | // When changing the underlying type, change the format string as well. | |||
42 | using Type = uint64_t; | |||
43 | enum : unsigned { BitWidth = 8*sizeof(Type) }; | |||
44 | constexpr static const char *const FormatStr = "%016llX"; | |||
45 | ||||
46 | constexpr LaneBitmask() = default; | |||
47 | explicit constexpr LaneBitmask(Type V) : Mask(V) {} | |||
48 | ||||
49 | constexpr bool operator== (LaneBitmask M) const { return Mask == M.Mask; } | |||
50 | constexpr bool operator!= (LaneBitmask M) const { return Mask != M.Mask; } | |||
51 | constexpr bool operator< (LaneBitmask M) const { return Mask < M.Mask; } | |||
52 | constexpr bool none() const { return Mask == 0; } | |||
53 | constexpr bool any() const { return Mask != 0; } | |||
54 | constexpr bool all() const { return ~Mask == 0; } | |||
55 | ||||
56 | constexpr LaneBitmask operator~() const { | |||
57 | return LaneBitmask(~Mask); | |||
58 | } | |||
59 | constexpr LaneBitmask operator|(LaneBitmask M) const { | |||
60 | return LaneBitmask(Mask | M.Mask); | |||
61 | } | |||
62 | constexpr LaneBitmask operator&(LaneBitmask M) const { | |||
63 | return LaneBitmask(Mask & M.Mask); | |||
64 | } | |||
65 | LaneBitmask &operator|=(LaneBitmask M) { | |||
66 | Mask |= M.Mask; | |||
67 | return *this; | |||
68 | } | |||
69 | LaneBitmask &operator&=(LaneBitmask M) { | |||
70 | Mask &= M.Mask; | |||
71 | return *this; | |||
72 | } | |||
73 | ||||
74 | constexpr Type getAsInteger() const { return Mask; } | |||
75 | ||||
76 | unsigned getNumLanes() const { return llvm::popcount(Mask); } | |||
77 | unsigned getHighestLane() const { | |||
78 | return Log2_64(Mask); | |||
79 | } | |||
80 | ||||
81 | static constexpr LaneBitmask getNone() { return LaneBitmask(0); } | |||
82 | static constexpr LaneBitmask getAll() { return ~LaneBitmask(0); } | |||
83 | static constexpr LaneBitmask getLane(unsigned Lane) { | |||
84 | return LaneBitmask(Type(1) << Lane); | |||
| ||||
85 | } | |||
86 | ||||
87 | private: | |||
88 | Type Mask = 0; | |||
89 | }; | |||
90 | ||||
91 | /// Create Printable object to print LaneBitmasks on a \ref raw_ostream. | |||
92 | inline Printable PrintLaneMask(LaneBitmask LaneMask) { | |||
93 | return Printable([LaneMask](raw_ostream &OS) { | |||
94 | OS << format(LaneBitmask::FormatStr, LaneMask.getAsInteger()); | |||
95 | }); | |||
96 | } | |||
97 | ||||
98 | } // end namespace llvm | |||
99 | ||||
100 | #endif // LLVM_MC_LANEBITMASK_H |
1 | //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains some functions that are useful for math stuff. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_SUPPORT_MATHEXTRAS_H |
14 | #define LLVM_SUPPORT_MATHEXTRAS_H |
15 | |
16 | #include "llvm/ADT/bit.h" |
17 | #include "llvm/Support/Compiler.h" |
18 | #include <cassert> |
19 | #include <climits> |
20 | #include <cstdint> |
21 | #include <cstring> |
22 | #include <limits> |
23 | #include <type_traits> |
24 | |
25 | namespace llvm { |
26 | |
27 | /// Mathematical constants. |
28 | namespace numbers { |
29 | // TODO: Track C++20 std::numbers. |
30 | // TODO: Favor using the hexadecimal FP constants (requires C++17). |
31 | constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 |
32 | egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 |
33 | ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 |
34 | ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 |
35 | log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) |
36 | log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) |
37 | pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 |
38 | inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 |
39 | sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 |
40 | inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 |
41 | sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 |
42 | inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) |
43 | sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 |
44 | inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) |
45 | phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 |
46 | constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 |
47 | egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 |
48 | ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 |
49 | ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 |
50 | log2ef = 1.44269504F, // (0x1.715476P+0) |
51 | log10ef = .434294482F, // (0x1.bcb7b2P-2) |
52 | pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 |
53 | inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 |
54 | sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 |
55 | inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 |
56 | sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 |
57 | inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) |
58 | sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 |
59 | inv_sqrt3f = .577350269F, // (0x1.279a74P-1) |
60 | phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 |
61 | } // namespace numbers |
62 | |
63 | /// Count number of 0's from the least significant bit to the most |
64 | /// stopping at the first 1. |
65 | /// |
66 | /// Only unsigned integral types are allowed. |
67 | /// |
68 | /// Returns std::numeric_limits<T>::digits on an input of 0. |
69 | template <typename T> |
70 | LLVM_DEPRECATED("Use llvm::countr_zero instead.", "llvm::countr_zero")__attribute__((deprecated("Use llvm::countr_zero instead.", "llvm::countr_zero" ))) |
71 | unsigned countTrailingZeros(T Val) { |
72 | static_assert(std::is_unsigned_v<T>, |
73 | "Only unsigned integral types are allowed."); |
74 | return llvm::countr_zero(Val); |
75 | } |
76 | |
77 | /// Count number of 0's from the most significant bit to the least |
78 | /// stopping at the first 1. |
79 | /// |
80 | /// Only unsigned integral types are allowed. |
81 | /// |
82 | /// Returns std::numeric_limits<T>::digits on an input of 0. |
83 | template <typename T> |
84 | LLVM_DEPRECATED("Use llvm::countl_zero instead.", "llvm::countl_zero")__attribute__((deprecated("Use llvm::countl_zero instead.", "llvm::countl_zero" ))) |
85 | unsigned countLeadingZeros(T Val) { |
86 | static_assert(std::is_unsigned_v<T>, |
87 | "Only unsigned integral types are allowed."); |
88 | return llvm::countl_zero(Val); |
89 | } |
90 | |
91 | /// Create a bitmask with the N right-most bits set to 1, and all other |
92 | /// bits set to 0. Only unsigned types are allowed. |
93 | template <typename T> T maskTrailingOnes(unsigned N) { |
94 | static_assert(std::is_unsigned_v<T>, "Invalid type!"); |
95 | const unsigned Bits = CHAR_BIT8 * sizeof(T); |
96 | assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index" ) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\"" , "llvm/include/llvm/Support/MathExtras.h", 96, __extension__ __PRETTY_FUNCTION__)); |
97 | return N == 0 ? 0 : (T(-1) >> (Bits - N)); |
98 | } |
99 | |
100 | /// Create a bitmask with the N left-most bits set to 1, and all other |
101 | /// bits set to 0. Only unsigned types are allowed. |
102 | template <typename T> T maskLeadingOnes(unsigned N) { |
103 | return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
104 | } |
105 | |
106 | /// Create a bitmask with the N right-most bits set to 0, and all other |
107 | /// bits set to 1. Only unsigned types are allowed. |
108 | template <typename T> T maskTrailingZeros(unsigned N) { |
109 | return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
110 | } |
111 | |
112 | /// Create a bitmask with the N left-most bits set to 0, and all other |
113 | /// bits set to 1. Only unsigned types are allowed. |
114 | template <typename T> T maskLeadingZeros(unsigned N) { |
115 | return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
116 | } |
117 | |
118 | /// Macro compressed bit reversal table for 256 bits. |
119 | /// |
120 | /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable |
121 | static const unsigned char BitReverseTable256[256] = { |
122 | #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 |
123 | #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) |
124 | #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) |
125 | R6(0), R6(2), R6(1), R6(3) |
126 | #undef R2 |
127 | #undef R4 |
128 | #undef R6 |
129 | }; |
130 | |
131 | /// Reverse the bits in \p Val. |
132 | template <typename T> T reverseBits(T Val) { |
133 | #if __has_builtin(__builtin_bitreverse8)1 |
134 | if constexpr (std::is_same_v<T, uint8_t>) |
135 | return __builtin_bitreverse8(Val); |
136 | #endif |
137 | #if __has_builtin(__builtin_bitreverse16)1 |
138 | if constexpr (std::is_same_v<T, uint16_t>) |
139 | return __builtin_bitreverse16(Val); |
140 | #endif |
141 | #if __has_builtin(__builtin_bitreverse32)1 |
142 | if constexpr (std::is_same_v<T, uint32_t>) |
143 | return __builtin_bitreverse32(Val); |
144 | #endif |
145 | #if __has_builtin(__builtin_bitreverse64)1 |
146 | if constexpr (std::is_same_v<T, uint64_t>) |
147 | return __builtin_bitreverse64(Val); |
148 | #endif |
149 | |
150 | unsigned char in[sizeof(Val)]; |
151 | unsigned char out[sizeof(Val)]; |
152 | std::memcpy(in, &Val, sizeof(Val)); |
153 | for (unsigned i = 0; i < sizeof(Val); ++i) |
154 | out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; |
155 | std::memcpy(&Val, out, sizeof(Val)); |
156 | return Val; |
157 | } |
158 | |
159 | // NOTE: The following support functions use the _32/_64 extensions instead of |
160 | // type overloading so that signed and unsigned integers can be used without |
161 | // ambiguity. |
162 | |
163 | /// Return the high 32 bits of a 64 bit value. |
164 | constexpr inline uint32_t Hi_32(uint64_t Value) { |
165 | return static_cast<uint32_t>(Value >> 32); |
166 | } |
167 | |
168 | /// Return the low 32 bits of a 64 bit value. |
169 | constexpr inline uint32_t Lo_32(uint64_t Value) { |
170 | return static_cast<uint32_t>(Value); |
171 | } |
172 | |
173 | /// Make a 64-bit integer from a high / low pair of 32-bit integers. |
174 | constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { |
175 | return ((uint64_t)High << 32) | (uint64_t)Low; |
176 | } |
177 | |
178 | /// Checks if an integer fits into the given bit width. |
179 | template <unsigned N> constexpr inline bool isInt(int64_t x) { |
180 | if constexpr (N == 8) |
181 | return static_cast<int8_t>(x) == x; |
182 | if constexpr (N == 16) |
183 | return static_cast<int16_t>(x) == x; |
184 | if constexpr (N == 32) |
185 | return static_cast<int32_t>(x) == x; |
186 | if constexpr (N < 64) |
187 | return -(INT64_C(1)1L << (N - 1)) <= x && x < (INT64_C(1)1L << (N - 1)); |
188 | (void)x; // MSVC v19.25 warns that x is unused. |
189 | return true; |
190 | } |
191 | |
192 | /// Checks if a signed integer is an N bit number shifted left by S. |
193 | template <unsigned N, unsigned S> |
194 | constexpr inline bool isShiftedInt(int64_t x) { |
195 | static_assert( |
196 | N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); |
197 | static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); |
198 | return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); |
199 | } |
200 | |
201 | /// Checks if an unsigned integer fits into the given bit width. |
202 | template <unsigned N> constexpr inline bool isUInt(uint64_t x) { |
203 | static_assert(N > 0, "isUInt<0> doesn't make sense"); |
204 | if constexpr (N == 8) |
205 | return static_cast<uint8_t>(x) == x; |
206 | if constexpr (N == 16) |
207 | return static_cast<uint16_t>(x) == x; |
208 | if constexpr (N == 32) |
209 | return static_cast<uint32_t>(x) == x; |
210 | if constexpr (N < 64) |
211 | return x < (UINT64_C(1)1UL << (N)); |
212 | (void)x; // MSVC v19.25 warns that x is unused. |
213 | return true; |
214 | } |
215 | |
216 | /// Checks if a unsigned integer is an N bit number shifted left by S. |
217 | template <unsigned N, unsigned S> |
218 | constexpr inline bool isShiftedUInt(uint64_t x) { |
219 | static_assert( |
220 | N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); |
221 | static_assert(N + S <= 64, |
222 | "isShiftedUInt<N, S> with N + S > 64 is too wide."); |
223 | // Per the two static_asserts above, S must be strictly less than 64. So |
224 | // 1 << S is not undefined behavior. |
225 | return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); |
226 | } |
227 | |
228 | /// Gets the maximum value for a N-bit unsigned integer. |
229 | inline uint64_t maxUIntN(uint64_t N) { |
230 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "llvm/include/llvm/Support/MathExtras.h", 230, __extension__ __PRETTY_FUNCTION__)); |
231 | |
232 | // uint64_t(1) << 64 is undefined behavior, so we can't do |
233 | // (uint64_t(1) << N) - 1 |
234 | // without checking first that N != 64. But this works and doesn't have a |
235 | // branch. |
236 | return UINT64_MAX(18446744073709551615UL) >> (64 - N); |
237 | } |
238 | |
239 | /// Gets the minimum value for a N-bit signed integer. |
240 | inline int64_t minIntN(int64_t N) { |
241 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "llvm/include/llvm/Support/MathExtras.h", 241, __extension__ __PRETTY_FUNCTION__)); |
242 | |
243 | return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1)); |
244 | } |
245 | |
246 | /// Gets the maximum value for a N-bit signed integer. |
247 | inline int64_t maxIntN(int64_t N) { |
248 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "llvm/include/llvm/Support/MathExtras.h", 248, __extension__ __PRETTY_FUNCTION__)); |
249 | |
250 | // This relies on two's complement wraparound when N == 64, so we convert to |
251 | // int64_t only at the very end to avoid UB. |
252 | return (UINT64_C(1)1UL << (N - 1)) - 1; |
253 | } |
254 | |
255 | /// Checks if an unsigned integer fits into the given (dynamic) bit width. |
256 | inline bool isUIntN(unsigned N, uint64_t x) { |
257 | return N >= 64 || x <= maxUIntN(N); |
258 | } |
259 | |
260 | /// Checks if an signed integer fits into the given (dynamic) bit width. |
261 | inline bool isIntN(unsigned N, int64_t x) { |
262 | return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); |
263 | } |
264 | |
265 | /// Return true if the argument is a non-empty sequence of ones starting at the |
266 | /// least significant bit with the remainder zero (32 bit version). |
267 | /// Ex. isMask_32(0x0000FFFFU) == true. |
268 | constexpr inline bool isMask_32(uint32_t Value) { |
269 | return Value && ((Value + 1) & Value) == 0; |
270 | } |
271 | |
272 | /// Return true if the argument is a non-empty sequence of ones starting at the |
273 | /// least significant bit with the remainder zero (64 bit version). |
274 | constexpr inline bool isMask_64(uint64_t Value) { |
275 | return Value && ((Value + 1) & Value) == 0; |
276 | } |
277 | |
278 | /// Return true if the argument contains a non-empty sequence of ones with the |
279 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. |
280 | constexpr inline bool isShiftedMask_32(uint32_t Value) { |
281 | return Value && isMask_32((Value - 1) | Value); |
282 | } |
283 | |
284 | /// Return true if the argument contains a non-empty sequence of ones with the |
285 | /// remainder zero (64 bit version.) |
286 | constexpr inline bool isShiftedMask_64(uint64_t Value) { |
287 | return Value && isMask_64((Value - 1) | Value); |
288 | } |
289 | |
290 | /// Return true if the argument is a power of two > 0. |
291 | /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) |
292 | constexpr inline bool isPowerOf2_32(uint32_t Value) { |
293 | return llvm::has_single_bit(Value); |
294 | } |
295 | |
296 | /// Return true if the argument is a power of two > 0 (64 bit edition.) |
297 | constexpr inline bool isPowerOf2_64(uint64_t Value) { |
298 | return llvm::has_single_bit(Value); |
299 | } |
300 | |
301 | /// Count the number of ones from the most significant bit to the first |
302 | /// zero bit. |
303 | /// |
304 | /// Ex. countLeadingOnes(0xFF0FFF00) == 8. |
305 | /// Only unsigned integral types are allowed. |
306 | /// |
307 | /// Returns std::numeric_limits<T>::digits on an input of all ones. |
308 | template <typename T> |
309 | LLVM_DEPRECATED("Use llvm::countl_one instead.", "llvm::countl_one")__attribute__((deprecated("Use llvm::countl_one instead.", "llvm::countl_one" ))) |
310 | unsigned countLeadingOnes(T Value) { |
311 | static_assert(std::is_unsigned_v<T>, |
312 | "Only unsigned integral types are allowed."); |
313 | return llvm::countl_one<T>(Value); |
314 | } |
315 | |
316 | /// Count the number of ones from the least significant bit to the first |
317 | /// zero bit. |
318 | /// |
319 | /// Ex. countTrailingOnes(0x00FF00FF) == 8. |
320 | /// Only unsigned integral types are allowed. |
321 | /// |
322 | /// Returns std::numeric_limits<T>::digits on an input of all ones. |
323 | template <typename T> |
324 | LLVM_DEPRECATED("Use llvm::countr_one instead.", "llvm::countr_one")__attribute__((deprecated("Use llvm::countr_one instead.", "llvm::countr_one" ))) |
325 | unsigned countTrailingOnes(T Value) { |
326 | static_assert(std::is_unsigned_v<T>, |
327 | "Only unsigned integral types are allowed."); |
328 | return llvm::countr_one<T>(Value); |
329 | } |
330 | |
331 | /// Count the number of set bits in a value. |
332 | /// Ex. countPopulation(0xF000F000) = 8 |
333 | /// Returns 0 if the word is zero. |
334 | template <typename T> |
335 | LLVM_DEPRECATED("Use llvm::popcount instead.", "llvm::popcount")__attribute__((deprecated("Use llvm::popcount instead.", "llvm::popcount" ))) |
336 | inline unsigned countPopulation(T Value) { |
337 | static_assert(std::is_unsigned_v<T>, |
338 | "Only unsigned integral types are allowed."); |
339 | return (unsigned)llvm::popcount(Value); |
340 | } |
341 | |
342 | /// Return true if the argument contains a non-empty sequence of ones with the |
343 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. |
344 | /// If true, \p MaskIdx will specify the index of the lowest set bit and \p |
345 | /// MaskLen is updated to specify the length of the mask, else neither are |
346 | /// updated. |
347 | inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx, |
348 | unsigned &MaskLen) { |
349 | if (!isShiftedMask_32(Value)) |
350 | return false; |
351 | MaskIdx = llvm::countr_zero(Value); |
352 | MaskLen = llvm::popcount(Value); |
353 | return true; |
354 | } |
355 | |
356 | /// Return true if the argument contains a non-empty sequence of ones with the |
357 | /// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index |
358 | /// of the lowest set bit and \p MaskLen is updated to specify the length of the |
359 | /// mask, else neither are updated. |
360 | inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx, |
361 | unsigned &MaskLen) { |
362 | if (!isShiftedMask_64(Value)) |
363 | return false; |
364 | MaskIdx = llvm::countr_zero(Value); |
365 | MaskLen = llvm::popcount(Value); |
366 | return true; |
367 | } |
368 | |
369 | /// Compile time Log2. |
370 | /// Valid only for positive powers of two. |
371 | template <size_t kValue> constexpr inline size_t CTLog2() { |
372 | static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), |
373 | "Value is not a valid power of 2"); |
374 | return 1 + CTLog2<kValue / 2>(); |
375 | } |
376 | |
377 | template <> constexpr inline size_t CTLog2<1>() { return 0; } |
378 | |
379 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
380 | /// (32 bit edition.) |
381 | /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 |
382 | inline unsigned Log2_32(uint32_t Value) { |
383 | return 31 - llvm::countl_zero(Value); |
384 | } |
385 | |
386 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
387 | /// (64 bit edition.) |
388 | inline unsigned Log2_64(uint64_t Value) { |
389 | return 63 - llvm::countl_zero(Value); |
390 | } |
391 | |
392 | /// Return the ceil log base 2 of the specified value, 32 if the value is zero. |
393 | /// (32 bit edition). |
394 | /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 |
395 | inline unsigned Log2_32_Ceil(uint32_t Value) { |
396 | return 32 - llvm::countl_zero(Value - 1); |
397 | } |
398 | |
399 | /// Return the ceil log base 2 of the specified value, 64 if the value is zero. |
400 | /// (64 bit edition.) |
401 | inline unsigned Log2_64_Ceil(uint64_t Value) { |
402 | return 64 - llvm::countl_zero(Value - 1); |
403 | } |
404 | |
405 | /// This function takes a 64-bit integer and returns the bit equivalent double. |
406 | LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<double>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<double>" ))) |
407 | inline double BitsToDouble(uint64_t Bits) { |
408 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
409 | return llvm::bit_cast<double>(Bits); |
410 | } |
411 | |
412 | /// This function takes a 32-bit integer and returns the bit equivalent float. |
413 | LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<float>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<float>" ))) |
414 | inline float BitsToFloat(uint32_t Bits) { |
415 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
416 | return llvm::bit_cast<float>(Bits); |
417 | } |
418 | |
419 | /// This function takes a double and returns the bit equivalent 64-bit integer. |
420 | /// Note that copying doubles around changes the bits of NaNs on some hosts, |
421 | /// notably x86, so this routine cannot be used if these bits are needed. |
422 | LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<uint64_t>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<uint64_t>" ))) |
423 | inline uint64_t DoubleToBits(double Double) { |
424 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
425 | return llvm::bit_cast<uint64_t>(Double); |
426 | } |
427 | |
428 | /// This function takes a float and returns the bit equivalent 32-bit integer. |
429 | /// Note that copying floats around changes the bits of NaNs on some hosts, |
430 | /// notably x86, so this routine cannot be used if these bits are needed. |
431 | LLVM_DEPRECATED("use llvm::bit_cast instead", "llvm::bit_cast<uint32_t>")__attribute__((deprecated("use llvm::bit_cast instead", "llvm::bit_cast<uint32_t>" ))) |
432 | inline uint32_t FloatToBits(float Float) { |
433 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
434 | return llvm::bit_cast<uint32_t>(Float); |
435 | } |
436 | |
437 | /// A and B are either alignments or offsets. Return the minimum alignment that |
438 | /// may be assumed after adding the two together. |
439 | constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { |
440 | // The largest power of 2 that divides both A and B. |
441 | // |
442 | // Replace "-Value" by "1+~Value" in the following commented code to avoid |
443 | // MSVC warning C4146 |
444 | // return (A | B) & -(A | B); |
445 | return (A | B) & (1 + ~(A | B)); |
446 | } |
447 | |
448 | /// Returns the next power of two (in 64-bits) that is strictly greater than A. |
449 | /// Returns zero on overflow. |
450 | constexpr inline uint64_t NextPowerOf2(uint64_t A) { |
451 | A |= (A >> 1); |
452 | A |= (A >> 2); |
453 | A |= (A >> 4); |
454 | A |= (A >> 8); |
455 | A |= (A >> 16); |
456 | A |= (A >> 32); |
457 | return A + 1; |
458 | } |
459 | |
460 | /// Returns the power of two which is less than or equal to the given value. |
461 | /// Essentially, it is a floor operation across the domain of powers of two. |
462 | LLVM_DEPRECATED("use llvm::bit_floor instead", "llvm::bit_floor")__attribute__((deprecated("use llvm::bit_floor instead", "llvm::bit_floor" ))) |
463 | inline uint64_t PowerOf2Floor(uint64_t A) { |
464 | return llvm::bit_floor(A); |
465 | } |
466 | |
467 | /// Returns the power of two which is greater than or equal to the given value. |
468 | /// Essentially, it is a ceil operation across the domain of powers of two. |
469 | inline uint64_t PowerOf2Ceil(uint64_t A) { |
470 | if (!A) |
471 | return 0; |
472 | return NextPowerOf2(A - 1); |
473 | } |
474 | |
475 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
476 | /// \p Value and is a multiple of \p Align. \p Align must be non-zero. |
477 | /// |
478 | /// Examples: |
479 | /// \code |
480 | /// alignTo(5, 8) = 8 |
481 | /// alignTo(17, 8) = 24 |
482 | /// alignTo(~0LL, 8) = 0 |
483 | /// alignTo(321, 255) = 510 |
484 | /// \endcode |
485 | inline uint64_t alignTo(uint64_t Value, uint64_t Align) { |
486 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 486, __extension__ __PRETTY_FUNCTION__)); |
487 | return (Value + Align - 1) / Align * Align; |
488 | } |
489 | |
490 | inline uint64_t alignToPowerOf2(uint64_t Value, uint64_t Align) { |
491 | assert(Align != 0 && (Align & (Align - 1)) == 0 &&(static_cast <bool> (Align != 0 && (Align & (Align - 1)) == 0 && "Align must be a power of 2") ? void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\"" , "llvm/include/llvm/Support/MathExtras.h", 492, __extension__ __PRETTY_FUNCTION__)) |
492 | "Align must be a power of 2")(static_cast <bool> (Align != 0 && (Align & (Align - 1)) == 0 && "Align must be a power of 2") ? void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\"" , "llvm/include/llvm/Support/MathExtras.h", 492, __extension__ __PRETTY_FUNCTION__)); |
493 | return (Value + Align - 1) & -Align; |
494 | } |
495 | |
496 | /// If non-zero \p Skew is specified, the return value will be a minimal integer |
497 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for |
498 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p |
499 | /// Skew mod \p A'. \p Align must be non-zero. |
500 | /// |
501 | /// Examples: |
502 | /// \code |
503 | /// alignTo(5, 8, 7) = 7 |
504 | /// alignTo(17, 8, 1) = 17 |
505 | /// alignTo(~0LL, 8, 3) = 3 |
506 | /// alignTo(321, 255, 42) = 552 |
507 | /// \endcode |
508 | inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew) { |
509 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 509, __extension__ __PRETTY_FUNCTION__)); |
510 | Skew %= Align; |
511 | return alignTo(Value - Skew, Align) + Skew; |
512 | } |
513 | |
514 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
515 | /// \p Value and is a multiple of \c Align. \c Align must be non-zero. |
516 | template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) { |
517 | static_assert(Align != 0u, "Align must be non-zero"); |
518 | return (Value + Align - 1) / Align * Align; |
519 | } |
520 | |
521 | /// Returns the integer ceil(Numerator / Denominator). |
522 | inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { |
523 | return alignTo(Numerator, Denominator) / Denominator; |
524 | } |
525 | |
526 | /// Returns the integer nearest(Numerator / Denominator). |
527 | inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { |
528 | return (Numerator + (Denominator / 2)) / Denominator; |
529 | } |
530 | |
531 | /// Returns the largest uint64_t less than or equal to \p Value and is |
532 | /// \p Skew mod \p Align. \p Align must be non-zero |
533 | inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |
534 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 534, __extension__ __PRETTY_FUNCTION__)); |
535 | Skew %= Align; |
536 | return (Value - Skew) / Align * Align + Skew; |
537 | } |
538 | |
539 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
540 | /// Requires 0 < B <= 32. |
541 | template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) { |
542 | static_assert(B > 0, "Bit width can't be 0."); |
543 | static_assert(B <= 32, "Bit width out of range."); |
544 | return int32_t(X << (32 - B)) >> (32 - B); |
545 | } |
546 | |
547 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
548 | /// Requires 0 < B <= 32. |
549 | inline int32_t SignExtend32(uint32_t X, unsigned B) { |
550 | assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0." ) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 550, __extension__ __PRETTY_FUNCTION__)); |
551 | assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range." ) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\"" , "llvm/include/llvm/Support/MathExtras.h", 551, __extension__ __PRETTY_FUNCTION__)); |
552 | return int32_t(X << (32 - B)) >> (32 - B); |
553 | } |
554 | |
555 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
556 | /// Requires 0 < B <= 64. |
557 | template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { |
558 | static_assert(B > 0, "Bit width can't be 0."); |
559 | static_assert(B <= 64, "Bit width out of range."); |
560 | return int64_t(x << (64 - B)) >> (64 - B); |
561 | } |
562 | |
563 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
564 | /// Requires 0 < B <= 64. |
565 | inline int64_t SignExtend64(uint64_t X, unsigned B) { |
566 | assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0." ) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "llvm/include/llvm/Support/MathExtras.h", 566, __extension__ __PRETTY_FUNCTION__)); |
567 | assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range." ) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\"" , "llvm/include/llvm/Support/MathExtras.h", 567, __extension__ __PRETTY_FUNCTION__)); |
568 | return int64_t(X << (64 - B)) >> (64 - B); |
569 | } |
570 | |
571 | /// Subtract two unsigned integers, X and Y, of type T and return the absolute |
572 | /// value of the result. |
573 | template <typename T> |
574 | std::enable_if_t<std::is_unsigned_v<T>, T> AbsoluteDifference(T X, T Y) { |
575 | return X > Y ? (X - Y) : (Y - X); |
576 | } |
577 | |
578 | /// Add two unsigned integers, X and Y, of type T. Clamp the result to the |
579 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
580 | /// the result is larger than the maximum representable value of type T. |
581 | template <typename T> |
582 | std::enable_if_t<std::is_unsigned_v<T>, T> |
583 | SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { |
584 | bool Dummy; |
585 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
586 | // Hacker's Delight, p. 29 |
587 | T Z = X + Y; |
588 | Overflowed = (Z < X || Z < Y); |
589 | if (Overflowed) |
590 | return std::numeric_limits<T>::max(); |
591 | else |
592 | return Z; |
593 | } |
594 | |
595 | /// Add multiple unsigned integers of type T. Clamp the result to the |
596 | /// maximum representable value of T on overflow. |
597 | template <class T, class... Ts> |
598 | std::enable_if_t<std::is_unsigned_v<T>, T> SaturatingAdd(T X, T Y, T Z, |
599 | Ts... Args) { |
600 | bool Overflowed = false; |
601 | T XY = SaturatingAdd(X, Y, &Overflowed); |
602 | if (Overflowed) |
603 | return SaturatingAdd(std::numeric_limits<T>::max(), T(1), Args...); |
604 | return SaturatingAdd(XY, Z, Args...); |
605 | } |
606 | |
607 | /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the |
608 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
609 | /// the result is larger than the maximum representable value of type T. |
610 | template <typename T> |
611 | std::enable_if_t<std::is_unsigned_v<T>, T> |
612 | SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { |
613 | bool Dummy; |
614 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
615 | |
616 | // Hacker's Delight, p. 30 has a different algorithm, but we don't use that |
617 | // because it fails for uint16_t (where multiplication can have undefined |
618 | // behavior due to promotion to int), and requires a division in addition |
619 | // to the multiplication. |
620 | |
621 | Overflowed = false; |
622 | |
623 | // Log2(Z) would be either Log2Z or Log2Z + 1. |
624 | // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z |
625 | // will necessarily be less than Log2Max as desired. |
626 | int Log2Z = Log2_64(X) + Log2_64(Y); |
627 | const T Max = std::numeric_limits<T>::max(); |
628 | int Log2Max = Log2_64(Max); |
629 | if (Log2Z < Log2Max) { |
630 | return X * Y; |
631 | } |
632 | if (Log2Z > Log2Max) { |
633 | Overflowed = true; |
634 | return Max; |
635 | } |
636 | |
637 | // We're going to use the top bit, and maybe overflow one |
638 | // bit past it. Multiply all but the bottom bit then add |
639 | // that on at the end. |
640 | T Z = (X >> 1) * Y; |
641 | if (Z & ~(Max >> 1)) { |
642 | Overflowed = true; |
643 | return Max; |
644 | } |
645 | Z <<= 1; |
646 | if (X & 1) |
647 | return SaturatingAdd(Z, Y, ResultOverflowed); |
648 | |
649 | return Z; |
650 | } |
651 | |
652 | /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to |
653 | /// the product. Clamp the result to the maximum representable value of T on |
654 | /// overflow. ResultOverflowed indicates if the result is larger than the |
655 | /// maximum representable value of type T. |
656 | template <typename T> |
657 | std::enable_if_t<std::is_unsigned_v<T>, T> |
658 | SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { |
659 | bool Dummy; |
660 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
661 | |
662 | T Product = SaturatingMultiply(X, Y, &Overflowed); |
663 | if (Overflowed) |
664 | return Product; |
665 | |
666 | return SaturatingAdd(A, Product, &Overflowed); |
667 | } |
668 | |
669 | /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. |
670 | extern const float huge_valf; |
671 | |
672 | |
673 | /// Add two signed integers, computing the two's complement truncated result, |
674 | /// returning true if overflow occurred. |
675 | template <typename T> |
676 | std::enable_if_t<std::is_signed_v<T>, T> AddOverflow(T X, T Y, T &Result) { |
677 | #if __has_builtin(__builtin_add_overflow)1 |
678 | return __builtin_add_overflow(X, Y, &Result); |
679 | #else |
680 | // Perform the unsigned addition. |
681 | using U = std::make_unsigned_t<T>; |
682 | const U UX = static_cast<U>(X); |
683 | const U UY = static_cast<U>(Y); |
684 | const U UResult = UX + UY; |
685 | |
686 | // Convert to signed. |
687 | Result = static_cast<T>(UResult); |
688 | |
689 | // Adding two positive numbers should result in a positive number. |
690 | if (X > 0 && Y > 0) |
691 | return Result <= 0; |
692 | // Adding two negatives should result in a negative number. |
693 | if (X < 0 && Y < 0) |
694 | return Result >= 0; |
695 | return false; |
696 | #endif |
697 | } |
698 | |
699 | /// Subtract two signed integers, computing the two's complement truncated |
700 | /// result, returning true if an overflow ocurred. |
701 | template <typename T> |
702 | std::enable_if_t<std::is_signed_v<T>, T> SubOverflow(T X, T Y, T &Result) { |
703 | #if __has_builtin(__builtin_sub_overflow)1 |
704 | return __builtin_sub_overflow(X, Y, &Result); |
705 | #else |
706 | // Perform the unsigned addition. |
707 | using U = std::make_unsigned_t<T>; |
708 | const U UX = static_cast<U>(X); |
709 | const U UY = static_cast<U>(Y); |
710 | const U UResult = UX - UY; |
711 | |
712 | // Convert to signed. |
713 | Result = static_cast<T>(UResult); |
714 | |
715 | // Subtracting a positive number from a negative results in a negative number. |
716 | if (X <= 0 && Y > 0) |
717 | return Result >= 0; |
718 | // Subtracting a negative number from a positive results in a positive number. |
719 | if (X >= 0 && Y < 0) |
720 | return Result <= 0; |
721 | return false; |
722 | #endif |
723 | } |
724 | |
725 | /// Multiply two signed integers, computing the two's complement truncated |
726 | /// result, returning true if an overflow ocurred. |
727 | template <typename T> |
728 | std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) { |
729 | // Perform the unsigned multiplication on absolute values. |
730 | using U = std::make_unsigned_t<T>; |
731 | const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X); |
732 | const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y); |
733 | const U UResult = UX * UY; |
734 | |
735 | // Convert to signed. |
736 | const bool IsNegative = (X < 0) ^ (Y < 0); |
737 | Result = IsNegative ? (0 - UResult) : UResult; |
738 | |
739 | // If any of the args was 0, result is 0 and no overflow occurs. |
740 | if (UX == 0 || UY == 0) |
741 | return false; |
742 | |
743 | // UX and UY are in [1, 2^n], where n is the number of digits. |
744 | // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for |
745 | // positive) divided by an argument compares to the other. |
746 | if (IsNegative) |
747 | return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY; |
748 | else |
749 | return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY; |
750 | } |
751 | |
752 | } // End llvm namespace |
753 | |
754 | #endif |