File: | llvm/include/llvm/MC/LaneBitmask.h |
Warning: | line 86, column 34 The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'llvm::LaneBitmask::Type' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- CodeGenRegisters.cpp - Register and RegisterClass Info -------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file defines structures to encapsulate information gleaned from the | |||
10 | // target register and register class definitions. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #include "CodeGenRegisters.h" | |||
15 | #include "CodeGenTarget.h" | |||
16 | #include "llvm/ADT/ArrayRef.h" | |||
17 | #include "llvm/ADT/BitVector.h" | |||
18 | #include "llvm/ADT/DenseMap.h" | |||
19 | #include "llvm/ADT/IntEqClasses.h" | |||
20 | #include "llvm/ADT/SetVector.h" | |||
21 | #include "llvm/ADT/SmallPtrSet.h" | |||
22 | #include "llvm/ADT/SmallSet.h" | |||
23 | #include "llvm/ADT/SmallVector.h" | |||
24 | #include "llvm/ADT/STLExtras.h" | |||
25 | #include "llvm/ADT/StringExtras.h" | |||
26 | #include "llvm/ADT/StringRef.h" | |||
27 | #include "llvm/ADT/Twine.h" | |||
28 | #include "llvm/Support/Debug.h" | |||
29 | #include "llvm/Support/MathExtras.h" | |||
30 | #include "llvm/Support/raw_ostream.h" | |||
31 | #include "llvm/TableGen/Error.h" | |||
32 | #include "llvm/TableGen/Record.h" | |||
33 | #include <algorithm> | |||
34 | #include <cassert> | |||
35 | #include <cstdint> | |||
36 | #include <iterator> | |||
37 | #include <map> | |||
38 | #include <queue> | |||
39 | #include <set> | |||
40 | #include <string> | |||
41 | #include <tuple> | |||
42 | #include <utility> | |||
43 | #include <vector> | |||
44 | ||||
45 | using namespace llvm; | |||
46 | ||||
47 | #define DEBUG_TYPE"regalloc-emitter" "regalloc-emitter" | |||
48 | ||||
49 | //===----------------------------------------------------------------------===// | |||
50 | // CodeGenSubRegIndex | |||
51 | //===----------------------------------------------------------------------===// | |||
52 | ||||
53 | CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum) | |||
54 | : TheDef(R), EnumValue(Enum), AllSuperRegsCovered(true), Artificial(true) { | |||
55 | Name = std::string(R->getName()); | |||
56 | if (R->getValue("Namespace")) | |||
57 | Namespace = std::string(R->getValueAsString("Namespace")); | |||
58 | Size = R->getValueAsInt("Size"); | |||
59 | Offset = R->getValueAsInt("Offset"); | |||
60 | } | |||
61 | ||||
62 | CodeGenSubRegIndex::CodeGenSubRegIndex(StringRef N, StringRef Nspace, | |||
63 | unsigned Enum) | |||
64 | : TheDef(nullptr), Name(std::string(N)), Namespace(std::string(Nspace)), | |||
65 | Size(-1), Offset(-1), EnumValue(Enum), AllSuperRegsCovered(true), | |||
66 | Artificial(true) {} | |||
67 | ||||
68 | std::string CodeGenSubRegIndex::getQualifiedName() const { | |||
69 | std::string N = getNamespace(); | |||
70 | if (!N.empty()) | |||
71 | N += "::"; | |||
72 | N += getName(); | |||
73 | return N; | |||
74 | } | |||
75 | ||||
76 | void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) { | |||
77 | if (!TheDef) | |||
78 | return; | |||
79 | ||||
80 | std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf"); | |||
81 | if (!Comps.empty()) { | |||
82 | if (Comps.size() != 2) | |||
83 | PrintFatalError(TheDef->getLoc(), | |||
84 | "ComposedOf must have exactly two entries"); | |||
85 | CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]); | |||
86 | CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]); | |||
87 | CodeGenSubRegIndex *X = A->addComposite(B, this); | |||
88 | if (X) | |||
89 | PrintFatalError(TheDef->getLoc(), "Ambiguous ComposedOf entries"); | |||
90 | } | |||
91 | ||||
92 | std::vector<Record*> Parts = | |||
93 | TheDef->getValueAsListOfDefs("CoveringSubRegIndices"); | |||
94 | if (!Parts.empty()) { | |||
95 | if (Parts.size() < 2) | |||
96 | PrintFatalError(TheDef->getLoc(), | |||
97 | "CoveredBySubRegs must have two or more entries"); | |||
98 | SmallVector<CodeGenSubRegIndex*, 8> IdxParts; | |||
99 | for (Record *Part : Parts) | |||
100 | IdxParts.push_back(RegBank.getSubRegIdx(Part)); | |||
101 | setConcatenationOf(IdxParts); | |||
102 | } | |||
103 | } | |||
104 | ||||
105 | LaneBitmask CodeGenSubRegIndex::computeLaneMask() const { | |||
106 | // Already computed? | |||
107 | if (LaneMask.any()) | |||
108 | return LaneMask; | |||
109 | ||||
110 | // Recursion guard, shouldn't be required. | |||
111 | LaneMask = LaneBitmask::getAll(); | |||
112 | ||||
113 | // The lane mask is simply the union of all sub-indices. | |||
114 | LaneBitmask M; | |||
115 | for (const auto &C : Composed) | |||
116 | M |= C.second->computeLaneMask(); | |||
117 | assert(M.any() && "Missing lane mask, sub-register cycle?")(static_cast <bool> (M.any() && "Missing lane mask, sub-register cycle?" ) ? void (0) : __assert_fail ("M.any() && \"Missing lane mask, sub-register cycle?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 117, __extension__ __PRETTY_FUNCTION__)); | |||
118 | LaneMask = M; | |||
119 | return LaneMask; | |||
120 | } | |||
121 | ||||
122 | void CodeGenSubRegIndex::setConcatenationOf( | |||
123 | ArrayRef<CodeGenSubRegIndex*> Parts) { | |||
124 | if (ConcatenationOf.empty()) | |||
125 | ConcatenationOf.assign(Parts.begin(), Parts.end()); | |||
126 | else | |||
127 | assert(std::equal(Parts.begin(), Parts.end(),(static_cast <bool> (std::equal(Parts.begin(), Parts.end (), ConcatenationOf.begin()) && "parts consistent") ? void (0) : __assert_fail ("std::equal(Parts.begin(), Parts.end(), ConcatenationOf.begin()) && \"parts consistent\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 128, __extension__ __PRETTY_FUNCTION__)) | |||
128 | ConcatenationOf.begin()) && "parts consistent")(static_cast <bool> (std::equal(Parts.begin(), Parts.end (), ConcatenationOf.begin()) && "parts consistent") ? void (0) : __assert_fail ("std::equal(Parts.begin(), Parts.end(), ConcatenationOf.begin()) && \"parts consistent\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 128, __extension__ __PRETTY_FUNCTION__)); | |||
129 | } | |||
130 | ||||
131 | void CodeGenSubRegIndex::computeConcatTransitiveClosure() { | |||
132 | for (SmallVectorImpl<CodeGenSubRegIndex*>::iterator | |||
133 | I = ConcatenationOf.begin(); I != ConcatenationOf.end(); /*empty*/) { | |||
134 | CodeGenSubRegIndex *SubIdx = *I; | |||
135 | SubIdx->computeConcatTransitiveClosure(); | |||
136 | #ifndef NDEBUG | |||
137 | for (CodeGenSubRegIndex *SRI : SubIdx->ConcatenationOf) | |||
138 | assert(SRI->ConcatenationOf.empty() && "No transitive closure?")(static_cast <bool> (SRI->ConcatenationOf.empty() && "No transitive closure?") ? void (0) : __assert_fail ("SRI->ConcatenationOf.empty() && \"No transitive closure?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 138, __extension__ __PRETTY_FUNCTION__)); | |||
139 | #endif | |||
140 | ||||
141 | if (SubIdx->ConcatenationOf.empty()) { | |||
142 | ++I; | |||
143 | } else { | |||
144 | I = ConcatenationOf.erase(I); | |||
145 | I = ConcatenationOf.insert(I, SubIdx->ConcatenationOf.begin(), | |||
146 | SubIdx->ConcatenationOf.end()); | |||
147 | I += SubIdx->ConcatenationOf.size(); | |||
148 | } | |||
149 | } | |||
150 | } | |||
151 | ||||
152 | //===----------------------------------------------------------------------===// | |||
153 | // CodeGenRegister | |||
154 | //===----------------------------------------------------------------------===// | |||
155 | ||||
156 | CodeGenRegister::CodeGenRegister(Record *R, unsigned Enum) | |||
157 | : TheDef(R), EnumValue(Enum), | |||
158 | CostPerUse(R->getValueAsListOfInts("CostPerUse")), | |||
159 | CoveredBySubRegs(R->getValueAsBit("CoveredBySubRegs")), | |||
160 | HasDisjunctSubRegs(false), SubRegsComplete(false), | |||
161 | SuperRegsComplete(false), TopoSig(~0u) { | |||
162 | Artificial = R->getValueAsBit("isArtificial"); | |||
163 | } | |||
164 | ||||
165 | void CodeGenRegister::buildObjectGraph(CodeGenRegBank &RegBank) { | |||
166 | std::vector<Record*> SRIs = TheDef->getValueAsListOfDefs("SubRegIndices"); | |||
167 | std::vector<Record*> SRs = TheDef->getValueAsListOfDefs("SubRegs"); | |||
168 | ||||
169 | if (SRIs.size() != SRs.size()) | |||
170 | PrintFatalError(TheDef->getLoc(), | |||
171 | "SubRegs and SubRegIndices must have the same size"); | |||
172 | ||||
173 | for (unsigned i = 0, e = SRIs.size(); i != e; ++i) { | |||
174 | ExplicitSubRegIndices.push_back(RegBank.getSubRegIdx(SRIs[i])); | |||
175 | ExplicitSubRegs.push_back(RegBank.getReg(SRs[i])); | |||
176 | } | |||
177 | ||||
178 | // Also compute leading super-registers. Each register has a list of | |||
179 | // covered-by-subregs super-registers where it appears as the first explicit | |||
180 | // sub-register. | |||
181 | // | |||
182 | // This is used by computeSecondarySubRegs() to find candidates. | |||
183 | if (CoveredBySubRegs && !ExplicitSubRegs.empty()) | |||
184 | ExplicitSubRegs.front()->LeadingSuperRegs.push_back(this); | |||
185 | ||||
186 | // Add ad hoc alias links. This is a symmetric relationship between two | |||
187 | // registers, so build a symmetric graph by adding links in both ends. | |||
188 | std::vector<Record*> Aliases = TheDef->getValueAsListOfDefs("Aliases"); | |||
189 | for (Record *Alias : Aliases) { | |||
190 | CodeGenRegister *Reg = RegBank.getReg(Alias); | |||
191 | ExplicitAliases.push_back(Reg); | |||
192 | Reg->ExplicitAliases.push_back(this); | |||
193 | } | |||
194 | } | |||
195 | ||||
196 | StringRef CodeGenRegister::getName() const { | |||
197 | assert(TheDef && "no def")(static_cast <bool> (TheDef && "no def") ? void (0) : __assert_fail ("TheDef && \"no def\"", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 197, __extension__ __PRETTY_FUNCTION__)); | |||
198 | return TheDef->getName(); | |||
199 | } | |||
200 | ||||
201 | namespace { | |||
202 | ||||
203 | // Iterate over all register units in a set of registers. | |||
204 | class RegUnitIterator { | |||
205 | CodeGenRegister::Vec::const_iterator RegI, RegE; | |||
206 | CodeGenRegister::RegUnitList::iterator UnitI, UnitE; | |||
207 | ||||
208 | public: | |||
209 | RegUnitIterator(const CodeGenRegister::Vec &Regs): | |||
210 | RegI(Regs.begin()), RegE(Regs.end()) { | |||
211 | ||||
212 | if (RegI != RegE) { | |||
213 | UnitI = (*RegI)->getRegUnits().begin(); | |||
214 | UnitE = (*RegI)->getRegUnits().end(); | |||
215 | advance(); | |||
216 | } | |||
217 | } | |||
218 | ||||
219 | bool isValid() const { return UnitI != UnitE; } | |||
220 | ||||
221 | unsigned operator* () const { assert(isValid())(static_cast <bool> (isValid()) ? void (0) : __assert_fail ("isValid()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 221, __extension__ __PRETTY_FUNCTION__)); return *UnitI; } | |||
222 | ||||
223 | const CodeGenRegister *getReg() const { assert(isValid())(static_cast <bool> (isValid()) ? void (0) : __assert_fail ("isValid()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 223, __extension__ __PRETTY_FUNCTION__)); return *RegI; } | |||
224 | ||||
225 | /// Preincrement. Move to the next unit. | |||
226 | void operator++() { | |||
227 | assert(isValid() && "Cannot advance beyond the last operand")(static_cast <bool> (isValid() && "Cannot advance beyond the last operand" ) ? void (0) : __assert_fail ("isValid() && \"Cannot advance beyond the last operand\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 227, __extension__ __PRETTY_FUNCTION__)); | |||
228 | ++UnitI; | |||
229 | advance(); | |||
230 | } | |||
231 | ||||
232 | protected: | |||
233 | void advance() { | |||
234 | while (UnitI == UnitE) { | |||
235 | if (++RegI == RegE) | |||
236 | break; | |||
237 | UnitI = (*RegI)->getRegUnits().begin(); | |||
238 | UnitE = (*RegI)->getRegUnits().end(); | |||
239 | } | |||
240 | } | |||
241 | }; | |||
242 | ||||
243 | } // end anonymous namespace | |||
244 | ||||
245 | // Return true of this unit appears in RegUnits. | |||
246 | static bool hasRegUnit(CodeGenRegister::RegUnitList &RegUnits, unsigned Unit) { | |||
247 | return RegUnits.test(Unit); | |||
248 | } | |||
249 | ||||
250 | // Inherit register units from subregisters. | |||
251 | // Return true if the RegUnits changed. | |||
252 | bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) { | |||
253 | bool changed = false; | |||
254 | for (const auto &SubReg : SubRegs) { | |||
255 | CodeGenRegister *SR = SubReg.second; | |||
256 | // Merge the subregister's units into this register's RegUnits. | |||
257 | changed |= (RegUnits |= SR->RegUnits); | |||
258 | } | |||
259 | ||||
260 | return changed; | |||
261 | } | |||
262 | ||||
263 | const CodeGenRegister::SubRegMap & | |||
264 | CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { | |||
265 | // Only compute this map once. | |||
266 | if (SubRegsComplete) | |||
267 | return SubRegs; | |||
268 | SubRegsComplete = true; | |||
269 | ||||
270 | HasDisjunctSubRegs = ExplicitSubRegs.size() > 1; | |||
271 | ||||
272 | // First insert the explicit subregs and make sure they are fully indexed. | |||
273 | for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { | |||
274 | CodeGenRegister *SR = ExplicitSubRegs[i]; | |||
275 | CodeGenSubRegIndex *Idx = ExplicitSubRegIndices[i]; | |||
276 | if (!SR->Artificial) | |||
277 | Idx->Artificial = false; | |||
278 | if (!SubRegs.insert(std::make_pair(Idx, SR)).second) | |||
279 | PrintFatalError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() + | |||
280 | " appears twice in Register " + getName()); | |||
281 | // Map explicit sub-registers first, so the names take precedence. | |||
282 | // The inherited sub-registers are mapped below. | |||
283 | SubReg2Idx.insert(std::make_pair(SR, Idx)); | |||
284 | } | |||
285 | ||||
286 | // Keep track of inherited subregs and how they can be reached. | |||
287 | SmallPtrSet<CodeGenRegister*, 8> Orphans; | |||
288 | ||||
289 | // Clone inherited subregs and place duplicate entries in Orphans. | |||
290 | // Here the order is important - earlier subregs take precedence. | |||
291 | for (CodeGenRegister *ESR : ExplicitSubRegs) { | |||
292 | const SubRegMap &Map = ESR->computeSubRegs(RegBank); | |||
293 | HasDisjunctSubRegs |= ESR->HasDisjunctSubRegs; | |||
294 | ||||
295 | for (const auto &SR : Map) { | |||
296 | if (!SubRegs.insert(SR).second) | |||
297 | Orphans.insert(SR.second); | |||
298 | } | |||
299 | } | |||
300 | ||||
301 | // Expand any composed subreg indices. | |||
302 | // If dsub_2 has ComposedOf = [qsub_1, dsub_0], and this register has a | |||
303 | // qsub_1 subreg, add a dsub_2 subreg. Keep growing Indices and process | |||
304 | // expanded subreg indices recursively. | |||
305 | SmallVector<CodeGenSubRegIndex*, 8> Indices = ExplicitSubRegIndices; | |||
306 | for (unsigned i = 0; i != Indices.size(); ++i) { | |||
307 | CodeGenSubRegIndex *Idx = Indices[i]; | |||
308 | const CodeGenSubRegIndex::CompMap &Comps = Idx->getComposites(); | |||
309 | CodeGenRegister *SR = SubRegs[Idx]; | |||
310 | const SubRegMap &Map = SR->computeSubRegs(RegBank); | |||
311 | ||||
312 | // Look at the possible compositions of Idx. | |||
313 | // They may not all be supported by SR. | |||
314 | for (auto Comp : Comps) { | |||
315 | SubRegMap::const_iterator SRI = Map.find(Comp.first); | |||
316 | if (SRI == Map.end()) | |||
317 | continue; // Idx + I->first doesn't exist in SR. | |||
318 | // Add I->second as a name for the subreg SRI->second, assuming it is | |||
319 | // orphaned, and the name isn't already used for something else. | |||
320 | if (SubRegs.count(Comp.second) || !Orphans.erase(SRI->second)) | |||
321 | continue; | |||
322 | // We found a new name for the orphaned sub-register. | |||
323 | SubRegs.insert(std::make_pair(Comp.second, SRI->second)); | |||
324 | Indices.push_back(Comp.second); | |||
325 | } | |||
326 | } | |||
327 | ||||
328 | // Now Orphans contains the inherited subregisters without a direct index. | |||
329 | // Create inferred indexes for all missing entries. | |||
330 | // Work backwards in the Indices vector in order to compose subregs bottom-up. | |||
331 | // Consider this subreg sequence: | |||
332 | // | |||
333 | // qsub_1 -> dsub_0 -> ssub_0 | |||
334 | // | |||
335 | // The qsub_1 -> dsub_0 composition becomes dsub_2, so the ssub_0 register | |||
336 | // can be reached in two different ways: | |||
337 | // | |||
338 | // qsub_1 -> ssub_0 | |||
339 | // dsub_2 -> ssub_0 | |||
340 | // | |||
341 | // We pick the latter composition because another register may have [dsub_0, | |||
342 | // dsub_1, dsub_2] subregs without necessarily having a qsub_1 subreg. The | |||
343 | // dsub_2 -> ssub_0 composition can be shared. | |||
344 | while (!Indices.empty() && !Orphans.empty()) { | |||
345 | CodeGenSubRegIndex *Idx = Indices.pop_back_val(); | |||
346 | CodeGenRegister *SR = SubRegs[Idx]; | |||
347 | const SubRegMap &Map = SR->computeSubRegs(RegBank); | |||
348 | for (const auto &SubReg : Map) | |||
349 | if (Orphans.erase(SubReg.second)) | |||
350 | SubRegs[RegBank.getCompositeSubRegIndex(Idx, SubReg.first)] = SubReg.second; | |||
351 | } | |||
352 | ||||
353 | // Compute the inverse SubReg -> Idx map. | |||
354 | for (const auto &SubReg : SubRegs) { | |||
355 | if (SubReg.second == this) { | |||
356 | ArrayRef<SMLoc> Loc; | |||
357 | if (TheDef) | |||
358 | Loc = TheDef->getLoc(); | |||
359 | PrintFatalError(Loc, "Register " + getName() + | |||
360 | " has itself as a sub-register"); | |||
361 | } | |||
362 | ||||
363 | // Compute AllSuperRegsCovered. | |||
364 | if (!CoveredBySubRegs) | |||
365 | SubReg.first->AllSuperRegsCovered = false; | |||
366 | ||||
367 | // Ensure that every sub-register has a unique name. | |||
368 | DenseMap<const CodeGenRegister*, CodeGenSubRegIndex*>::iterator Ins = | |||
369 | SubReg2Idx.insert(std::make_pair(SubReg.second, SubReg.first)).first; | |||
370 | if (Ins->second == SubReg.first) | |||
371 | continue; | |||
372 | // Trouble: Two different names for SubReg.second. | |||
373 | ArrayRef<SMLoc> Loc; | |||
374 | if (TheDef) | |||
375 | Loc = TheDef->getLoc(); | |||
376 | PrintFatalError(Loc, "Sub-register can't have two names: " + | |||
377 | SubReg.second->getName() + " available as " + | |||
378 | SubReg.first->getName() + " and " + Ins->second->getName()); | |||
379 | } | |||
380 | ||||
381 | // Derive possible names for sub-register concatenations from any explicit | |||
382 | // sub-registers. By doing this before computeSecondarySubRegs(), we ensure | |||
383 | // that getConcatSubRegIndex() won't invent any concatenated indices that the | |||
384 | // user already specified. | |||
385 | for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { | |||
386 | CodeGenRegister *SR = ExplicitSubRegs[i]; | |||
387 | if (!SR->CoveredBySubRegs || SR->ExplicitSubRegs.size() <= 1 || | |||
388 | SR->Artificial) | |||
389 | continue; | |||
390 | ||||
391 | // SR is composed of multiple sub-regs. Find their names in this register. | |||
392 | SmallVector<CodeGenSubRegIndex*, 8> Parts; | |||
393 | for (unsigned j = 0, e = SR->ExplicitSubRegs.size(); j != e; ++j) { | |||
394 | CodeGenSubRegIndex &I = *SR->ExplicitSubRegIndices[j]; | |||
395 | if (!I.Artificial) | |||
396 | Parts.push_back(getSubRegIndex(SR->ExplicitSubRegs[j])); | |||
397 | } | |||
398 | ||||
399 | // Offer this as an existing spelling for the concatenation of Parts. | |||
400 | CodeGenSubRegIndex &Idx = *ExplicitSubRegIndices[i]; | |||
401 | Idx.setConcatenationOf(Parts); | |||
402 | } | |||
403 | ||||
404 | // Initialize RegUnitList. Because getSubRegs is called recursively, this | |||
405 | // processes the register hierarchy in postorder. | |||
406 | // | |||
407 | // Inherit all sub-register units. It is good enough to look at the explicit | |||
408 | // sub-registers, the other registers won't contribute any more units. | |||
409 | for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { | |||
410 | CodeGenRegister *SR = ExplicitSubRegs[i]; | |||
411 | RegUnits |= SR->RegUnits; | |||
412 | } | |||
413 | ||||
414 | // Absent any ad hoc aliasing, we create one register unit per leaf register. | |||
415 | // These units correspond to the maximal cliques in the register overlap | |||
416 | // graph which is optimal. | |||
417 | // | |||
418 | // When there is ad hoc aliasing, we simply create one unit per edge in the | |||
419 | // undirected ad hoc aliasing graph. Technically, we could do better by | |||
420 | // identifying maximal cliques in the ad hoc graph, but cliques larger than 2 | |||
421 | // are extremely rare anyway (I've never seen one), so we don't bother with | |||
422 | // the added complexity. | |||
423 | for (unsigned i = 0, e = ExplicitAliases.size(); i != e; ++i) { | |||
424 | CodeGenRegister *AR = ExplicitAliases[i]; | |||
425 | // Only visit each edge once. | |||
426 | if (AR->SubRegsComplete) | |||
427 | continue; | |||
428 | // Create a RegUnit representing this alias edge, and add it to both | |||
429 | // registers. | |||
430 | unsigned Unit = RegBank.newRegUnit(this, AR); | |||
431 | RegUnits.set(Unit); | |||
432 | AR->RegUnits.set(Unit); | |||
433 | } | |||
434 | ||||
435 | // Finally, create units for leaf registers without ad hoc aliases. Note that | |||
436 | // a leaf register with ad hoc aliases doesn't get its own unit - it isn't | |||
437 | // necessary. This means the aliasing leaf registers can share a single unit. | |||
438 | if (RegUnits.empty()) | |||
439 | RegUnits.set(RegBank.newRegUnit(this)); | |||
440 | ||||
441 | // We have now computed the native register units. More may be adopted later | |||
442 | // for balancing purposes. | |||
443 | NativeRegUnits = RegUnits; | |||
444 | ||||
445 | return SubRegs; | |||
446 | } | |||
447 | ||||
448 | // In a register that is covered by its sub-registers, try to find redundant | |||
449 | // sub-registers. For example: | |||
450 | // | |||
451 | // QQ0 = {Q0, Q1} | |||
452 | // Q0 = {D0, D1} | |||
453 | // Q1 = {D2, D3} | |||
454 | // | |||
455 | // We can infer that D1_D2 is also a sub-register, even if it wasn't named in | |||
456 | // the register definition. | |||
457 | // | |||
458 | // The explicitly specified registers form a tree. This function discovers | |||
459 | // sub-register relationships that would force a DAG. | |||
460 | // | |||
461 | void CodeGenRegister::computeSecondarySubRegs(CodeGenRegBank &RegBank) { | |||
462 | SmallVector<SubRegMap::value_type, 8> NewSubRegs; | |||
463 | ||||
464 | std::queue<std::pair<CodeGenSubRegIndex*,CodeGenRegister*>> SubRegQueue; | |||
465 | for (std::pair<CodeGenSubRegIndex*,CodeGenRegister*> P : SubRegs) | |||
466 | SubRegQueue.push(P); | |||
467 | ||||
468 | // Look at the leading super-registers of each sub-register. Those are the | |||
469 | // candidates for new sub-registers, assuming they are fully contained in | |||
470 | // this register. | |||
471 | while (!SubRegQueue.empty()) { | |||
472 | CodeGenSubRegIndex *SubRegIdx; | |||
473 | const CodeGenRegister *SubReg; | |||
474 | std::tie(SubRegIdx, SubReg) = SubRegQueue.front(); | |||
475 | SubRegQueue.pop(); | |||
476 | ||||
477 | const CodeGenRegister::SuperRegList &Leads = SubReg->LeadingSuperRegs; | |||
478 | for (unsigned i = 0, e = Leads.size(); i != e; ++i) { | |||
479 | CodeGenRegister *Cand = const_cast<CodeGenRegister*>(Leads[i]); | |||
480 | // Already got this sub-register? | |||
481 | if (Cand == this || getSubRegIndex(Cand)) | |||
482 | continue; | |||
483 | // Check if each component of Cand is already a sub-register. | |||
484 | assert(!Cand->ExplicitSubRegs.empty() &&(static_cast <bool> (!Cand->ExplicitSubRegs.empty() && "Super-register has no sub-registers") ? void (0) : __assert_fail ("!Cand->ExplicitSubRegs.empty() && \"Super-register has no sub-registers\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 485, __extension__ __PRETTY_FUNCTION__)) | |||
485 | "Super-register has no sub-registers")(static_cast <bool> (!Cand->ExplicitSubRegs.empty() && "Super-register has no sub-registers") ? void (0) : __assert_fail ("!Cand->ExplicitSubRegs.empty() && \"Super-register has no sub-registers\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 485, __extension__ __PRETTY_FUNCTION__)); | |||
486 | if (Cand->ExplicitSubRegs.size() == 1) | |||
487 | continue; | |||
488 | SmallVector<CodeGenSubRegIndex*, 8> Parts; | |||
489 | // We know that the first component is (SubRegIdx,SubReg). However we | |||
490 | // may still need to split it into smaller subregister parts. | |||
491 | assert(Cand->ExplicitSubRegs[0] == SubReg && "LeadingSuperRegs correct")(static_cast <bool> (Cand->ExplicitSubRegs[0] == SubReg && "LeadingSuperRegs correct") ? void (0) : __assert_fail ("Cand->ExplicitSubRegs[0] == SubReg && \"LeadingSuperRegs correct\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 491, __extension__ __PRETTY_FUNCTION__)); | |||
492 | assert(getSubRegIndex(SubReg) == SubRegIdx && "LeadingSuperRegs correct")(static_cast <bool> (getSubRegIndex(SubReg) == SubRegIdx && "LeadingSuperRegs correct") ? void (0) : __assert_fail ("getSubRegIndex(SubReg) == SubRegIdx && \"LeadingSuperRegs correct\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 492, __extension__ __PRETTY_FUNCTION__)); | |||
493 | for (CodeGenRegister *SubReg : Cand->ExplicitSubRegs) { | |||
494 | if (CodeGenSubRegIndex *SubRegIdx = getSubRegIndex(SubReg)) { | |||
495 | if (SubRegIdx->ConcatenationOf.empty()) | |||
496 | Parts.push_back(SubRegIdx); | |||
497 | else | |||
498 | append_range(Parts, SubRegIdx->ConcatenationOf); | |||
499 | } else { | |||
500 | // Sub-register doesn't exist. | |||
501 | Parts.clear(); | |||
502 | break; | |||
503 | } | |||
504 | } | |||
505 | // There is nothing to do if some Cand sub-register is not part of this | |||
506 | // register. | |||
507 | if (Parts.empty()) | |||
508 | continue; | |||
509 | ||||
510 | // Each part of Cand is a sub-register of this. Make the full Cand also | |||
511 | // a sub-register with a concatenated sub-register index. | |||
512 | CodeGenSubRegIndex *Concat = RegBank.getConcatSubRegIndex(Parts); | |||
513 | std::pair<CodeGenSubRegIndex*,CodeGenRegister*> NewSubReg = | |||
514 | std::make_pair(Concat, Cand); | |||
515 | ||||
516 | if (!SubRegs.insert(NewSubReg).second) | |||
517 | continue; | |||
518 | ||||
519 | // We inserted a new subregister. | |||
520 | NewSubRegs.push_back(NewSubReg); | |||
521 | SubRegQueue.push(NewSubReg); | |||
522 | SubReg2Idx.insert(std::make_pair(Cand, Concat)); | |||
523 | } | |||
524 | } | |||
525 | ||||
526 | // Create sub-register index composition maps for the synthesized indices. | |||
527 | for (unsigned i = 0, e = NewSubRegs.size(); i != e; ++i) { | |||
528 | CodeGenSubRegIndex *NewIdx = NewSubRegs[i].first; | |||
529 | CodeGenRegister *NewSubReg = NewSubRegs[i].second; | |||
530 | for (auto SubReg : NewSubReg->SubRegs) { | |||
531 | CodeGenSubRegIndex *SubIdx = getSubRegIndex(SubReg.second); | |||
532 | if (!SubIdx) | |||
533 | PrintFatalError(TheDef->getLoc(), "No SubRegIndex for " + | |||
534 | SubReg.second->getName() + | |||
535 | " in " + getName()); | |||
536 | NewIdx->addComposite(SubReg.first, SubIdx); | |||
537 | } | |||
538 | } | |||
539 | } | |||
540 | ||||
541 | void CodeGenRegister::computeSuperRegs(CodeGenRegBank &RegBank) { | |||
542 | // Only visit each register once. | |||
543 | if (SuperRegsComplete) | |||
544 | return; | |||
545 | SuperRegsComplete = true; | |||
546 | ||||
547 | // Make sure all sub-registers have been visited first, so the super-reg | |||
548 | // lists will be topologically ordered. | |||
549 | for (auto SubReg : SubRegs) | |||
550 | SubReg.second->computeSuperRegs(RegBank); | |||
551 | ||||
552 | // Now add this as a super-register on all sub-registers. | |||
553 | // Also compute the TopoSigId in post-order. | |||
554 | TopoSigId Id; | |||
555 | for (auto SubReg : SubRegs) { | |||
556 | // Topological signature computed from SubIdx, TopoId(SubReg). | |||
557 | // Loops and idempotent indices have TopoSig = ~0u. | |||
558 | Id.push_back(SubReg.first->EnumValue); | |||
559 | Id.push_back(SubReg.second->TopoSig); | |||
560 | ||||
561 | // Don't add duplicate entries. | |||
562 | if (!SubReg.second->SuperRegs.empty() && | |||
563 | SubReg.second->SuperRegs.back() == this) | |||
564 | continue; | |||
565 | SubReg.second->SuperRegs.push_back(this); | |||
566 | } | |||
567 | TopoSig = RegBank.getTopoSig(Id); | |||
568 | } | |||
569 | ||||
570 | void | |||
571 | CodeGenRegister::addSubRegsPreOrder(SetVector<const CodeGenRegister*> &OSet, | |||
572 | CodeGenRegBank &RegBank) const { | |||
573 | assert(SubRegsComplete && "Must precompute sub-registers")(static_cast <bool> (SubRegsComplete && "Must precompute sub-registers" ) ? void (0) : __assert_fail ("SubRegsComplete && \"Must precompute sub-registers\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 573, __extension__ __PRETTY_FUNCTION__)); | |||
574 | for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { | |||
575 | CodeGenRegister *SR = ExplicitSubRegs[i]; | |||
576 | if (OSet.insert(SR)) | |||
577 | SR->addSubRegsPreOrder(OSet, RegBank); | |||
578 | } | |||
579 | // Add any secondary sub-registers that weren't part of the explicit tree. | |||
580 | for (auto SubReg : SubRegs) | |||
581 | OSet.insert(SubReg.second); | |||
582 | } | |||
583 | ||||
584 | // Get the sum of this register's unit weights. | |||
585 | unsigned CodeGenRegister::getWeight(const CodeGenRegBank &RegBank) const { | |||
586 | unsigned Weight = 0; | |||
587 | for (unsigned RegUnit : RegUnits) { | |||
588 | Weight += RegBank.getRegUnit(RegUnit).Weight; | |||
589 | } | |||
590 | return Weight; | |||
591 | } | |||
592 | ||||
593 | //===----------------------------------------------------------------------===// | |||
594 | // RegisterTuples | |||
595 | //===----------------------------------------------------------------------===// | |||
596 | ||||
597 | // A RegisterTuples def is used to generate pseudo-registers from lists of | |||
598 | // sub-registers. We provide a SetTheory expander class that returns the new | |||
599 | // registers. | |||
600 | namespace { | |||
601 | ||||
602 | struct TupleExpander : SetTheory::Expander { | |||
603 | // Reference to SynthDefs in the containing CodeGenRegBank, to keep track of | |||
604 | // the synthesized definitions for their lifetime. | |||
605 | std::vector<std::unique_ptr<Record>> &SynthDefs; | |||
606 | ||||
607 | TupleExpander(std::vector<std::unique_ptr<Record>> &SynthDefs) | |||
608 | : SynthDefs(SynthDefs) {} | |||
609 | ||||
610 | void expand(SetTheory &ST, Record *Def, SetTheory::RecSet &Elts) override { | |||
611 | std::vector<Record*> Indices = Def->getValueAsListOfDefs("SubRegIndices"); | |||
612 | unsigned Dim = Indices.size(); | |||
613 | ListInit *SubRegs = Def->getValueAsListInit("SubRegs"); | |||
614 | if (Dim != SubRegs->size()) | |||
615 | PrintFatalError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch"); | |||
616 | if (Dim < 2) | |||
617 | PrintFatalError(Def->getLoc(), | |||
618 | "Tuples must have at least 2 sub-registers"); | |||
619 | ||||
620 | // Evaluate the sub-register lists to be zipped. | |||
621 | unsigned Length = ~0u; | |||
622 | SmallVector<SetTheory::RecSet, 4> Lists(Dim); | |||
623 | for (unsigned i = 0; i != Dim; ++i) { | |||
624 | ST.evaluate(SubRegs->getElement(i), Lists[i], Def->getLoc()); | |||
625 | Length = std::min(Length, unsigned(Lists[i].size())); | |||
626 | } | |||
627 | ||||
628 | if (Length == 0) | |||
629 | return; | |||
630 | ||||
631 | // Precompute some types. | |||
632 | Record *RegisterCl = Def->getRecords().getClass("Register"); | |||
633 | RecTy *RegisterRecTy = RecordRecTy::get(RegisterCl); | |||
634 | std::vector<StringRef> RegNames = | |||
635 | Def->getValueAsListOfStrings("RegAsmNames"); | |||
636 | ||||
637 | // Zip them up. | |||
638 | for (unsigned n = 0; n != Length; ++n) { | |||
639 | std::string Name; | |||
640 | Record *Proto = Lists[0][n]; | |||
641 | std::vector<Init*> Tuple; | |||
642 | for (unsigned i = 0; i != Dim; ++i) { | |||
643 | Record *Reg = Lists[i][n]; | |||
644 | if (i) Name += '_'; | |||
645 | Name += Reg->getName(); | |||
646 | Tuple.push_back(DefInit::get(Reg)); | |||
647 | } | |||
648 | ||||
649 | // Take the cost list of the first register in the tuple. | |||
650 | ListInit *CostList = Proto->getValueAsListInit("CostPerUse"); | |||
651 | SmallVector<Init *, 2> CostPerUse; | |||
652 | CostPerUse.insert(CostPerUse.end(), CostList->begin(), CostList->end()); | |||
653 | ||||
654 | StringInit *AsmName = StringInit::get(""); | |||
655 | if (!RegNames.empty()) { | |||
656 | if (RegNames.size() <= n) | |||
657 | PrintFatalError(Def->getLoc(), | |||
658 | "Register tuple definition missing name for '" + | |||
659 | Name + "'."); | |||
660 | AsmName = StringInit::get(RegNames[n]); | |||
661 | } | |||
662 | ||||
663 | // Create a new Record representing the synthesized register. This record | |||
664 | // is only for consumption by CodeGenRegister, it is not added to the | |||
665 | // RecordKeeper. | |||
666 | SynthDefs.emplace_back( | |||
667 | std::make_unique<Record>(Name, Def->getLoc(), Def->getRecords())); | |||
668 | Record *NewReg = SynthDefs.back().get(); | |||
669 | Elts.insert(NewReg); | |||
670 | ||||
671 | // Copy Proto super-classes. | |||
672 | ArrayRef<std::pair<Record *, SMRange>> Supers = Proto->getSuperClasses(); | |||
673 | for (const auto &SuperPair : Supers) | |||
674 | NewReg->addSuperClass(SuperPair.first, SuperPair.second); | |||
675 | ||||
676 | // Copy Proto fields. | |||
677 | for (unsigned i = 0, e = Proto->getValues().size(); i != e; ++i) { | |||
678 | RecordVal RV = Proto->getValues()[i]; | |||
679 | ||||
680 | // Skip existing fields, like NAME. | |||
681 | if (NewReg->getValue(RV.getNameInit())) | |||
682 | continue; | |||
683 | ||||
684 | StringRef Field = RV.getName(); | |||
685 | ||||
686 | // Replace the sub-register list with Tuple. | |||
687 | if (Field == "SubRegs") | |||
688 | RV.setValue(ListInit::get(Tuple, RegisterRecTy)); | |||
689 | ||||
690 | if (Field == "AsmName") | |||
691 | RV.setValue(AsmName); | |||
692 | ||||
693 | // CostPerUse is aggregated from all Tuple members. | |||
694 | if (Field == "CostPerUse") | |||
695 | RV.setValue(ListInit::get(CostPerUse, CostList->getElementType())); | |||
696 | ||||
697 | // Composite registers are always covered by sub-registers. | |||
698 | if (Field == "CoveredBySubRegs") | |||
699 | RV.setValue(BitInit::get(true)); | |||
700 | ||||
701 | // Copy fields from the RegisterTuples def. | |||
702 | if (Field == "SubRegIndices" || | |||
703 | Field == "CompositeIndices") { | |||
704 | NewReg->addValue(*Def->getValue(Field)); | |||
705 | continue; | |||
706 | } | |||
707 | ||||
708 | // Some fields get their default uninitialized value. | |||
709 | if (Field == "DwarfNumbers" || | |||
710 | Field == "DwarfAlias" || | |||
711 | Field == "Aliases") { | |||
712 | if (const RecordVal *DefRV = RegisterCl->getValue(Field)) | |||
713 | NewReg->addValue(*DefRV); | |||
714 | continue; | |||
715 | } | |||
716 | ||||
717 | // Everything else is copied from Proto. | |||
718 | NewReg->addValue(RV); | |||
719 | } | |||
720 | } | |||
721 | } | |||
722 | }; | |||
723 | ||||
724 | } // end anonymous namespace | |||
725 | ||||
726 | //===----------------------------------------------------------------------===// | |||
727 | // CodeGenRegisterClass | |||
728 | //===----------------------------------------------------------------------===// | |||
729 | ||||
730 | static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) { | |||
731 | llvm::sort(M, deref<std::less<>>()); | |||
732 | M.erase(std::unique(M.begin(), M.end(), deref<std::equal_to<>>()), M.end()); | |||
733 | } | |||
734 | ||||
735 | CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) | |||
736 | : TheDef(R), Name(std::string(R->getName())), | |||
737 | TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1) { | |||
738 | GeneratePressureSet = R->getValueAsBit("GeneratePressureSet"); | |||
739 | std::vector<Record*> TypeList = R->getValueAsListOfDefs("RegTypes"); | |||
740 | if (TypeList.empty()) | |||
741 | PrintFatalError(R->getLoc(), "RegTypes list must not be empty!"); | |||
742 | for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { | |||
743 | Record *Type = TypeList[i]; | |||
744 | if (!Type->isSubClassOf("ValueType")) | |||
745 | PrintFatalError(R->getLoc(), | |||
746 | "RegTypes list member '" + Type->getName() + | |||
747 | "' does not derive from the ValueType class!"); | |||
748 | VTs.push_back(getValueTypeByHwMode(Type, RegBank.getHwModes())); | |||
749 | } | |||
750 | ||||
751 | // Allocation order 0 is the full set. AltOrders provides others. | |||
752 | const SetTheory::RecVec *Elements = RegBank.getSets().expand(R); | |||
753 | ListInit *AltOrders = R->getValueAsListInit("AltOrders"); | |||
754 | Orders.resize(1 + AltOrders->size()); | |||
755 | ||||
756 | // Default allocation order always contains all registers. | |||
757 | Artificial = true; | |||
758 | for (unsigned i = 0, e = Elements->size(); i != e; ++i) { | |||
759 | Orders[0].push_back((*Elements)[i]); | |||
760 | const CodeGenRegister *Reg = RegBank.getReg((*Elements)[i]); | |||
761 | Members.push_back(Reg); | |||
762 | Artificial &= Reg->Artificial; | |||
763 | TopoSigs.set(Reg->getTopoSig()); | |||
764 | } | |||
765 | sortAndUniqueRegisters(Members); | |||
766 | ||||
767 | // Alternative allocation orders may be subsets. | |||
768 | SetTheory::RecSet Order; | |||
769 | for (unsigned i = 0, e = AltOrders->size(); i != e; ++i) { | |||
770 | RegBank.getSets().evaluate(AltOrders->getElement(i), Order, R->getLoc()); | |||
771 | Orders[1 + i].append(Order.begin(), Order.end()); | |||
772 | // Verify that all altorder members are regclass members. | |||
773 | while (!Order.empty()) { | |||
774 | CodeGenRegister *Reg = RegBank.getReg(Order.back()); | |||
775 | Order.pop_back(); | |||
776 | if (!contains(Reg)) | |||
777 | PrintFatalError(R->getLoc(), " AltOrder register " + Reg->getName() + | |||
778 | " is not a class member"); | |||
779 | } | |||
780 | } | |||
781 | ||||
782 | Namespace = R->getValueAsString("Namespace"); | |||
783 | ||||
784 | if (const RecordVal *RV = R->getValue("RegInfos")) | |||
785 | if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) | |||
786 | RSI = RegSizeInfoByHwMode(DI->getDef(), RegBank.getHwModes()); | |||
787 | unsigned Size = R->getValueAsInt("Size"); | |||
788 | assert((RSI.hasDefault() || Size != 0 || VTs[0].isSimple()) &&(static_cast <bool> ((RSI.hasDefault() || Size != 0 || VTs [0].isSimple()) && "Impossible to determine register size" ) ? void (0) : __assert_fail ("(RSI.hasDefault() || Size != 0 || VTs[0].isSimple()) && \"Impossible to determine register size\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 789, __extension__ __PRETTY_FUNCTION__)) | |||
789 | "Impossible to determine register size")(static_cast <bool> ((RSI.hasDefault() || Size != 0 || VTs [0].isSimple()) && "Impossible to determine register size" ) ? void (0) : __assert_fail ("(RSI.hasDefault() || Size != 0 || VTs[0].isSimple()) && \"Impossible to determine register size\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 789, __extension__ __PRETTY_FUNCTION__)); | |||
790 | if (!RSI.hasDefault()) { | |||
791 | RegSizeInfo RI; | |||
792 | RI.RegSize = RI.SpillSize = Size ? Size | |||
793 | : VTs[0].getSimple().getSizeInBits(); | |||
794 | RI.SpillAlignment = R->getValueAsInt("Alignment"); | |||
795 | RSI.insertRegSizeForMode(DefaultMode, RI); | |||
796 | } | |||
797 | ||||
798 | CopyCost = R->getValueAsInt("CopyCost"); | |||
799 | Allocatable = R->getValueAsBit("isAllocatable"); | |||
800 | AltOrderSelect = R->getValueAsString("AltOrderSelect"); | |||
801 | int AllocationPriority = R->getValueAsInt("AllocationPriority"); | |||
802 | if (AllocationPriority < 0 || AllocationPriority > 63) | |||
803 | PrintFatalError(R->getLoc(), "AllocationPriority out of range [0,63]"); | |||
804 | this->AllocationPriority = AllocationPriority; | |||
805 | } | |||
806 | ||||
807 | // Create an inferred register class that was missing from the .td files. | |||
808 | // Most properties will be inherited from the closest super-class after the | |||
809 | // class structure has been computed. | |||
810 | CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, | |||
811 | StringRef Name, Key Props) | |||
812 | : Members(*Props.Members), TheDef(nullptr), Name(std::string(Name)), | |||
813 | TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), RSI(Props.RSI), | |||
814 | CopyCost(0), Allocatable(true), AllocationPriority(0) { | |||
815 | Artificial = true; | |||
816 | GeneratePressureSet = false; | |||
817 | for (const auto R : Members) { | |||
818 | TopoSigs.set(R->getTopoSig()); | |||
819 | Artificial &= R->Artificial; | |||
820 | } | |||
821 | } | |||
822 | ||||
823 | // Compute inherited propertied for a synthesized register class. | |||
824 | void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) { | |||
825 | assert(!getDef() && "Only synthesized classes can inherit properties")(static_cast <bool> (!getDef() && "Only synthesized classes can inherit properties" ) ? void (0) : __assert_fail ("!getDef() && \"Only synthesized classes can inherit properties\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 825, __extension__ __PRETTY_FUNCTION__)); | |||
826 | assert(!SuperClasses.empty() && "Synthesized class without super class")(static_cast <bool> (!SuperClasses.empty() && "Synthesized class without super class" ) ? void (0) : __assert_fail ("!SuperClasses.empty() && \"Synthesized class without super class\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 826, __extension__ __PRETTY_FUNCTION__)); | |||
827 | ||||
828 | // The last super-class is the smallest one. | |||
829 | CodeGenRegisterClass &Super = *SuperClasses.back(); | |||
830 | ||||
831 | // Most properties are copied directly. | |||
832 | // Exceptions are members, size, and alignment | |||
833 | Namespace = Super.Namespace; | |||
834 | VTs = Super.VTs; | |||
835 | CopyCost = Super.CopyCost; | |||
836 | // Check for allocatable superclasses. | |||
837 | Allocatable = any_of(SuperClasses, [&](const CodeGenRegisterClass *S) { | |||
838 | return S->Allocatable; | |||
839 | }); | |||
840 | AltOrderSelect = Super.AltOrderSelect; | |||
841 | AllocationPriority = Super.AllocationPriority; | |||
842 | GeneratePressureSet |= Super.GeneratePressureSet; | |||
843 | ||||
844 | // Copy all allocation orders, filter out foreign registers from the larger | |||
845 | // super-class. | |||
846 | Orders.resize(Super.Orders.size()); | |||
847 | for (unsigned i = 0, ie = Super.Orders.size(); i != ie; ++i) | |||
848 | for (unsigned j = 0, je = Super.Orders[i].size(); j != je; ++j) | |||
849 | if (contains(RegBank.getReg(Super.Orders[i][j]))) | |||
850 | Orders[i].push_back(Super.Orders[i][j]); | |||
851 | } | |||
852 | ||||
853 | bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const { | |||
854 | return std::binary_search(Members.begin(), Members.end(), Reg, | |||
855 | deref<std::less<>>()); | |||
856 | } | |||
857 | ||||
858 | unsigned CodeGenRegisterClass::getWeight(const CodeGenRegBank& RegBank) const { | |||
859 | if (TheDef && !TheDef->isValueUnset("Weight")) | |||
860 | return TheDef->getValueAsInt("Weight"); | |||
861 | ||||
862 | if (Members.empty() || Artificial) | |||
863 | return 0; | |||
864 | ||||
865 | return (*Members.begin())->getWeight(RegBank); | |||
866 | } | |||
867 | ||||
868 | namespace llvm { | |||
869 | ||||
870 | raw_ostream &operator<<(raw_ostream &OS, const CodeGenRegisterClass::Key &K) { | |||
871 | OS << "{ " << K.RSI; | |||
872 | for (const auto R : *K.Members) | |||
873 | OS << ", " << R->getName(); | |||
874 | return OS << " }"; | |||
875 | } | |||
876 | ||||
877 | } // end namespace llvm | |||
878 | ||||
879 | // This is a simple lexicographical order that can be used to search for sets. | |||
880 | // It is not the same as the topological order provided by TopoOrderRC. | |||
881 | bool CodeGenRegisterClass::Key:: | |||
882 | operator<(const CodeGenRegisterClass::Key &B) const { | |||
883 | assert(Members && B.Members)(static_cast <bool> (Members && B.Members) ? void (0) : __assert_fail ("Members && B.Members", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 883, __extension__ __PRETTY_FUNCTION__)); | |||
884 | return std::tie(*Members, RSI) < std::tie(*B.Members, B.RSI); | |||
885 | } | |||
886 | ||||
887 | // Returns true if RC is a strict subclass. | |||
888 | // RC is a sub-class of this class if it is a valid replacement for any | |||
889 | // instruction operand where a register of this classis required. It must | |||
890 | // satisfy these conditions: | |||
891 | // | |||
892 | // 1. All RC registers are also in this. | |||
893 | // 2. The RC spill size must not be smaller than our spill size. | |||
894 | // 3. RC spill alignment must be compatible with ours. | |||
895 | // | |||
896 | static bool testSubClass(const CodeGenRegisterClass *A, | |||
897 | const CodeGenRegisterClass *B) { | |||
898 | return A->RSI.isSubClassOf(B->RSI) && | |||
899 | std::includes(A->getMembers().begin(), A->getMembers().end(), | |||
900 | B->getMembers().begin(), B->getMembers().end(), | |||
901 | deref<std::less<>>()); | |||
902 | } | |||
903 | ||||
904 | /// Sorting predicate for register classes. This provides a topological | |||
905 | /// ordering that arranges all register classes before their sub-classes. | |||
906 | /// | |||
907 | /// Register classes with the same registers, spill size, and alignment form a | |||
908 | /// clique. They will be ordered alphabetically. | |||
909 | /// | |||
910 | static bool TopoOrderRC(const CodeGenRegisterClass &PA, | |||
911 | const CodeGenRegisterClass &PB) { | |||
912 | auto *A = &PA; | |||
913 | auto *B = &PB; | |||
914 | if (A == B) | |||
915 | return false; | |||
916 | ||||
917 | if (A->RSI < B->RSI) | |||
918 | return true; | |||
919 | if (A->RSI != B->RSI) | |||
920 | return false; | |||
921 | ||||
922 | // Order by descending set size. Note that the classes' allocation order may | |||
923 | // not have been computed yet. The Members set is always vaild. | |||
924 | if (A->getMembers().size() > B->getMembers().size()) | |||
925 | return true; | |||
926 | if (A->getMembers().size() < B->getMembers().size()) | |||
927 | return false; | |||
928 | ||||
929 | // Finally order by name as a tie breaker. | |||
930 | return StringRef(A->getName()) < B->getName(); | |||
931 | } | |||
932 | ||||
933 | std::string CodeGenRegisterClass::getQualifiedName() const { | |||
934 | if (Namespace.empty()) | |||
935 | return getName(); | |||
936 | else | |||
937 | return (Namespace + "::" + getName()).str(); | |||
938 | } | |||
939 | ||||
940 | // Compute sub-classes of all register classes. | |||
941 | // Assume the classes are ordered topologically. | |||
942 | void CodeGenRegisterClass::computeSubClasses(CodeGenRegBank &RegBank) { | |||
943 | auto &RegClasses = RegBank.getRegClasses(); | |||
944 | ||||
945 | // Visit backwards so sub-classes are seen first. | |||
946 | for (auto I = RegClasses.rbegin(), E = RegClasses.rend(); I != E; ++I) { | |||
947 | CodeGenRegisterClass &RC = *I; | |||
948 | RC.SubClasses.resize(RegClasses.size()); | |||
949 | RC.SubClasses.set(RC.EnumValue); | |||
950 | if (RC.Artificial) | |||
951 | continue; | |||
952 | ||||
953 | // Normally, all subclasses have IDs >= rci, unless RC is part of a clique. | |||
954 | for (auto I2 = I.base(), E2 = RegClasses.end(); I2 != E2; ++I2) { | |||
955 | CodeGenRegisterClass &SubRC = *I2; | |||
956 | if (RC.SubClasses.test(SubRC.EnumValue)) | |||
957 | continue; | |||
958 | if (!testSubClass(&RC, &SubRC)) | |||
959 | continue; | |||
960 | // SubRC is a sub-class. Grap all its sub-classes so we won't have to | |||
961 | // check them again. | |||
962 | RC.SubClasses |= SubRC.SubClasses; | |||
963 | } | |||
964 | ||||
965 | // Sweep up missed clique members. They will be immediately preceding RC. | |||
966 | for (auto I2 = std::next(I); I2 != E && testSubClass(&RC, &*I2); ++I2) | |||
967 | RC.SubClasses.set(I2->EnumValue); | |||
968 | } | |||
969 | ||||
970 | // Compute the SuperClasses lists from the SubClasses vectors. | |||
971 | for (auto &RC : RegClasses) { | |||
972 | const BitVector &SC = RC.getSubClasses(); | |||
973 | auto I = RegClasses.begin(); | |||
974 | for (int s = 0, next_s = SC.find_first(); next_s != -1; | |||
975 | next_s = SC.find_next(s)) { | |||
976 | std::advance(I, next_s - s); | |||
977 | s = next_s; | |||
978 | if (&*I == &RC) | |||
979 | continue; | |||
980 | I->SuperClasses.push_back(&RC); | |||
981 | } | |||
982 | } | |||
983 | ||||
984 | // With the class hierarchy in place, let synthesized register classes inherit | |||
985 | // properties from their closest super-class. The iteration order here can | |||
986 | // propagate properties down multiple levels. | |||
987 | for (auto &RC : RegClasses) | |||
988 | if (!RC.getDef()) | |||
989 | RC.inheritProperties(RegBank); | |||
990 | } | |||
991 | ||||
992 | Optional<std::pair<CodeGenRegisterClass *, CodeGenRegisterClass *>> | |||
993 | CodeGenRegisterClass::getMatchingSubClassWithSubRegs( | |||
994 | CodeGenRegBank &RegBank, const CodeGenSubRegIndex *SubIdx) const { | |||
995 | auto SizeOrder = [this](const CodeGenRegisterClass *A, | |||
996 | const CodeGenRegisterClass *B) { | |||
997 | // If there are multiple, identical register classes, prefer the original | |||
998 | // register class. | |||
999 | if (A == B) | |||
1000 | return false; | |||
1001 | if (A->getMembers().size() == B->getMembers().size()) | |||
1002 | return A == this; | |||
1003 | return A->getMembers().size() > B->getMembers().size(); | |||
1004 | }; | |||
1005 | ||||
1006 | auto &RegClasses = RegBank.getRegClasses(); | |||
1007 | ||||
1008 | // Find all the subclasses of this one that fully support the sub-register | |||
1009 | // index and order them by size. BiggestSuperRC should always be first. | |||
1010 | CodeGenRegisterClass *BiggestSuperRegRC = getSubClassWithSubReg(SubIdx); | |||
1011 | if (!BiggestSuperRegRC) | |||
1012 | return None; | |||
1013 | BitVector SuperRegRCsBV = BiggestSuperRegRC->getSubClasses(); | |||
1014 | std::vector<CodeGenRegisterClass *> SuperRegRCs; | |||
1015 | for (auto &RC : RegClasses) | |||
1016 | if (SuperRegRCsBV[RC.EnumValue]) | |||
1017 | SuperRegRCs.emplace_back(&RC); | |||
1018 | llvm::stable_sort(SuperRegRCs, SizeOrder); | |||
1019 | ||||
1020 | assert(SuperRegRCs.front() == BiggestSuperRegRC &&(static_cast <bool> (SuperRegRCs.front() == BiggestSuperRegRC && "Biggest class wasn't first") ? void (0) : __assert_fail ("SuperRegRCs.front() == BiggestSuperRegRC && \"Biggest class wasn't first\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1021, __extension__ __PRETTY_FUNCTION__)) | |||
1021 | "Biggest class wasn't first")(static_cast <bool> (SuperRegRCs.front() == BiggestSuperRegRC && "Biggest class wasn't first") ? void (0) : __assert_fail ("SuperRegRCs.front() == BiggestSuperRegRC && \"Biggest class wasn't first\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1021, __extension__ __PRETTY_FUNCTION__)); | |||
1022 | ||||
1023 | // Find all the subreg classes and order them by size too. | |||
1024 | std::vector<std::pair<CodeGenRegisterClass *, BitVector>> SuperRegClasses; | |||
1025 | for (auto &RC: RegClasses) { | |||
1026 | BitVector SuperRegClassesBV(RegClasses.size()); | |||
1027 | RC.getSuperRegClasses(SubIdx, SuperRegClassesBV); | |||
1028 | if (SuperRegClassesBV.any()) | |||
1029 | SuperRegClasses.push_back(std::make_pair(&RC, SuperRegClassesBV)); | |||
1030 | } | |||
1031 | llvm::sort(SuperRegClasses, | |||
1032 | [&](const std::pair<CodeGenRegisterClass *, BitVector> &A, | |||
1033 | const std::pair<CodeGenRegisterClass *, BitVector> &B) { | |||
1034 | return SizeOrder(A.first, B.first); | |||
1035 | }); | |||
1036 | ||||
1037 | // Find the biggest subclass and subreg class such that R:subidx is in the | |||
1038 | // subreg class for all R in subclass. | |||
1039 | // | |||
1040 | // For example: | |||
1041 | // All registers in X86's GR64 have a sub_32bit subregister but no class | |||
1042 | // exists that contains all the 32-bit subregisters because GR64 contains RIP | |||
1043 | // but GR32 does not contain EIP. Instead, we constrain SuperRegRC to | |||
1044 | // GR32_with_sub_8bit (which is identical to GR32_with_sub_32bit) and then, | |||
1045 | // having excluded RIP, we are able to find a SubRegRC (GR32). | |||
1046 | CodeGenRegisterClass *ChosenSuperRegClass = nullptr; | |||
1047 | CodeGenRegisterClass *SubRegRC = nullptr; | |||
1048 | for (auto *SuperRegRC : SuperRegRCs) { | |||
1049 | for (const auto &SuperRegClassPair : SuperRegClasses) { | |||
1050 | const BitVector &SuperRegClassBV = SuperRegClassPair.second; | |||
1051 | if (SuperRegClassBV[SuperRegRC->EnumValue]) { | |||
1052 | SubRegRC = SuperRegClassPair.first; | |||
1053 | ChosenSuperRegClass = SuperRegRC; | |||
1054 | ||||
1055 | // If SubRegRC is bigger than SuperRegRC then there are members of | |||
1056 | // SubRegRC that don't have super registers via SubIdx. Keep looking to | |||
1057 | // find a better fit and fall back on this one if there isn't one. | |||
1058 | // | |||
1059 | // This is intended to prevent X86 from making odd choices such as | |||
1060 | // picking LOW32_ADDR_ACCESS_RBP instead of GR32 in the example above. | |||
1061 | // LOW32_ADDR_ACCESS_RBP is a valid choice but contains registers that | |||
1062 | // aren't subregisters of SuperRegRC whereas GR32 has a direct 1:1 | |||
1063 | // mapping. | |||
1064 | if (SuperRegRC->getMembers().size() >= SubRegRC->getMembers().size()) | |||
1065 | return std::make_pair(ChosenSuperRegClass, SubRegRC); | |||
1066 | } | |||
1067 | } | |||
1068 | ||||
1069 | // If we found a fit but it wasn't quite ideal because SubRegRC had excess | |||
1070 | // registers, then we're done. | |||
1071 | if (ChosenSuperRegClass) | |||
1072 | return std::make_pair(ChosenSuperRegClass, SubRegRC); | |||
1073 | } | |||
1074 | ||||
1075 | return None; | |||
1076 | } | |||
1077 | ||||
1078 | void CodeGenRegisterClass::getSuperRegClasses(const CodeGenSubRegIndex *SubIdx, | |||
1079 | BitVector &Out) const { | |||
1080 | auto FindI = SuperRegClasses.find(SubIdx); | |||
1081 | if (FindI == SuperRegClasses.end()) | |||
1082 | return; | |||
1083 | for (CodeGenRegisterClass *RC : FindI->second) | |||
1084 | Out.set(RC->EnumValue); | |||
1085 | } | |||
1086 | ||||
1087 | // Populate a unique sorted list of units from a register set. | |||
1088 | void CodeGenRegisterClass::buildRegUnitSet(const CodeGenRegBank &RegBank, | |||
1089 | std::vector<unsigned> &RegUnits) const { | |||
1090 | std::vector<unsigned> TmpUnits; | |||
1091 | for (RegUnitIterator UnitI(Members); UnitI.isValid(); ++UnitI) { | |||
1092 | const RegUnit &RU = RegBank.getRegUnit(*UnitI); | |||
1093 | if (!RU.Artificial) | |||
1094 | TmpUnits.push_back(*UnitI); | |||
1095 | } | |||
1096 | llvm::sort(TmpUnits); | |||
1097 | std::unique_copy(TmpUnits.begin(), TmpUnits.end(), | |||
1098 | std::back_inserter(RegUnits)); | |||
1099 | } | |||
1100 | ||||
1101 | //===----------------------------------------------------------------------===// | |||
1102 | // CodeGenRegBank | |||
1103 | //===----------------------------------------------------------------------===// | |||
1104 | ||||
1105 | CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records, | |||
1106 | const CodeGenHwModes &Modes) : CGH(Modes) { | |||
1107 | // Configure register Sets to understand register classes and tuples. | |||
1108 | Sets.addFieldExpander("RegisterClass", "MemberList"); | |||
1109 | Sets.addFieldExpander("CalleeSavedRegs", "SaveList"); | |||
1110 | Sets.addExpander("RegisterTuples", | |||
1111 | std::make_unique<TupleExpander>(SynthDefs)); | |||
1112 | ||||
1113 | // Read in the user-defined (named) sub-register indices. | |||
1114 | // More indices will be synthesized later. | |||
1115 | std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex"); | |||
1116 | llvm::sort(SRIs, LessRecord()); | |||
1117 | for (unsigned i = 0, e = SRIs.size(); i != e; ++i) | |||
1118 | getSubRegIdx(SRIs[i]); | |||
1119 | // Build composite maps from ComposedOf fields. | |||
1120 | for (auto &Idx : SubRegIndices) | |||
1121 | Idx.updateComponents(*this); | |||
1122 | ||||
1123 | // Read in the register definitions. | |||
1124 | std::vector<Record*> Regs = Records.getAllDerivedDefinitions("Register"); | |||
1125 | llvm::sort(Regs, LessRecordRegister()); | |||
1126 | // Assign the enumeration values. | |||
1127 | for (unsigned i = 0, e = Regs.size(); i != e; ++i) | |||
1128 | getReg(Regs[i]); | |||
1129 | ||||
1130 | // Expand tuples and number the new registers. | |||
1131 | std::vector<Record*> Tups = | |||
1132 | Records.getAllDerivedDefinitions("RegisterTuples"); | |||
1133 | ||||
1134 | for (Record *R : Tups) { | |||
1135 | std::vector<Record *> TupRegs = *Sets.expand(R); | |||
1136 | llvm::sort(TupRegs, LessRecordRegister()); | |||
1137 | for (Record *RC : TupRegs) | |||
1138 | getReg(RC); | |||
1139 | } | |||
1140 | ||||
1141 | // Now all the registers are known. Build the object graph of explicit | |||
1142 | // register-register references. | |||
1143 | for (auto &Reg : Registers) | |||
1144 | Reg.buildObjectGraph(*this); | |||
1145 | ||||
1146 | // Compute register name map. | |||
1147 | for (auto &Reg : Registers) | |||
1148 | // FIXME: This could just be RegistersByName[name] = register, except that | |||
1149 | // causes some failures in MIPS - perhaps they have duplicate register name | |||
1150 | // entries? (or maybe there's a reason for it - I don't know much about this | |||
1151 | // code, just drive-by refactoring) | |||
1152 | RegistersByName.insert( | |||
1153 | std::make_pair(Reg.TheDef->getValueAsString("AsmName"), &Reg)); | |||
1154 | ||||
1155 | // Precompute all sub-register maps. | |||
1156 | // This will create Composite entries for all inferred sub-register indices. | |||
1157 | for (auto &Reg : Registers) | |||
1158 | Reg.computeSubRegs(*this); | |||
1159 | ||||
1160 | // Compute transitive closure of subregister index ConcatenationOf vectors | |||
1161 | // and initialize ConcatIdx map. | |||
1162 | for (CodeGenSubRegIndex &SRI : SubRegIndices) { | |||
1163 | SRI.computeConcatTransitiveClosure(); | |||
1164 | if (!SRI.ConcatenationOf.empty()) | |||
1165 | ConcatIdx.insert(std::make_pair( | |||
1166 | SmallVector<CodeGenSubRegIndex*,8>(SRI.ConcatenationOf.begin(), | |||
1167 | SRI.ConcatenationOf.end()), &SRI)); | |||
1168 | } | |||
1169 | ||||
1170 | // Infer even more sub-registers by combining leading super-registers. | |||
1171 | for (auto &Reg : Registers) | |||
1172 | if (Reg.CoveredBySubRegs) | |||
1173 | Reg.computeSecondarySubRegs(*this); | |||
1174 | ||||
1175 | // After the sub-register graph is complete, compute the topologically | |||
1176 | // ordered SuperRegs list. | |||
1177 | for (auto &Reg : Registers) | |||
1178 | Reg.computeSuperRegs(*this); | |||
1179 | ||||
1180 | // For each pair of Reg:SR, if both are non-artificial, mark the | |||
1181 | // corresponding sub-register index as non-artificial. | |||
1182 | for (auto &Reg : Registers) { | |||
1183 | if (Reg.Artificial) | |||
1184 | continue; | |||
1185 | for (auto P : Reg.getSubRegs()) { | |||
1186 | const CodeGenRegister *SR = P.second; | |||
1187 | if (!SR->Artificial) | |||
1188 | P.first->Artificial = false; | |||
1189 | } | |||
1190 | } | |||
1191 | ||||
1192 | // Native register units are associated with a leaf register. They've all been | |||
1193 | // discovered now. | |||
1194 | NumNativeRegUnits = RegUnits.size(); | |||
1195 | ||||
1196 | // Read in register class definitions. | |||
1197 | std::vector<Record*> RCs = Records.getAllDerivedDefinitions("RegisterClass"); | |||
1198 | if (RCs.empty()) | |||
1199 | PrintFatalError("No 'RegisterClass' subclasses defined!"); | |||
1200 | ||||
1201 | // Allocate user-defined register classes. | |||
1202 | for (auto *R : RCs) { | |||
1203 | RegClasses.emplace_back(*this, R); | |||
1204 | CodeGenRegisterClass &RC = RegClasses.back(); | |||
1205 | if (!RC.Artificial) | |||
1206 | addToMaps(&RC); | |||
1207 | } | |||
1208 | ||||
1209 | // Infer missing classes to create a full algebra. | |||
1210 | computeInferredRegisterClasses(); | |||
1211 | ||||
1212 | // Order register classes topologically and assign enum values. | |||
1213 | RegClasses.sort(TopoOrderRC); | |||
1214 | unsigned i = 0; | |||
1215 | for (auto &RC : RegClasses) | |||
1216 | RC.EnumValue = i++; | |||
1217 | CodeGenRegisterClass::computeSubClasses(*this); | |||
1218 | } | |||
1219 | ||||
1220 | // Create a synthetic CodeGenSubRegIndex without a corresponding Record. | |||
1221 | CodeGenSubRegIndex* | |||
1222 | CodeGenRegBank::createSubRegIndex(StringRef Name, StringRef Namespace) { | |||
1223 | SubRegIndices.emplace_back(Name, Namespace, SubRegIndices.size() + 1); | |||
1224 | return &SubRegIndices.back(); | |||
1225 | } | |||
1226 | ||||
1227 | CodeGenSubRegIndex *CodeGenRegBank::getSubRegIdx(Record *Def) { | |||
1228 | CodeGenSubRegIndex *&Idx = Def2SubRegIdx[Def]; | |||
1229 | if (Idx) | |||
1230 | return Idx; | |||
1231 | SubRegIndices.emplace_back(Def, SubRegIndices.size() + 1); | |||
1232 | Idx = &SubRegIndices.back(); | |||
1233 | return Idx; | |||
1234 | } | |||
1235 | ||||
1236 | const CodeGenSubRegIndex * | |||
1237 | CodeGenRegBank::findSubRegIdx(const Record* Def) const { | |||
1238 | return Def2SubRegIdx.lookup(Def); | |||
1239 | } | |||
1240 | ||||
1241 | CodeGenRegister *CodeGenRegBank::getReg(Record *Def) { | |||
1242 | CodeGenRegister *&Reg = Def2Reg[Def]; | |||
1243 | if (Reg) | |||
1244 | return Reg; | |||
1245 | Registers.emplace_back(Def, Registers.size() + 1); | |||
1246 | Reg = &Registers.back(); | |||
1247 | return Reg; | |||
1248 | } | |||
1249 | ||||
1250 | void CodeGenRegBank::addToMaps(CodeGenRegisterClass *RC) { | |||
1251 | if (Record *Def = RC->getDef()) | |||
1252 | Def2RC.insert(std::make_pair(Def, RC)); | |||
1253 | ||||
1254 | // Duplicate classes are rejected by insert(). | |||
1255 | // That's OK, we only care about the properties handled by CGRC::Key. | |||
1256 | CodeGenRegisterClass::Key K(*RC); | |||
1257 | Key2RC.insert(std::make_pair(K, RC)); | |||
1258 | } | |||
1259 | ||||
1260 | // Create a synthetic sub-class if it is missing. | |||
1261 | CodeGenRegisterClass* | |||
1262 | CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC, | |||
1263 | const CodeGenRegister::Vec *Members, | |||
1264 | StringRef Name) { | |||
1265 | // Synthetic sub-class has the same size and alignment as RC. | |||
1266 | CodeGenRegisterClass::Key K(Members, RC->RSI); | |||
1267 | RCKeyMap::const_iterator FoundI = Key2RC.find(K); | |||
1268 | if (FoundI != Key2RC.end()) | |||
1269 | return FoundI->second; | |||
1270 | ||||
1271 | // Sub-class doesn't exist, create a new one. | |||
1272 | RegClasses.emplace_back(*this, Name, K); | |||
1273 | addToMaps(&RegClasses.back()); | |||
1274 | return &RegClasses.back(); | |||
1275 | } | |||
1276 | ||||
1277 | CodeGenRegisterClass *CodeGenRegBank::getRegClass(const Record *Def) const { | |||
1278 | if (CodeGenRegisterClass *RC = Def2RC.lookup(Def)) | |||
1279 | return RC; | |||
1280 | ||||
1281 | PrintFatalError(Def->getLoc(), "Not a known RegisterClass!"); | |||
1282 | } | |||
1283 | ||||
1284 | CodeGenSubRegIndex* | |||
1285 | CodeGenRegBank::getCompositeSubRegIndex(CodeGenSubRegIndex *A, | |||
1286 | CodeGenSubRegIndex *B) { | |||
1287 | // Look for an existing entry. | |||
1288 | CodeGenSubRegIndex *Comp = A->compose(B); | |||
1289 | if (Comp) | |||
1290 | return Comp; | |||
1291 | ||||
1292 | // None exists, synthesize one. | |||
1293 | std::string Name = A->getName() + "_then_" + B->getName(); | |||
1294 | Comp = createSubRegIndex(Name, A->getNamespace()); | |||
1295 | A->addComposite(B, Comp); | |||
1296 | return Comp; | |||
1297 | } | |||
1298 | ||||
1299 | CodeGenSubRegIndex *CodeGenRegBank:: | |||
1300 | getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex *, 8> &Parts) { | |||
1301 | assert(Parts.size() > 1 && "Need two parts to concatenate")(static_cast <bool> (Parts.size() > 1 && "Need two parts to concatenate" ) ? void (0) : __assert_fail ("Parts.size() > 1 && \"Need two parts to concatenate\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1301, __extension__ __PRETTY_FUNCTION__)); | |||
1302 | #ifndef NDEBUG | |||
1303 | for (CodeGenSubRegIndex *Idx : Parts) { | |||
1304 | assert(Idx->ConcatenationOf.empty() && "No transitive closure?")(static_cast <bool> (Idx->ConcatenationOf.empty() && "No transitive closure?") ? void (0) : __assert_fail ("Idx->ConcatenationOf.empty() && \"No transitive closure?\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1304, __extension__ __PRETTY_FUNCTION__)); | |||
1305 | } | |||
1306 | #endif | |||
1307 | ||||
1308 | // Look for an existing entry. | |||
1309 | CodeGenSubRegIndex *&Idx = ConcatIdx[Parts]; | |||
1310 | if (Idx) | |||
1311 | return Idx; | |||
1312 | ||||
1313 | // None exists, synthesize one. | |||
1314 | std::string Name = Parts.front()->getName(); | |||
1315 | // Determine whether all parts are contiguous. | |||
1316 | bool isContinuous = true; | |||
1317 | unsigned Size = Parts.front()->Size; | |||
1318 | unsigned LastOffset = Parts.front()->Offset; | |||
1319 | unsigned LastSize = Parts.front()->Size; | |||
1320 | for (unsigned i = 1, e = Parts.size(); i != e; ++i) { | |||
1321 | Name += '_'; | |||
1322 | Name += Parts[i]->getName(); | |||
1323 | Size += Parts[i]->Size; | |||
1324 | if (Parts[i]->Offset != (LastOffset + LastSize)) | |||
1325 | isContinuous = false; | |||
1326 | LastOffset = Parts[i]->Offset; | |||
1327 | LastSize = Parts[i]->Size; | |||
1328 | } | |||
1329 | Idx = createSubRegIndex(Name, Parts.front()->getNamespace()); | |||
1330 | Idx->Size = Size; | |||
1331 | Idx->Offset = isContinuous ? Parts.front()->Offset : -1; | |||
1332 | Idx->ConcatenationOf.assign(Parts.begin(), Parts.end()); | |||
1333 | return Idx; | |||
1334 | } | |||
1335 | ||||
1336 | void CodeGenRegBank::computeComposites() { | |||
1337 | using RegMap = std::map<const CodeGenRegister*, const CodeGenRegister*>; | |||
1338 | ||||
1339 | // Subreg -> { Reg->Reg }, where the right-hand side is the mapping from | |||
1340 | // register to (sub)register associated with the action of the left-hand | |||
1341 | // side subregister. | |||
1342 | std::map<const CodeGenSubRegIndex*, RegMap> SubRegAction; | |||
1343 | for (const CodeGenRegister &R : Registers) { | |||
1344 | const CodeGenRegister::SubRegMap &SM = R.getSubRegs(); | |||
1345 | for (std::pair<const CodeGenSubRegIndex*, const CodeGenRegister*> P : SM) | |||
1346 | SubRegAction[P.first].insert({&R, P.second}); | |||
1347 | } | |||
1348 | ||||
1349 | // Calculate the composition of two subregisters as compositions of their | |||
1350 | // associated actions. | |||
1351 | auto compose = [&SubRegAction] (const CodeGenSubRegIndex *Sub1, | |||
1352 | const CodeGenSubRegIndex *Sub2) { | |||
1353 | RegMap C; | |||
1354 | const RegMap &Img1 = SubRegAction.at(Sub1); | |||
1355 | const RegMap &Img2 = SubRegAction.at(Sub2); | |||
1356 | for (std::pair<const CodeGenRegister*, const CodeGenRegister*> P : Img1) { | |||
1357 | auto F = Img2.find(P.second); | |||
1358 | if (F != Img2.end()) | |||
1359 | C.insert({P.first, F->second}); | |||
1360 | } | |||
1361 | return C; | |||
1362 | }; | |||
1363 | ||||
1364 | // Check if the two maps agree on the intersection of their domains. | |||
1365 | auto agree = [] (const RegMap &Map1, const RegMap &Map2) { | |||
1366 | // Technically speaking, an empty map agrees with any other map, but | |||
1367 | // this could flag false positives. We're interested in non-vacuous | |||
1368 | // agreements. | |||
1369 | if (Map1.empty() || Map2.empty()) | |||
1370 | return false; | |||
1371 | for (std::pair<const CodeGenRegister*, const CodeGenRegister*> P : Map1) { | |||
1372 | auto F = Map2.find(P.first); | |||
1373 | if (F == Map2.end() || P.second != F->second) | |||
1374 | return false; | |||
1375 | } | |||
1376 | return true; | |||
1377 | }; | |||
1378 | ||||
1379 | using CompositePair = std::pair<const CodeGenSubRegIndex*, | |||
1380 | const CodeGenSubRegIndex*>; | |||
1381 | SmallSet<CompositePair,4> UserDefined; | |||
1382 | for (const CodeGenSubRegIndex &Idx : SubRegIndices) | |||
1383 | for (auto P : Idx.getComposites()) | |||
1384 | UserDefined.insert(std::make_pair(&Idx, P.first)); | |||
1385 | ||||
1386 | // Keep track of TopoSigs visited. We only need to visit each TopoSig once, | |||
1387 | // and many registers will share TopoSigs on regular architectures. | |||
1388 | BitVector TopoSigs(getNumTopoSigs()); | |||
1389 | ||||
1390 | for (const auto &Reg1 : Registers) { | |||
1391 | // Skip identical subreg structures already processed. | |||
1392 | if (TopoSigs.test(Reg1.getTopoSig())) | |||
1393 | continue; | |||
1394 | TopoSigs.set(Reg1.getTopoSig()); | |||
1395 | ||||
1396 | const CodeGenRegister::SubRegMap &SRM1 = Reg1.getSubRegs(); | |||
1397 | for (auto I1 : SRM1) { | |||
1398 | CodeGenSubRegIndex *Idx1 = I1.first; | |||
1399 | CodeGenRegister *Reg2 = I1.second; | |||
1400 | // Ignore identity compositions. | |||
1401 | if (&Reg1 == Reg2) | |||
1402 | continue; | |||
1403 | const CodeGenRegister::SubRegMap &SRM2 = Reg2->getSubRegs(); | |||
1404 | // Try composing Idx1 with another SubRegIndex. | |||
1405 | for (auto I2 : SRM2) { | |||
1406 | CodeGenSubRegIndex *Idx2 = I2.first; | |||
1407 | CodeGenRegister *Reg3 = I2.second; | |||
1408 | // Ignore identity compositions. | |||
1409 | if (Reg2 == Reg3) | |||
1410 | continue; | |||
1411 | // OK Reg1:IdxPair == Reg3. Find the index with Reg:Idx == Reg3. | |||
1412 | CodeGenSubRegIndex *Idx3 = Reg1.getSubRegIndex(Reg3); | |||
1413 | assert(Idx3 && "Sub-register doesn't have an index")(static_cast <bool> (Idx3 && "Sub-register doesn't have an index" ) ? void (0) : __assert_fail ("Idx3 && \"Sub-register doesn't have an index\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1413, __extension__ __PRETTY_FUNCTION__)); | |||
1414 | ||||
1415 | // Conflicting composition? Emit a warning but allow it. | |||
1416 | if (CodeGenSubRegIndex *Prev = Idx1->addComposite(Idx2, Idx3)) { | |||
1417 | // If the composition was not user-defined, always emit a warning. | |||
1418 | if (!UserDefined.count({Idx1, Idx2}) || | |||
1419 | agree(compose(Idx1, Idx2), SubRegAction.at(Idx3))) | |||
1420 | PrintWarning(Twine("SubRegIndex ") + Idx1->getQualifiedName() + | |||
1421 | " and " + Idx2->getQualifiedName() + | |||
1422 | " compose ambiguously as " + Prev->getQualifiedName() + | |||
1423 | " or " + Idx3->getQualifiedName()); | |||
1424 | } | |||
1425 | } | |||
1426 | } | |||
1427 | } | |||
1428 | } | |||
1429 | ||||
1430 | // Compute lane masks. This is similar to register units, but at the | |||
1431 | // sub-register index level. Each bit in the lane mask is like a register unit | |||
1432 | // class, and two lane masks will have a bit in common if two sub-register | |||
1433 | // indices overlap in some register. | |||
1434 | // | |||
1435 | // Conservatively share a lane mask bit if two sub-register indices overlap in | |||
1436 | // some registers, but not in others. That shouldn't happen a lot. | |||
1437 | void CodeGenRegBank::computeSubRegLaneMasks() { | |||
1438 | // First assign individual bits to all the leaf indices. | |||
1439 | unsigned Bit = 0; | |||
1440 | // Determine mask of lanes that cover their registers. | |||
1441 | CoveringLanes = LaneBitmask::getAll(); | |||
1442 | for (auto &Idx : SubRegIndices) { | |||
1443 | if (Idx.getComposites().empty()) { | |||
1444 | if (Bit > LaneBitmask::BitWidth) { | |||
1445 | PrintFatalError( | |||
1446 | Twine("Ran out of lanemask bits to represent subregister ") | |||
1447 | + Idx.getName()); | |||
1448 | } | |||
1449 | Idx.LaneMask = LaneBitmask::getLane(Bit); | |||
1450 | ++Bit; | |||
1451 | } else { | |||
1452 | Idx.LaneMask = LaneBitmask::getNone(); | |||
1453 | } | |||
1454 | } | |||
1455 | ||||
1456 | // Compute transformation sequences for composeSubRegIndexLaneMask. The idea | |||
1457 | // here is that for each possible target subregister we look at the leafs | |||
1458 | // in the subregister graph that compose for this target and create | |||
1459 | // transformation sequences for the lanemasks. Each step in the sequence | |||
1460 | // consists of a bitmask and a bitrotate operation. As the rotation amounts | |||
1461 | // are usually the same for many subregisters we can easily combine the steps | |||
1462 | // by combining the masks. | |||
1463 | for (const auto &Idx : SubRegIndices) { | |||
1464 | const auto &Composites = Idx.getComposites(); | |||
1465 | auto &LaneTransforms = Idx.CompositionLaneMaskTransform; | |||
1466 | ||||
1467 | if (Composites.empty()) { | |||
1468 | // Moving from a class with no subregisters we just had a single lane: | |||
1469 | // The subregister must be a leaf subregister and only occupies 1 bit. | |||
1470 | // Move the bit from the class without subregisters into that position. | |||
1471 | unsigned DstBit = Idx.LaneMask.getHighestLane(); | |||
1472 | assert(Idx.LaneMask == LaneBitmask::getLane(DstBit) &&(static_cast <bool> (Idx.LaneMask == LaneBitmask::getLane (DstBit) && "Must be a leaf subregister") ? void (0) : __assert_fail ("Idx.LaneMask == LaneBitmask::getLane(DstBit) && \"Must be a leaf subregister\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1473, __extension__ __PRETTY_FUNCTION__)) | |||
1473 | "Must be a leaf subregister")(static_cast <bool> (Idx.LaneMask == LaneBitmask::getLane (DstBit) && "Must be a leaf subregister") ? void (0) : __assert_fail ("Idx.LaneMask == LaneBitmask::getLane(DstBit) && \"Must be a leaf subregister\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1473, __extension__ __PRETTY_FUNCTION__)); | |||
1474 | MaskRolPair MaskRol = { LaneBitmask::getLane(0), (uint8_t)DstBit }; | |||
1475 | LaneTransforms.push_back(MaskRol); | |||
1476 | } else { | |||
1477 | // Go through all leaf subregisters and find the ones that compose with | |||
1478 | // Idx. These make out all possible valid bits in the lane mask we want to | |||
1479 | // transform. Looking only at the leafs ensure that only a single bit in | |||
1480 | // the mask is set. | |||
1481 | unsigned NextBit = 0; | |||
1482 | for (auto &Idx2 : SubRegIndices) { | |||
1483 | // Skip non-leaf subregisters. | |||
1484 | if (!Idx2.getComposites().empty()) | |||
1485 | continue; | |||
1486 | // Replicate the behaviour from the lane mask generation loop above. | |||
1487 | unsigned SrcBit = NextBit; | |||
1488 | LaneBitmask SrcMask = LaneBitmask::getLane(SrcBit); | |||
1489 | if (NextBit < LaneBitmask::BitWidth-1) | |||
1490 | ++NextBit; | |||
1491 | assert(Idx2.LaneMask == SrcMask)(static_cast <bool> (Idx2.LaneMask == SrcMask) ? void ( 0) : __assert_fail ("Idx2.LaneMask == SrcMask", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1491, __extension__ __PRETTY_FUNCTION__)); | |||
1492 | ||||
1493 | // Get the composed subregister if there is any. | |||
1494 | auto C = Composites.find(&Idx2); | |||
1495 | if (C == Composites.end()) | |||
1496 | continue; | |||
1497 | const CodeGenSubRegIndex *Composite = C->second; | |||
1498 | // The Composed subreg should be a leaf subreg too | |||
1499 | assert(Composite->getComposites().empty())(static_cast <bool> (Composite->getComposites().empty ()) ? void (0) : __assert_fail ("Composite->getComposites().empty()" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1499, __extension__ __PRETTY_FUNCTION__)); | |||
1500 | ||||
1501 | // Create Mask+Rotate operation and merge with existing ops if possible. | |||
1502 | unsigned DstBit = Composite->LaneMask.getHighestLane(); | |||
1503 | int Shift = DstBit - SrcBit; | |||
1504 | uint8_t RotateLeft = Shift >= 0 ? (uint8_t)Shift | |||
1505 | : LaneBitmask::BitWidth + Shift; | |||
1506 | for (auto &I : LaneTransforms) { | |||
1507 | if (I.RotateLeft == RotateLeft) { | |||
1508 | I.Mask |= SrcMask; | |||
1509 | SrcMask = LaneBitmask::getNone(); | |||
1510 | } | |||
1511 | } | |||
1512 | if (SrcMask.any()) { | |||
1513 | MaskRolPair MaskRol = { SrcMask, RotateLeft }; | |||
1514 | LaneTransforms.push_back(MaskRol); | |||
1515 | } | |||
1516 | } | |||
1517 | } | |||
1518 | ||||
1519 | // Optimize if the transformation consists of one step only: Set mask to | |||
1520 | // 0xffffffff (including some irrelevant invalid bits) so that it should | |||
1521 | // merge with more entries later while compressing the table. | |||
1522 | if (LaneTransforms.size() == 1) | |||
1523 | LaneTransforms[0].Mask = LaneBitmask::getAll(); | |||
1524 | ||||
1525 | // Further compression optimization: For invalid compositions resulting | |||
1526 | // in a sequence with 0 entries we can just pick any other. Choose | |||
1527 | // Mask 0xffffffff with Rotation 0. | |||
1528 | if (LaneTransforms.size() == 0) { | |||
1529 | MaskRolPair P = { LaneBitmask::getAll(), 0 }; | |||
1530 | LaneTransforms.push_back(P); | |||
1531 | } | |||
1532 | } | |||
1533 | ||||
1534 | // FIXME: What if ad-hoc aliasing introduces overlaps that aren't represented | |||
1535 | // by the sub-register graph? This doesn't occur in any known targets. | |||
1536 | ||||
1537 | // Inherit lanes from composites. | |||
1538 | for (const auto &Idx : SubRegIndices) { | |||
1539 | LaneBitmask Mask = Idx.computeLaneMask(); | |||
1540 | // If some super-registers without CoveredBySubRegs use this index, we can | |||
1541 | // no longer assume that the lanes are covering their registers. | |||
1542 | if (!Idx.AllSuperRegsCovered) | |||
1543 | CoveringLanes &= ~Mask; | |||
1544 | } | |||
1545 | ||||
1546 | // Compute lane mask combinations for register classes. | |||
1547 | for (auto &RegClass : RegClasses) { | |||
1548 | LaneBitmask LaneMask; | |||
1549 | for (const auto &SubRegIndex : SubRegIndices) { | |||
1550 | if (RegClass.getSubClassWithSubReg(&SubRegIndex) == nullptr) | |||
1551 | continue; | |||
1552 | LaneMask |= SubRegIndex.LaneMask; | |||
1553 | } | |||
1554 | ||||
1555 | // For classes without any subregisters set LaneMask to 1 instead of 0. | |||
1556 | // This makes it easier for client code to handle classes uniformly. | |||
1557 | if (LaneMask.none()) | |||
1558 | LaneMask = LaneBitmask::getLane(0); | |||
1559 | ||||
1560 | RegClass.LaneMask = LaneMask; | |||
1561 | } | |||
1562 | } | |||
1563 | ||||
1564 | namespace { | |||
1565 | ||||
1566 | // UberRegSet is a helper class for computeRegUnitWeights. Each UberRegSet is | |||
1567 | // the transitive closure of the union of overlapping register | |||
1568 | // classes. Together, the UberRegSets form a partition of the registers. If we | |||
1569 | // consider overlapping register classes to be connected, then each UberRegSet | |||
1570 | // is a set of connected components. | |||
1571 | // | |||
1572 | // An UberRegSet will likely be a horizontal slice of register names of | |||
1573 | // the same width. Nontrivial subregisters should then be in a separate | |||
1574 | // UberRegSet. But this property isn't required for valid computation of | |||
1575 | // register unit weights. | |||
1576 | // | |||
1577 | // A Weight field caches the max per-register unit weight in each UberRegSet. | |||
1578 | // | |||
1579 | // A set of SingularDeterminants flags single units of some register in this set | |||
1580 | // for which the unit weight equals the set weight. These units should not have | |||
1581 | // their weight increased. | |||
1582 | struct UberRegSet { | |||
1583 | CodeGenRegister::Vec Regs; | |||
1584 | unsigned Weight = 0; | |||
1585 | CodeGenRegister::RegUnitList SingularDeterminants; | |||
1586 | ||||
1587 | UberRegSet() = default; | |||
1588 | }; | |||
1589 | ||||
1590 | } // end anonymous namespace | |||
1591 | ||||
1592 | // Partition registers into UberRegSets, where each set is the transitive | |||
1593 | // closure of the union of overlapping register classes. | |||
1594 | // | |||
1595 | // UberRegSets[0] is a special non-allocatable set. | |||
1596 | static void computeUberSets(std::vector<UberRegSet> &UberSets, | |||
1597 | std::vector<UberRegSet*> &RegSets, | |||
1598 | CodeGenRegBank &RegBank) { | |||
1599 | const auto &Registers = RegBank.getRegisters(); | |||
1600 | ||||
1601 | // The Register EnumValue is one greater than its index into Registers. | |||
1602 | assert(Registers.size() == Registers.back().EnumValue &&(static_cast <bool> (Registers.size() == Registers.back ().EnumValue && "register enum value mismatch") ? void (0) : __assert_fail ("Registers.size() == Registers.back().EnumValue && \"register enum value mismatch\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1603, __extension__ __PRETTY_FUNCTION__)) | |||
1603 | "register enum value mismatch")(static_cast <bool> (Registers.size() == Registers.back ().EnumValue && "register enum value mismatch") ? void (0) : __assert_fail ("Registers.size() == Registers.back().EnumValue && \"register enum value mismatch\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1603, __extension__ __PRETTY_FUNCTION__)); | |||
1604 | ||||
1605 | // For simplicitly make the SetID the same as EnumValue. | |||
1606 | IntEqClasses UberSetIDs(Registers.size()+1); | |||
1607 | std::set<unsigned> AllocatableRegs; | |||
1608 | for (auto &RegClass : RegBank.getRegClasses()) { | |||
1609 | if (!RegClass.Allocatable) | |||
1610 | continue; | |||
1611 | ||||
1612 | const CodeGenRegister::Vec &Regs = RegClass.getMembers(); | |||
1613 | if (Regs.empty()) | |||
1614 | continue; | |||
1615 | ||||
1616 | unsigned USetID = UberSetIDs.findLeader((*Regs.begin())->EnumValue); | |||
1617 | assert(USetID && "register number 0 is invalid")(static_cast <bool> (USetID && "register number 0 is invalid" ) ? void (0) : __assert_fail ("USetID && \"register number 0 is invalid\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1617, __extension__ __PRETTY_FUNCTION__)); | |||
1618 | ||||
1619 | AllocatableRegs.insert((*Regs.begin())->EnumValue); | |||
1620 | for (auto I = std::next(Regs.begin()), E = Regs.end(); I != E; ++I) { | |||
1621 | AllocatableRegs.insert((*I)->EnumValue); | |||
1622 | UberSetIDs.join(USetID, (*I)->EnumValue); | |||
1623 | } | |||
1624 | } | |||
1625 | // Combine non-allocatable regs. | |||
1626 | for (const auto &Reg : Registers) { | |||
1627 | unsigned RegNum = Reg.EnumValue; | |||
1628 | if (AllocatableRegs.count(RegNum)) | |||
1629 | continue; | |||
1630 | ||||
1631 | UberSetIDs.join(0, RegNum); | |||
1632 | } | |||
1633 | UberSetIDs.compress(); | |||
1634 | ||||
1635 | // Make the first UberSet a special unallocatable set. | |||
1636 | unsigned ZeroID = UberSetIDs[0]; | |||
1637 | ||||
1638 | // Insert Registers into the UberSets formed by union-find. | |||
1639 | // Do not resize after this. | |||
1640 | UberSets.resize(UberSetIDs.getNumClasses()); | |||
1641 | unsigned i = 0; | |||
1642 | for (const CodeGenRegister &Reg : Registers) { | |||
1643 | unsigned USetID = UberSetIDs[Reg.EnumValue]; | |||
1644 | if (!USetID) | |||
1645 | USetID = ZeroID; | |||
1646 | else if (USetID == ZeroID) | |||
1647 | USetID = 0; | |||
1648 | ||||
1649 | UberRegSet *USet = &UberSets[USetID]; | |||
1650 | USet->Regs.push_back(&Reg); | |||
1651 | sortAndUniqueRegisters(USet->Regs); | |||
1652 | RegSets[i++] = USet; | |||
1653 | } | |||
1654 | } | |||
1655 | ||||
1656 | // Recompute each UberSet weight after changing unit weights. | |||
1657 | static void computeUberWeights(std::vector<UberRegSet> &UberSets, | |||
1658 | CodeGenRegBank &RegBank) { | |||
1659 | // Skip the first unallocatable set. | |||
1660 | for (std::vector<UberRegSet>::iterator I = std::next(UberSets.begin()), | |||
1661 | E = UberSets.end(); I != E; ++I) { | |||
1662 | ||||
1663 | // Initialize all unit weights in this set, and remember the max units/reg. | |||
1664 | const CodeGenRegister *Reg = nullptr; | |||
1665 | unsigned MaxWeight = 0, Weight = 0; | |||
1666 | for (RegUnitIterator UnitI(I->Regs); UnitI.isValid(); ++UnitI) { | |||
1667 | if (Reg != UnitI.getReg()) { | |||
1668 | if (Weight > MaxWeight) | |||
1669 | MaxWeight = Weight; | |||
1670 | Reg = UnitI.getReg(); | |||
1671 | Weight = 0; | |||
1672 | } | |||
1673 | if (!RegBank.getRegUnit(*UnitI).Artificial) { | |||
1674 | unsigned UWeight = RegBank.getRegUnit(*UnitI).Weight; | |||
1675 | if (!UWeight) { | |||
1676 | UWeight = 1; | |||
1677 | RegBank.increaseRegUnitWeight(*UnitI, UWeight); | |||
1678 | } | |||
1679 | Weight += UWeight; | |||
1680 | } | |||
1681 | } | |||
1682 | if (Weight > MaxWeight) | |||
1683 | MaxWeight = Weight; | |||
1684 | if (I->Weight != MaxWeight) { | |||
1685 | LLVM_DEBUG(dbgs() << "UberSet " << I - UberSets.begin() << " Weight "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1686 | << MaxWeight;do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1687 | for (auto &Unitdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1688 | : I->Regs) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1689 | << " " << Unit->getName();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false) | |||
1690 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; for (auto &Unit : I->Regs) dbgs() << " " << Unit ->getName(); dbgs() << "\n"; } } while (false); | |||
1691 | // Update the set weight. | |||
1692 | I->Weight = MaxWeight; | |||
1693 | } | |||
1694 | ||||
1695 | // Find singular determinants. | |||
1696 | for (const auto R : I->Regs) { | |||
1697 | if (R->getRegUnits().count() == 1 && R->getWeight(RegBank) == I->Weight) { | |||
1698 | I->SingularDeterminants |= R->getRegUnits(); | |||
1699 | } | |||
1700 | } | |||
1701 | } | |||
1702 | } | |||
1703 | ||||
1704 | // normalizeWeight is a computeRegUnitWeights helper that adjusts the weight of | |||
1705 | // a register and its subregisters so that they have the same weight as their | |||
1706 | // UberSet. Self-recursion processes the subregister tree in postorder so | |||
1707 | // subregisters are normalized first. | |||
1708 | // | |||
1709 | // Side effects: | |||
1710 | // - creates new adopted register units | |||
1711 | // - causes superregisters to inherit adopted units | |||
1712 | // - increases the weight of "singular" units | |||
1713 | // - induces recomputation of UberWeights. | |||
1714 | static bool normalizeWeight(CodeGenRegister *Reg, | |||
1715 | std::vector<UberRegSet> &UberSets, | |||
1716 | std::vector<UberRegSet*> &RegSets, | |||
1717 | BitVector &NormalRegs, | |||
1718 | CodeGenRegister::RegUnitList &NormalUnits, | |||
1719 | CodeGenRegBank &RegBank) { | |||
1720 | NormalRegs.resize(std::max(Reg->EnumValue + 1, NormalRegs.size())); | |||
1721 | if (NormalRegs.test(Reg->EnumValue)) | |||
1722 | return false; | |||
1723 | NormalRegs.set(Reg->EnumValue); | |||
1724 | ||||
1725 | bool Changed = false; | |||
1726 | const CodeGenRegister::SubRegMap &SRM = Reg->getSubRegs(); | |||
1727 | for (auto SRI : SRM) { | |||
1728 | if (SRI.second == Reg) | |||
1729 | continue; // self-cycles happen | |||
1730 | ||||
1731 | Changed |= normalizeWeight(SRI.second, UberSets, RegSets, NormalRegs, | |||
1732 | NormalUnits, RegBank); | |||
1733 | } | |||
1734 | // Postorder register normalization. | |||
1735 | ||||
1736 | // Inherit register units newly adopted by subregisters. | |||
1737 | if (Reg->inheritRegUnits(RegBank)) | |||
1738 | computeUberWeights(UberSets, RegBank); | |||
1739 | ||||
1740 | // Check if this register is too skinny for its UberRegSet. | |||
1741 | UberRegSet *UberSet = RegSets[RegBank.getRegIndex(Reg)]; | |||
1742 | ||||
1743 | unsigned RegWeight = Reg->getWeight(RegBank); | |||
1744 | if (UberSet->Weight > RegWeight) { | |||
1745 | // A register unit's weight can be adjusted only if it is the singular unit | |||
1746 | // for this register, has not been used to normalize a subregister's set, | |||
1747 | // and has not already been used to singularly determine this UberRegSet. | |||
1748 | unsigned AdjustUnit = *Reg->getRegUnits().begin(); | |||
1749 | if (Reg->getRegUnits().count() != 1 | |||
1750 | || hasRegUnit(NormalUnits, AdjustUnit) | |||
1751 | || hasRegUnit(UberSet->SingularDeterminants, AdjustUnit)) { | |||
1752 | // We don't have an adjustable unit, so adopt a new one. | |||
1753 | AdjustUnit = RegBank.newRegUnit(UberSet->Weight - RegWeight); | |||
1754 | Reg->adoptRegUnit(AdjustUnit); | |||
1755 | // Adopting a unit does not immediately require recomputing set weights. | |||
1756 | } | |||
1757 | else { | |||
1758 | // Adjust the existing single unit. | |||
1759 | if (!RegBank.getRegUnit(AdjustUnit).Artificial) | |||
1760 | RegBank.increaseRegUnitWeight(AdjustUnit, UberSet->Weight - RegWeight); | |||
1761 | // The unit may be shared among sets and registers within this set. | |||
1762 | computeUberWeights(UberSets, RegBank); | |||
1763 | } | |||
1764 | Changed = true; | |||
1765 | } | |||
1766 | ||||
1767 | // Mark these units normalized so superregisters can't change their weights. | |||
1768 | NormalUnits |= Reg->getRegUnits(); | |||
1769 | ||||
1770 | return Changed; | |||
1771 | } | |||
1772 | ||||
1773 | // Compute a weight for each register unit created during getSubRegs. | |||
1774 | // | |||
1775 | // The goal is that two registers in the same class will have the same weight, | |||
1776 | // where each register's weight is defined as sum of its units' weights. | |||
1777 | void CodeGenRegBank::computeRegUnitWeights() { | |||
1778 | std::vector<UberRegSet> UberSets; | |||
1779 | std::vector<UberRegSet*> RegSets(Registers.size()); | |||
1780 | computeUberSets(UberSets, RegSets, *this); | |||
1781 | // UberSets and RegSets are now immutable. | |||
1782 | ||||
1783 | computeUberWeights(UberSets, *this); | |||
1784 | ||||
1785 | // Iterate over each Register, normalizing the unit weights until reaching | |||
1786 | // a fix point. | |||
1787 | unsigned NumIters = 0; | |||
1788 | for (bool Changed = true; Changed; ++NumIters) { | |||
1789 | assert(NumIters <= NumNativeRegUnits && "Runaway register unit weights")(static_cast <bool> (NumIters <= NumNativeRegUnits && "Runaway register unit weights") ? void (0) : __assert_fail ( "NumIters <= NumNativeRegUnits && \"Runaway register unit weights\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1789, __extension__ __PRETTY_FUNCTION__)); | |||
1790 | Changed = false; | |||
1791 | for (auto &Reg : Registers) { | |||
1792 | CodeGenRegister::RegUnitList NormalUnits; | |||
1793 | BitVector NormalRegs; | |||
1794 | Changed |= normalizeWeight(&Reg, UberSets, RegSets, NormalRegs, | |||
1795 | NormalUnits, *this); | |||
1796 | } | |||
1797 | } | |||
1798 | } | |||
1799 | ||||
1800 | // Find a set in UniqueSets with the same elements as Set. | |||
1801 | // Return an iterator into UniqueSets. | |||
1802 | static std::vector<RegUnitSet>::const_iterator | |||
1803 | findRegUnitSet(const std::vector<RegUnitSet> &UniqueSets, | |||
1804 | const RegUnitSet &Set) { | |||
1805 | std::vector<RegUnitSet>::const_iterator | |||
1806 | I = UniqueSets.begin(), E = UniqueSets.end(); | |||
1807 | for(;I != E; ++I) { | |||
1808 | if (I->Units == Set.Units) | |||
1809 | break; | |||
1810 | } | |||
1811 | return I; | |||
1812 | } | |||
1813 | ||||
1814 | // Return true if the RUSubSet is a subset of RUSuperSet. | |||
1815 | static bool isRegUnitSubSet(const std::vector<unsigned> &RUSubSet, | |||
1816 | const std::vector<unsigned> &RUSuperSet) { | |||
1817 | return std::includes(RUSuperSet.begin(), RUSuperSet.end(), | |||
1818 | RUSubSet.begin(), RUSubSet.end()); | |||
1819 | } | |||
1820 | ||||
1821 | /// Iteratively prune unit sets. Prune subsets that are close to the superset, | |||
1822 | /// but with one or two registers removed. We occasionally have registers like | |||
1823 | /// APSR and PC thrown in with the general registers. We also see many | |||
1824 | /// special-purpose register subsets, such as tail-call and Thumb | |||
1825 | /// encodings. Generating all possible overlapping sets is combinatorial and | |||
1826 | /// overkill for modeling pressure. Ideally we could fix this statically in | |||
1827 | /// tablegen by (1) having the target define register classes that only include | |||
1828 | /// the allocatable registers and marking other classes as non-allocatable and | |||
1829 | /// (2) having a way to mark special purpose classes as "don't-care" classes for | |||
1830 | /// the purpose of pressure. However, we make an attempt to handle targets that | |||
1831 | /// are not nicely defined by merging nearly identical register unit sets | |||
1832 | /// statically. This generates smaller tables. Then, dynamically, we adjust the | |||
1833 | /// set limit by filtering the reserved registers. | |||
1834 | /// | |||
1835 | /// Merge sets only if the units have the same weight. For example, on ARM, | |||
1836 | /// Q-tuples with ssub index 0 include all S regs but also include D16+. We | |||
1837 | /// should not expand the S set to include D regs. | |||
1838 | void CodeGenRegBank::pruneUnitSets() { | |||
1839 | assert(RegClassUnitSets.empty() && "this invalidates RegClassUnitSets")(static_cast <bool> (RegClassUnitSets.empty() && "this invalidates RegClassUnitSets") ? void (0) : __assert_fail ("RegClassUnitSets.empty() && \"this invalidates RegClassUnitSets\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1839, __extension__ __PRETTY_FUNCTION__)); | |||
1840 | ||||
1841 | // Form an equivalence class of UnitSets with no significant difference. | |||
1842 | std::vector<unsigned> SuperSetIDs; | |||
1843 | for (unsigned SubIdx = 0, EndIdx = RegUnitSets.size(); | |||
1844 | SubIdx != EndIdx; ++SubIdx) { | |||
1845 | const RegUnitSet &SubSet = RegUnitSets[SubIdx]; | |||
1846 | unsigned SuperIdx = 0; | |||
1847 | for (; SuperIdx != EndIdx; ++SuperIdx) { | |||
1848 | if (SuperIdx == SubIdx) | |||
1849 | continue; | |||
1850 | ||||
1851 | unsigned UnitWeight = RegUnits[SubSet.Units[0]].Weight; | |||
1852 | const RegUnitSet &SuperSet = RegUnitSets[SuperIdx]; | |||
1853 | if (isRegUnitSubSet(SubSet.Units, SuperSet.Units) | |||
1854 | && (SubSet.Units.size() + 3 > SuperSet.Units.size()) | |||
1855 | && UnitWeight == RegUnits[SuperSet.Units[0]].Weight | |||
1856 | && UnitWeight == RegUnits[SuperSet.Units.back()].Weight) { | |||
1857 | LLVM_DEBUG(dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdxdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdx << "\n"; } } while (false) | |||
1858 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdx << "\n"; } } while (false); | |||
1859 | // We can pick any of the set names for the merged set. Go for the | |||
1860 | // shortest one to avoid picking the name of one of the classes that are | |||
1861 | // artificially created by tablegen. So "FPR128_lo" instead of | |||
1862 | // "QQQQ_with_qsub3_in_FPR128_lo". | |||
1863 | if (RegUnitSets[SubIdx].Name.size() < RegUnitSets[SuperIdx].Name.size()) | |||
1864 | RegUnitSets[SuperIdx].Name = RegUnitSets[SubIdx].Name; | |||
1865 | break; | |||
1866 | } | |||
1867 | } | |||
1868 | if (SuperIdx == EndIdx) | |||
1869 | SuperSetIDs.push_back(SubIdx); | |||
1870 | } | |||
1871 | // Populate PrunedUnitSets with each equivalence class's superset. | |||
1872 | std::vector<RegUnitSet> PrunedUnitSets(SuperSetIDs.size()); | |||
1873 | for (unsigned i = 0, e = SuperSetIDs.size(); i != e; ++i) { | |||
1874 | unsigned SuperIdx = SuperSetIDs[i]; | |||
1875 | PrunedUnitSets[i].Name = RegUnitSets[SuperIdx].Name; | |||
1876 | PrunedUnitSets[i].Units.swap(RegUnitSets[SuperIdx].Units); | |||
1877 | } | |||
1878 | RegUnitSets.swap(PrunedUnitSets); | |||
1879 | } | |||
1880 | ||||
1881 | // Create a RegUnitSet for each RegClass that contains all units in the class | |||
1882 | // including adopted units that are necessary to model register pressure. Then | |||
1883 | // iteratively compute RegUnitSets such that the union of any two overlapping | |||
1884 | // RegUnitSets is repreresented. | |||
1885 | // | |||
1886 | // RegisterInfoEmitter will map each RegClass to its RegUnitClass and any | |||
1887 | // RegUnitSet that is a superset of that RegUnitClass. | |||
1888 | void CodeGenRegBank::computeRegUnitSets() { | |||
1889 | assert(RegUnitSets.empty() && "dirty RegUnitSets")(static_cast <bool> (RegUnitSets.empty() && "dirty RegUnitSets" ) ? void (0) : __assert_fail ("RegUnitSets.empty() && \"dirty RegUnitSets\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1889, __extension__ __PRETTY_FUNCTION__)); | |||
1890 | ||||
1891 | // Compute a unique RegUnitSet for each RegClass. | |||
1892 | auto &RegClasses = getRegClasses(); | |||
1893 | for (auto &RC : RegClasses) { | |||
1894 | if (!RC.Allocatable || RC.Artificial || !RC.GeneratePressureSet) | |||
1895 | continue; | |||
1896 | ||||
1897 | // Speculatively grow the RegUnitSets to hold the new set. | |||
1898 | RegUnitSets.resize(RegUnitSets.size() + 1); | |||
1899 | RegUnitSets.back().Name = RC.getName(); | |||
1900 | ||||
1901 | // Compute a sorted list of units in this class. | |||
1902 | RC.buildRegUnitSet(*this, RegUnitSets.back().Units); | |||
1903 | ||||
1904 | // Find an existing RegUnitSet. | |||
1905 | std::vector<RegUnitSet>::const_iterator SetI = | |||
1906 | findRegUnitSet(RegUnitSets, RegUnitSets.back()); | |||
1907 | if (SetI != std::prev(RegUnitSets.end())) | |||
1908 | RegUnitSets.pop_back(); | |||
1909 | } | |||
1910 | ||||
1911 | LLVM_DEBUG(dbgs() << "\nBefore pruning:\n"; for (unsigned USIdx = 0,do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1912 | USEnd = RegUnitSets.size();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1913 | USIdx < USEnd; ++USIdx) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1914 | dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1915 | for (auto &U : RegUnitSets[USIdx].Units)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1916 | printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1917 | dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false) | |||
1918 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore pruning:\n" ; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; }; } } while (false); | |||
1919 | ||||
1920 | // Iteratively prune unit sets. | |||
1921 | pruneUnitSets(); | |||
1922 | ||||
1923 | LLVM_DEBUG(dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0,do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1924 | USEnd = RegUnitSets.size();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1925 | USIdx < USEnd; ++USIdx) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1926 | dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1927 | for (auto &U : RegUnitSets[USIdx].Units)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1928 | printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1929 | dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false) | |||
1930 | } dbgs() << "\nUnion sets:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\nBefore union:\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd ; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets[USIdx].Units) printRegUnitName(U); dbgs () << "\n"; } dbgs() << "\nUnion sets:\n"; } } while (false); | |||
1931 | ||||
1932 | // Iterate over all unit sets, including new ones added by this loop. | |||
1933 | unsigned NumRegUnitSubSets = RegUnitSets.size(); | |||
1934 | for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) { | |||
1935 | // In theory, this is combinatorial. In practice, it needs to be bounded | |||
1936 | // by a small number of sets for regpressure to be efficient. | |||
1937 | // If the assert is hit, we need to implement pruning. | |||
1938 | assert(Idx < (2*NumRegUnitSubSets) && "runaway unit set inference")(static_cast <bool> (Idx < (2*NumRegUnitSubSets) && "runaway unit set inference") ? void (0) : __assert_fail ("Idx < (2*NumRegUnitSubSets) && \"runaway unit set inference\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 1938, __extension__ __PRETTY_FUNCTION__)); | |||
1939 | ||||
1940 | // Compare new sets with all original classes. | |||
1941 | for (unsigned SearchIdx = (Idx >= NumRegUnitSubSets) ? 0 : Idx+1; | |||
1942 | SearchIdx != EndIdx; ++SearchIdx) { | |||
1943 | std::set<unsigned> Intersection; | |||
1944 | std::set_intersection(RegUnitSets[Idx].Units.begin(), | |||
1945 | RegUnitSets[Idx].Units.end(), | |||
1946 | RegUnitSets[SearchIdx].Units.begin(), | |||
1947 | RegUnitSets[SearchIdx].Units.end(), | |||
1948 | std::inserter(Intersection, Intersection.begin())); | |||
1949 | if (Intersection.empty()) | |||
1950 | continue; | |||
1951 | ||||
1952 | // Speculatively grow the RegUnitSets to hold the new set. | |||
1953 | RegUnitSets.resize(RegUnitSets.size() + 1); | |||
1954 | RegUnitSets.back().Name = | |||
1955 | RegUnitSets[Idx].Name + "_with_" + RegUnitSets[SearchIdx].Name; | |||
1956 | ||||
1957 | std::set_union(RegUnitSets[Idx].Units.begin(), | |||
1958 | RegUnitSets[Idx].Units.end(), | |||
1959 | RegUnitSets[SearchIdx].Units.begin(), | |||
1960 | RegUnitSets[SearchIdx].Units.end(), | |||
1961 | std::inserter(RegUnitSets.back().Units, | |||
1962 | RegUnitSets.back().Units.begin())); | |||
1963 | ||||
1964 | // Find an existing RegUnitSet, or add the union to the unique sets. | |||
1965 | std::vector<RegUnitSet>::const_iterator SetI = | |||
1966 | findRegUnitSet(RegUnitSets, RegUnitSets.back()); | |||
1967 | if (SetI != std::prev(RegUnitSets.end())) | |||
1968 | RegUnitSets.pop_back(); | |||
1969 | else { | |||
1970 | LLVM_DEBUG(dbgs() << "UnitSet " << RegUnitSets.size() - 1 << " "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false) | |||
1971 | << RegUnitSets.back().Name << ":";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false) | |||
1972 | for (auto &Udo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false) | |||
1973 | : RegUnitSets.back().Units) printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false) | |||
1974 | dbgs() << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "UnitSet " << RegUnitSets .size() - 1 << " " << RegUnitSets.back().Name << ":"; for (auto &U : RegUnitSets.back().Units) printRegUnitName (U); dbgs() << "\n";; } } while (false); | |||
1975 | } | |||
1976 | } | |||
1977 | } | |||
1978 | ||||
1979 | // Iteratively prune unit sets after inferring supersets. | |||
1980 | pruneUnitSets(); | |||
1981 | ||||
1982 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
1983 | dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
1984 | USIdx < USEnd; ++USIdx) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
1985 | dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
1986 | for (auto &U : RegUnitSets[USIdx].Units)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
1987 | printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
1988 | dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false) | |||
1989 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; for (auto &U : RegUnitSets [USIdx].Units) printRegUnitName(U); dbgs() << "\n"; }; } } while (false); | |||
1990 | ||||
1991 | // For each register class, list the UnitSets that are supersets. | |||
1992 | RegClassUnitSets.resize(RegClasses.size()); | |||
1993 | int RCIdx = -1; | |||
1994 | for (auto &RC : RegClasses) { | |||
1995 | ++RCIdx; | |||
1996 | if (!RC.Allocatable) | |||
1997 | continue; | |||
1998 | ||||
1999 | // Recompute the sorted list of units in this class. | |||
2000 | std::vector<unsigned> RCRegUnits; | |||
2001 | RC.buildRegUnitSet(*this, RCRegUnits); | |||
2002 | ||||
2003 | // Don't increase pressure for unallocatable regclasses. | |||
2004 | if (RCRegUnits.empty()) | |||
2005 | continue; | |||
2006 | ||||
2007 | LLVM_DEBUG(dbgs() << "RC " << RC.getName() << " Units:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "RC " << RC.getName () << " Units:\n"; for (auto U : RCRegUnits) printRegUnitName (U); dbgs() << "\n UnitSetIDs:"; } } while (false) | |||
2008 | for (auto Udo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "RC " << RC.getName () << " Units:\n"; for (auto U : RCRegUnits) printRegUnitName (U); dbgs() << "\n UnitSetIDs:"; } } while (false) | |||
2009 | : RCRegUnits) printRegUnitName(U);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "RC " << RC.getName () << " Units:\n"; for (auto U : RCRegUnits) printRegUnitName (U); dbgs() << "\n UnitSetIDs:"; } } while (false) | |||
2010 | dbgs() << "\n UnitSetIDs:")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "RC " << RC.getName () << " Units:\n"; for (auto U : RCRegUnits) printRegUnitName (U); dbgs() << "\n UnitSetIDs:"; } } while (false); | |||
2011 | ||||
2012 | // Find all supersets. | |||
2013 | for (unsigned USIdx = 0, USEnd = RegUnitSets.size(); | |||
2014 | USIdx != USEnd; ++USIdx) { | |||
2015 | if (isRegUnitSubSet(RCRegUnits, RegUnitSets[USIdx].Units)) { | |||
2016 | LLVM_DEBUG(dbgs() << " " << USIdx)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << " " << USIdx; } } while (false); | |||
2017 | RegClassUnitSets[RCIdx].push_back(USIdx); | |||
2018 | } | |||
2019 | } | |||
2020 | LLVM_DEBUG(dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("regalloc-emitter")) { dbgs() << "\n"; } } while (false ); | |||
2021 | assert(!RegClassUnitSets[RCIdx].empty() && "missing unit set for regclass")(static_cast <bool> (!RegClassUnitSets[RCIdx].empty() && "missing unit set for regclass") ? void (0) : __assert_fail ( "!RegClassUnitSets[RCIdx].empty() && \"missing unit set for regclass\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 2021, __extension__ __PRETTY_FUNCTION__)); | |||
2022 | } | |||
2023 | ||||
2024 | // For each register unit, ensure that we have the list of UnitSets that | |||
2025 | // contain the unit. Normally, this matches an existing list of UnitSets for a | |||
2026 | // register class. If not, we create a new entry in RegClassUnitSets as a | |||
2027 | // "fake" register class. | |||
2028 | for (unsigned UnitIdx = 0, UnitEnd = NumNativeRegUnits; | |||
2029 | UnitIdx < UnitEnd; ++UnitIdx) { | |||
2030 | std::vector<unsigned> RUSets; | |||
2031 | for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) { | |||
2032 | RegUnitSet &RUSet = RegUnitSets[i]; | |||
2033 | if (!is_contained(RUSet.Units, UnitIdx)) | |||
2034 | continue; | |||
2035 | RUSets.push_back(i); | |||
2036 | } | |||
2037 | unsigned RCUnitSetsIdx = 0; | |||
2038 | for (unsigned e = RegClassUnitSets.size(); | |||
2039 | RCUnitSetsIdx != e; ++RCUnitSetsIdx) { | |||
2040 | if (RegClassUnitSets[RCUnitSetsIdx] == RUSets) { | |||
2041 | break; | |||
2042 | } | |||
2043 | } | |||
2044 | RegUnits[UnitIdx].RegClassUnitSetsIdx = RCUnitSetsIdx; | |||
2045 | if (RCUnitSetsIdx == RegClassUnitSets.size()) { | |||
2046 | // Create a new list of UnitSets as a "fake" register class. | |||
2047 | RegClassUnitSets.resize(RCUnitSetsIdx + 1); | |||
2048 | RegClassUnitSets[RCUnitSetsIdx].swap(RUSets); | |||
2049 | } | |||
2050 | } | |||
2051 | } | |||
2052 | ||||
2053 | void CodeGenRegBank::computeRegUnitLaneMasks() { | |||
2054 | for (auto &Register : Registers) { | |||
2055 | // Create an initial lane mask for all register units. | |||
2056 | const auto &RegUnits = Register.getRegUnits(); | |||
2057 | CodeGenRegister::RegUnitLaneMaskList | |||
2058 | RegUnitLaneMasks(RegUnits.count(), LaneBitmask::getNone()); | |||
2059 | // Iterate through SubRegisters. | |||
2060 | typedef CodeGenRegister::SubRegMap SubRegMap; | |||
2061 | const SubRegMap &SubRegs = Register.getSubRegs(); | |||
2062 | for (auto S : SubRegs) { | |||
2063 | CodeGenRegister *SubReg = S.second; | |||
2064 | // Ignore non-leaf subregisters, their lane masks are fully covered by | |||
2065 | // the leaf subregisters anyway. | |||
2066 | if (!SubReg->getSubRegs().empty()) | |||
2067 | continue; | |||
2068 | CodeGenSubRegIndex *SubRegIndex = S.first; | |||
2069 | const CodeGenRegister *SubRegister = S.second; | |||
2070 | LaneBitmask LaneMask = SubRegIndex->LaneMask; | |||
2071 | // Distribute LaneMask to Register Units touched. | |||
2072 | for (unsigned SUI : SubRegister->getRegUnits()) { | |||
2073 | bool Found = false; | |||
2074 | unsigned u = 0; | |||
2075 | for (unsigned RU : RegUnits) { | |||
2076 | if (SUI == RU) { | |||
2077 | RegUnitLaneMasks[u] |= LaneMask; | |||
2078 | assert(!Found)(static_cast <bool> (!Found) ? void (0) : __assert_fail ("!Found", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 2078, __extension__ __PRETTY_FUNCTION__)); | |||
2079 | Found = true; | |||
2080 | } | |||
2081 | ++u; | |||
2082 | } | |||
2083 | (void)Found; | |||
2084 | assert(Found)(static_cast <bool> (Found) ? void (0) : __assert_fail ( "Found", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 2084, __extension__ __PRETTY_FUNCTION__)); | |||
2085 | } | |||
2086 | } | |||
2087 | Register.setRegUnitLaneMasks(RegUnitLaneMasks); | |||
2088 | } | |||
2089 | } | |||
2090 | ||||
2091 | void CodeGenRegBank::computeDerivedInfo() { | |||
2092 | computeComposites(); | |||
2093 | computeSubRegLaneMasks(); | |||
| ||||
2094 | ||||
2095 | // Compute a weight for each register unit created during getSubRegs. | |||
2096 | // This may create adopted register units (with unit # >= NumNativeRegUnits). | |||
2097 | computeRegUnitWeights(); | |||
2098 | ||||
2099 | // Compute a unique set of RegUnitSets. One for each RegClass and inferred | |||
2100 | // supersets for the union of overlapping sets. | |||
2101 | computeRegUnitSets(); | |||
2102 | ||||
2103 | computeRegUnitLaneMasks(); | |||
2104 | ||||
2105 | // Compute register class HasDisjunctSubRegs/CoveredBySubRegs flag. | |||
2106 | for (CodeGenRegisterClass &RC : RegClasses) { | |||
2107 | RC.HasDisjunctSubRegs = false; | |||
2108 | RC.CoveredBySubRegs = true; | |||
2109 | for (const CodeGenRegister *Reg : RC.getMembers()) { | |||
2110 | RC.HasDisjunctSubRegs |= Reg->HasDisjunctSubRegs; | |||
2111 | RC.CoveredBySubRegs &= Reg->CoveredBySubRegs; | |||
2112 | } | |||
2113 | } | |||
2114 | ||||
2115 | // Get the weight of each set. | |||
2116 | for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) | |||
2117 | RegUnitSets[Idx].Weight = getRegUnitSetWeight(RegUnitSets[Idx].Units); | |||
2118 | ||||
2119 | // Find the order of each set. | |||
2120 | RegUnitSetOrder.reserve(RegUnitSets.size()); | |||
2121 | for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) | |||
2122 | RegUnitSetOrder.push_back(Idx); | |||
2123 | ||||
2124 | llvm::stable_sort(RegUnitSetOrder, [this](unsigned ID1, unsigned ID2) { | |||
2125 | return getRegPressureSet(ID1).Units.size() < | |||
2126 | getRegPressureSet(ID2).Units.size(); | |||
2127 | }); | |||
2128 | for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) { | |||
2129 | RegUnitSets[RegUnitSetOrder[Idx]].Order = Idx; | |||
2130 | } | |||
2131 | } | |||
2132 | ||||
2133 | // | |||
2134 | // Synthesize missing register class intersections. | |||
2135 | // | |||
2136 | // Make sure that sub-classes of RC exists such that getCommonSubClass(RC, X) | |||
2137 | // returns a maximal register class for all X. | |||
2138 | // | |||
2139 | void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) { | |||
2140 | assert(!RegClasses.empty())(static_cast <bool> (!RegClasses.empty()) ? void (0) : __assert_fail ("!RegClasses.empty()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 2140, __extension__ __PRETTY_FUNCTION__)); | |||
2141 | // Stash the iterator to the last element so that this loop doesn't visit | |||
2142 | // elements added by the getOrCreateSubClass call within it. | |||
2143 | for (auto I = RegClasses.begin(), E = std::prev(RegClasses.end()); | |||
2144 | I != std::next(E); ++I) { | |||
2145 | CodeGenRegisterClass *RC1 = RC; | |||
2146 | CodeGenRegisterClass *RC2 = &*I; | |||
2147 | if (RC1 == RC2) | |||
2148 | continue; | |||
2149 | ||||
2150 | // Compute the set intersection of RC1 and RC2. | |||
2151 | const CodeGenRegister::Vec &Memb1 = RC1->getMembers(); | |||
2152 | const CodeGenRegister::Vec &Memb2 = RC2->getMembers(); | |||
2153 | CodeGenRegister::Vec Intersection; | |||
2154 | std::set_intersection(Memb1.begin(), Memb1.end(), Memb2.begin(), | |||
2155 | Memb2.end(), | |||
2156 | std::inserter(Intersection, Intersection.begin()), | |||
2157 | deref<std::less<>>()); | |||
2158 | ||||
2159 | // Skip disjoint class pairs. | |||
2160 | if (Intersection.empty()) | |||
2161 | continue; | |||
2162 | ||||
2163 | // If RC1 and RC2 have different spill sizes or alignments, use the | |||
2164 | // stricter one for sub-classing. If they are equal, prefer RC1. | |||
2165 | if (RC2->RSI.hasStricterSpillThan(RC1->RSI)) | |||
2166 | std::swap(RC1, RC2); | |||
2167 | ||||
2168 | getOrCreateSubClass(RC1, &Intersection, | |||
2169 | RC1->getName() + "_and_" + RC2->getName()); | |||
2170 | } | |||
2171 | } | |||
2172 | ||||
2173 | // | |||
2174 | // Synthesize missing sub-classes for getSubClassWithSubReg(). | |||
2175 | // | |||
2176 | // Make sure that the set of registers in RC with a given SubIdx sub-register | |||
2177 | // form a register class. Update RC->SubClassWithSubReg. | |||
2178 | // | |||
2179 | void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) { | |||
2180 | // Map SubRegIndex to set of registers in RC supporting that SubRegIndex. | |||
2181 | typedef std::map<const CodeGenSubRegIndex *, CodeGenRegister::Vec, | |||
2182 | deref<std::less<>>> | |||
2183 | SubReg2SetMap; | |||
2184 | ||||
2185 | // Compute the set of registers supporting each SubRegIndex. | |||
2186 | SubReg2SetMap SRSets; | |||
2187 | for (const auto R : RC->getMembers()) { | |||
2188 | if (R->Artificial) | |||
2189 | continue; | |||
2190 | const CodeGenRegister::SubRegMap &SRM = R->getSubRegs(); | |||
2191 | for (auto I : SRM) { | |||
2192 | if (!I.first->Artificial) | |||
2193 | SRSets[I.first].push_back(R); | |||
2194 | } | |||
2195 | } | |||
2196 | ||||
2197 | for (auto I : SRSets) | |||
2198 | sortAndUniqueRegisters(I.second); | |||
2199 | ||||
2200 | // Find matching classes for all SRSets entries. Iterate in SubRegIndex | |||
2201 | // numerical order to visit synthetic indices last. | |||
2202 | for (const auto &SubIdx : SubRegIndices) { | |||
2203 | if (SubIdx.Artificial) | |||
2204 | continue; | |||
2205 | SubReg2SetMap::const_iterator I = SRSets.find(&SubIdx); | |||
2206 | // Unsupported SubRegIndex. Skip it. | |||
2207 | if (I == SRSets.end()) | |||
2208 | continue; | |||
2209 | // In most cases, all RC registers support the SubRegIndex. | |||
2210 | if (I->second.size() == RC->getMembers().size()) { | |||
2211 | RC->setSubClassWithSubReg(&SubIdx, RC); | |||
2212 | continue; | |||
2213 | } | |||
2214 | // This is a real subset. See if we have a matching class. | |||
2215 | CodeGenRegisterClass *SubRC = | |||
2216 | getOrCreateSubClass(RC, &I->second, | |||
2217 | RC->getName() + "_with_" + I->first->getName()); | |||
2218 | RC->setSubClassWithSubReg(&SubIdx, SubRC); | |||
2219 | } | |||
2220 | } | |||
2221 | ||||
2222 | // | |||
2223 | // Synthesize missing sub-classes of RC for getMatchingSuperRegClass(). | |||
2224 | // | |||
2225 | // Create sub-classes of RC such that getMatchingSuperRegClass(RC, SubIdx, X) | |||
2226 | // has a maximal result for any SubIdx and any X >= FirstSubRegRC. | |||
2227 | // | |||
2228 | ||||
2229 | void CodeGenRegBank::inferMatchingSuperRegClass(CodeGenRegisterClass *RC, | |||
2230 | std::list<CodeGenRegisterClass>::iterator FirstSubRegRC) { | |||
2231 | SmallVector<std::pair<const CodeGenRegister*, | |||
2232 | const CodeGenRegister*>, 16> SSPairs; | |||
2233 | BitVector TopoSigs(getNumTopoSigs()); | |||
2234 | ||||
2235 | // Iterate in SubRegIndex numerical order to visit synthetic indices last. | |||
2236 | for (auto &SubIdx : SubRegIndices) { | |||
2237 | // Skip indexes that aren't fully supported by RC's registers. This was | |||
2238 | // computed by inferSubClassWithSubReg() above which should have been | |||
2239 | // called first. | |||
2240 | if (RC->getSubClassWithSubReg(&SubIdx) != RC) | |||
2241 | continue; | |||
2242 | ||||
2243 | // Build list of (Super, Sub) pairs for this SubIdx. | |||
2244 | SSPairs.clear(); | |||
2245 | TopoSigs.reset(); | |||
2246 | for (const auto Super : RC->getMembers()) { | |||
2247 | const CodeGenRegister *Sub = Super->getSubRegs().find(&SubIdx)->second; | |||
2248 | assert(Sub && "Missing sub-register")(static_cast <bool> (Sub && "Missing sub-register" ) ? void (0) : __assert_fail ("Sub && \"Missing sub-register\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 2248, __extension__ __PRETTY_FUNCTION__)); | |||
2249 | SSPairs.push_back(std::make_pair(Super, Sub)); | |||
2250 | TopoSigs.set(Sub->getTopoSig()); | |||
2251 | } | |||
2252 | ||||
2253 | // Iterate over sub-register class candidates. Ignore classes created by | |||
2254 | // this loop. They will never be useful. | |||
2255 | // Store an iterator to the last element (not end) so that this loop doesn't | |||
2256 | // visit newly inserted elements. | |||
2257 | assert(!RegClasses.empty())(static_cast <bool> (!RegClasses.empty()) ? void (0) : __assert_fail ("!RegClasses.empty()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 2257, __extension__ __PRETTY_FUNCTION__)); | |||
2258 | for (auto I = FirstSubRegRC, E = std::prev(RegClasses.end()); | |||
2259 | I != std::next(E); ++I) { | |||
2260 | CodeGenRegisterClass &SubRC = *I; | |||
2261 | if (SubRC.Artificial) | |||
2262 | continue; | |||
2263 | // Topological shortcut: SubRC members have the wrong shape. | |||
2264 | if (!TopoSigs.anyCommon(SubRC.getTopoSigs())) | |||
2265 | continue; | |||
2266 | // Compute the subset of RC that maps into SubRC. | |||
2267 | CodeGenRegister::Vec SubSetVec; | |||
2268 | for (unsigned i = 0, e = SSPairs.size(); i != e; ++i) | |||
2269 | if (SubRC.contains(SSPairs[i].second)) | |||
2270 | SubSetVec.push_back(SSPairs[i].first); | |||
2271 | ||||
2272 | if (SubSetVec.empty()) | |||
2273 | continue; | |||
2274 | ||||
2275 | // RC injects completely into SubRC. | |||
2276 | sortAndUniqueRegisters(SubSetVec); | |||
2277 | if (SubSetVec.size() == SSPairs.size()) { | |||
2278 | SubRC.addSuperRegClass(&SubIdx, RC); | |||
2279 | continue; | |||
2280 | } | |||
2281 | ||||
2282 | // Only a subset of RC maps into SubRC. Make sure it is represented by a | |||
2283 | // class. | |||
2284 | getOrCreateSubClass(RC, &SubSetVec, RC->getName() + "_with_" + | |||
2285 | SubIdx.getName() + "_in_" + | |||
2286 | SubRC.getName()); | |||
2287 | } | |||
2288 | } | |||
2289 | } | |||
2290 | ||||
2291 | // | |||
2292 | // Infer missing register classes. | |||
2293 | // | |||
2294 | void CodeGenRegBank::computeInferredRegisterClasses() { | |||
2295 | assert(!RegClasses.empty())(static_cast <bool> (!RegClasses.empty()) ? void (0) : __assert_fail ("!RegClasses.empty()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 2295, __extension__ __PRETTY_FUNCTION__)); | |||
2296 | // When this function is called, the register classes have not been sorted | |||
2297 | // and assigned EnumValues yet. That means getSubClasses(), | |||
2298 | // getSuperClasses(), and hasSubClass() functions are defunct. | |||
2299 | ||||
2300 | // Use one-before-the-end so it doesn't move forward when new elements are | |||
2301 | // added. | |||
2302 | auto FirstNewRC = std::prev(RegClasses.end()); | |||
2303 | ||||
2304 | // Visit all register classes, including the ones being added by the loop. | |||
2305 | // Watch out for iterator invalidation here. | |||
2306 | for (auto I = RegClasses.begin(), E = RegClasses.end(); I != E; ++I) { | |||
2307 | CodeGenRegisterClass *RC = &*I; | |||
2308 | if (RC->Artificial) | |||
2309 | continue; | |||
2310 | ||||
2311 | // Synthesize answers for getSubClassWithSubReg(). | |||
2312 | inferSubClassWithSubReg(RC); | |||
2313 | ||||
2314 | // Synthesize answers for getCommonSubClass(). | |||
2315 | inferCommonSubClass(RC); | |||
2316 | ||||
2317 | // Synthesize answers for getMatchingSuperRegClass(). | |||
2318 | inferMatchingSuperRegClass(RC); | |||
2319 | ||||
2320 | // New register classes are created while this loop is running, and we need | |||
2321 | // to visit all of them. I particular, inferMatchingSuperRegClass needs | |||
2322 | // to match old super-register classes with sub-register classes created | |||
2323 | // after inferMatchingSuperRegClass was called. At this point, | |||
2324 | // inferMatchingSuperRegClass has checked SuperRC = [0..rci] with SubRC = | |||
2325 | // [0..FirstNewRC). We need to cover SubRC = [FirstNewRC..rci]. | |||
2326 | if (I == FirstNewRC) { | |||
2327 | auto NextNewRC = std::prev(RegClasses.end()); | |||
2328 | for (auto I2 = RegClasses.begin(), E2 = std::next(FirstNewRC); I2 != E2; | |||
2329 | ++I2) | |||
2330 | inferMatchingSuperRegClass(&*I2, E2); | |||
2331 | FirstNewRC = NextNewRC; | |||
2332 | } | |||
2333 | } | |||
2334 | } | |||
2335 | ||||
2336 | /// getRegisterClassForRegister - Find the register class that contains the | |||
2337 | /// specified physical register. If the register is not in a register class, | |||
2338 | /// return null. If the register is in multiple classes, and the classes have a | |||
2339 | /// superset-subset relationship and the same set of types, return the | |||
2340 | /// superclass. Otherwise return null. | |||
2341 | const CodeGenRegisterClass* | |||
2342 | CodeGenRegBank::getRegClassForRegister(Record *R) { | |||
2343 | const CodeGenRegister *Reg = getReg(R); | |||
2344 | const CodeGenRegisterClass *FoundRC = nullptr; | |||
2345 | for (const auto &RC : getRegClasses()) { | |||
2346 | if (!RC.contains(Reg)) | |||
2347 | continue; | |||
2348 | ||||
2349 | // If this is the first class that contains the register, | |||
2350 | // make a note of it and go on to the next class. | |||
2351 | if (!FoundRC) { | |||
2352 | FoundRC = &RC; | |||
2353 | continue; | |||
2354 | } | |||
2355 | ||||
2356 | // If a register's classes have different types, return null. | |||
2357 | if (RC.getValueTypes() != FoundRC->getValueTypes()) | |||
2358 | return nullptr; | |||
2359 | ||||
2360 | // Check to see if the previously found class that contains | |||
2361 | // the register is a subclass of the current class. If so, | |||
2362 | // prefer the superclass. | |||
2363 | if (RC.hasSubClass(FoundRC)) { | |||
2364 | FoundRC = &RC; | |||
2365 | continue; | |||
2366 | } | |||
2367 | ||||
2368 | // Check to see if the previously found class that contains | |||
2369 | // the register is a superclass of the current class. If so, | |||
2370 | // prefer the superclass. | |||
2371 | if (FoundRC->hasSubClass(&RC)) | |||
2372 | continue; | |||
2373 | ||||
2374 | // Multiple classes, and neither is a superclass of the other. | |||
2375 | // Return null. | |||
2376 | return nullptr; | |||
2377 | } | |||
2378 | return FoundRC; | |||
2379 | } | |||
2380 | ||||
2381 | const CodeGenRegisterClass * | |||
2382 | CodeGenRegBank::getMinimalPhysRegClass(Record *RegRecord, | |||
2383 | ValueTypeByHwMode *VT) { | |||
2384 | const CodeGenRegister *Reg = getReg(RegRecord); | |||
2385 | const CodeGenRegisterClass *BestRC = nullptr; | |||
2386 | for (const auto &RC : getRegClasses()) { | |||
2387 | if ((!VT || RC.hasType(*VT)) && | |||
2388 | RC.contains(Reg) && (!BestRC || BestRC->hasSubClass(&RC))) | |||
2389 | BestRC = &RC; | |||
2390 | } | |||
2391 | ||||
2392 | assert(BestRC && "Couldn't find the register class")(static_cast <bool> (BestRC && "Couldn't find the register class" ) ? void (0) : __assert_fail ("BestRC && \"Couldn't find the register class\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/utils/TableGen/CodeGenRegisters.cpp" , 2392, __extension__ __PRETTY_FUNCTION__)); | |||
2393 | return BestRC; | |||
2394 | } | |||
2395 | ||||
2396 | BitVector CodeGenRegBank::computeCoveredRegisters(ArrayRef<Record*> Regs) { | |||
2397 | SetVector<const CodeGenRegister*> Set; | |||
2398 | ||||
2399 | // First add Regs with all sub-registers. | |||
2400 | for (unsigned i = 0, e = Regs.size(); i != e; ++i) { | |||
2401 | CodeGenRegister *Reg = getReg(Regs[i]); | |||
2402 | if (Set.insert(Reg)) | |||
2403 | // Reg is new, add all sub-registers. | |||
2404 | // The pre-ordering is not important here. | |||
2405 | Reg->addSubRegsPreOrder(Set, *this); | |||
2406 | } | |||
2407 | ||||
2408 | // Second, find all super-registers that are completely covered by the set. | |||
2409 | for (unsigned i = 0; i != Set.size(); ++i) { | |||
2410 | const CodeGenRegister::SuperRegList &SR = Set[i]->getSuperRegs(); | |||
2411 | for (unsigned j = 0, e = SR.size(); j != e; ++j) { | |||
2412 | const CodeGenRegister *Super = SR[j]; | |||
2413 | if (!Super->CoveredBySubRegs || Set.count(Super)) | |||
2414 | continue; | |||
2415 | // This new super-register is covered by its sub-registers. | |||
2416 | bool AllSubsInSet = true; | |||
2417 | const CodeGenRegister::SubRegMap &SRM = Super->getSubRegs(); | |||
2418 | for (auto I : SRM) | |||
2419 | if (!Set.count(I.second)) { | |||
2420 | AllSubsInSet = false; | |||
2421 | break; | |||
2422 | } | |||
2423 | // All sub-registers in Set, add Super as well. | |||
2424 | // We will visit Super later to recheck its super-registers. | |||
2425 | if (AllSubsInSet) | |||
2426 | Set.insert(Super); | |||
2427 | } | |||
2428 | } | |||
2429 | ||||
2430 | // Convert to BitVector. | |||
2431 | BitVector BV(Registers.size() + 1); | |||
2432 | for (unsigned i = 0, e = Set.size(); i != e; ++i) | |||
2433 | BV.set(Set[i]->EnumValue); | |||
2434 | return BV; | |||
2435 | } | |||
2436 | ||||
2437 | void CodeGenRegBank::printRegUnitName(unsigned Unit) const { | |||
2438 | if (Unit < NumNativeRegUnits) | |||
2439 | dbgs() << ' ' << RegUnits[Unit].Roots[0]->getName(); | |||
2440 | else | |||
2441 | dbgs() << " #" << Unit; | |||
2442 | } |
1 | //===- llvm/MC/LaneBitmask.h ------------------------------------*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// | |||
9 | /// \file | |||
10 | /// A common definition of LaneBitmask for use in TableGen and CodeGen. | |||
11 | /// | |||
12 | /// A lane mask is a bitmask representing the covering of a register with | |||
13 | /// sub-registers. | |||
14 | /// | |||
15 | /// This is typically used to track liveness at sub-register granularity. | |||
16 | /// Lane masks for sub-register indices are similar to register units for | |||
17 | /// physical registers. The individual bits in a lane mask can't be assigned | |||
18 | /// any specific meaning. They can be used to check if two sub-register | |||
19 | /// indices overlap. | |||
20 | /// | |||
21 | /// Iff the target has a register such that: | |||
22 | /// | |||
23 | /// getSubReg(Reg, A) overlaps getSubReg(Reg, B) | |||
24 | /// | |||
25 | /// then: | |||
26 | /// | |||
27 | /// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0 | |||
28 | ||||
29 | #ifndef LLVM_MC_LANEBITMASK_H | |||
30 | #define LLVM_MC_LANEBITMASK_H | |||
31 | ||||
32 | #include "llvm/Support/Compiler.h" | |||
33 | #include "llvm/Support/Format.h" | |||
34 | #include "llvm/Support/MathExtras.h" | |||
35 | #include "llvm/Support/Printable.h" | |||
36 | #include "llvm/Support/raw_ostream.h" | |||
37 | ||||
38 | namespace llvm { | |||
39 | ||||
40 | struct LaneBitmask { | |||
41 | // When changing the underlying type, change the format string as well. | |||
42 | using Type = uint64_t; | |||
43 | enum : unsigned { BitWidth = 8*sizeof(Type) }; | |||
44 | constexpr static const char *const FormatStr = "%016llX"; | |||
45 | ||||
46 | constexpr LaneBitmask() = default; | |||
47 | explicit constexpr LaneBitmask(Type V) : Mask(V) {} | |||
48 | ||||
49 | constexpr bool operator== (LaneBitmask M) const { return Mask == M.Mask; } | |||
50 | constexpr bool operator!= (LaneBitmask M) const { return Mask != M.Mask; } | |||
51 | constexpr bool operator< (LaneBitmask M) const { return Mask < M.Mask; } | |||
52 | constexpr bool none() const { return Mask == 0; } | |||
53 | constexpr bool any() const { return Mask != 0; } | |||
54 | constexpr bool all() const { return ~Mask == 0; } | |||
55 | ||||
56 | constexpr LaneBitmask operator~() const { | |||
57 | return LaneBitmask(~Mask); | |||
58 | } | |||
59 | constexpr LaneBitmask operator|(LaneBitmask M) const { | |||
60 | return LaneBitmask(Mask | M.Mask); | |||
61 | } | |||
62 | constexpr LaneBitmask operator&(LaneBitmask M) const { | |||
63 | return LaneBitmask(Mask & M.Mask); | |||
64 | } | |||
65 | LaneBitmask &operator|=(LaneBitmask M) { | |||
66 | Mask |= M.Mask; | |||
67 | return *this; | |||
68 | } | |||
69 | LaneBitmask &operator&=(LaneBitmask M) { | |||
70 | Mask &= M.Mask; | |||
71 | return *this; | |||
72 | } | |||
73 | ||||
74 | constexpr Type getAsInteger() const { return Mask; } | |||
75 | ||||
76 | unsigned getNumLanes() const { | |||
77 | return countPopulation(Mask); | |||
78 | } | |||
79 | unsigned getHighestLane() const { | |||
80 | return Log2_64(Mask); | |||
81 | } | |||
82 | ||||
83 | static constexpr LaneBitmask getNone() { return LaneBitmask(0); } | |||
84 | static constexpr LaneBitmask getAll() { return ~LaneBitmask(0); } | |||
85 | static constexpr LaneBitmask getLane(unsigned Lane) { | |||
86 | return LaneBitmask(Type(1) << Lane); | |||
| ||||
87 | } | |||
88 | ||||
89 | private: | |||
90 | Type Mask = 0; | |||
91 | }; | |||
92 | ||||
93 | /// Create Printable object to print LaneBitmasks on a \ref raw_ostream. | |||
94 | inline Printable PrintLaneMask(LaneBitmask LaneMask) { | |||
95 | return Printable([LaneMask](raw_ostream &OS) { | |||
96 | OS << format(LaneBitmask::FormatStr, LaneMask.getAsInteger()); | |||
97 | }); | |||
98 | } | |||
99 | ||||
100 | } // end namespace llvm | |||
101 | ||||
102 | #endif // LLVM_MC_LANEBITMASK_H |
1 | //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains some functions that are useful for math stuff. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_SUPPORT_MATHEXTRAS_H |
14 | #define LLVM_SUPPORT_MATHEXTRAS_H |
15 | |
16 | #include "llvm/Support/Compiler.h" |
17 | #include <cassert> |
18 | #include <climits> |
19 | #include <cmath> |
20 | #include <cstdint> |
21 | #include <cstring> |
22 | #include <limits> |
23 | #include <type_traits> |
24 | |
25 | #ifdef __ANDROID_NDK__ |
26 | #include <android/api-level.h> |
27 | #endif |
28 | |
29 | #ifdef _MSC_VER |
30 | // Declare these intrinsics manually rather including intrin.h. It's very |
31 | // expensive, and MathExtras.h is popular. |
32 | // #include <intrin.h> |
33 | extern "C" { |
34 | unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); |
35 | unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); |
36 | unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); |
37 | unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); |
38 | } |
39 | #endif |
40 | |
41 | namespace llvm { |
42 | |
43 | /// The behavior an operation has on an input of 0. |
44 | enum ZeroBehavior { |
45 | /// The returned value is undefined. |
46 | ZB_Undefined, |
47 | /// The returned value is numeric_limits<T>::max() |
48 | ZB_Max, |
49 | /// The returned value is numeric_limits<T>::digits |
50 | ZB_Width |
51 | }; |
52 | |
53 | /// Mathematical constants. |
54 | namespace numbers { |
55 | // TODO: Track C++20 std::numbers. |
56 | // TODO: Favor using the hexadecimal FP constants (requires C++17). |
57 | constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 |
58 | egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 |
59 | ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 |
60 | ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 |
61 | log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) |
62 | log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) |
63 | pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 |
64 | inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 |
65 | sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 |
66 | inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 |
67 | sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 |
68 | inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) |
69 | sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 |
70 | inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) |
71 | phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 |
72 | constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 |
73 | egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 |
74 | ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 |
75 | ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 |
76 | log2ef = 1.44269504F, // (0x1.715476P+0) |
77 | log10ef = .434294482F, // (0x1.bcb7b2P-2) |
78 | pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 |
79 | inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 |
80 | sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 |
81 | inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 |
82 | sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 |
83 | inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) |
84 | sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 |
85 | inv_sqrt3f = .577350269F, // (0x1.279a74P-1) |
86 | phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 |
87 | } // namespace numbers |
88 | |
89 | namespace detail { |
90 | template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter { |
91 | static unsigned count(T Val, ZeroBehavior) { |
92 | if (!Val) |
93 | return std::numeric_limits<T>::digits; |
94 | if (Val & 0x1) |
95 | return 0; |
96 | |
97 | // Bisection method. |
98 | unsigned ZeroBits = 0; |
99 | T Shift = std::numeric_limits<T>::digits >> 1; |
100 | T Mask = std::numeric_limits<T>::max() >> Shift; |
101 | while (Shift) { |
102 | if ((Val & Mask) == 0) { |
103 | Val >>= Shift; |
104 | ZeroBits |= Shift; |
105 | } |
106 | Shift >>= 1; |
107 | Mask >>= Shift; |
108 | } |
109 | return ZeroBits; |
110 | } |
111 | }; |
112 | |
113 | #if defined(__GNUC__4) || defined(_MSC_VER) |
114 | template <typename T> struct TrailingZerosCounter<T, 4> { |
115 | static unsigned count(T Val, ZeroBehavior ZB) { |
116 | if (ZB != ZB_Undefined && Val == 0) |
117 | return 32; |
118 | |
119 | #if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4) |
120 | return __builtin_ctz(Val); |
121 | #elif defined(_MSC_VER) |
122 | unsigned long Index; |
123 | _BitScanForward(&Index, Val); |
124 | return Index; |
125 | #endif |
126 | } |
127 | }; |
128 | |
129 | #if !defined(_MSC_VER) || defined(_M_X64) |
130 | template <typename T> struct TrailingZerosCounter<T, 8> { |
131 | static unsigned count(T Val, ZeroBehavior ZB) { |
132 | if (ZB != ZB_Undefined && Val == 0) |
133 | return 64; |
134 | |
135 | #if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4) |
136 | return __builtin_ctzll(Val); |
137 | #elif defined(_MSC_VER) |
138 | unsigned long Index; |
139 | _BitScanForward64(&Index, Val); |
140 | return Index; |
141 | #endif |
142 | } |
143 | }; |
144 | #endif |
145 | #endif |
146 | } // namespace detail |
147 | |
148 | /// Count number of 0's from the least significant bit to the most |
149 | /// stopping at the first 1. |
150 | /// |
151 | /// Only unsigned integral types are allowed. |
152 | /// |
153 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are |
154 | /// valid arguments. |
155 | template <typename T> |
156 | unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { |
157 | static_assert(std::numeric_limits<T>::is_integer && |
158 | !std::numeric_limits<T>::is_signed, |
159 | "Only unsigned integral types are allowed."); |
160 | return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB); |
161 | } |
162 | |
163 | namespace detail { |
164 | template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter { |
165 | static unsigned count(T Val, ZeroBehavior) { |
166 | if (!Val) |
167 | return std::numeric_limits<T>::digits; |
168 | |
169 | // Bisection method. |
170 | unsigned ZeroBits = 0; |
171 | for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) { |
172 | T Tmp = Val >> Shift; |
173 | if (Tmp) |
174 | Val = Tmp; |
175 | else |
176 | ZeroBits |= Shift; |
177 | } |
178 | return ZeroBits; |
179 | } |
180 | }; |
181 | |
182 | #if defined(__GNUC__4) || defined(_MSC_VER) |
183 | template <typename T> struct LeadingZerosCounter<T, 4> { |
184 | static unsigned count(T Val, ZeroBehavior ZB) { |
185 | if (ZB != ZB_Undefined && Val == 0) |
186 | return 32; |
187 | |
188 | #if __has_builtin(__builtin_clz)1 || defined(__GNUC__4) |
189 | return __builtin_clz(Val); |
190 | #elif defined(_MSC_VER) |
191 | unsigned long Index; |
192 | _BitScanReverse(&Index, Val); |
193 | return Index ^ 31; |
194 | #endif |
195 | } |
196 | }; |
197 | |
198 | #if !defined(_MSC_VER) || defined(_M_X64) |
199 | template <typename T> struct LeadingZerosCounter<T, 8> { |
200 | static unsigned count(T Val, ZeroBehavior ZB) { |
201 | if (ZB != ZB_Undefined && Val == 0) |
202 | return 64; |
203 | |
204 | #if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4) |
205 | return __builtin_clzll(Val); |
206 | #elif defined(_MSC_VER) |
207 | unsigned long Index; |
208 | _BitScanReverse64(&Index, Val); |
209 | return Index ^ 63; |
210 | #endif |
211 | } |
212 | }; |
213 | #endif |
214 | #endif |
215 | } // namespace detail |
216 | |
217 | /// Count number of 0's from the most significant bit to the least |
218 | /// stopping at the first 1. |
219 | /// |
220 | /// Only unsigned integral types are allowed. |
221 | /// |
222 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are |
223 | /// valid arguments. |
224 | template <typename T> |
225 | unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { |
226 | static_assert(std::numeric_limits<T>::is_integer && |
227 | !std::numeric_limits<T>::is_signed, |
228 | "Only unsigned integral types are allowed."); |
229 | return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB); |
230 | } |
231 | |
232 | /// Get the index of the first set bit starting from the least |
233 | /// significant bit. |
234 | /// |
235 | /// Only unsigned integral types are allowed. |
236 | /// |
237 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are |
238 | /// valid arguments. |
239 | template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { |
240 | if (ZB == ZB_Max && Val == 0) |
241 | return std::numeric_limits<T>::max(); |
242 | |
243 | return countTrailingZeros(Val, ZB_Undefined); |
244 | } |
245 | |
246 | /// Create a bitmask with the N right-most bits set to 1, and all other |
247 | /// bits set to 0. Only unsigned types are allowed. |
248 | template <typename T> T maskTrailingOnes(unsigned N) { |
249 | static_assert(std::is_unsigned<T>::value, "Invalid type!"); |
250 | const unsigned Bits = CHAR_BIT8 * sizeof(T); |
251 | assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index" ) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 251, __extension__ __PRETTY_FUNCTION__)); |
252 | return N == 0 ? 0 : (T(-1) >> (Bits - N)); |
253 | } |
254 | |
255 | /// Create a bitmask with the N left-most bits set to 1, and all other |
256 | /// bits set to 0. Only unsigned types are allowed. |
257 | template <typename T> T maskLeadingOnes(unsigned N) { |
258 | return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
259 | } |
260 | |
261 | /// Create a bitmask with the N right-most bits set to 0, and all other |
262 | /// bits set to 1. Only unsigned types are allowed. |
263 | template <typename T> T maskTrailingZeros(unsigned N) { |
264 | return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
265 | } |
266 | |
267 | /// Create a bitmask with the N left-most bits set to 0, and all other |
268 | /// bits set to 1. Only unsigned types are allowed. |
269 | template <typename T> T maskLeadingZeros(unsigned N) { |
270 | return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); |
271 | } |
272 | |
273 | /// Get the index of the last set bit starting from the least |
274 | /// significant bit. |
275 | /// |
276 | /// Only unsigned integral types are allowed. |
277 | /// |
278 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are |
279 | /// valid arguments. |
280 | template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) { |
281 | if (ZB == ZB_Max && Val == 0) |
282 | return std::numeric_limits<T>::max(); |
283 | |
284 | // Use ^ instead of - because both gcc and llvm can remove the associated ^ |
285 | // in the __builtin_clz intrinsic on x86. |
286 | return countLeadingZeros(Val, ZB_Undefined) ^ |
287 | (std::numeric_limits<T>::digits - 1); |
288 | } |
289 | |
290 | /// Macro compressed bit reversal table for 256 bits. |
291 | /// |
292 | /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable |
293 | static const unsigned char BitReverseTable256[256] = { |
294 | #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 |
295 | #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) |
296 | #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) |
297 | R6(0), R6(2), R6(1), R6(3) |
298 | #undef R2 |
299 | #undef R4 |
300 | #undef R6 |
301 | }; |
302 | |
303 | /// Reverse the bits in \p Val. |
304 | template <typename T> |
305 | T reverseBits(T Val) { |
306 | unsigned char in[sizeof(Val)]; |
307 | unsigned char out[sizeof(Val)]; |
308 | std::memcpy(in, &Val, sizeof(Val)); |
309 | for (unsigned i = 0; i < sizeof(Val); ++i) |
310 | out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; |
311 | std::memcpy(&Val, out, sizeof(Val)); |
312 | return Val; |
313 | } |
314 | |
315 | #if __has_builtin(__builtin_bitreverse8)1 |
316 | template<> |
317 | inline uint8_t reverseBits<uint8_t>(uint8_t Val) { |
318 | return __builtin_bitreverse8(Val); |
319 | } |
320 | #endif |
321 | |
322 | #if __has_builtin(__builtin_bitreverse16)1 |
323 | template<> |
324 | inline uint16_t reverseBits<uint16_t>(uint16_t Val) { |
325 | return __builtin_bitreverse16(Val); |
326 | } |
327 | #endif |
328 | |
329 | #if __has_builtin(__builtin_bitreverse32)1 |
330 | template<> |
331 | inline uint32_t reverseBits<uint32_t>(uint32_t Val) { |
332 | return __builtin_bitreverse32(Val); |
333 | } |
334 | #endif |
335 | |
336 | #if __has_builtin(__builtin_bitreverse64)1 |
337 | template<> |
338 | inline uint64_t reverseBits<uint64_t>(uint64_t Val) { |
339 | return __builtin_bitreverse64(Val); |
340 | } |
341 | #endif |
342 | |
343 | // NOTE: The following support functions use the _32/_64 extensions instead of |
344 | // type overloading so that signed and unsigned integers can be used without |
345 | // ambiguity. |
346 | |
347 | /// Return the high 32 bits of a 64 bit value. |
348 | constexpr inline uint32_t Hi_32(uint64_t Value) { |
349 | return static_cast<uint32_t>(Value >> 32); |
350 | } |
351 | |
352 | /// Return the low 32 bits of a 64 bit value. |
353 | constexpr inline uint32_t Lo_32(uint64_t Value) { |
354 | return static_cast<uint32_t>(Value); |
355 | } |
356 | |
357 | /// Make a 64-bit integer from a high / low pair of 32-bit integers. |
358 | constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { |
359 | return ((uint64_t)High << 32) | (uint64_t)Low; |
360 | } |
361 | |
362 | /// Checks if an integer fits into the given bit width. |
363 | template <unsigned N> constexpr inline bool isInt(int64_t x) { |
364 | return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1))); |
365 | } |
366 | // Template specializations to get better code for common cases. |
367 | template <> constexpr inline bool isInt<8>(int64_t x) { |
368 | return static_cast<int8_t>(x) == x; |
369 | } |
370 | template <> constexpr inline bool isInt<16>(int64_t x) { |
371 | return static_cast<int16_t>(x) == x; |
372 | } |
373 | template <> constexpr inline bool isInt<32>(int64_t x) { |
374 | return static_cast<int32_t>(x) == x; |
375 | } |
376 | |
377 | /// Checks if a signed integer is an N bit number shifted left by S. |
378 | template <unsigned N, unsigned S> |
379 | constexpr inline bool isShiftedInt(int64_t x) { |
380 | static_assert( |
381 | N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); |
382 | static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); |
383 | return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); |
384 | } |
385 | |
386 | /// Checks if an unsigned integer fits into the given bit width. |
387 | /// |
388 | /// This is written as two functions rather than as simply |
389 | /// |
390 | /// return N >= 64 || X < (UINT64_C(1) << N); |
391 | /// |
392 | /// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting |
393 | /// left too many places. |
394 | template <unsigned N> |
395 | constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) { |
396 | static_assert(N > 0, "isUInt<0> doesn't make sense"); |
397 | return X < (UINT64_C(1)1UL << (N)); |
398 | } |
399 | template <unsigned N> |
400 | constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) { |
401 | return true; |
402 | } |
403 | |
404 | // Template specializations to get better code for common cases. |
405 | template <> constexpr inline bool isUInt<8>(uint64_t x) { |
406 | return static_cast<uint8_t>(x) == x; |
407 | } |
408 | template <> constexpr inline bool isUInt<16>(uint64_t x) { |
409 | return static_cast<uint16_t>(x) == x; |
410 | } |
411 | template <> constexpr inline bool isUInt<32>(uint64_t x) { |
412 | return static_cast<uint32_t>(x) == x; |
413 | } |
414 | |
415 | /// Checks if a unsigned integer is an N bit number shifted left by S. |
416 | template <unsigned N, unsigned S> |
417 | constexpr inline bool isShiftedUInt(uint64_t x) { |
418 | static_assert( |
419 | N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); |
420 | static_assert(N + S <= 64, |
421 | "isShiftedUInt<N, S> with N + S > 64 is too wide."); |
422 | // Per the two static_asserts above, S must be strictly less than 64. So |
423 | // 1 << S is not undefined behavior. |
424 | return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); |
425 | } |
426 | |
427 | /// Gets the maximum value for a N-bit unsigned integer. |
428 | inline uint64_t maxUIntN(uint64_t N) { |
429 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 429, __extension__ __PRETTY_FUNCTION__)); |
430 | |
431 | // uint64_t(1) << 64 is undefined behavior, so we can't do |
432 | // (uint64_t(1) << N) - 1 |
433 | // without checking first that N != 64. But this works and doesn't have a |
434 | // branch. |
435 | return UINT64_MAX(18446744073709551615UL) >> (64 - N); |
436 | } |
437 | |
438 | /// Gets the minimum value for a N-bit signed integer. |
439 | inline int64_t minIntN(int64_t N) { |
440 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 440, __extension__ __PRETTY_FUNCTION__)); |
441 | |
442 | return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1)); |
443 | } |
444 | |
445 | /// Gets the maximum value for a N-bit signed integer. |
446 | inline int64_t maxIntN(int64_t N) { |
447 | assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 && "integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 447, __extension__ __PRETTY_FUNCTION__)); |
448 | |
449 | // This relies on two's complement wraparound when N == 64, so we convert to |
450 | // int64_t only at the very end to avoid UB. |
451 | return (UINT64_C(1)1UL << (N - 1)) - 1; |
452 | } |
453 | |
454 | /// Checks if an unsigned integer fits into the given (dynamic) bit width. |
455 | inline bool isUIntN(unsigned N, uint64_t x) { |
456 | return N >= 64 || x <= maxUIntN(N); |
457 | } |
458 | |
459 | /// Checks if an signed integer fits into the given (dynamic) bit width. |
460 | inline bool isIntN(unsigned N, int64_t x) { |
461 | return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); |
462 | } |
463 | |
464 | /// Return true if the argument is a non-empty sequence of ones starting at the |
465 | /// least significant bit with the remainder zero (32 bit version). |
466 | /// Ex. isMask_32(0x0000FFFFU) == true. |
467 | constexpr inline bool isMask_32(uint32_t Value) { |
468 | return Value && ((Value + 1) & Value) == 0; |
469 | } |
470 | |
471 | /// Return true if the argument is a non-empty sequence of ones starting at the |
472 | /// least significant bit with the remainder zero (64 bit version). |
473 | constexpr inline bool isMask_64(uint64_t Value) { |
474 | return Value && ((Value + 1) & Value) == 0; |
475 | } |
476 | |
477 | /// Return true if the argument contains a non-empty sequence of ones with the |
478 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. |
479 | constexpr inline bool isShiftedMask_32(uint32_t Value) { |
480 | return Value && isMask_32((Value - 1) | Value); |
481 | } |
482 | |
483 | /// Return true if the argument contains a non-empty sequence of ones with the |
484 | /// remainder zero (64 bit version.) |
485 | constexpr inline bool isShiftedMask_64(uint64_t Value) { |
486 | return Value && isMask_64((Value - 1) | Value); |
487 | } |
488 | |
489 | /// Return true if the argument is a power of two > 0. |
490 | /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) |
491 | constexpr inline bool isPowerOf2_32(uint32_t Value) { |
492 | return Value && !(Value & (Value - 1)); |
493 | } |
494 | |
495 | /// Return true if the argument is a power of two > 0 (64 bit edition.) |
496 | constexpr inline bool isPowerOf2_64(uint64_t Value) { |
497 | return Value && !(Value & (Value - 1)); |
498 | } |
499 | |
500 | /// Count the number of ones from the most significant bit to the first |
501 | /// zero bit. |
502 | /// |
503 | /// Ex. countLeadingOnes(0xFF0FFF00) == 8. |
504 | /// Only unsigned integral types are allowed. |
505 | /// |
506 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and |
507 | /// ZB_Undefined are valid arguments. |
508 | template <typename T> |
509 | unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { |
510 | static_assert(std::numeric_limits<T>::is_integer && |
511 | !std::numeric_limits<T>::is_signed, |
512 | "Only unsigned integral types are allowed."); |
513 | return countLeadingZeros<T>(~Value, ZB); |
514 | } |
515 | |
516 | /// Count the number of ones from the least significant bit to the first |
517 | /// zero bit. |
518 | /// |
519 | /// Ex. countTrailingOnes(0x00FF00FF) == 8. |
520 | /// Only unsigned integral types are allowed. |
521 | /// |
522 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and |
523 | /// ZB_Undefined are valid arguments. |
524 | template <typename T> |
525 | unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { |
526 | static_assert(std::numeric_limits<T>::is_integer && |
527 | !std::numeric_limits<T>::is_signed, |
528 | "Only unsigned integral types are allowed."); |
529 | return countTrailingZeros<T>(~Value, ZB); |
530 | } |
531 | |
532 | namespace detail { |
533 | template <typename T, std::size_t SizeOfT> struct PopulationCounter { |
534 | static unsigned count(T Value) { |
535 | // Generic version, forward to 32 bits. |
536 | static_assert(SizeOfT <= 4, "Not implemented!"); |
537 | #if defined(__GNUC__4) |
538 | return __builtin_popcount(Value); |
539 | #else |
540 | uint32_t v = Value; |
541 | v = v - ((v >> 1) & 0x55555555); |
542 | v = (v & 0x33333333) + ((v >> 2) & 0x33333333); |
543 | return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; |
544 | #endif |
545 | } |
546 | }; |
547 | |
548 | template <typename T> struct PopulationCounter<T, 8> { |
549 | static unsigned count(T Value) { |
550 | #if defined(__GNUC__4) |
551 | return __builtin_popcountll(Value); |
552 | #else |
553 | uint64_t v = Value; |
554 | v = v - ((v >> 1) & 0x5555555555555555ULL); |
555 | v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); |
556 | v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; |
557 | return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56); |
558 | #endif |
559 | } |
560 | }; |
561 | } // namespace detail |
562 | |
563 | /// Count the number of set bits in a value. |
564 | /// Ex. countPopulation(0xF000F000) = 8 |
565 | /// Returns 0 if the word is zero. |
566 | template <typename T> |
567 | inline unsigned countPopulation(T Value) { |
568 | static_assert(std::numeric_limits<T>::is_integer && |
569 | !std::numeric_limits<T>::is_signed, |
570 | "Only unsigned integral types are allowed."); |
571 | return detail::PopulationCounter<T, sizeof(T)>::count(Value); |
572 | } |
573 | |
574 | /// Compile time Log2. |
575 | /// Valid only for positive powers of two. |
576 | template <size_t kValue> constexpr inline size_t CTLog2() { |
577 | static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), |
578 | "Value is not a valid power of 2"); |
579 | return 1 + CTLog2<kValue / 2>(); |
580 | } |
581 | |
582 | template <> constexpr inline size_t CTLog2<1>() { return 0; } |
583 | |
584 | /// Return the log base 2 of the specified value. |
585 | inline double Log2(double Value) { |
586 | #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 |
587 | return __builtin_log(Value) / __builtin_log(2.0); |
588 | #else |
589 | return log2(Value); |
590 | #endif |
591 | } |
592 | |
593 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
594 | /// (32 bit edition.) |
595 | /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 |
596 | inline unsigned Log2_32(uint32_t Value) { |
597 | return 31 - countLeadingZeros(Value); |
598 | } |
599 | |
600 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |
601 | /// (64 bit edition.) |
602 | inline unsigned Log2_64(uint64_t Value) { |
603 | return 63 - countLeadingZeros(Value); |
604 | } |
605 | |
606 | /// Return the ceil log base 2 of the specified value, 32 if the value is zero. |
607 | /// (32 bit edition). |
608 | /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 |
609 | inline unsigned Log2_32_Ceil(uint32_t Value) { |
610 | return 32 - countLeadingZeros(Value - 1); |
611 | } |
612 | |
613 | /// Return the ceil log base 2 of the specified value, 64 if the value is zero. |
614 | /// (64 bit edition.) |
615 | inline unsigned Log2_64_Ceil(uint64_t Value) { |
616 | return 64 - countLeadingZeros(Value - 1); |
617 | } |
618 | |
619 | /// Return the greatest common divisor of the values using Euclid's algorithm. |
620 | template <typename T> |
621 | inline T greatestCommonDivisor(T A, T B) { |
622 | while (B) { |
623 | T Tmp = B; |
624 | B = A % B; |
625 | A = Tmp; |
626 | } |
627 | return A; |
628 | } |
629 | |
630 | inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { |
631 | return greatestCommonDivisor<uint64_t>(A, B); |
632 | } |
633 | |
634 | /// This function takes a 64-bit integer and returns the bit equivalent double. |
635 | inline double BitsToDouble(uint64_t Bits) { |
636 | double D; |
637 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
638 | memcpy(&D, &Bits, sizeof(Bits)); |
639 | return D; |
640 | } |
641 | |
642 | /// This function takes a 32-bit integer and returns the bit equivalent float. |
643 | inline float BitsToFloat(uint32_t Bits) { |
644 | float F; |
645 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
646 | memcpy(&F, &Bits, sizeof(Bits)); |
647 | return F; |
648 | } |
649 | |
650 | /// This function takes a double and returns the bit equivalent 64-bit integer. |
651 | /// Note that copying doubles around changes the bits of NaNs on some hosts, |
652 | /// notably x86, so this routine cannot be used if these bits are needed. |
653 | inline uint64_t DoubleToBits(double Double) { |
654 | uint64_t Bits; |
655 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |
656 | memcpy(&Bits, &Double, sizeof(Double)); |
657 | return Bits; |
658 | } |
659 | |
660 | /// This function takes a float and returns the bit equivalent 32-bit integer. |
661 | /// Note that copying floats around changes the bits of NaNs on some hosts, |
662 | /// notably x86, so this routine cannot be used if these bits are needed. |
663 | inline uint32_t FloatToBits(float Float) { |
664 | uint32_t Bits; |
665 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |
666 | memcpy(&Bits, &Float, sizeof(Float)); |
667 | return Bits; |
668 | } |
669 | |
670 | /// A and B are either alignments or offsets. Return the minimum alignment that |
671 | /// may be assumed after adding the two together. |
672 | constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { |
673 | // The largest power of 2 that divides both A and B. |
674 | // |
675 | // Replace "-Value" by "1+~Value" in the following commented code to avoid |
676 | // MSVC warning C4146 |
677 | // return (A | B) & -(A | B); |
678 | return (A | B) & (1 + ~(A | B)); |
679 | } |
680 | |
681 | /// Returns the next power of two (in 64-bits) that is strictly greater than A. |
682 | /// Returns zero on overflow. |
683 | inline uint64_t NextPowerOf2(uint64_t A) { |
684 | A |= (A >> 1); |
685 | A |= (A >> 2); |
686 | A |= (A >> 4); |
687 | A |= (A >> 8); |
688 | A |= (A >> 16); |
689 | A |= (A >> 32); |
690 | return A + 1; |
691 | } |
692 | |
693 | /// Returns the power of two which is less than or equal to the given value. |
694 | /// Essentially, it is a floor operation across the domain of powers of two. |
695 | inline uint64_t PowerOf2Floor(uint64_t A) { |
696 | if (!A) return 0; |
697 | return 1ull << (63 - countLeadingZeros(A, ZB_Undefined)); |
698 | } |
699 | |
700 | /// Returns the power of two which is greater than or equal to the given value. |
701 | /// Essentially, it is a ceil operation across the domain of powers of two. |
702 | inline uint64_t PowerOf2Ceil(uint64_t A) { |
703 | if (!A) |
704 | return 0; |
705 | return NextPowerOf2(A - 1); |
706 | } |
707 | |
708 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
709 | /// \p Value and is a multiple of \p Align. \p Align must be non-zero. |
710 | /// |
711 | /// If non-zero \p Skew is specified, the return value will be a minimal |
712 | /// integer that is greater than or equal to \p Value and equal to |
713 | /// \p Align * N + \p Skew for some integer N. If \p Skew is larger than |
714 | /// \p Align, its value is adjusted to '\p Skew mod \p Align'. |
715 | /// |
716 | /// Examples: |
717 | /// \code |
718 | /// alignTo(5, 8) = 8 |
719 | /// alignTo(17, 8) = 24 |
720 | /// alignTo(~0LL, 8) = 0 |
721 | /// alignTo(321, 255) = 510 |
722 | /// |
723 | /// alignTo(5, 8, 7) = 7 |
724 | /// alignTo(17, 8, 1) = 17 |
725 | /// alignTo(~0LL, 8, 3) = 3 |
726 | /// alignTo(321, 255, 42) = 552 |
727 | /// \endcode |
728 | inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |
729 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 729, __extension__ __PRETTY_FUNCTION__)); |
730 | Skew %= Align; |
731 | return (Value + Align - 1 - Skew) / Align * Align + Skew; |
732 | } |
733 | |
734 | /// Returns the next integer (mod 2**64) that is greater than or equal to |
735 | /// \p Value and is a multiple of \c Align. \c Align must be non-zero. |
736 | template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) { |
737 | static_assert(Align != 0u, "Align must be non-zero"); |
738 | return (Value + Align - 1) / Align * Align; |
739 | } |
740 | |
741 | /// Returns the integer ceil(Numerator / Denominator). |
742 | inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { |
743 | return alignTo(Numerator, Denominator) / Denominator; |
744 | } |
745 | |
746 | /// Returns the integer nearest(Numerator / Denominator). |
747 | inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { |
748 | return (Numerator + (Denominator / 2)) / Denominator; |
749 | } |
750 | |
751 | /// Returns the largest uint64_t less than or equal to \p Value and is |
752 | /// \p Skew mod \p Align. \p Align must be non-zero |
753 | inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |
754 | assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0." ) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 754, __extension__ __PRETTY_FUNCTION__)); |
755 | Skew %= Align; |
756 | return (Value - Skew) / Align * Align + Skew; |
757 | } |
758 | |
759 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
760 | /// Requires 0 < B <= 32. |
761 | template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) { |
762 | static_assert(B > 0, "Bit width can't be 0."); |
763 | static_assert(B <= 32, "Bit width out of range."); |
764 | return int32_t(X << (32 - B)) >> (32 - B); |
765 | } |
766 | |
767 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |
768 | /// Requires 0 < B <= 32. |
769 | inline int32_t SignExtend32(uint32_t X, unsigned B) { |
770 | assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0." ) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 770, __extension__ __PRETTY_FUNCTION__)); |
771 | assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range." ) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 771, __extension__ __PRETTY_FUNCTION__)); |
772 | return int32_t(X << (32 - B)) >> (32 - B); |
773 | } |
774 | |
775 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
776 | /// Requires 0 < B <= 64. |
777 | template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { |
778 | static_assert(B > 0, "Bit width can't be 0."); |
779 | static_assert(B <= 64, "Bit width out of range."); |
780 | return int64_t(x << (64 - B)) >> (64 - B); |
781 | } |
782 | |
783 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |
784 | /// Requires 0 < B <= 64. |
785 | inline int64_t SignExtend64(uint64_t X, unsigned B) { |
786 | assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0." ) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 786, __extension__ __PRETTY_FUNCTION__)); |
787 | assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range." ) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h" , 787, __extension__ __PRETTY_FUNCTION__)); |
788 | return int64_t(X << (64 - B)) >> (64 - B); |
789 | } |
790 | |
791 | /// Subtract two unsigned integers, X and Y, of type T and return the absolute |
792 | /// value of the result. |
793 | template <typename T> |
794 | std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) { |
795 | return X > Y ? (X - Y) : (Y - X); |
796 | } |
797 | |
798 | /// Add two unsigned integers, X and Y, of type T. Clamp the result to the |
799 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
800 | /// the result is larger than the maximum representable value of type T. |
801 | template <typename T> |
802 | std::enable_if_t<std::is_unsigned<T>::value, T> |
803 | SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { |
804 | bool Dummy; |
805 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
806 | // Hacker's Delight, p. 29 |
807 | T Z = X + Y; |
808 | Overflowed = (Z < X || Z < Y); |
809 | if (Overflowed) |
810 | return std::numeric_limits<T>::max(); |
811 | else |
812 | return Z; |
813 | } |
814 | |
815 | /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the |
816 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |
817 | /// the result is larger than the maximum representable value of type T. |
818 | template <typename T> |
819 | std::enable_if_t<std::is_unsigned<T>::value, T> |
820 | SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { |
821 | bool Dummy; |
822 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
823 | |
824 | // Hacker's Delight, p. 30 has a different algorithm, but we don't use that |
825 | // because it fails for uint16_t (where multiplication can have undefined |
826 | // behavior due to promotion to int), and requires a division in addition |
827 | // to the multiplication. |
828 | |
829 | Overflowed = false; |
830 | |
831 | // Log2(Z) would be either Log2Z or Log2Z + 1. |
832 | // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z |
833 | // will necessarily be less than Log2Max as desired. |
834 | int Log2Z = Log2_64(X) + Log2_64(Y); |
835 | const T Max = std::numeric_limits<T>::max(); |
836 | int Log2Max = Log2_64(Max); |
837 | if (Log2Z < Log2Max) { |
838 | return X * Y; |
839 | } |
840 | if (Log2Z > Log2Max) { |
841 | Overflowed = true; |
842 | return Max; |
843 | } |
844 | |
845 | // We're going to use the top bit, and maybe overflow one |
846 | // bit past it. Multiply all but the bottom bit then add |
847 | // that on at the end. |
848 | T Z = (X >> 1) * Y; |
849 | if (Z & ~(Max >> 1)) { |
850 | Overflowed = true; |
851 | return Max; |
852 | } |
853 | Z <<= 1; |
854 | if (X & 1) |
855 | return SaturatingAdd(Z, Y, ResultOverflowed); |
856 | |
857 | return Z; |
858 | } |
859 | |
860 | /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to |
861 | /// the product. Clamp the result to the maximum representable value of T on |
862 | /// overflow. ResultOverflowed indicates if the result is larger than the |
863 | /// maximum representable value of type T. |
864 | template <typename T> |
865 | std::enable_if_t<std::is_unsigned<T>::value, T> |
866 | SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { |
867 | bool Dummy; |
868 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |
869 | |
870 | T Product = SaturatingMultiply(X, Y, &Overflowed); |
871 | if (Overflowed) |
872 | return Product; |
873 | |
874 | return SaturatingAdd(A, Product, &Overflowed); |
875 | } |
876 | |
877 | /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. |
878 | extern const float huge_valf; |
879 | |
880 | |
881 | /// Add two signed integers, computing the two's complement truncated result, |
882 | /// returning true if overflow occured. |
883 | template <typename T> |
884 | std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) { |
885 | #if __has_builtin(__builtin_add_overflow)1 |
886 | return __builtin_add_overflow(X, Y, &Result); |
887 | #else |
888 | // Perform the unsigned addition. |
889 | using U = std::make_unsigned_t<T>; |
890 | const U UX = static_cast<U>(X); |
891 | const U UY = static_cast<U>(Y); |
892 | const U UResult = UX + UY; |
893 | |
894 | // Convert to signed. |
895 | Result = static_cast<T>(UResult); |
896 | |
897 | // Adding two positive numbers should result in a positive number. |
898 | if (X > 0 && Y > 0) |
899 | return Result <= 0; |
900 | // Adding two negatives should result in a negative number. |
901 | if (X < 0 && Y < 0) |
902 | return Result >= 0; |
903 | return false; |
904 | #endif |
905 | } |
906 | |
907 | /// Subtract two signed integers, computing the two's complement truncated |
908 | /// result, returning true if an overflow ocurred. |
909 | template <typename T> |
910 | std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) { |
911 | #if __has_builtin(__builtin_sub_overflow)1 |
912 | return __builtin_sub_overflow(X, Y, &Result); |
913 | #else |
914 | // Perform the unsigned addition. |
915 | using U = std::make_unsigned_t<T>; |
916 | const U UX = static_cast<U>(X); |
917 | const U UY = static_cast<U>(Y); |
918 | const U UResult = UX - UY; |
919 | |
920 | // Convert to signed. |
921 | Result = static_cast<T>(UResult); |
922 | |
923 | // Subtracting a positive number from a negative results in a negative number. |
924 | if (X <= 0 && Y > 0) |
925 | return Result >= 0; |
926 | // Subtracting a negative number from a positive results in a positive number. |
927 | if (X >= 0 && Y < 0) |
928 | return Result <= 0; |
929 | return false; |
930 | #endif |
931 | } |
932 | |
933 | /// Multiply two signed integers, computing the two's complement truncated |
934 | /// result, returning true if an overflow ocurred. |
935 | template <typename T> |
936 | std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) { |
937 | // Perform the unsigned multiplication on absolute values. |
938 | using U = std::make_unsigned_t<T>; |
939 | const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X); |
940 | const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y); |
941 | const U UResult = UX * UY; |
942 | |
943 | // Convert to signed. |
944 | const bool IsNegative = (X < 0) ^ (Y < 0); |
945 | Result = IsNegative ? (0 - UResult) : UResult; |
946 | |
947 | // If any of the args was 0, result is 0 and no overflow occurs. |
948 | if (UX == 0 || UY == 0) |
949 | return false; |
950 | |
951 | // UX and UY are in [1, 2^n], where n is the number of digits. |
952 | // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for |
953 | // positive) divided by an argument compares to the other. |
954 | if (IsNegative) |
955 | return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY; |
956 | else |
957 | return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY; |
958 | } |
959 | |
960 | } // End llvm namespace |
961 | |
962 | #endif |