File: | llvm/include/llvm/CodeGen/ExecutionDomainFix.h |
Warning: | line 84, column 60 The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | |||||||||
9 | #include "llvm/CodeGen/ExecutionDomainFix.h" | ||||||||
10 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||||||
11 | #include "llvm/CodeGen/TargetInstrInfo.h" | ||||||||
12 | #include "llvm/Support/Debug.h" | ||||||||
13 | |||||||||
14 | using namespace llvm; | ||||||||
15 | |||||||||
16 | #define DEBUG_TYPE"execution-deps-fix" "execution-deps-fix" | ||||||||
17 | |||||||||
18 | iterator_range<SmallVectorImpl<int>::const_iterator> | ||||||||
19 | ExecutionDomainFix::regIndices(unsigned Reg) const { | ||||||||
20 | assert(Reg < AliasMap.size() && "Invalid register")((Reg < AliasMap.size() && "Invalid register") ? static_cast <void> (0) : __assert_fail ("Reg < AliasMap.size() && \"Invalid register\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 20, __PRETTY_FUNCTION__)); | ||||||||
21 | const auto &Entry = AliasMap[Reg]; | ||||||||
22 | return make_range(Entry.begin(), Entry.end()); | ||||||||
23 | } | ||||||||
24 | |||||||||
25 | DomainValue *ExecutionDomainFix::alloc(int domain) { | ||||||||
26 | DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue | ||||||||
27 | : Avail.pop_back_val(); | ||||||||
28 | if (domain >= 0) | ||||||||
29 | dv->addDomain(domain); | ||||||||
30 | assert(dv->Refs == 0 && "Reference count wasn't cleared")((dv->Refs == 0 && "Reference count wasn't cleared" ) ? static_cast<void> (0) : __assert_fail ("dv->Refs == 0 && \"Reference count wasn't cleared\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 30, __PRETTY_FUNCTION__)); | ||||||||
31 | assert(!dv->Next && "Chained DomainValue shouldn't have been recycled")((!dv->Next && "Chained DomainValue shouldn't have been recycled" ) ? static_cast<void> (0) : __assert_fail ("!dv->Next && \"Chained DomainValue shouldn't have been recycled\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 31, __PRETTY_FUNCTION__)); | ||||||||
32 | return dv; | ||||||||
33 | } | ||||||||
34 | |||||||||
35 | void ExecutionDomainFix::release(DomainValue *DV) { | ||||||||
36 | while (DV) { | ||||||||
37 | assert(DV->Refs && "Bad DomainValue")((DV->Refs && "Bad DomainValue") ? static_cast< void> (0) : __assert_fail ("DV->Refs && \"Bad DomainValue\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 37, __PRETTY_FUNCTION__)); | ||||||||
38 | if (--DV->Refs) | ||||||||
39 | return; | ||||||||
40 | |||||||||
41 | // There are no more DV references. Collapse any contained instructions. | ||||||||
42 | if (DV->AvailableDomains && !DV->isCollapsed()) | ||||||||
43 | collapse(DV, DV->getFirstDomain()); | ||||||||
44 | |||||||||
45 | DomainValue *Next = DV->Next; | ||||||||
46 | DV->clear(); | ||||||||
47 | Avail.push_back(DV); | ||||||||
48 | // Also release the next DomainValue in the chain. | ||||||||
49 | DV = Next; | ||||||||
50 | } | ||||||||
51 | } | ||||||||
52 | |||||||||
53 | DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) { | ||||||||
54 | DomainValue *DV = DVRef; | ||||||||
55 | if (!DV || !DV->Next) | ||||||||
56 | return DV; | ||||||||
57 | |||||||||
58 | // DV has a chain. Find the end. | ||||||||
59 | do | ||||||||
60 | DV = DV->Next; | ||||||||
61 | while (DV->Next); | ||||||||
62 | |||||||||
63 | // Update DVRef to point to DV. | ||||||||
64 | retain(DV); | ||||||||
65 | release(DVRef); | ||||||||
66 | DVRef = DV; | ||||||||
67 | return DV; | ||||||||
68 | } | ||||||||
69 | |||||||||
70 | void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) { | ||||||||
71 | assert(unsigned(rx) < NumRegs && "Invalid index")((unsigned(rx) < NumRegs && "Invalid index") ? static_cast <void> (0) : __assert_fail ("unsigned(rx) < NumRegs && \"Invalid index\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 71, __PRETTY_FUNCTION__)); | ||||||||
72 | assert(!LiveRegs.empty() && "Must enter basic block first.")((!LiveRegs.empty() && "Must enter basic block first." ) ? static_cast<void> (0) : __assert_fail ("!LiveRegs.empty() && \"Must enter basic block first.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 72, __PRETTY_FUNCTION__)); | ||||||||
73 | |||||||||
74 | if (LiveRegs[rx] == dv) | ||||||||
75 | return; | ||||||||
76 | if (LiveRegs[rx]) | ||||||||
77 | release(LiveRegs[rx]); | ||||||||
78 | LiveRegs[rx] = retain(dv); | ||||||||
79 | } | ||||||||
80 | |||||||||
81 | void ExecutionDomainFix::kill(int rx) { | ||||||||
82 | assert(unsigned(rx) < NumRegs && "Invalid index")((unsigned(rx) < NumRegs && "Invalid index") ? static_cast <void> (0) : __assert_fail ("unsigned(rx) < NumRegs && \"Invalid index\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 82, __PRETTY_FUNCTION__)); | ||||||||
83 | assert(!LiveRegs.empty() && "Must enter basic block first.")((!LiveRegs.empty() && "Must enter basic block first." ) ? static_cast<void> (0) : __assert_fail ("!LiveRegs.empty() && \"Must enter basic block first.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 83, __PRETTY_FUNCTION__)); | ||||||||
84 | if (!LiveRegs[rx]) | ||||||||
85 | return; | ||||||||
86 | |||||||||
87 | release(LiveRegs[rx]); | ||||||||
88 | LiveRegs[rx] = nullptr; | ||||||||
89 | } | ||||||||
90 | |||||||||
91 | void ExecutionDomainFix::force(int rx, unsigned domain) { | ||||||||
92 | assert(unsigned(rx) < NumRegs && "Invalid index")((unsigned(rx) < NumRegs && "Invalid index") ? static_cast <void> (0) : __assert_fail ("unsigned(rx) < NumRegs && \"Invalid index\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 92, __PRETTY_FUNCTION__)); | ||||||||
93 | assert(!LiveRegs.empty() && "Must enter basic block first.")((!LiveRegs.empty() && "Must enter basic block first." ) ? static_cast<void> (0) : __assert_fail ("!LiveRegs.empty() && \"Must enter basic block first.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 93, __PRETTY_FUNCTION__)); | ||||||||
94 | if (DomainValue *dv = LiveRegs[rx]) { | ||||||||
95 | if (dv->isCollapsed()) | ||||||||
96 | dv->addDomain(domain); | ||||||||
97 | else if (dv->hasDomain(domain)) | ||||||||
98 | collapse(dv, domain); | ||||||||
99 | else { | ||||||||
100 | // This is an incompatible open DomainValue. Collapse it to whatever and | ||||||||
101 | // force the new value into domain. This costs a domain crossing. | ||||||||
102 | collapse(dv, dv->getFirstDomain()); | ||||||||
103 | assert(LiveRegs[rx] && "Not live after collapse?")((LiveRegs[rx] && "Not live after collapse?") ? static_cast <void> (0) : __assert_fail ("LiveRegs[rx] && \"Not live after collapse?\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 103, __PRETTY_FUNCTION__)); | ||||||||
104 | LiveRegs[rx]->addDomain(domain); | ||||||||
105 | } | ||||||||
106 | } else { | ||||||||
107 | // Set up basic collapsed DomainValue. | ||||||||
108 | setLiveReg(rx, alloc(domain)); | ||||||||
109 | } | ||||||||
110 | } | ||||||||
111 | |||||||||
112 | void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) { | ||||||||
113 | assert(dv->hasDomain(domain) && "Cannot collapse")((dv->hasDomain(domain) && "Cannot collapse") ? static_cast <void> (0) : __assert_fail ("dv->hasDomain(domain) && \"Cannot collapse\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 113, __PRETTY_FUNCTION__)); | ||||||||
114 | |||||||||
115 | // Collapse all the instructions. | ||||||||
116 | while (!dv->Instrs.empty()) | ||||||||
117 | TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain); | ||||||||
118 | dv->setSingleDomain(domain); | ||||||||
119 | |||||||||
120 | // If there are multiple users, give them new, unique DomainValues. | ||||||||
121 | if (!LiveRegs.empty() && dv->Refs > 1) | ||||||||
122 | for (unsigned rx = 0; rx != NumRegs; ++rx) | ||||||||
123 | if (LiveRegs[rx] == dv) | ||||||||
124 | setLiveReg(rx, alloc(domain)); | ||||||||
125 | } | ||||||||
126 | |||||||||
127 | bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) { | ||||||||
128 | assert(!A->isCollapsed() && "Cannot merge into collapsed")((!A->isCollapsed() && "Cannot merge into collapsed" ) ? static_cast<void> (0) : __assert_fail ("!A->isCollapsed() && \"Cannot merge into collapsed\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 128, __PRETTY_FUNCTION__)); | ||||||||
129 | assert(!B->isCollapsed() && "Cannot merge from collapsed")((!B->isCollapsed() && "Cannot merge from collapsed" ) ? static_cast<void> (0) : __assert_fail ("!B->isCollapsed() && \"Cannot merge from collapsed\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 129, __PRETTY_FUNCTION__)); | ||||||||
130 | if (A == B) | ||||||||
131 | return true; | ||||||||
132 | // Restrict to the domains that A and B have in common. | ||||||||
133 | unsigned common = A->getCommonDomains(B->AvailableDomains); | ||||||||
134 | if (!common) | ||||||||
135 | return false; | ||||||||
136 | A->AvailableDomains = common; | ||||||||
137 | A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); | ||||||||
138 | |||||||||
139 | // Clear the old DomainValue so we won't try to swizzle instructions twice. | ||||||||
140 | B->clear(); | ||||||||
141 | // All uses of B are referred to A. | ||||||||
142 | B->Next = retain(A); | ||||||||
143 | |||||||||
144 | for (unsigned rx = 0; rx != NumRegs; ++rx) { | ||||||||
145 | assert(!LiveRegs.empty() && "no space allocated for live registers")((!LiveRegs.empty() && "no space allocated for live registers" ) ? static_cast<void> (0) : __assert_fail ("!LiveRegs.empty() && \"no space allocated for live registers\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 145, __PRETTY_FUNCTION__)); | ||||||||
146 | if (LiveRegs[rx] == B) | ||||||||
147 | setLiveReg(rx, A); | ||||||||
148 | } | ||||||||
149 | return true; | ||||||||
150 | } | ||||||||
151 | |||||||||
152 | void ExecutionDomainFix::enterBasicBlock( | ||||||||
153 | const LoopTraversal::TraversedMBBInfo &TraversedMBB) { | ||||||||
154 | |||||||||
155 | MachineBasicBlock *MBB = TraversedMBB.MBB; | ||||||||
156 | |||||||||
157 | // Set up LiveRegs to represent registers entering MBB. | ||||||||
158 | // Set default domain values to 'no domain' (nullptr) | ||||||||
159 | if (LiveRegs.empty()) | ||||||||
160 | LiveRegs.assign(NumRegs, nullptr); | ||||||||
161 | |||||||||
162 | // This is the entry block. | ||||||||
163 | if (MBB->pred_empty()) { | ||||||||
164 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("execution-deps-fix")) { dbgs() << printMBBReference(* MBB) << ": entry\n"; } } while (false); | ||||||||
165 | return; | ||||||||
166 | } | ||||||||
167 | |||||||||
168 | // Try to coalesce live-out registers from predecessors. | ||||||||
169 | for (MachineBasicBlock *pred : MBB->predecessors()) { | ||||||||
170 | assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&((unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && "Should have pre-allocated MBBInfos for all MBBs") ? static_cast <void> (0) : __assert_fail ("unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && \"Should have pre-allocated MBBInfos for all MBBs\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 171, __PRETTY_FUNCTION__)) | ||||||||
171 | "Should have pre-allocated MBBInfos for all MBBs")((unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && "Should have pre-allocated MBBInfos for all MBBs") ? static_cast <void> (0) : __assert_fail ("unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && \"Should have pre-allocated MBBInfos for all MBBs\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 171, __PRETTY_FUNCTION__)); | ||||||||
172 | LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; | ||||||||
173 | // Incoming is null if this is a backedge from a BB | ||||||||
174 | // we haven't processed yet | ||||||||
175 | if (Incoming.empty()) | ||||||||
176 | continue; | ||||||||
177 | |||||||||
178 | for (unsigned rx = 0; rx != NumRegs; ++rx) { | ||||||||
179 | DomainValue *pdv = resolve(Incoming[rx]); | ||||||||
180 | if (!pdv
| ||||||||
181 | continue; | ||||||||
182 | if (!LiveRegs[rx]) { | ||||||||
183 | setLiveReg(rx, pdv); | ||||||||
184 | continue; | ||||||||
185 | } | ||||||||
186 | |||||||||
187 | // We have a live DomainValue from more than one predecessor. | ||||||||
188 | if (LiveRegs[rx]->isCollapsed()) { | ||||||||
189 | // We are already collapsed, but predecessor is not. Force it. | ||||||||
190 | unsigned Domain = LiveRegs[rx]->getFirstDomain(); | ||||||||
191 | if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) | ||||||||
192 | collapse(pdv, Domain); | ||||||||
193 | continue; | ||||||||
194 | } | ||||||||
195 | |||||||||
196 | // Currently open, merge in predecessor. | ||||||||
197 | if (!pdv->isCollapsed()) | ||||||||
198 | merge(LiveRegs[rx], pdv); | ||||||||
199 | else | ||||||||
200 | force(rx, pdv->getFirstDomain()); | ||||||||
201 | } | ||||||||
202 | } | ||||||||
203 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("execution-deps-fix")) { dbgs() << printMBBReference(* MBB) << (!TraversedMBB.IsDone ? ": incomplete\n" : ": all preds known\n" ); } } while (false) | ||||||||
204 | << (!TraversedMBB.IsDone ? ": incomplete\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("execution-deps-fix")) { dbgs() << printMBBReference(* MBB) << (!TraversedMBB.IsDone ? ": incomplete\n" : ": all preds known\n" ); } } while (false) | ||||||||
205 | : ": all preds known\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("execution-deps-fix")) { dbgs() << printMBBReference(* MBB) << (!TraversedMBB.IsDone ? ": incomplete\n" : ": all preds known\n" ); } } while (false); | ||||||||
206 | } | ||||||||
207 | |||||||||
208 | void ExecutionDomainFix::leaveBasicBlock( | ||||||||
209 | const LoopTraversal::TraversedMBBInfo &TraversedMBB) { | ||||||||
210 | assert(!LiveRegs.empty() && "Must enter basic block first.")((!LiveRegs.empty() && "Must enter basic block first." ) ? static_cast<void> (0) : __assert_fail ("!LiveRegs.empty() && \"Must enter basic block first.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 210, __PRETTY_FUNCTION__)); | ||||||||
211 | unsigned MBBNumber = TraversedMBB.MBB->getNumber(); | ||||||||
212 | assert(MBBNumber < MBBOutRegsInfos.size() &&((MBBNumber < MBBOutRegsInfos.size() && "Unexpected basic block number." ) ? static_cast<void> (0) : __assert_fail ("MBBNumber < MBBOutRegsInfos.size() && \"Unexpected basic block number.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 213, __PRETTY_FUNCTION__)) | ||||||||
213 | "Unexpected basic block number.")((MBBNumber < MBBOutRegsInfos.size() && "Unexpected basic block number." ) ? static_cast<void> (0) : __assert_fail ("MBBNumber < MBBOutRegsInfos.size() && \"Unexpected basic block number.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 213, __PRETTY_FUNCTION__)); | ||||||||
214 | // Save register clearances at end of MBB - used by enterBasicBlock(). | ||||||||
215 | for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) { | ||||||||
216 | release(OldLiveReg); | ||||||||
217 | } | ||||||||
218 | MBBOutRegsInfos[MBBNumber] = LiveRegs; | ||||||||
219 | LiveRegs.clear(); | ||||||||
220 | } | ||||||||
221 | |||||||||
222 | bool ExecutionDomainFix::visitInstr(MachineInstr *MI) { | ||||||||
223 | // Update instructions with explicit execution domains. | ||||||||
224 | std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI); | ||||||||
225 | if (DomP.first) { | ||||||||
226 | if (DomP.second) | ||||||||
227 | visitSoftInstr(MI, DomP.second); | ||||||||
228 | else | ||||||||
229 | visitHardInstr(MI, DomP.first); | ||||||||
230 | } | ||||||||
231 | |||||||||
232 | return !DomP.first; | ||||||||
233 | } | ||||||||
234 | |||||||||
235 | void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) { | ||||||||
236 | assert(!MI->isDebugInstr() && "Won't process debug values")((!MI->isDebugInstr() && "Won't process debug values" ) ? static_cast<void> (0) : __assert_fail ("!MI->isDebugInstr() && \"Won't process debug values\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 236, __PRETTY_FUNCTION__)); | ||||||||
237 | const MCInstrDesc &MCID = MI->getDesc(); | ||||||||
238 | for (unsigned i = 0, | ||||||||
239 | e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); | ||||||||
240 | i != e; ++i) { | ||||||||
241 | MachineOperand &MO = MI->getOperand(i); | ||||||||
242 | if (!MO.isReg()) | ||||||||
243 | continue; | ||||||||
244 | if (MO.isUse()) | ||||||||
245 | continue; | ||||||||
246 | for (int rx : regIndices(MO.getReg())) { | ||||||||
247 | // This instruction explicitly defines rx. | ||||||||
248 | LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("execution-deps-fix")) { dbgs() << printReg(RC->getRegister (rx), TRI) << ":\t" << *MI; } } while (false); | ||||||||
249 | |||||||||
250 | // Kill off domains redefined by generic instructions. | ||||||||
251 | if (Kill) | ||||||||
252 | kill(rx); | ||||||||
253 | } | ||||||||
254 | } | ||||||||
255 | } | ||||||||
256 | |||||||||
257 | void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) { | ||||||||
258 | // Collapse all uses. | ||||||||
259 | for (unsigned i = mi->getDesc().getNumDefs(), | ||||||||
260 | e = mi->getDesc().getNumOperands(); | ||||||||
261 | i != e; ++i) { | ||||||||
262 | MachineOperand &mo = mi->getOperand(i); | ||||||||
263 | if (!mo.isReg()) | ||||||||
264 | continue; | ||||||||
265 | for (int rx : regIndices(mo.getReg())) { | ||||||||
266 | force(rx, domain); | ||||||||
267 | } | ||||||||
268 | } | ||||||||
269 | |||||||||
270 | // Kill all defs and force them. | ||||||||
271 | for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { | ||||||||
272 | MachineOperand &mo = mi->getOperand(i); | ||||||||
273 | if (!mo.isReg()) | ||||||||
274 | continue; | ||||||||
275 | for (int rx : regIndices(mo.getReg())) { | ||||||||
276 | kill(rx); | ||||||||
277 | force(rx, domain); | ||||||||
278 | } | ||||||||
279 | } | ||||||||
280 | } | ||||||||
281 | |||||||||
282 | void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { | ||||||||
283 | // Bitmask of available domains for this instruction after taking collapsed | ||||||||
284 | // operands into account. | ||||||||
285 | unsigned available = mask; | ||||||||
286 | |||||||||
287 | // Scan the explicit use operands for incoming domains. | ||||||||
288 | SmallVector<int, 4> used; | ||||||||
289 | if (!LiveRegs.empty()) | ||||||||
290 | for (unsigned i = mi->getDesc().getNumDefs(), | ||||||||
291 | e = mi->getDesc().getNumOperands(); | ||||||||
292 | i != e; ++i) { | ||||||||
293 | MachineOperand &mo = mi->getOperand(i); | ||||||||
294 | if (!mo.isReg()) | ||||||||
295 | continue; | ||||||||
296 | for (int rx : regIndices(mo.getReg())) { | ||||||||
297 | DomainValue *dv = LiveRegs[rx]; | ||||||||
298 | if (dv == nullptr) | ||||||||
299 | continue; | ||||||||
300 | // Bitmask of domains that dv and available have in common. | ||||||||
301 | unsigned common = dv->getCommonDomains(available); | ||||||||
302 | // Is it possible to use this collapsed register for free? | ||||||||
303 | if (dv->isCollapsed()) { | ||||||||
304 | // Restrict available domains to the ones in common with the operand. | ||||||||
305 | // If there are no common domains, we must pay the cross-domain | ||||||||
306 | // penalty for this operand. | ||||||||
307 | if (common) | ||||||||
308 | available = common; | ||||||||
309 | } else if (common) | ||||||||
310 | // Open DomainValue is compatible, save it for merging. | ||||||||
311 | used.push_back(rx); | ||||||||
312 | else | ||||||||
313 | // Open DomainValue is not compatible with instruction. It is useless | ||||||||
314 | // now. | ||||||||
315 | kill(rx); | ||||||||
316 | } | ||||||||
317 | } | ||||||||
318 | |||||||||
319 | // If the collapsed operands force a single domain, propagate the collapse. | ||||||||
320 | if (isPowerOf2_32(available)) { | ||||||||
321 | unsigned domain = countTrailingZeros(available); | ||||||||
322 | TII->setExecutionDomain(*mi, domain); | ||||||||
323 | visitHardInstr(mi, domain); | ||||||||
324 | return; | ||||||||
325 | } | ||||||||
326 | |||||||||
327 | // Kill off any remaining uses that don't match available, and build a list of | ||||||||
328 | // incoming DomainValues that we want to merge. | ||||||||
329 | SmallVector<int, 4> Regs; | ||||||||
330 | for (int rx : used) { | ||||||||
331 | assert(!LiveRegs.empty() && "no space allocated for live registers")((!LiveRegs.empty() && "no space allocated for live registers" ) ? static_cast<void> (0) : __assert_fail ("!LiveRegs.empty() && \"no space allocated for live registers\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 331, __PRETTY_FUNCTION__)); | ||||||||
332 | DomainValue *&LR = LiveRegs[rx]; | ||||||||
333 | // This useless DomainValue could have been missed above. | ||||||||
334 | if (!LR->getCommonDomains(available)) { | ||||||||
335 | kill(rx); | ||||||||
336 | continue; | ||||||||
337 | } | ||||||||
338 | // Sorted insertion. | ||||||||
339 | // Enables giving priority to the latest domains during merging. | ||||||||
340 | const int Def = RDA->getReachingDef(mi, RC->getRegister(rx)); | ||||||||
341 | auto I = partition_point(Regs, [&](int I) { | ||||||||
342 | return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def; | ||||||||
343 | }); | ||||||||
344 | Regs.insert(I, rx); | ||||||||
345 | } | ||||||||
346 | |||||||||
347 | // doms are now sorted in order of appearance. Try to merge them all, giving | ||||||||
348 | // priority to the latest ones. | ||||||||
349 | DomainValue *dv = nullptr; | ||||||||
350 | while (!Regs.empty()) { | ||||||||
351 | if (!dv) { | ||||||||
352 | dv = LiveRegs[Regs.pop_back_val()]; | ||||||||
353 | // Force the first dv to match the current instruction. | ||||||||
354 | dv->AvailableDomains = dv->getCommonDomains(available); | ||||||||
355 | assert(dv->AvailableDomains && "Domain should have been filtered")((dv->AvailableDomains && "Domain should have been filtered" ) ? static_cast<void> (0) : __assert_fail ("dv->AvailableDomains && \"Domain should have been filtered\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 355, __PRETTY_FUNCTION__)); | ||||||||
356 | continue; | ||||||||
357 | } | ||||||||
358 | |||||||||
359 | DomainValue *Latest = LiveRegs[Regs.pop_back_val()]; | ||||||||
360 | // Skip already merged values. | ||||||||
361 | if (Latest == dv || Latest->Next) | ||||||||
362 | continue; | ||||||||
363 | if (merge(dv, Latest)) | ||||||||
364 | continue; | ||||||||
365 | |||||||||
366 | // If latest didn't merge, it is useless now. Kill all registers using it. | ||||||||
367 | for (int i : used) { | ||||||||
368 | assert(!LiveRegs.empty() && "no space allocated for live registers")((!LiveRegs.empty() && "no space allocated for live registers" ) ? static_cast<void> (0) : __assert_fail ("!LiveRegs.empty() && \"no space allocated for live registers\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 368, __PRETTY_FUNCTION__)); | ||||||||
369 | if (LiveRegs[i] == Latest) | ||||||||
370 | kill(i); | ||||||||
371 | } | ||||||||
372 | } | ||||||||
373 | |||||||||
374 | // dv is the DomainValue we are going to use for this instruction. | ||||||||
375 | if (!dv) { | ||||||||
376 | dv = alloc(); | ||||||||
377 | dv->AvailableDomains = available; | ||||||||
378 | } | ||||||||
379 | dv->Instrs.push_back(mi); | ||||||||
380 | |||||||||
381 | // Finally set all defs and non-collapsed uses to dv. We must iterate through | ||||||||
382 | // all the operators, including imp-def ones. | ||||||||
383 | for (MachineOperand &mo : mi->operands()) { | ||||||||
384 | if (!mo.isReg()) | ||||||||
385 | continue; | ||||||||
386 | for (int rx : regIndices(mo.getReg())) { | ||||||||
387 | if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) { | ||||||||
388 | kill(rx); | ||||||||
389 | setLiveReg(rx, dv); | ||||||||
390 | } | ||||||||
391 | } | ||||||||
392 | } | ||||||||
393 | } | ||||||||
394 | |||||||||
395 | void ExecutionDomainFix::processBasicBlock( | ||||||||
396 | const LoopTraversal::TraversedMBBInfo &TraversedMBB) { | ||||||||
397 | enterBasicBlock(TraversedMBB); | ||||||||
398 | // If this block is not done, it makes little sense to make any decisions | ||||||||
399 | // based on clearance information. We need to make a second pass anyway, | ||||||||
400 | // and by then we'll have better information, so we can avoid doing the work | ||||||||
401 | // to try and break dependencies now. | ||||||||
402 | for (MachineInstr &MI : *TraversedMBB.MBB) { | ||||||||
403 | if (!MI.isDebugInstr()) { | ||||||||
404 | bool Kill = false; | ||||||||
405 | if (TraversedMBB.PrimaryPass) | ||||||||
406 | Kill = visitInstr(&MI); | ||||||||
407 | processDefs(&MI, Kill); | ||||||||
408 | } | ||||||||
409 | } | ||||||||
410 | leaveBasicBlock(TraversedMBB); | ||||||||
411 | } | ||||||||
412 | |||||||||
413 | bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) { | ||||||||
414 | if (skipFunction(mf.getFunction())) | ||||||||
| |||||||||
415 | return false; | ||||||||
416 | MF = &mf; | ||||||||
417 | TII = MF->getSubtarget().getInstrInfo(); | ||||||||
418 | TRI = MF->getSubtarget().getRegisterInfo(); | ||||||||
419 | LiveRegs.clear(); | ||||||||
420 | assert(NumRegs == RC->getNumRegs() && "Bad regclass")((NumRegs == RC->getNumRegs() && "Bad regclass") ? static_cast<void> (0) : __assert_fail ("NumRegs == RC->getNumRegs() && \"Bad regclass\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/CodeGen/ExecutionDomainFix.cpp" , 420, __PRETTY_FUNCTION__)); | ||||||||
421 | |||||||||
422 | LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("execution-deps-fix")) { dbgs() << "********** FIX EXECUTION DOMAIN: " << TRI->getRegClassName(RC) << " **********\n" ; } } while (false) | ||||||||
423 | << TRI->getRegClassName(RC) << " **********\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("execution-deps-fix")) { dbgs() << "********** FIX EXECUTION DOMAIN: " << TRI->getRegClassName(RC) << " **********\n" ; } } while (false); | ||||||||
424 | |||||||||
425 | // If no relevant registers are used in the function, we can skip it | ||||||||
426 | // completely. | ||||||||
427 | bool anyregs = false; | ||||||||
428 | const MachineRegisterInfo &MRI = mf.getRegInfo(); | ||||||||
429 | for (unsigned Reg : *RC) { | ||||||||
430 | if (MRI.isPhysRegUsed(Reg)) { | ||||||||
431 | anyregs = true; | ||||||||
432 | break; | ||||||||
433 | } | ||||||||
434 | } | ||||||||
435 | if (!anyregs
| ||||||||
436 | return false; | ||||||||
437 | |||||||||
438 | RDA = &getAnalysis<ReachingDefAnalysis>(); | ||||||||
439 | |||||||||
440 | // Initialize the AliasMap on the first use. | ||||||||
441 | if (AliasMap.empty()) { | ||||||||
442 | // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and | ||||||||
443 | // therefore the LiveRegs array. | ||||||||
444 | AliasMap.resize(TRI->getNumRegs()); | ||||||||
445 | for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) | ||||||||
446 | for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); | ||||||||
447 | ++AI) | ||||||||
448 | AliasMap[*AI].push_back(i); | ||||||||
449 | } | ||||||||
450 | |||||||||
451 | // Initialize the MBBOutRegsInfos | ||||||||
452 | MBBOutRegsInfos.resize(mf.getNumBlockIDs()); | ||||||||
453 | |||||||||
454 | // Traverse the basic blocks. | ||||||||
455 | LoopTraversal Traversal; | ||||||||
456 | LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf); | ||||||||
457 | for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) { | ||||||||
458 | processBasicBlock(TraversedMBB); | ||||||||
459 | } | ||||||||
460 | |||||||||
461 | for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) { | ||||||||
462 | for (DomainValue *OutLiveReg : OutLiveRegs) { | ||||||||
463 | if (OutLiveReg) | ||||||||
464 | release(OutLiveReg); | ||||||||
465 | } | ||||||||
466 | } | ||||||||
467 | MBBOutRegsInfos.clear(); | ||||||||
468 | Avail.clear(); | ||||||||
469 | Allocator.DestroyAll(); | ||||||||
470 | |||||||||
471 | return false; | ||||||||
472 | } |
1 | //==-- llvm/CodeGen/ExecutionDomainFix.h - Execution Domain Fix -*- C++ -*--==// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | /// \file Execution Domain Fix pass. | |||
10 | /// | |||
11 | /// Some X86 SSE instructions like mov, and, or, xor are available in different | |||
12 | /// variants for different operand types. These variant instructions are | |||
13 | /// equivalent, but on Nehalem and newer cpus there is extra latency | |||
14 | /// transferring data between integer and floating point domains. ARM cores | |||
15 | /// have similar issues when they are configured with both VFP and NEON | |||
16 | /// pipelines. | |||
17 | /// | |||
18 | /// This pass changes the variant instructions to minimize domain crossings. | |||
19 | // | |||
20 | //===----------------------------------------------------------------------===// | |||
21 | ||||
22 | #ifndef LLVM_CODEGEN_EXECUTIONDOMAINFIX_H | |||
23 | #define LLVM_CODEGEN_EXECUTIONDOMAINFIX_H | |||
24 | ||||
25 | #include "llvm/ADT/SmallVector.h" | |||
26 | #include "llvm/CodeGen/LoopTraversal.h" | |||
27 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
28 | #include "llvm/CodeGen/ReachingDefAnalysis.h" | |||
29 | #include "llvm/CodeGen/TargetRegisterInfo.h" | |||
30 | ||||
31 | namespace llvm { | |||
32 | ||||
33 | class MachineBasicBlock; | |||
34 | class MachineInstr; | |||
35 | class TargetInstrInfo; | |||
36 | ||||
37 | /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track | |||
38 | /// of execution domains. | |||
39 | /// | |||
40 | /// An open DomainValue represents a set of instructions that can still switch | |||
41 | /// execution domain. Multiple registers may refer to the same open | |||
42 | /// DomainValue - they will eventually be collapsed to the same execution | |||
43 | /// domain. | |||
44 | /// | |||
45 | /// A collapsed DomainValue represents a single register that has been forced | |||
46 | /// into one of more execution domains. There is a separate collapsed | |||
47 | /// DomainValue for each register, but it may contain multiple execution | |||
48 | /// domains. A register value is initially created in a single execution | |||
49 | /// domain, but if we were forced to pay the penalty of a domain crossing, we | |||
50 | /// keep track of the fact that the register is now available in multiple | |||
51 | /// domains. | |||
52 | struct DomainValue { | |||
53 | /// Basic reference counting. | |||
54 | unsigned Refs = 0; | |||
55 | ||||
56 | /// Bitmask of available domains. For an open DomainValue, it is the still | |||
57 | /// possible domains for collapsing. For a collapsed DomainValue it is the | |||
58 | /// domains where the register is available for free. | |||
59 | unsigned AvailableDomains; | |||
60 | ||||
61 | /// Pointer to the next DomainValue in a chain. When two DomainValues are | |||
62 | /// merged, Victim.Next is set to point to Victor, so old DomainValue | |||
63 | /// references can be updated by following the chain. | |||
64 | DomainValue *Next; | |||
65 | ||||
66 | /// Twiddleable instructions using or defining these registers. | |||
67 | SmallVector<MachineInstr *, 8> Instrs; | |||
68 | ||||
69 | DomainValue() { clear(); } | |||
70 | ||||
71 | /// A collapsed DomainValue has no instructions to twiddle - it simply keeps | |||
72 | /// track of the domains where the registers are already available. | |||
73 | bool isCollapsed() const { return Instrs.empty(); } | |||
74 | ||||
75 | /// Is domain available? | |||
76 | bool hasDomain(unsigned domain) const { | |||
77 | assert(domain <((domain < static_cast<unsigned>(std::numeric_limits <unsigned>::digits) && "undefined behavior") ? static_cast <void> (0) : __assert_fail ("domain < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ExecutionDomainFix.h" , 79, __PRETTY_FUNCTION__)) | |||
78 | static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&((domain < static_cast<unsigned>(std::numeric_limits <unsigned>::digits) && "undefined behavior") ? static_cast <void> (0) : __assert_fail ("domain < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ExecutionDomainFix.h" , 79, __PRETTY_FUNCTION__)) | |||
79 | "undefined behavior")((domain < static_cast<unsigned>(std::numeric_limits <unsigned>::digits) && "undefined behavior") ? static_cast <void> (0) : __assert_fail ("domain < static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && \"undefined behavior\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/ExecutionDomainFix.h" , 79, __PRETTY_FUNCTION__)); | |||
80 | return AvailableDomains & (1u << domain); | |||
81 | } | |||
82 | ||||
83 | /// Mark domain as available. | |||
84 | void addDomain(unsigned domain) { AvailableDomains |= 1u << domain; } | |||
| ||||
85 | ||||
86 | // Restrict to a single domain available. | |||
87 | void setSingleDomain(unsigned domain) { AvailableDomains = 1u << domain; } | |||
88 | ||||
89 | /// Return bitmask of domains that are available and in mask. | |||
90 | unsigned getCommonDomains(unsigned mask) const { | |||
91 | return AvailableDomains & mask; | |||
92 | } | |||
93 | ||||
94 | /// First domain available. | |||
95 | unsigned getFirstDomain() const { | |||
96 | return countTrailingZeros(AvailableDomains); | |||
97 | } | |||
98 | ||||
99 | /// Clear this DomainValue and point to next which has all its data. | |||
100 | void clear() { | |||
101 | AvailableDomains = 0; | |||
102 | Next = nullptr; | |||
103 | Instrs.clear(); | |||
104 | } | |||
105 | }; | |||
106 | ||||
107 | class ExecutionDomainFix : public MachineFunctionPass { | |||
108 | SpecificBumpPtrAllocator<DomainValue> Allocator; | |||
109 | SmallVector<DomainValue *, 16> Avail; | |||
110 | ||||
111 | const TargetRegisterClass *const RC; | |||
112 | MachineFunction *MF; | |||
113 | const TargetInstrInfo *TII; | |||
114 | const TargetRegisterInfo *TRI; | |||
115 | std::vector<SmallVector<int, 1>> AliasMap; | |||
116 | const unsigned NumRegs; | |||
117 | /// Value currently in each register, or NULL when no value is being tracked. | |||
118 | /// This counts as a DomainValue reference. | |||
119 | using LiveRegsDVInfo = std::vector<DomainValue *>; | |||
120 | LiveRegsDVInfo LiveRegs; | |||
121 | /// Keeps domain information for all registers. Note that this | |||
122 | /// is different from the usual definition notion of liveness. The CPU | |||
123 | /// doesn't care whether or not we consider a register killed. | |||
124 | using OutRegsInfoMap = SmallVector<LiveRegsDVInfo, 4>; | |||
125 | OutRegsInfoMap MBBOutRegsInfos; | |||
126 | ||||
127 | ReachingDefAnalysis *RDA; | |||
128 | ||||
129 | public: | |||
130 | ExecutionDomainFix(char &PassID, const TargetRegisterClass &RC) | |||
131 | : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} | |||
132 | ||||
133 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
134 | AU.setPreservesAll(); | |||
135 | AU.addRequired<ReachingDefAnalysis>(); | |||
136 | MachineFunctionPass::getAnalysisUsage(AU); | |||
137 | } | |||
138 | ||||
139 | bool runOnMachineFunction(MachineFunction &MF) override; | |||
140 | ||||
141 | MachineFunctionProperties getRequiredProperties() const override { | |||
142 | return MachineFunctionProperties().set( | |||
143 | MachineFunctionProperties::Property::NoVRegs); | |||
144 | } | |||
145 | ||||
146 | private: | |||
147 | /// Translate TRI register number to a list of indices into our smaller tables | |||
148 | /// of interesting registers. | |||
149 | iterator_range<SmallVectorImpl<int>::const_iterator> | |||
150 | regIndices(unsigned Reg) const; | |||
151 | ||||
152 | /// DomainValue allocation. | |||
153 | DomainValue *alloc(int domain = -1); | |||
154 | ||||
155 | /// Add reference to DV. | |||
156 | DomainValue *retain(DomainValue *DV) { | |||
157 | if (DV) | |||
158 | ++DV->Refs; | |||
159 | return DV; | |||
160 | } | |||
161 | ||||
162 | /// Release a reference to DV. When the last reference is released, | |||
163 | /// collapse if needed. | |||
164 | void release(DomainValue *); | |||
165 | ||||
166 | /// Follow the chain of dead DomainValues until a live DomainValue is reached. | |||
167 | /// Update the referenced pointer when necessary. | |||
168 | DomainValue *resolve(DomainValue *&); | |||
169 | ||||
170 | /// Set LiveRegs[rx] = dv, updating reference counts. | |||
171 | void setLiveReg(int rx, DomainValue *DV); | |||
172 | ||||
173 | /// Kill register rx, recycle or collapse any DomainValue. | |||
174 | void kill(int rx); | |||
175 | ||||
176 | /// Force register rx into domain. | |||
177 | void force(int rx, unsigned domain); | |||
178 | ||||
179 | /// Collapse open DomainValue into given domain. If there are multiple | |||
180 | /// registers using dv, they each get a unique collapsed DomainValue. | |||
181 | void collapse(DomainValue *dv, unsigned domain); | |||
182 | ||||
183 | /// All instructions and registers in B are moved to A, and B is released. | |||
184 | bool merge(DomainValue *A, DomainValue *B); | |||
185 | ||||
186 | /// Set up LiveRegs by merging predecessor live-out values. | |||
187 | void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); | |||
188 | ||||
189 | /// Update live-out values. | |||
190 | void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); | |||
191 | ||||
192 | /// Process he given basic block. | |||
193 | void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); | |||
194 | ||||
195 | /// Visit given insturcion. | |||
196 | bool visitInstr(MachineInstr *); | |||
197 | ||||
198 | /// Update def-ages for registers defined by MI. | |||
199 | /// If Kill is set, also kill off DomainValues clobbered by the defs. | |||
200 | void processDefs(MachineInstr *, bool Kill); | |||
201 | ||||
202 | /// A soft instruction can be changed to work in other domains given by mask. | |||
203 | void visitSoftInstr(MachineInstr *, unsigned mask); | |||
204 | ||||
205 | /// A hard instruction only works in one domain. All input registers will be | |||
206 | /// forced into that domain. | |||
207 | void visitHardInstr(MachineInstr *, unsigned domain); | |||
208 | }; | |||
209 | ||||
210 | } // namespace llvm | |||
211 | ||||
212 | #endif // LLVM_CODEGEN_EXECUTIONDOMAINFIX_H |
1 | //===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the SmallVector class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_ADT_SMALLVECTOR_H |
14 | #define LLVM_ADT_SMALLVECTOR_H |
15 | |
16 | #include "llvm/ADT/iterator_range.h" |
17 | #include "llvm/Support/AlignOf.h" |
18 | #include "llvm/Support/Compiler.h" |
19 | #include "llvm/Support/MathExtras.h" |
20 | #include "llvm/Support/MemAlloc.h" |
21 | #include "llvm/Support/type_traits.h" |
22 | #include "llvm/Support/ErrorHandling.h" |
23 | #include <algorithm> |
24 | #include <cassert> |
25 | #include <cstddef> |
26 | #include <cstdlib> |
27 | #include <cstring> |
28 | #include <initializer_list> |
29 | #include <iterator> |
30 | #include <memory> |
31 | #include <new> |
32 | #include <type_traits> |
33 | #include <utility> |
34 | |
35 | namespace llvm { |
36 | |
37 | /// This is all the non-templated stuff common to all SmallVectors. |
38 | class SmallVectorBase { |
39 | protected: |
40 | void *BeginX; |
41 | unsigned Size = 0, Capacity; |
42 | |
43 | SmallVectorBase() = delete; |
44 | SmallVectorBase(void *FirstEl, size_t TotalCapacity) |
45 | : BeginX(FirstEl), Capacity(TotalCapacity) {} |
46 | |
47 | /// This is an implementation of the grow() method which only works |
48 | /// on POD-like data types and is out of line to reduce code duplication. |
49 | void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize); |
50 | |
51 | public: |
52 | size_t size() const { return Size; } |
53 | size_t capacity() const { return Capacity; } |
54 | |
55 | LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; } |
56 | |
57 | /// Set the array size to \p N, which the current array must have enough |
58 | /// capacity for. |
59 | /// |
60 | /// This does not construct or destroy any elements in the vector. |
61 | /// |
62 | /// Clients can use this in conjunction with capacity() to write past the end |
63 | /// of the buffer when they know that more elements are available, and only |
64 | /// update the size later. This avoids the cost of value initializing elements |
65 | /// which will only be overwritten. |
66 | void set_size(size_t N) { |
67 | assert(N <= capacity())((N <= capacity()) ? static_cast<void> (0) : __assert_fail ("N <= capacity()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 67, __PRETTY_FUNCTION__)); |
68 | Size = N; |
69 | } |
70 | }; |
71 | |
72 | /// Figure out the offset of the first element. |
73 | template <class T, typename = void> struct SmallVectorAlignmentAndSize { |
74 | AlignedCharArrayUnion<SmallVectorBase> Base; |
75 | AlignedCharArrayUnion<T> FirstEl; |
76 | }; |
77 | |
78 | /// This is the part of SmallVectorTemplateBase which does not depend on whether |
79 | /// the type T is a POD. The extra dummy template argument is used by ArrayRef |
80 | /// to avoid unnecessarily requiring T to be complete. |
81 | template <typename T, typename = void> |
82 | class SmallVectorTemplateCommon : public SmallVectorBase { |
83 | /// Find the address of the first element. For this pointer math to be valid |
84 | /// with small-size of 0 for T with lots of alignment, it's important that |
85 | /// SmallVectorStorage is properly-aligned even for small-size of 0. |
86 | void *getFirstEl() const { |
87 | return const_cast<void *>(reinterpret_cast<const void *>( |
88 | reinterpret_cast<const char *>(this) + |
89 | offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl ))); |
90 | } |
91 | // Space after 'FirstEl' is clobbered, do not add any instance vars after it. |
92 | |
93 | protected: |
94 | SmallVectorTemplateCommon(size_t Size) |
95 | : SmallVectorBase(getFirstEl(), Size) {} |
96 | |
97 | void grow_pod(size_t MinCapacity, size_t TSize) { |
98 | SmallVectorBase::grow_pod(getFirstEl(), MinCapacity, TSize); |
99 | } |
100 | |
101 | /// Return true if this is a smallvector which has not had dynamic |
102 | /// memory allocated for it. |
103 | bool isSmall() const { return BeginX == getFirstEl(); } |
104 | |
105 | /// Put this vector in a state of being small. |
106 | void resetToSmall() { |
107 | BeginX = getFirstEl(); |
108 | Size = Capacity = 0; // FIXME: Setting Capacity to 0 is suspect. |
109 | } |
110 | |
111 | public: |
112 | using size_type = size_t; |
113 | using difference_type = ptrdiff_t; |
114 | using value_type = T; |
115 | using iterator = T *; |
116 | using const_iterator = const T *; |
117 | |
118 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; |
119 | using reverse_iterator = std::reverse_iterator<iterator>; |
120 | |
121 | using reference = T &; |
122 | using const_reference = const T &; |
123 | using pointer = T *; |
124 | using const_pointer = const T *; |
125 | |
126 | // forward iterator creation methods. |
127 | iterator begin() { return (iterator)this->BeginX; } |
128 | const_iterator begin() const { return (const_iterator)this->BeginX; } |
129 | iterator end() { return begin() + size(); } |
130 | const_iterator end() const { return begin() + size(); } |
131 | |
132 | // reverse iterator creation methods. |
133 | reverse_iterator rbegin() { return reverse_iterator(end()); } |
134 | const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } |
135 | reverse_iterator rend() { return reverse_iterator(begin()); } |
136 | const_reverse_iterator rend() const { return const_reverse_iterator(begin());} |
137 | |
138 | size_type size_in_bytes() const { return size() * sizeof(T); } |
139 | size_type max_size() const { return size_type(-1) / sizeof(T); } |
140 | |
141 | size_t capacity_in_bytes() const { return capacity() * sizeof(T); } |
142 | |
143 | /// Return a pointer to the vector's buffer, even if empty(). |
144 | pointer data() { return pointer(begin()); } |
145 | /// Return a pointer to the vector's buffer, even if empty(). |
146 | const_pointer data() const { return const_pointer(begin()); } |
147 | |
148 | reference operator[](size_type idx) { |
149 | assert(idx < size())((idx < size()) ? static_cast<void> (0) : __assert_fail ("idx < size()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 149, __PRETTY_FUNCTION__)); |
150 | return begin()[idx]; |
151 | } |
152 | const_reference operator[](size_type idx) const { |
153 | assert(idx < size())((idx < size()) ? static_cast<void> (0) : __assert_fail ("idx < size()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 153, __PRETTY_FUNCTION__)); |
154 | return begin()[idx]; |
155 | } |
156 | |
157 | reference front() { |
158 | assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 158, __PRETTY_FUNCTION__)); |
159 | return begin()[0]; |
160 | } |
161 | const_reference front() const { |
162 | assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 162, __PRETTY_FUNCTION__)); |
163 | return begin()[0]; |
164 | } |
165 | |
166 | reference back() { |
167 | assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 167, __PRETTY_FUNCTION__)); |
168 | return end()[-1]; |
169 | } |
170 | const_reference back() const { |
171 | assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 171, __PRETTY_FUNCTION__)); |
172 | return end()[-1]; |
173 | } |
174 | }; |
175 | |
176 | /// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put method |
177 | /// implementations that are designed to work with non-POD-like T's. |
178 | template <typename T, bool = is_trivially_copyable<T>::value> |
179 | class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { |
180 | protected: |
181 | SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} |
182 | |
183 | static void destroy_range(T *S, T *E) { |
184 | while (S != E) { |
185 | --E; |
186 | E->~T(); |
187 | } |
188 | } |
189 | |
190 | /// Move the range [I, E) into the uninitialized memory starting with "Dest", |
191 | /// constructing elements as needed. |
192 | template<typename It1, typename It2> |
193 | static void uninitialized_move(It1 I, It1 E, It2 Dest) { |
194 | std::uninitialized_copy(std::make_move_iterator(I), |
195 | std::make_move_iterator(E), Dest); |
196 | } |
197 | |
198 | /// Copy the range [I, E) onto the uninitialized memory starting with "Dest", |
199 | /// constructing elements as needed. |
200 | template<typename It1, typename It2> |
201 | static void uninitialized_copy(It1 I, It1 E, It2 Dest) { |
202 | std::uninitialized_copy(I, E, Dest); |
203 | } |
204 | |
205 | /// Grow the allocated memory (without initializing new elements), doubling |
206 | /// the size of the allocated memory. Guarantees space for at least one more |
207 | /// element, or MinSize more elements if specified. |
208 | void grow(size_t MinSize = 0); |
209 | |
210 | public: |
211 | void push_back(const T &Elt) { |
212 | if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity ()), false)) |
213 | this->grow(); |
214 | ::new ((void*) this->end()) T(Elt); |
215 | this->set_size(this->size() + 1); |
216 | } |
217 | |
218 | void push_back(T &&Elt) { |
219 | if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity ()), false)) |
220 | this->grow(); |
221 | ::new ((void*) this->end()) T(::std::move(Elt)); |
222 | this->set_size(this->size() + 1); |
223 | } |
224 | |
225 | void pop_back() { |
226 | this->set_size(this->size() - 1); |
227 | this->end()->~T(); |
228 | } |
229 | }; |
230 | |
231 | // Define this out-of-line to dissuade the C++ compiler from inlining it. |
232 | template <typename T, bool TriviallyCopyable> |
233 | void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) { |
234 | if (MinSize > UINT32_MAX(4294967295U)) |
235 | report_bad_alloc_error("SmallVector capacity overflow during allocation"); |
236 | |
237 | // Always grow, even from zero. |
238 | size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2)); |
239 | NewCapacity = std::min(std::max(NewCapacity, MinSize), size_t(UINT32_MAX(4294967295U))); |
240 | T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T))); |
241 | |
242 | // Move the elements over. |
243 | this->uninitialized_move(this->begin(), this->end(), NewElts); |
244 | |
245 | // Destroy the original elements. |
246 | destroy_range(this->begin(), this->end()); |
247 | |
248 | // If this wasn't grown from the inline copy, deallocate the old space. |
249 | if (!this->isSmall()) |
250 | free(this->begin()); |
251 | |
252 | this->BeginX = NewElts; |
253 | this->Capacity = NewCapacity; |
254 | } |
255 | |
256 | /// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put |
257 | /// method implementations that are designed to work with POD-like T's. |
258 | template <typename T> |
259 | class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> { |
260 | protected: |
261 | SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} |
262 | |
263 | // No need to do a destroy loop for POD's. |
264 | static void destroy_range(T *, T *) {} |
265 | |
266 | /// Move the range [I, E) onto the uninitialized memory |
267 | /// starting with "Dest", constructing elements into it as needed. |
268 | template<typename It1, typename It2> |
269 | static void uninitialized_move(It1 I, It1 E, It2 Dest) { |
270 | // Just do a copy. |
271 | uninitialized_copy(I, E, Dest); |
272 | } |
273 | |
274 | /// Copy the range [I, E) onto the uninitialized memory |
275 | /// starting with "Dest", constructing elements into it as needed. |
276 | template<typename It1, typename It2> |
277 | static void uninitialized_copy(It1 I, It1 E, It2 Dest) { |
278 | // Arbitrary iterator types; just use the basic implementation. |
279 | std::uninitialized_copy(I, E, Dest); |
280 | } |
281 | |
282 | /// Copy the range [I, E) onto the uninitialized memory |
283 | /// starting with "Dest", constructing elements into it as needed. |
284 | template <typename T1, typename T2> |
285 | static void uninitialized_copy( |
286 | T1 *I, T1 *E, T2 *Dest, |
287 | std::enable_if_t<std::is_same<typename std::remove_const<T1>::type, |
288 | T2>::value> * = nullptr) { |
289 | // Use memcpy for PODs iterated by pointers (which includes SmallVector |
290 | // iterators): std::uninitialized_copy optimizes to memmove, but we can |
291 | // use memcpy here. Note that I and E are iterators and thus might be |
292 | // invalid for memcpy if they are equal. |
293 | if (I != E) |
294 | memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T)); |
295 | } |
296 | |
297 | /// Double the size of the allocated memory, guaranteeing space for at |
298 | /// least one more element or MinSize if specified. |
299 | void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); } |
300 | |
301 | public: |
302 | void push_back(const T &Elt) { |
303 | if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity ()), false)) |
304 | this->grow(); |
305 | memcpy(reinterpret_cast<void *>(this->end()), &Elt, sizeof(T)); |
306 | this->set_size(this->size() + 1); |
307 | } |
308 | |
309 | void pop_back() { this->set_size(this->size() - 1); } |
310 | }; |
311 | |
312 | /// This class consists of common code factored out of the SmallVector class to |
313 | /// reduce code duplication based on the SmallVector 'N' template parameter. |
314 | template <typename T> |
315 | class SmallVectorImpl : public SmallVectorTemplateBase<T> { |
316 | using SuperClass = SmallVectorTemplateBase<T>; |
317 | |
318 | public: |
319 | using iterator = typename SuperClass::iterator; |
320 | using const_iterator = typename SuperClass::const_iterator; |
321 | using reference = typename SuperClass::reference; |
322 | using size_type = typename SuperClass::size_type; |
323 | |
324 | protected: |
325 | // Default ctor - Initialize to empty. |
326 | explicit SmallVectorImpl(unsigned N) |
327 | : SmallVectorTemplateBase<T>(N) {} |
328 | |
329 | public: |
330 | SmallVectorImpl(const SmallVectorImpl &) = delete; |
331 | |
332 | ~SmallVectorImpl() { |
333 | // Subclass has already destructed this vector's elements. |
334 | // If this wasn't grown from the inline copy, deallocate the old space. |
335 | if (!this->isSmall()) |
336 | free(this->begin()); |
337 | } |
338 | |
339 | void clear() { |
340 | this->destroy_range(this->begin(), this->end()); |
341 | this->Size = 0; |
342 | } |
343 | |
344 | void resize(size_type N) { |
345 | if (N < this->size()) { |
346 | this->destroy_range(this->begin()+N, this->end()); |
347 | this->set_size(N); |
348 | } else if (N > this->size()) { |
349 | if (this->capacity() < N) |
350 | this->grow(N); |
351 | for (auto I = this->end(), E = this->begin() + N; I != E; ++I) |
352 | new (&*I) T(); |
353 | this->set_size(N); |
354 | } |
355 | } |
356 | |
357 | void resize(size_type N, const T &NV) { |
358 | if (N < this->size()) { |
359 | this->destroy_range(this->begin()+N, this->end()); |
360 | this->set_size(N); |
361 | } else if (N > this->size()) { |
362 | if (this->capacity() < N) |
363 | this->grow(N); |
364 | std::uninitialized_fill(this->end(), this->begin()+N, NV); |
365 | this->set_size(N); |
366 | } |
367 | } |
368 | |
369 | void reserve(size_type N) { |
370 | if (this->capacity() < N) |
371 | this->grow(N); |
372 | } |
373 | |
374 | LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() { |
375 | T Result = ::std::move(this->back()); |
376 | this->pop_back(); |
377 | return Result; |
378 | } |
379 | |
380 | void swap(SmallVectorImpl &RHS); |
381 | |
382 | /// Add the specified range to the end of the SmallVector. |
383 | template <typename in_iter, |
384 | typename = std::enable_if_t<std::is_convertible< |
385 | typename std::iterator_traits<in_iter>::iterator_category, |
386 | std::input_iterator_tag>::value>> |
387 | void append(in_iter in_start, in_iter in_end) { |
388 | size_type NumInputs = std::distance(in_start, in_end); |
389 | if (NumInputs > this->capacity() - this->size()) |
390 | this->grow(this->size()+NumInputs); |
391 | |
392 | this->uninitialized_copy(in_start, in_end, this->end()); |
393 | this->set_size(this->size() + NumInputs); |
394 | } |
395 | |
396 | /// Append \p NumInputs copies of \p Elt to the end. |
397 | void append(size_type NumInputs, const T &Elt) { |
398 | if (NumInputs > this->capacity() - this->size()) |
399 | this->grow(this->size()+NumInputs); |
400 | |
401 | std::uninitialized_fill_n(this->end(), NumInputs, Elt); |
402 | this->set_size(this->size() + NumInputs); |
403 | } |
404 | |
405 | void append(std::initializer_list<T> IL) { |
406 | append(IL.begin(), IL.end()); |
407 | } |
408 | |
409 | // FIXME: Consider assigning over existing elements, rather than clearing & |
410 | // re-initializing them - for all assign(...) variants. |
411 | |
412 | void assign(size_type NumElts, const T &Elt) { |
413 | clear(); |
414 | if (this->capacity() < NumElts) |
415 | this->grow(NumElts); |
416 | this->set_size(NumElts); |
417 | std::uninitialized_fill(this->begin(), this->end(), Elt); |
418 | } |
419 | |
420 | template <typename in_iter, |
421 | typename = std::enable_if_t<std::is_convertible< |
422 | typename std::iterator_traits<in_iter>::iterator_category, |
423 | std::input_iterator_tag>::value>> |
424 | void assign(in_iter in_start, in_iter in_end) { |
425 | clear(); |
426 | append(in_start, in_end); |
427 | } |
428 | |
429 | void assign(std::initializer_list<T> IL) { |
430 | clear(); |
431 | append(IL); |
432 | } |
433 | |
434 | iterator erase(const_iterator CI) { |
435 | // Just cast away constness because this is a non-const member function. |
436 | iterator I = const_cast<iterator>(CI); |
437 | |
438 | assert(I >= this->begin() && "Iterator to erase is out of bounds.")((I >= this->begin() && "Iterator to erase is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Iterator to erase is out of bounds.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 438, __PRETTY_FUNCTION__)); |
439 | assert(I < this->end() && "Erasing at past-the-end iterator.")((I < this->end() && "Erasing at past-the-end iterator." ) ? static_cast<void> (0) : __assert_fail ("I < this->end() && \"Erasing at past-the-end iterator.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 439, __PRETTY_FUNCTION__)); |
440 | |
441 | iterator N = I; |
442 | // Shift all elts down one. |
443 | std::move(I+1, this->end(), I); |
444 | // Drop the last elt. |
445 | this->pop_back(); |
446 | return(N); |
447 | } |
448 | |
449 | iterator erase(const_iterator CS, const_iterator CE) { |
450 | // Just cast away constness because this is a non-const member function. |
451 | iterator S = const_cast<iterator>(CS); |
452 | iterator E = const_cast<iterator>(CE); |
453 | |
454 | assert(S >= this->begin() && "Range to erase is out of bounds.")((S >= this->begin() && "Range to erase is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("S >= this->begin() && \"Range to erase is out of bounds.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 454, __PRETTY_FUNCTION__)); |
455 | assert(S <= E && "Trying to erase invalid range.")((S <= E && "Trying to erase invalid range.") ? static_cast <void> (0) : __assert_fail ("S <= E && \"Trying to erase invalid range.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 455, __PRETTY_FUNCTION__)); |
456 | assert(E <= this->end() && "Trying to erase past the end.")((E <= this->end() && "Trying to erase past the end." ) ? static_cast<void> (0) : __assert_fail ("E <= this->end() && \"Trying to erase past the end.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 456, __PRETTY_FUNCTION__)); |
457 | |
458 | iterator N = S; |
459 | // Shift all elts down. |
460 | iterator I = std::move(E, this->end(), S); |
461 | // Drop the last elts. |
462 | this->destroy_range(I, this->end()); |
463 | this->set_size(I - this->begin()); |
464 | return(N); |
465 | } |
466 | |
467 | iterator insert(iterator I, T &&Elt) { |
468 | if (I == this->end()) { // Important special case for empty vector. |
469 | this->push_back(::std::move(Elt)); |
470 | return this->end()-1; |
471 | } |
472 | |
473 | assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 473, __PRETTY_FUNCTION__)); |
474 | assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector." ) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 474, __PRETTY_FUNCTION__)); |
475 | |
476 | if (this->size() >= this->capacity()) { |
477 | size_t EltNo = I-this->begin(); |
478 | this->grow(); |
479 | I = this->begin()+EltNo; |
480 | } |
481 | |
482 | ::new ((void*) this->end()) T(::std::move(this->back())); |
483 | // Push everything else over. |
484 | std::move_backward(I, this->end()-1, this->end()); |
485 | this->set_size(this->size() + 1); |
486 | |
487 | // If we just moved the element we're inserting, be sure to update |
488 | // the reference. |
489 | T *EltPtr = &Elt; |
490 | if (I <= EltPtr && EltPtr < this->end()) |
491 | ++EltPtr; |
492 | |
493 | *I = ::std::move(*EltPtr); |
494 | return I; |
495 | } |
496 | |
497 | iterator insert(iterator I, const T &Elt) { |
498 | if (I == this->end()) { // Important special case for empty vector. |
499 | this->push_back(Elt); |
500 | return this->end()-1; |
501 | } |
502 | |
503 | assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 503, __PRETTY_FUNCTION__)); |
504 | assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector." ) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 504, __PRETTY_FUNCTION__)); |
505 | |
506 | if (this->size() >= this->capacity()) { |
507 | size_t EltNo = I-this->begin(); |
508 | this->grow(); |
509 | I = this->begin()+EltNo; |
510 | } |
511 | ::new ((void*) this->end()) T(std::move(this->back())); |
512 | // Push everything else over. |
513 | std::move_backward(I, this->end()-1, this->end()); |
514 | this->set_size(this->size() + 1); |
515 | |
516 | // If we just moved the element we're inserting, be sure to update |
517 | // the reference. |
518 | const T *EltPtr = &Elt; |
519 | if (I <= EltPtr && EltPtr < this->end()) |
520 | ++EltPtr; |
521 | |
522 | *I = *EltPtr; |
523 | return I; |
524 | } |
525 | |
526 | iterator insert(iterator I, size_type NumToInsert, const T &Elt) { |
527 | // Convert iterator to elt# to avoid invalidating iterator when we reserve() |
528 | size_t InsertElt = I - this->begin(); |
529 | |
530 | if (I == this->end()) { // Important special case for empty vector. |
531 | append(NumToInsert, Elt); |
532 | return this->begin()+InsertElt; |
533 | } |
534 | |
535 | assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 535, __PRETTY_FUNCTION__)); |
536 | assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector." ) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 536, __PRETTY_FUNCTION__)); |
537 | |
538 | // Ensure there is enough space. |
539 | reserve(this->size() + NumToInsert); |
540 | |
541 | // Uninvalidate the iterator. |
542 | I = this->begin()+InsertElt; |
543 | |
544 | // If there are more elements between the insertion point and the end of the |
545 | // range than there are being inserted, we can use a simple approach to |
546 | // insertion. Since we already reserved space, we know that this won't |
547 | // reallocate the vector. |
548 | if (size_t(this->end()-I) >= NumToInsert) { |
549 | T *OldEnd = this->end(); |
550 | append(std::move_iterator<iterator>(this->end() - NumToInsert), |
551 | std::move_iterator<iterator>(this->end())); |
552 | |
553 | // Copy the existing elements that get replaced. |
554 | std::move_backward(I, OldEnd-NumToInsert, OldEnd); |
555 | |
556 | std::fill_n(I, NumToInsert, Elt); |
557 | return I; |
558 | } |
559 | |
560 | // Otherwise, we're inserting more elements than exist already, and we're |
561 | // not inserting at the end. |
562 | |
563 | // Move over the elements that we're about to overwrite. |
564 | T *OldEnd = this->end(); |
565 | this->set_size(this->size() + NumToInsert); |
566 | size_t NumOverwritten = OldEnd-I; |
567 | this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); |
568 | |
569 | // Replace the overwritten part. |
570 | std::fill_n(I, NumOverwritten, Elt); |
571 | |
572 | // Insert the non-overwritten middle part. |
573 | std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt); |
574 | return I; |
575 | } |
576 | |
577 | template <typename ItTy, |
578 | typename = std::enable_if_t<std::is_convertible< |
579 | typename std::iterator_traits<ItTy>::iterator_category, |
580 | std::input_iterator_tag>::value>> |
581 | iterator insert(iterator I, ItTy From, ItTy To) { |
582 | // Convert iterator to elt# to avoid invalidating iterator when we reserve() |
583 | size_t InsertElt = I - this->begin(); |
584 | |
585 | if (I == this->end()) { // Important special case for empty vector. |
586 | append(From, To); |
587 | return this->begin()+InsertElt; |
588 | } |
589 | |
590 | assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 590, __PRETTY_FUNCTION__)); |
591 | assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector." ) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/ADT/SmallVector.h" , 591, __PRETTY_FUNCTION__)); |
592 | |
593 | size_t NumToInsert = std::distance(From, To); |
594 | |
595 | // Ensure there is enough space. |
596 | reserve(this->size() + NumToInsert); |
597 | |
598 | // Uninvalidate the iterator. |
599 | I = this->begin()+InsertElt; |
600 | |
601 | // If there are more elements between the insertion point and the end of the |
602 | // range than there are being inserted, we can use a simple approach to |
603 | // insertion. Since we already reserved space, we know that this won't |
604 | // reallocate the vector. |
605 | if (size_t(this->end()-I) >= NumToInsert) { |
606 | T *OldEnd = this->end(); |
607 | append(std::move_iterator<iterator>(this->end() - NumToInsert), |
608 | std::move_iterator<iterator>(this->end())); |
609 | |
610 | // Copy the existing elements that get replaced. |
611 | std::move_backward(I, OldEnd-NumToInsert, OldEnd); |
612 | |
613 | std::copy(From, To, I); |
614 | return I; |
615 | } |
616 | |
617 | // Otherwise, we're inserting more elements than exist already, and we're |
618 | // not inserting at the end. |
619 | |
620 | // Move over the elements that we're about to overwrite. |
621 | T *OldEnd = this->end(); |
622 | this->set_size(this->size() + NumToInsert); |
623 | size_t NumOverwritten = OldEnd-I; |
624 | this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); |
625 | |
626 | // Replace the overwritten part. |
627 | for (T *J = I; NumOverwritten > 0; --NumOverwritten) { |
628 | *J = *From; |
629 | ++J; ++From; |
630 | } |
631 | |
632 | // Insert the non-overwritten middle part. |
633 | this->uninitialized_copy(From, To, OldEnd); |
634 | return I; |
635 | } |
636 | |
637 | void insert(iterator I, std::initializer_list<T> IL) { |
638 | insert(I, IL.begin(), IL.end()); |
639 | } |
640 | |
641 | template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) { |
642 | if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity ()), false)) |
643 | this->grow(); |
644 | ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...); |
645 | this->set_size(this->size() + 1); |
646 | return this->back(); |
647 | } |
648 | |
649 | SmallVectorImpl &operator=(const SmallVectorImpl &RHS); |
650 | |
651 | SmallVectorImpl &operator=(SmallVectorImpl &&RHS); |
652 | |
653 | bool operator==(const SmallVectorImpl &RHS) const { |
654 | if (this->size() != RHS.size()) return false; |
655 | return std::equal(this->begin(), this->end(), RHS.begin()); |
656 | } |
657 | bool operator!=(const SmallVectorImpl &RHS) const { |
658 | return !(*this == RHS); |
659 | } |
660 | |
661 | bool operator<(const SmallVectorImpl &RHS) const { |
662 | return std::lexicographical_compare(this->begin(), this->end(), |
663 | RHS.begin(), RHS.end()); |
664 | } |
665 | }; |
666 | |
667 | template <typename T> |
668 | void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) { |
669 | if (this == &RHS) return; |
670 | |
671 | // We can only avoid copying elements if neither vector is small. |
672 | if (!this->isSmall() && !RHS.isSmall()) { |
673 | std::swap(this->BeginX, RHS.BeginX); |
674 | std::swap(this->Size, RHS.Size); |
675 | std::swap(this->Capacity, RHS.Capacity); |
676 | return; |
677 | } |
678 | if (RHS.size() > this->capacity()) |
679 | this->grow(RHS.size()); |
680 | if (this->size() > RHS.capacity()) |
681 | RHS.grow(this->size()); |
682 | |
683 | // Swap the shared elements. |
684 | size_t NumShared = this->size(); |
685 | if (NumShared > RHS.size()) NumShared = RHS.size(); |
686 | for (size_type i = 0; i != NumShared; ++i) |
687 | std::swap((*this)[i], RHS[i]); |
688 | |
689 | // Copy over the extra elts. |
690 | if (this->size() > RHS.size()) { |
691 | size_t EltDiff = this->size() - RHS.size(); |
692 | this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end()); |
693 | RHS.set_size(RHS.size() + EltDiff); |
694 | this->destroy_range(this->begin()+NumShared, this->end()); |
695 | this->set_size(NumShared); |
696 | } else if (RHS.size() > this->size()) { |
697 | size_t EltDiff = RHS.size() - this->size(); |
698 | this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end()); |
699 | this->set_size(this->size() + EltDiff); |
700 | this->destroy_range(RHS.begin()+NumShared, RHS.end()); |
701 | RHS.set_size(NumShared); |
702 | } |
703 | } |
704 | |
705 | template <typename T> |
706 | SmallVectorImpl<T> &SmallVectorImpl<T>:: |
707 | operator=(const SmallVectorImpl<T> &RHS) { |
708 | // Avoid self-assignment. |
709 | if (this == &RHS) return *this; |
710 | |
711 | // If we already have sufficient space, assign the common elements, then |
712 | // destroy any excess. |
713 | size_t RHSSize = RHS.size(); |
714 | size_t CurSize = this->size(); |
715 | if (CurSize >= RHSSize) { |
716 | // Assign common elements. |
717 | iterator NewEnd; |
718 | if (RHSSize) |
719 | NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin()); |
720 | else |
721 | NewEnd = this->begin(); |
722 | |
723 | // Destroy excess elements. |
724 | this->destroy_range(NewEnd, this->end()); |
725 | |
726 | // Trim. |
727 | this->set_size(RHSSize); |
728 | return *this; |
729 | } |
730 | |
731 | // If we have to grow to have enough elements, destroy the current elements. |
732 | // This allows us to avoid copying them during the grow. |
733 | // FIXME: don't do this if they're efficiently moveable. |
734 | if (this->capacity() < RHSSize) { |
735 | // Destroy current elements. |
736 | this->destroy_range(this->begin(), this->end()); |
737 | this->set_size(0); |
738 | CurSize = 0; |
739 | this->grow(RHSSize); |
740 | } else if (CurSize) { |
741 | // Otherwise, use assignment for the already-constructed elements. |
742 | std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin()); |
743 | } |
744 | |
745 | // Copy construct the new elements in place. |
746 | this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(), |
747 | this->begin()+CurSize); |
748 | |
749 | // Set end. |
750 | this->set_size(RHSSize); |
751 | return *this; |
752 | } |
753 | |
754 | template <typename T> |
755 | SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) { |
756 | // Avoid self-assignment. |
757 | if (this == &RHS) return *this; |
758 | |
759 | // If the RHS isn't small, clear this vector and then steal its buffer. |
760 | if (!RHS.isSmall()) { |
761 | this->destroy_range(this->begin(), this->end()); |
762 | if (!this->isSmall()) free(this->begin()); |
763 | this->BeginX = RHS.BeginX; |
764 | this->Size = RHS.Size; |
765 | this->Capacity = RHS.Capacity; |
766 | RHS.resetToSmall(); |
767 | return *this; |
768 | } |
769 | |
770 | // If we already have sufficient space, assign the common elements, then |
771 | // destroy any excess. |
772 | size_t RHSSize = RHS.size(); |
773 | size_t CurSize = this->size(); |
774 | if (CurSize >= RHSSize) { |
775 | // Assign common elements. |
776 | iterator NewEnd = this->begin(); |
777 | if (RHSSize) |
778 | NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd); |
779 | |
780 | // Destroy excess elements and trim the bounds. |
781 | this->destroy_range(NewEnd, this->end()); |
782 | this->set_size(RHSSize); |
783 | |
784 | // Clear the RHS. |
785 | RHS.clear(); |
786 | |
787 | return *this; |
788 | } |
789 | |
790 | // If we have to grow to have enough elements, destroy the current elements. |
791 | // This allows us to avoid copying them during the grow. |
792 | // FIXME: this may not actually make any sense if we can efficiently move |
793 | // elements. |
794 | if (this->capacity() < RHSSize) { |
795 | // Destroy current elements. |
796 | this->destroy_range(this->begin(), this->end()); |
797 | this->set_size(0); |
798 | CurSize = 0; |
799 | this->grow(RHSSize); |
800 | } else if (CurSize) { |
801 | // Otherwise, use assignment for the already-constructed elements. |
802 | std::move(RHS.begin(), RHS.begin()+CurSize, this->begin()); |
803 | } |
804 | |
805 | // Move-construct the new elements in place. |
806 | this->uninitialized_move(RHS.begin()+CurSize, RHS.end(), |
807 | this->begin()+CurSize); |
808 | |
809 | // Set end. |
810 | this->set_size(RHSSize); |
811 | |
812 | RHS.clear(); |
813 | return *this; |
814 | } |
815 | |
816 | /// Storage for the SmallVector elements. This is specialized for the N=0 case |
817 | /// to avoid allocating unnecessary storage. |
818 | template <typename T, unsigned N> |
819 | struct SmallVectorStorage { |
820 | AlignedCharArrayUnion<T> InlineElts[N]; |
821 | }; |
822 | |
823 | /// We need the storage to be properly aligned even for small-size of 0 so that |
824 | /// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is |
825 | /// well-defined. |
826 | template <typename T> struct alignas(alignof(T)) SmallVectorStorage<T, 0> {}; |
827 | |
828 | /// This is a 'vector' (really, a variable-sized array), optimized |
829 | /// for the case when the array is small. It contains some number of elements |
830 | /// in-place, which allows it to avoid heap allocation when the actual number of |
831 | /// elements is below that threshold. This allows normal "small" cases to be |
832 | /// fast without losing generality for large inputs. |
833 | /// |
834 | /// Note that this does not attempt to be exception safe. |
835 | /// |
836 | template <typename T, unsigned N> |
837 | class SmallVector : public SmallVectorImpl<T>, SmallVectorStorage<T, N> { |
838 | public: |
839 | SmallVector() : SmallVectorImpl<T>(N) {} |
840 | |
841 | ~SmallVector() { |
842 | // Destroy the constructed elements in the vector. |
843 | this->destroy_range(this->begin(), this->end()); |
844 | } |
845 | |
846 | explicit SmallVector(size_t Size, const T &Value = T()) |
847 | : SmallVectorImpl<T>(N) { |
848 | this->assign(Size, Value); |
849 | } |
850 | |
851 | template <typename ItTy, |
852 | typename = std::enable_if_t<std::is_convertible< |
853 | typename std::iterator_traits<ItTy>::iterator_category, |
854 | std::input_iterator_tag>::value>> |
855 | SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) { |
856 | this->append(S, E); |
857 | } |
858 | |
859 | template <typename RangeTy> |
860 | explicit SmallVector(const iterator_range<RangeTy> &R) |
861 | : SmallVectorImpl<T>(N) { |
862 | this->append(R.begin(), R.end()); |
863 | } |
864 | |
865 | SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) { |
866 | this->assign(IL); |
867 | } |
868 | |
869 | SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) { |
870 | if (!RHS.empty()) |
871 | SmallVectorImpl<T>::operator=(RHS); |
872 | } |
873 | |
874 | const SmallVector &operator=(const SmallVector &RHS) { |
875 | SmallVectorImpl<T>::operator=(RHS); |
876 | return *this; |
877 | } |
878 | |
879 | SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) { |
880 | if (!RHS.empty()) |
881 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
882 | } |
883 | |
884 | SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) { |
885 | if (!RHS.empty()) |
886 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
887 | } |
888 | |
889 | const SmallVector &operator=(SmallVector &&RHS) { |
890 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
891 | return *this; |
892 | } |
893 | |
894 | const SmallVector &operator=(SmallVectorImpl<T> &&RHS) { |
895 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
896 | return *this; |
897 | } |
898 | |
899 | const SmallVector &operator=(std::initializer_list<T> IL) { |
900 | this->assign(IL); |
901 | return *this; |
902 | } |
903 | }; |
904 | |
905 | template <typename T, unsigned N> |
906 | inline size_t capacity_in_bytes(const SmallVector<T, N> &X) { |
907 | return X.capacity_in_bytes(); |
908 | } |
909 | |
910 | /// Given a range of type R, iterate the entire range and return a |
911 | /// SmallVector with elements of the vector. This is useful, for example, |
912 | /// when you want to iterate a range and then sort the results. |
913 | template <unsigned Size, typename R> |
914 | SmallVector<typename std::remove_const<typename std::remove_reference< |
915 | decltype(*std::begin(std::declval<R &>()))>::type>::type, |
916 | Size> |
917 | to_vector(R &&Range) { |
918 | return {std::begin(Range), std::end(Range)}; |
919 | } |
920 | |
921 | } // end namespace llvm |
922 | |
923 | namespace std { |
924 | |
925 | /// Implement std::swap in terms of SmallVector swap. |
926 | template<typename T> |
927 | inline void |
928 | swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) { |
929 | LHS.swap(RHS); |
930 | } |
931 | |
932 | /// Implement std::swap in terms of SmallVector swap. |
933 | template<typename T, unsigned N> |
934 | inline void |
935 | swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) { |
936 | LHS.swap(RHS); |
937 | } |
938 | |
939 | } // end namespace std |
940 | |
941 | #endif // LLVM_ADT_SMALLVECTOR_H |
1 | //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | // | ||||||||
9 | // This file contains some functions that are useful for math stuff. | ||||||||
10 | // | ||||||||
11 | //===----------------------------------------------------------------------===// | ||||||||
12 | |||||||||
13 | #ifndef LLVM_SUPPORT_MATHEXTRAS_H | ||||||||
14 | #define LLVM_SUPPORT_MATHEXTRAS_H | ||||||||
15 | |||||||||
16 | #include "llvm/Support/Compiler.h" | ||||||||
17 | #include <algorithm> | ||||||||
18 | #include <cassert> | ||||||||
19 | #include <climits> | ||||||||
20 | #include <cmath> | ||||||||
21 | #include <cstdint> | ||||||||
22 | #include <cstring> | ||||||||
23 | #include <limits> | ||||||||
24 | #include <type_traits> | ||||||||
25 | |||||||||
26 | #ifdef __ANDROID_NDK__ | ||||||||
27 | #include <android/api-level.h> | ||||||||
28 | #endif | ||||||||
29 | |||||||||
30 | #ifdef _MSC_VER | ||||||||
31 | // Declare these intrinsics manually rather including intrin.h. It's very | ||||||||
32 | // expensive, and MathExtras.h is popular. | ||||||||
33 | // #include <intrin.h> | ||||||||
34 | extern "C" { | ||||||||
35 | unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); | ||||||||
36 | unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); | ||||||||
37 | unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); | ||||||||
38 | unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); | ||||||||
39 | } | ||||||||
40 | #endif | ||||||||
41 | |||||||||
42 | namespace llvm { | ||||||||
43 | |||||||||
44 | /// The behavior an operation has on an input of 0. | ||||||||
45 | enum ZeroBehavior { | ||||||||
46 | /// The returned value is undefined. | ||||||||
47 | ZB_Undefined, | ||||||||
48 | /// The returned value is numeric_limits<T>::max() | ||||||||
49 | ZB_Max, | ||||||||
50 | /// The returned value is numeric_limits<T>::digits | ||||||||
51 | ZB_Width | ||||||||
52 | }; | ||||||||
53 | |||||||||
54 | /// Mathematical constants. | ||||||||
55 | namespace numbers { | ||||||||
56 | // TODO: Track C++20 std::numbers. | ||||||||
57 | // TODO: Favor using the hexadecimal FP constants (requires C++17). | ||||||||
58 | constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 | ||||||||
59 | egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 | ||||||||
60 | ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 | ||||||||
61 | ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 | ||||||||
62 | log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) | ||||||||
63 | log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) | ||||||||
64 | pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 | ||||||||
65 | inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 | ||||||||
66 | sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 | ||||||||
67 | inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 | ||||||||
68 | sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 | ||||||||
69 | inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) | ||||||||
70 | sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 | ||||||||
71 | inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) | ||||||||
72 | phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 | ||||||||
73 | constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 | ||||||||
74 | egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 | ||||||||
75 | ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 | ||||||||
76 | ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 | ||||||||
77 | log2ef = 1.44269504F, // (0x1.715476P+0) | ||||||||
78 | log10ef = .434294482F, // (0x1.bcb7b2P-2) | ||||||||
79 | pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 | ||||||||
80 | inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 | ||||||||
81 | sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 | ||||||||
82 | inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 | ||||||||
83 | sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 | ||||||||
84 | inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) | ||||||||
85 | sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 | ||||||||
86 | inv_sqrt3f = .577350269F, // (0x1.279a74P-1) | ||||||||
87 | phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 | ||||||||
88 | } // namespace numbers | ||||||||
89 | |||||||||
90 | namespace detail { | ||||||||
91 | template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter { | ||||||||
92 | static unsigned count(T Val, ZeroBehavior) { | ||||||||
93 | if (!Val) | ||||||||
94 | return std::numeric_limits<T>::digits; | ||||||||
95 | if (Val & 0x1) | ||||||||
96 | return 0; | ||||||||
97 | |||||||||
98 | // Bisection method. | ||||||||
99 | unsigned ZeroBits = 0; | ||||||||
100 | T Shift = std::numeric_limits<T>::digits >> 1; | ||||||||
101 | T Mask = std::numeric_limits<T>::max() >> Shift; | ||||||||
102 | while (Shift) { | ||||||||
103 | if ((Val & Mask) == 0) { | ||||||||
104 | Val >>= Shift; | ||||||||
105 | ZeroBits |= Shift; | ||||||||
106 | } | ||||||||
107 | Shift >>= 1; | ||||||||
108 | Mask >>= Shift; | ||||||||
109 | } | ||||||||
110 | return ZeroBits; | ||||||||
111 | } | ||||||||
112 | }; | ||||||||
113 | |||||||||
114 | #if defined(__GNUC__4) || defined(_MSC_VER) | ||||||||
115 | template <typename T> struct TrailingZerosCounter<T, 4> { | ||||||||
116 | static unsigned count(T Val, ZeroBehavior ZB) { | ||||||||
117 | if (ZB
| ||||||||
118 | return 32; | ||||||||
119 | |||||||||
120 | #if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4) | ||||||||
121 | return __builtin_ctz(Val); | ||||||||
122 | #elif defined(_MSC_VER) | ||||||||
123 | unsigned long Index; | ||||||||
124 | _BitScanForward(&Index, Val); | ||||||||
125 | return Index; | ||||||||
126 | #endif | ||||||||
127 | } | ||||||||
128 | }; | ||||||||
129 | |||||||||
130 | #if !defined(_MSC_VER) || defined(_M_X64) | ||||||||
131 | template <typename T> struct TrailingZerosCounter<T, 8> { | ||||||||
132 | static unsigned count(T Val, ZeroBehavior ZB) { | ||||||||
133 | if (ZB != ZB_Undefined && Val == 0) | ||||||||
134 | return 64; | ||||||||
135 | |||||||||
136 | #if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4) | ||||||||
137 | return __builtin_ctzll(Val); | ||||||||
138 | #elif defined(_MSC_VER) | ||||||||
139 | unsigned long Index; | ||||||||
140 | _BitScanForward64(&Index, Val); | ||||||||
141 | return Index; | ||||||||
142 | #endif | ||||||||
143 | } | ||||||||
144 | }; | ||||||||
145 | #endif | ||||||||
146 | #endif | ||||||||
147 | } // namespace detail | ||||||||
148 | |||||||||
149 | /// Count number of 0's from the least significant bit to the most | ||||||||
150 | /// stopping at the first 1. | ||||||||
151 | /// | ||||||||
152 | /// Only unsigned integral types are allowed. | ||||||||
153 | /// | ||||||||
154 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are | ||||||||
155 | /// valid arguments. | ||||||||
156 | template <typename T> | ||||||||
157 | unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { | ||||||||
158 | static_assert(std::numeric_limits<T>::is_integer && | ||||||||
159 | !std::numeric_limits<T>::is_signed, | ||||||||
160 | "Only unsigned integral types are allowed."); | ||||||||
161 | return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB); | ||||||||
162 | } | ||||||||
163 | |||||||||
164 | namespace detail { | ||||||||
165 | template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter { | ||||||||
166 | static unsigned count(T Val, ZeroBehavior) { | ||||||||
167 | if (!Val) | ||||||||
168 | return std::numeric_limits<T>::digits; | ||||||||
169 | |||||||||
170 | // Bisection method. | ||||||||
171 | unsigned ZeroBits = 0; | ||||||||
172 | for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) { | ||||||||
173 | T Tmp = Val >> Shift; | ||||||||
174 | if (Tmp) | ||||||||
175 | Val = Tmp; | ||||||||
176 | else | ||||||||
177 | ZeroBits |= Shift; | ||||||||
178 | } | ||||||||
179 | return ZeroBits; | ||||||||
180 | } | ||||||||
181 | }; | ||||||||
182 | |||||||||
183 | #if defined(__GNUC__4) || defined(_MSC_VER) | ||||||||
184 | template <typename T> struct LeadingZerosCounter<T, 4> { | ||||||||
185 | static unsigned count(T Val, ZeroBehavior ZB) { | ||||||||
186 | if (ZB != ZB_Undefined && Val == 0) | ||||||||
187 | return 32; | ||||||||
188 | |||||||||
189 | #if __has_builtin(__builtin_clz)1 || defined(__GNUC__4) | ||||||||
190 | return __builtin_clz(Val); | ||||||||
191 | #elif defined(_MSC_VER) | ||||||||
192 | unsigned long Index; | ||||||||
193 | _BitScanReverse(&Index, Val); | ||||||||
194 | return Index ^ 31; | ||||||||
195 | #endif | ||||||||
196 | } | ||||||||
197 | }; | ||||||||
198 | |||||||||
199 | #if !defined(_MSC_VER) || defined(_M_X64) | ||||||||
200 | template <typename T> struct LeadingZerosCounter<T, 8> { | ||||||||
201 | static unsigned count(T Val, ZeroBehavior ZB) { | ||||||||
202 | if (ZB != ZB_Undefined && Val == 0) | ||||||||
203 | return 64; | ||||||||
204 | |||||||||
205 | #if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4) | ||||||||
206 | return __builtin_clzll(Val); | ||||||||
207 | #elif defined(_MSC_VER) | ||||||||
208 | unsigned long Index; | ||||||||
209 | _BitScanReverse64(&Index, Val); | ||||||||
210 | return Index ^ 63; | ||||||||
211 | #endif | ||||||||
212 | } | ||||||||
213 | }; | ||||||||
214 | #endif | ||||||||
215 | #endif | ||||||||
216 | } // namespace detail | ||||||||
217 | |||||||||
218 | /// Count number of 0's from the most significant bit to the least | ||||||||
219 | /// stopping at the first 1. | ||||||||
220 | /// | ||||||||
221 | /// Only unsigned integral types are allowed. | ||||||||
222 | /// | ||||||||
223 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are | ||||||||
224 | /// valid arguments. | ||||||||
225 | template <typename T> | ||||||||
226 | unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { | ||||||||
227 | static_assert(std::numeric_limits<T>::is_integer && | ||||||||
228 | !std::numeric_limits<T>::is_signed, | ||||||||
229 | "Only unsigned integral types are allowed."); | ||||||||
230 | return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB); | ||||||||
231 | } | ||||||||
232 | |||||||||
233 | /// Get the index of the first set bit starting from the least | ||||||||
234 | /// significant bit. | ||||||||
235 | /// | ||||||||
236 | /// Only unsigned integral types are allowed. | ||||||||
237 | /// | ||||||||
238 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are | ||||||||
239 | /// valid arguments. | ||||||||
240 | template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { | ||||||||
241 | if (ZB == ZB_Max && Val == 0) | ||||||||
242 | return std::numeric_limits<T>::max(); | ||||||||
243 | |||||||||
244 | return countTrailingZeros(Val, ZB_Undefined); | ||||||||
245 | } | ||||||||
246 | |||||||||
247 | /// Create a bitmask with the N right-most bits set to 1, and all other | ||||||||
248 | /// bits set to 0. Only unsigned types are allowed. | ||||||||
249 | template <typename T> T maskTrailingOnes(unsigned N) { | ||||||||
250 | static_assert(std::is_unsigned<T>::value, "Invalid type!"); | ||||||||
251 | const unsigned Bits = CHAR_BIT8 * sizeof(T); | ||||||||
252 | assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast< void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 252, __PRETTY_FUNCTION__)); | ||||||||
253 | return N == 0 ? 0 : (T(-1) >> (Bits - N)); | ||||||||
254 | } | ||||||||
255 | |||||||||
256 | /// Create a bitmask with the N left-most bits set to 1, and all other | ||||||||
257 | /// bits set to 0. Only unsigned types are allowed. | ||||||||
258 | template <typename T> T maskLeadingOnes(unsigned N) { | ||||||||
259 | return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); | ||||||||
260 | } | ||||||||
261 | |||||||||
262 | /// Create a bitmask with the N right-most bits set to 0, and all other | ||||||||
263 | /// bits set to 1. Only unsigned types are allowed. | ||||||||
264 | template <typename T> T maskTrailingZeros(unsigned N) { | ||||||||
265 | return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N); | ||||||||
266 | } | ||||||||
267 | |||||||||
268 | /// Create a bitmask with the N left-most bits set to 0, and all other | ||||||||
269 | /// bits set to 1. Only unsigned types are allowed. | ||||||||
270 | template <typename T> T maskLeadingZeros(unsigned N) { | ||||||||
271 | return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); | ||||||||
272 | } | ||||||||
273 | |||||||||
274 | /// Get the index of the last set bit starting from the least | ||||||||
275 | /// significant bit. | ||||||||
276 | /// | ||||||||
277 | /// Only unsigned integral types are allowed. | ||||||||
278 | /// | ||||||||
279 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are | ||||||||
280 | /// valid arguments. | ||||||||
281 | template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) { | ||||||||
282 | if (ZB == ZB_Max && Val == 0) | ||||||||
283 | return std::numeric_limits<T>::max(); | ||||||||
284 | |||||||||
285 | // Use ^ instead of - because both gcc and llvm can remove the associated ^ | ||||||||
286 | // in the __builtin_clz intrinsic on x86. | ||||||||
287 | return countLeadingZeros(Val, ZB_Undefined) ^ | ||||||||
288 | (std::numeric_limits<T>::digits - 1); | ||||||||
289 | } | ||||||||
290 | |||||||||
291 | /// Macro compressed bit reversal table for 256 bits. | ||||||||
292 | /// | ||||||||
293 | /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable | ||||||||
294 | static const unsigned char BitReverseTable256[256] = { | ||||||||
295 | #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 | ||||||||
296 | #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) | ||||||||
297 | #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) | ||||||||
298 | R6(0), R6(2), R6(1), R6(3) | ||||||||
299 | #undef R2 | ||||||||
300 | #undef R4 | ||||||||
301 | #undef R6 | ||||||||
302 | }; | ||||||||
303 | |||||||||
304 | /// Reverse the bits in \p Val. | ||||||||
305 | template <typename T> | ||||||||
306 | T reverseBits(T Val) { | ||||||||
307 | unsigned char in[sizeof(Val)]; | ||||||||
308 | unsigned char out[sizeof(Val)]; | ||||||||
309 | std::memcpy(in, &Val, sizeof(Val)); | ||||||||
310 | for (unsigned i = 0; i < sizeof(Val); ++i) | ||||||||
311 | out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; | ||||||||
312 | std::memcpy(&Val, out, sizeof(Val)); | ||||||||
313 | return Val; | ||||||||
314 | } | ||||||||
315 | |||||||||
316 | // NOTE: The following support functions use the _32/_64 extensions instead of | ||||||||
317 | // type overloading so that signed and unsigned integers can be used without | ||||||||
318 | // ambiguity. | ||||||||
319 | |||||||||
320 | /// Return the high 32 bits of a 64 bit value. | ||||||||
321 | constexpr inline uint32_t Hi_32(uint64_t Value) { | ||||||||
322 | return static_cast<uint32_t>(Value >> 32); | ||||||||
323 | } | ||||||||
324 | |||||||||
325 | /// Return the low 32 bits of a 64 bit value. | ||||||||
326 | constexpr inline uint32_t Lo_32(uint64_t Value) { | ||||||||
327 | return static_cast<uint32_t>(Value); | ||||||||
328 | } | ||||||||
329 | |||||||||
330 | /// Make a 64-bit integer from a high / low pair of 32-bit integers. | ||||||||
331 | constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { | ||||||||
332 | return ((uint64_t)High << 32) | (uint64_t)Low; | ||||||||
333 | } | ||||||||
334 | |||||||||
335 | /// Checks if an integer fits into the given bit width. | ||||||||
336 | template <unsigned N> constexpr inline bool isInt(int64_t x) { | ||||||||
337 | return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1))); | ||||||||
338 | } | ||||||||
339 | // Template specializations to get better code for common cases. | ||||||||
340 | template <> constexpr inline bool isInt<8>(int64_t x) { | ||||||||
341 | return static_cast<int8_t>(x) == x; | ||||||||
342 | } | ||||||||
343 | template <> constexpr inline bool isInt<16>(int64_t x) { | ||||||||
344 | return static_cast<int16_t>(x) == x; | ||||||||
345 | } | ||||||||
346 | template <> constexpr inline bool isInt<32>(int64_t x) { | ||||||||
347 | return static_cast<int32_t>(x) == x; | ||||||||
348 | } | ||||||||
349 | |||||||||
350 | /// Checks if a signed integer is an N bit number shifted left by S. | ||||||||
351 | template <unsigned N, unsigned S> | ||||||||
352 | constexpr inline bool isShiftedInt(int64_t x) { | ||||||||
353 | static_assert( | ||||||||
354 | N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); | ||||||||
355 | static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); | ||||||||
356 | return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); | ||||||||
357 | } | ||||||||
358 | |||||||||
359 | /// Checks if an unsigned integer fits into the given bit width. | ||||||||
360 | /// | ||||||||
361 | /// This is written as two functions rather than as simply | ||||||||
362 | /// | ||||||||
363 | /// return N >= 64 || X < (UINT64_C(1) << N); | ||||||||
364 | /// | ||||||||
365 | /// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting | ||||||||
366 | /// left too many places. | ||||||||
367 | template <unsigned N> | ||||||||
368 | constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) { | ||||||||
369 | static_assert(N > 0, "isUInt<0> doesn't make sense"); | ||||||||
370 | return X < (UINT64_C(1)1UL << (N)); | ||||||||
371 | } | ||||||||
372 | template <unsigned N> | ||||||||
373 | constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t X) { | ||||||||
374 | return true; | ||||||||
375 | } | ||||||||
376 | |||||||||
377 | // Template specializations to get better code for common cases. | ||||||||
378 | template <> constexpr inline bool isUInt<8>(uint64_t x) { | ||||||||
379 | return static_cast<uint8_t>(x) == x; | ||||||||
380 | } | ||||||||
381 | template <> constexpr inline bool isUInt<16>(uint64_t x) { | ||||||||
382 | return static_cast<uint16_t>(x) == x; | ||||||||
383 | } | ||||||||
384 | template <> constexpr inline bool isUInt<32>(uint64_t x) { | ||||||||
385 | return static_cast<uint32_t>(x) == x; | ||||||||
386 | } | ||||||||
387 | |||||||||
388 | /// Checks if a unsigned integer is an N bit number shifted left by S. | ||||||||
389 | template <unsigned N, unsigned S> | ||||||||
390 | constexpr inline bool isShiftedUInt(uint64_t x) { | ||||||||
391 | static_assert( | ||||||||
392 | N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); | ||||||||
393 | static_assert(N + S <= 64, | ||||||||
394 | "isShiftedUInt<N, S> with N + S > 64 is too wide."); | ||||||||
395 | // Per the two static_asserts above, S must be strictly less than 64. So | ||||||||
396 | // 1 << S is not undefined behavior. | ||||||||
397 | return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); | ||||||||
398 | } | ||||||||
399 | |||||||||
400 | /// Gets the maximum value for a N-bit unsigned integer. | ||||||||
401 | inline uint64_t maxUIntN(uint64_t N) { | ||||||||
402 | assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range" ) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 402, __PRETTY_FUNCTION__)); | ||||||||
403 | |||||||||
404 | // uint64_t(1) << 64 is undefined behavior, so we can't do | ||||||||
405 | // (uint64_t(1) << N) - 1 | ||||||||
406 | // without checking first that N != 64. But this works and doesn't have a | ||||||||
407 | // branch. | ||||||||
408 | return UINT64_MAX(18446744073709551615UL) >> (64 - N); | ||||||||
409 | } | ||||||||
410 | |||||||||
411 | /// Gets the minimum value for a N-bit signed integer. | ||||||||
412 | inline int64_t minIntN(int64_t N) { | ||||||||
413 | assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range" ) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 413, __PRETTY_FUNCTION__)); | ||||||||
414 | |||||||||
415 | return -(UINT64_C(1)1UL<<(N-1)); | ||||||||
416 | } | ||||||||
417 | |||||||||
418 | /// Gets the maximum value for a N-bit signed integer. | ||||||||
419 | inline int64_t maxIntN(int64_t N) { | ||||||||
420 | assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range" ) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 420, __PRETTY_FUNCTION__)); | ||||||||
421 | |||||||||
422 | // This relies on two's complement wraparound when N == 64, so we convert to | ||||||||
423 | // int64_t only at the very end to avoid UB. | ||||||||
424 | return (UINT64_C(1)1UL << (N - 1)) - 1; | ||||||||
425 | } | ||||||||
426 | |||||||||
427 | /// Checks if an unsigned integer fits into the given (dynamic) bit width. | ||||||||
428 | inline bool isUIntN(unsigned N, uint64_t x) { | ||||||||
429 | return N >= 64 || x <= maxUIntN(N); | ||||||||
430 | } | ||||||||
431 | |||||||||
432 | /// Checks if an signed integer fits into the given (dynamic) bit width. | ||||||||
433 | inline bool isIntN(unsigned N, int64_t x) { | ||||||||
434 | return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); | ||||||||
435 | } | ||||||||
436 | |||||||||
437 | /// Return true if the argument is a non-empty sequence of ones starting at the | ||||||||
438 | /// least significant bit with the remainder zero (32 bit version). | ||||||||
439 | /// Ex. isMask_32(0x0000FFFFU) == true. | ||||||||
440 | constexpr inline bool isMask_32(uint32_t Value) { | ||||||||
441 | return Value && ((Value + 1) & Value) == 0; | ||||||||
442 | } | ||||||||
443 | |||||||||
444 | /// Return true if the argument is a non-empty sequence of ones starting at the | ||||||||
445 | /// least significant bit with the remainder zero (64 bit version). | ||||||||
446 | constexpr inline bool isMask_64(uint64_t Value) { | ||||||||
447 | return Value && ((Value + 1) & Value) == 0; | ||||||||
448 | } | ||||||||
449 | |||||||||
450 | /// Return true if the argument contains a non-empty sequence of ones with the | ||||||||
451 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. | ||||||||
452 | constexpr inline bool isShiftedMask_32(uint32_t Value) { | ||||||||
453 | return Value && isMask_32((Value - 1) | Value); | ||||||||
454 | } | ||||||||
455 | |||||||||
456 | /// Return true if the argument contains a non-empty sequence of ones with the | ||||||||
457 | /// remainder zero (64 bit version.) | ||||||||
458 | constexpr inline bool isShiftedMask_64(uint64_t Value) { | ||||||||
459 | return Value && isMask_64((Value - 1) | Value); | ||||||||
460 | } | ||||||||
461 | |||||||||
462 | /// Return true if the argument is a power of two > 0. | ||||||||
463 | /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) | ||||||||
464 | constexpr inline bool isPowerOf2_32(uint32_t Value) { | ||||||||
465 | return Value && !(Value & (Value - 1)); | ||||||||
466 | } | ||||||||
467 | |||||||||
468 | /// Return true if the argument is a power of two > 0 (64 bit edition.) | ||||||||
469 | constexpr inline bool isPowerOf2_64(uint64_t Value) { | ||||||||
470 | return Value && !(Value & (Value - 1)); | ||||||||
471 | } | ||||||||
472 | |||||||||
473 | /// Count the number of ones from the most significant bit to the first | ||||||||
474 | /// zero bit. | ||||||||
475 | /// | ||||||||
476 | /// Ex. countLeadingOnes(0xFF0FFF00) == 8. | ||||||||
477 | /// Only unsigned integral types are allowed. | ||||||||
478 | /// | ||||||||
479 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and | ||||||||
480 | /// ZB_Undefined are valid arguments. | ||||||||
481 | template <typename T> | ||||||||
482 | unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { | ||||||||
483 | static_assert(std::numeric_limits<T>::is_integer && | ||||||||
484 | !std::numeric_limits<T>::is_signed, | ||||||||
485 | "Only unsigned integral types are allowed."); | ||||||||
486 | return countLeadingZeros<T>(~Value, ZB); | ||||||||
487 | } | ||||||||
488 | |||||||||
489 | /// Count the number of ones from the least significant bit to the first | ||||||||
490 | /// zero bit. | ||||||||
491 | /// | ||||||||
492 | /// Ex. countTrailingOnes(0x00FF00FF) == 8. | ||||||||
493 | /// Only unsigned integral types are allowed. | ||||||||
494 | /// | ||||||||
495 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and | ||||||||
496 | /// ZB_Undefined are valid arguments. | ||||||||
497 | template <typename T> | ||||||||
498 | unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { | ||||||||
499 | static_assert(std::numeric_limits<T>::is_integer && | ||||||||
500 | !std::numeric_limits<T>::is_signed, | ||||||||
501 | "Only unsigned integral types are allowed."); | ||||||||
502 | return countTrailingZeros<T>(~Value, ZB); | ||||||||
503 | } | ||||||||
504 | |||||||||
505 | namespace detail { | ||||||||
506 | template <typename T, std::size_t SizeOfT> struct PopulationCounter { | ||||||||
507 | static unsigned count(T Value) { | ||||||||
508 | // Generic version, forward to 32 bits. | ||||||||
509 | static_assert(SizeOfT <= 4, "Not implemented!"); | ||||||||
510 | #if defined(__GNUC__4) | ||||||||
511 | return __builtin_popcount(Value); | ||||||||
512 | #else | ||||||||
513 | uint32_t v = Value; | ||||||||
514 | v = v - ((v >> 1) & 0x55555555); | ||||||||
515 | v = (v & 0x33333333) + ((v >> 2) & 0x33333333); | ||||||||
516 | return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; | ||||||||
517 | #endif | ||||||||
518 | } | ||||||||
519 | }; | ||||||||
520 | |||||||||
521 | template <typename T> struct PopulationCounter<T, 8> { | ||||||||
522 | static unsigned count(T Value) { | ||||||||
523 | #if defined(__GNUC__4) | ||||||||
524 | return __builtin_popcountll(Value); | ||||||||
525 | #else | ||||||||
526 | uint64_t v = Value; | ||||||||
527 | v = v - ((v >> 1) & 0x5555555555555555ULL); | ||||||||
528 | v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); | ||||||||
529 | v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; | ||||||||
530 | return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56); | ||||||||
531 | #endif | ||||||||
532 | } | ||||||||
533 | }; | ||||||||
534 | } // namespace detail | ||||||||
535 | |||||||||
536 | /// Count the number of set bits in a value. | ||||||||
537 | /// Ex. countPopulation(0xF000F000) = 8 | ||||||||
538 | /// Returns 0 if the word is zero. | ||||||||
539 | template <typename T> | ||||||||
540 | inline unsigned countPopulation(T Value) { | ||||||||
541 | static_assert(std::numeric_limits<T>::is_integer && | ||||||||
542 | !std::numeric_limits<T>::is_signed, | ||||||||
543 | "Only unsigned integral types are allowed."); | ||||||||
544 | return detail::PopulationCounter<T, sizeof(T)>::count(Value); | ||||||||
545 | } | ||||||||
546 | |||||||||
547 | /// Compile time Log2. | ||||||||
548 | /// Valid only for positive powers of two. | ||||||||
549 | template <size_t kValue> constexpr inline size_t CTLog2() { | ||||||||
550 | static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), | ||||||||
551 | "Value is not a valid power of 2"); | ||||||||
552 | return 1 + CTLog2<kValue / 2>(); | ||||||||
553 | } | ||||||||
554 | |||||||||
555 | template <> constexpr inline size_t CTLog2<1>() { return 0; } | ||||||||
556 | |||||||||
557 | /// Return the log base 2 of the specified value. | ||||||||
558 | inline double Log2(double Value) { | ||||||||
559 | #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 | ||||||||
560 | return __builtin_log(Value) / __builtin_log(2.0); | ||||||||
561 | #else | ||||||||
562 | return log2(Value); | ||||||||
563 | #endif | ||||||||
564 | } | ||||||||
565 | |||||||||
566 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. | ||||||||
567 | /// (32 bit edition.) | ||||||||
568 | /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 | ||||||||
569 | inline unsigned Log2_32(uint32_t Value) { | ||||||||
570 | return 31 - countLeadingZeros(Value); | ||||||||
571 | } | ||||||||
572 | |||||||||
573 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. | ||||||||
574 | /// (64 bit edition.) | ||||||||
575 | inline unsigned Log2_64(uint64_t Value) { | ||||||||
576 | return 63 - countLeadingZeros(Value); | ||||||||
577 | } | ||||||||
578 | |||||||||
579 | /// Return the ceil log base 2 of the specified value, 32 if the value is zero. | ||||||||
580 | /// (32 bit edition). | ||||||||
581 | /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 | ||||||||
582 | inline unsigned Log2_32_Ceil(uint32_t Value) { | ||||||||
583 | return 32 - countLeadingZeros(Value - 1); | ||||||||
584 | } | ||||||||
585 | |||||||||
586 | /// Return the ceil log base 2 of the specified value, 64 if the value is zero. | ||||||||
587 | /// (64 bit edition.) | ||||||||
588 | inline unsigned Log2_64_Ceil(uint64_t Value) { | ||||||||
589 | return 64 - countLeadingZeros(Value - 1); | ||||||||
590 | } | ||||||||
591 | |||||||||
592 | /// Return the greatest common divisor of the values using Euclid's algorithm. | ||||||||
593 | template <typename T> | ||||||||
594 | inline T greatestCommonDivisor(T A, T B) { | ||||||||
595 | while (B) { | ||||||||
596 | T Tmp = B; | ||||||||
597 | B = A % B; | ||||||||
598 | A = Tmp; | ||||||||
599 | } | ||||||||
600 | return A; | ||||||||
601 | } | ||||||||
602 | |||||||||
603 | inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { | ||||||||
604 | return greatestCommonDivisor<uint64_t>(A, B); | ||||||||
605 | } | ||||||||
606 | |||||||||
607 | /// This function takes a 64-bit integer and returns the bit equivalent double. | ||||||||
608 | inline double BitsToDouble(uint64_t Bits) { | ||||||||
609 | double D; | ||||||||
610 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); | ||||||||
611 | memcpy(&D, &Bits, sizeof(Bits)); | ||||||||
612 | return D; | ||||||||
613 | } | ||||||||
614 | |||||||||
615 | /// This function takes a 32-bit integer and returns the bit equivalent float. | ||||||||
616 | inline float BitsToFloat(uint32_t Bits) { | ||||||||
617 | float F; | ||||||||
618 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); | ||||||||
619 | memcpy(&F, &Bits, sizeof(Bits)); | ||||||||
620 | return F; | ||||||||
621 | } | ||||||||
622 | |||||||||
623 | /// This function takes a double and returns the bit equivalent 64-bit integer. | ||||||||
624 | /// Note that copying doubles around changes the bits of NaNs on some hosts, | ||||||||
625 | /// notably x86, so this routine cannot be used if these bits are needed. | ||||||||
626 | inline uint64_t DoubleToBits(double Double) { | ||||||||
627 | uint64_t Bits; | ||||||||
628 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); | ||||||||
629 | memcpy(&Bits, &Double, sizeof(Double)); | ||||||||
630 | return Bits; | ||||||||
631 | } | ||||||||
632 | |||||||||
633 | /// This function takes a float and returns the bit equivalent 32-bit integer. | ||||||||
634 | /// Note that copying floats around changes the bits of NaNs on some hosts, | ||||||||
635 | /// notably x86, so this routine cannot be used if these bits are needed. | ||||||||
636 | inline uint32_t FloatToBits(float Float) { | ||||||||
637 | uint32_t Bits; | ||||||||
638 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); | ||||||||
639 | memcpy(&Bits, &Float, sizeof(Float)); | ||||||||
640 | return Bits; | ||||||||
641 | } | ||||||||
642 | |||||||||
643 | /// A and B are either alignments or offsets. Return the minimum alignment that | ||||||||
644 | /// may be assumed after adding the two together. | ||||||||
645 | constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { | ||||||||
646 | // The largest power of 2 that divides both A and B. | ||||||||
647 | // | ||||||||
648 | // Replace "-Value" by "1+~Value" in the following commented code to avoid | ||||||||
649 | // MSVC warning C4146 | ||||||||
650 | // return (A | B) & -(A | B); | ||||||||
651 | return (A | B) & (1 + ~(A | B)); | ||||||||
652 | } | ||||||||
653 | |||||||||
654 | /// Returns the next power of two (in 64-bits) that is strictly greater than A. | ||||||||
655 | /// Returns zero on overflow. | ||||||||
656 | inline uint64_t NextPowerOf2(uint64_t A) { | ||||||||
657 | A |= (A >> 1); | ||||||||
658 | A |= (A >> 2); | ||||||||
659 | A |= (A >> 4); | ||||||||
660 | A |= (A >> 8); | ||||||||
661 | A |= (A >> 16); | ||||||||
662 | A |= (A >> 32); | ||||||||
663 | return A + 1; | ||||||||
664 | } | ||||||||
665 | |||||||||
666 | /// Returns the power of two which is less than or equal to the given value. | ||||||||
667 | /// Essentially, it is a floor operation across the domain of powers of two. | ||||||||
668 | inline uint64_t PowerOf2Floor(uint64_t A) { | ||||||||
669 | if (!A) return 0; | ||||||||
670 | return 1ull << (63 - countLeadingZeros(A, ZB_Undefined)); | ||||||||
671 | } | ||||||||
672 | |||||||||
673 | /// Returns the power of two which is greater than or equal to the given value. | ||||||||
674 | /// Essentially, it is a ceil operation across the domain of powers of two. | ||||||||
675 | inline uint64_t PowerOf2Ceil(uint64_t A) { | ||||||||
676 | if (!A) | ||||||||
677 | return 0; | ||||||||
678 | return NextPowerOf2(A - 1); | ||||||||
679 | } | ||||||||
680 | |||||||||
681 | /// Returns the next integer (mod 2**64) that is greater than or equal to | ||||||||
682 | /// \p Value and is a multiple of \p Align. \p Align must be non-zero. | ||||||||
683 | /// | ||||||||
684 | /// If non-zero \p Skew is specified, the return value will be a minimal | ||||||||
685 | /// integer that is greater than or equal to \p Value and equal to | ||||||||
686 | /// \p Align * N + \p Skew for some integer N. If \p Skew is larger than | ||||||||
687 | /// \p Align, its value is adjusted to '\p Skew mod \p Align'. | ||||||||
688 | /// | ||||||||
689 | /// Examples: | ||||||||
690 | /// \code | ||||||||
691 | /// alignTo(5, 8) = 8 | ||||||||
692 | /// alignTo(17, 8) = 24 | ||||||||
693 | /// alignTo(~0LL, 8) = 0 | ||||||||
694 | /// alignTo(321, 255) = 510 | ||||||||
695 | /// | ||||||||
696 | /// alignTo(5, 8, 7) = 7 | ||||||||
697 | /// alignTo(17, 8, 1) = 17 | ||||||||
698 | /// alignTo(~0LL, 8, 3) = 3 | ||||||||
699 | /// alignTo(321, 255, 42) = 552 | ||||||||
700 | /// \endcode | ||||||||
701 | inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { | ||||||||
702 | assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast< void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 702, __PRETTY_FUNCTION__)); | ||||||||
703 | Skew %= Align; | ||||||||
704 | return (Value + Align - 1 - Skew) / Align * Align + Skew; | ||||||||
705 | } | ||||||||
706 | |||||||||
707 | /// Returns the next integer (mod 2**64) that is greater than or equal to | ||||||||
708 | /// \p Value and is a multiple of \c Align. \c Align must be non-zero. | ||||||||
709 | template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) { | ||||||||
710 | static_assert(Align != 0u, "Align must be non-zero"); | ||||||||
711 | return (Value + Align - 1) / Align * Align; | ||||||||
712 | } | ||||||||
713 | |||||||||
714 | /// Returns the integer ceil(Numerator / Denominator). | ||||||||
715 | inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { | ||||||||
716 | return alignTo(Numerator, Denominator) / Denominator; | ||||||||
717 | } | ||||||||
718 | |||||||||
719 | /// Returns the integer nearest(Numerator / Denominator). | ||||||||
720 | inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { | ||||||||
721 | return (Numerator + (Denominator / 2)) / Denominator; | ||||||||
722 | } | ||||||||
723 | |||||||||
724 | /// Returns the largest uint64_t less than or equal to \p Value and is | ||||||||
725 | /// \p Skew mod \p Align. \p Align must be non-zero | ||||||||
726 | inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { | ||||||||
727 | assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast< void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 727, __PRETTY_FUNCTION__)); | ||||||||
728 | Skew %= Align; | ||||||||
729 | return (Value - Skew) / Align * Align + Skew; | ||||||||
730 | } | ||||||||
731 | |||||||||
732 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. | ||||||||
733 | /// Requires 0 < B <= 32. | ||||||||
734 | template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) { | ||||||||
735 | static_assert(B > 0, "Bit width can't be 0."); | ||||||||
736 | static_assert(B <= 32, "Bit width out of range."); | ||||||||
737 | return int32_t(X << (32 - B)) >> (32 - B); | ||||||||
738 | } | ||||||||
739 | |||||||||
740 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. | ||||||||
741 | /// Requires 0 < B < 32. | ||||||||
742 | inline int32_t SignExtend32(uint32_t X, unsigned B) { | ||||||||
743 | assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast< void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 743, __PRETTY_FUNCTION__)); | ||||||||
744 | assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast <void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 744, __PRETTY_FUNCTION__)); | ||||||||
745 | return int32_t(X << (32 - B)) >> (32 - B); | ||||||||
746 | } | ||||||||
747 | |||||||||
748 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. | ||||||||
749 | /// Requires 0 < B < 64. | ||||||||
750 | template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { | ||||||||
751 | static_assert(B > 0, "Bit width can't be 0."); | ||||||||
752 | static_assert(B <= 64, "Bit width out of range."); | ||||||||
753 | return int64_t(x << (64 - B)) >> (64 - B); | ||||||||
754 | } | ||||||||
755 | |||||||||
756 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. | ||||||||
757 | /// Requires 0 < B < 64. | ||||||||
758 | inline int64_t SignExtend64(uint64_t X, unsigned B) { | ||||||||
759 | assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast< void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 759, __PRETTY_FUNCTION__)); | ||||||||
760 | assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast <void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 760, __PRETTY_FUNCTION__)); | ||||||||
761 | return int64_t(X << (64 - B)) >> (64 - B); | ||||||||
762 | } | ||||||||
763 | |||||||||
764 | /// Subtract two unsigned integers, X and Y, of type T and return the absolute | ||||||||
765 | /// value of the result. | ||||||||
766 | template <typename T> | ||||||||
767 | std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) { | ||||||||
768 | return std::max(X, Y) - std::min(X, Y); | ||||||||
769 | } | ||||||||
770 | |||||||||
771 | /// Add two unsigned integers, X and Y, of type T. Clamp the result to the | ||||||||
772 | /// maximum representable value of T on overflow. ResultOverflowed indicates if | ||||||||
773 | /// the result is larger than the maximum representable value of type T. | ||||||||
774 | template <typename T> | ||||||||
775 | std::enable_if_t<std::is_unsigned<T>::value, T> | ||||||||
776 | SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { | ||||||||
777 | bool Dummy; | ||||||||
778 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; | ||||||||
779 | // Hacker's Delight, p. 29 | ||||||||
780 | T Z = X + Y; | ||||||||
781 | Overflowed = (Z < X || Z < Y); | ||||||||
782 | if (Overflowed) | ||||||||
783 | return std::numeric_limits<T>::max(); | ||||||||
784 | else | ||||||||
785 | return Z; | ||||||||
786 | } | ||||||||
787 | |||||||||
788 | /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the | ||||||||
789 | /// maximum representable value of T on overflow. ResultOverflowed indicates if | ||||||||
790 | /// the result is larger than the maximum representable value of type T. | ||||||||
791 | template <typename T> | ||||||||
792 | std::enable_if_t<std::is_unsigned<T>::value, T> | ||||||||
793 | SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { | ||||||||
794 | bool Dummy; | ||||||||
795 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; | ||||||||
796 | |||||||||
797 | // Hacker's Delight, p. 30 has a different algorithm, but we don't use that | ||||||||
798 | // because it fails for uint16_t (where multiplication can have undefined | ||||||||
799 | // behavior due to promotion to int), and requires a division in addition | ||||||||
800 | // to the multiplication. | ||||||||
801 | |||||||||
802 | Overflowed = false; | ||||||||
803 | |||||||||
804 | // Log2(Z) would be either Log2Z or Log2Z + 1. | ||||||||
805 | // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z | ||||||||
806 | // will necessarily be less than Log2Max as desired. | ||||||||
807 | int Log2Z = Log2_64(X) + Log2_64(Y); | ||||||||
808 | const T Max = std::numeric_limits<T>::max(); | ||||||||
809 | int Log2Max = Log2_64(Max); | ||||||||
810 | if (Log2Z < Log2Max) { | ||||||||
811 | return X * Y; | ||||||||
812 | } | ||||||||
813 | if (Log2Z > Log2Max) { | ||||||||
814 | Overflowed = true; | ||||||||
815 | return Max; | ||||||||
816 | } | ||||||||
817 | |||||||||
818 | // We're going to use the top bit, and maybe overflow one | ||||||||
819 | // bit past it. Multiply all but the bottom bit then add | ||||||||
820 | // that on at the end. | ||||||||
821 | T Z = (X >> 1) * Y; | ||||||||
822 | if (Z & ~(Max >> 1)) { | ||||||||
823 | Overflowed = true; | ||||||||
824 | return Max; | ||||||||
825 | } | ||||||||
826 | Z <<= 1; | ||||||||
827 | if (X & 1) | ||||||||
828 | return SaturatingAdd(Z, Y, ResultOverflowed); | ||||||||
829 | |||||||||
830 | return Z; | ||||||||
831 | } | ||||||||
832 | |||||||||
833 | /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to | ||||||||
834 | /// the product. Clamp the result to the maximum representable value of T on | ||||||||
835 | /// overflow. ResultOverflowed indicates if the result is larger than the | ||||||||
836 | /// maximum representable value of type T. | ||||||||
837 | template <typename T> | ||||||||
838 | std::enable_if_t<std::is_unsigned<T>::value, T> | ||||||||
839 | SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { | ||||||||
840 | bool Dummy; | ||||||||
841 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; | ||||||||
842 | |||||||||
843 | T Product = SaturatingMultiply(X, Y, &Overflowed); | ||||||||
844 | if (Overflowed) | ||||||||
845 | return Product; | ||||||||
846 | |||||||||
847 | return SaturatingAdd(A, Product, &Overflowed); | ||||||||
848 | } | ||||||||
849 | |||||||||
850 | /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. | ||||||||
851 | extern const float huge_valf; | ||||||||
852 | |||||||||
853 | |||||||||
854 | /// Add two signed integers, computing the two's complement truncated result, | ||||||||
855 | /// returning true if overflow occured. | ||||||||
856 | template <typename T> | ||||||||
857 | std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) { | ||||||||
858 | #if __has_builtin(__builtin_add_overflow)1 | ||||||||
859 | return __builtin_add_overflow(X, Y, &Result); | ||||||||
860 | #else | ||||||||
861 | // Perform the unsigned addition. | ||||||||
862 | using U = std::make_unsigned_t<T>; | ||||||||
863 | const U UX = static_cast<U>(X); | ||||||||
864 | const U UY = static_cast<U>(Y); | ||||||||
865 | const U UResult = UX + UY; | ||||||||
866 | |||||||||
867 | // Convert to signed. | ||||||||
868 | Result = static_cast<T>(UResult); | ||||||||
869 | |||||||||
870 | // Adding two positive numbers should result in a positive number. | ||||||||
871 | if (X > 0 && Y > 0) | ||||||||
872 | return Result <= 0; | ||||||||
873 | // Adding two negatives should result in a negative number. | ||||||||
874 | if (X < 0 && Y < 0) | ||||||||
875 | return Result >= 0; | ||||||||
876 | return false; | ||||||||
877 | #endif | ||||||||
878 | } | ||||||||
879 | |||||||||
880 | /// Subtract two signed integers, computing the two's complement truncated | ||||||||
881 | /// result, returning true if an overflow ocurred. | ||||||||
882 | template <typename T> | ||||||||
883 | std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) { | ||||||||
884 | #if __has_builtin(__builtin_sub_overflow)1 | ||||||||
885 | return __builtin_sub_overflow(X, Y, &Result); | ||||||||
886 | #else | ||||||||
887 | // Perform the unsigned addition. | ||||||||
888 | using U = std::make_unsigned_t<T>; | ||||||||
889 | const U UX = static_cast<U>(X); | ||||||||
890 | const U UY = static_cast<U>(Y); | ||||||||
891 | const U UResult = UX - UY; | ||||||||
892 | |||||||||
893 | // Convert to signed. | ||||||||
894 | Result = static_cast<T>(UResult); | ||||||||
895 | |||||||||
896 | // Subtracting a positive number from a negative results in a negative number. | ||||||||
897 | if (X <= 0 && Y > 0) | ||||||||
898 | return Result >= 0; | ||||||||
899 | // Subtracting a negative number from a positive results in a positive number. | ||||||||
900 | if (X >= 0 && Y < 0) | ||||||||
901 | return Result <= 0; | ||||||||
902 | return false; | ||||||||
903 | #endif | ||||||||
904 | } | ||||||||
905 | |||||||||
906 | /// Multiply two signed integers, computing the two's complement truncated | ||||||||
907 | /// result, returning true if an overflow ocurred. | ||||||||
908 | template <typename T> | ||||||||
909 | std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) { | ||||||||
910 | // Perform the unsigned multiplication on absolute values. | ||||||||
911 | using U = std::make_unsigned_t<T>; | ||||||||
912 | const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X); | ||||||||
913 | const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y); | ||||||||
914 | const U UResult = UX * UY; | ||||||||
915 | |||||||||
916 | // Convert to signed. | ||||||||
917 | const bool IsNegative = (X < 0) ^ (Y < 0); | ||||||||
918 | Result = IsNegative ? (0 - UResult) : UResult; | ||||||||
919 | |||||||||
920 | // If any of the args was 0, result is 0 and no overflow occurs. | ||||||||
921 | if (UX == 0 || UY == 0) | ||||||||
922 | return false; | ||||||||
923 | |||||||||
924 | // UX and UY are in [1, 2^n], where n is the number of digits. | ||||||||
925 | // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for | ||||||||
926 | // positive) divided by an argument compares to the other. | ||||||||
927 | if (IsNegative) | ||||||||
928 | return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY; | ||||||||
929 | else | ||||||||
930 | return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY; | ||||||||
931 | } | ||||||||
932 | |||||||||
933 | } // End llvm namespace | ||||||||
934 | |||||||||
935 | #endif |