Bug Summary

File:llvm/include/llvm/CodeGen/ExecutionDomainFix.h
Warning:line 87, column 28
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ExecutionDomainFix.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/CodeGen -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/CodeGen/ExecutionDomainFix.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/CodeGen/ExecutionDomainFix.cpp

1//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/CodeGen/ExecutionDomainFix.h"
10#include "llvm/CodeGen/MachineRegisterInfo.h"
11#include "llvm/CodeGen/TargetInstrInfo.h"
12#include "llvm/Support/Debug.h"
13
14using namespace llvm;
15
16#define DEBUG_TYPE"execution-deps-fix" "execution-deps-fix"
17
18iterator_range<SmallVectorImpl<int>::const_iterator>
19ExecutionDomainFix::regIndices(unsigned Reg) const {
20 assert(Reg < AliasMap.size() && "Invalid register")(static_cast<void> (0));
21 const auto &Entry = AliasMap[Reg];
22 return make_range(Entry.begin(), Entry.end());
23}
24
25DomainValue *ExecutionDomainFix::alloc(int domain) {
26 DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue
27 : Avail.pop_back_val();
28 if (domain >= 0)
29 dv->addDomain(domain);
30 assert(dv->Refs == 0 && "Reference count wasn't cleared")(static_cast<void> (0));
31 assert(!dv->Next && "Chained DomainValue shouldn't have been recycled")(static_cast<void> (0));
32 return dv;
33}
34
35void ExecutionDomainFix::release(DomainValue *DV) {
36 while (DV) {
37 assert(DV->Refs && "Bad DomainValue")(static_cast<void> (0));
38 if (--DV->Refs)
39 return;
40
41 // There are no more DV references. Collapse any contained instructions.
42 if (DV->AvailableDomains && !DV->isCollapsed())
43 collapse(DV, DV->getFirstDomain());
44
45 DomainValue *Next = DV->Next;
46 DV->clear();
47 Avail.push_back(DV);
48 // Also release the next DomainValue in the chain.
49 DV = Next;
50 }
51}
52
53DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) {
54 DomainValue *DV = DVRef;
55 if (!DV || !DV->Next)
23
Assuming 'DV' is non-null
24
Assuming field 'Next' is null
25
Taking true branch
56 return DV;
26
Returning without writing to 'DVRef->Instrs.Size', which participates in a condition later
27
Returning pointer (loaded from 'DV'), which participates in a condition later
57
58 // DV has a chain. Find the end.
59 do
60 DV = DV->Next;
61 while (DV->Next);
62
63 // Update DVRef to point to DV.
64 retain(DV);
65 release(DVRef);
66 DVRef = DV;
67 return DV;
68}
69
70void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) {
71 assert(unsigned(rx) < NumRegs && "Invalid index")(static_cast<void> (0));
72 assert(!LiveRegs.empty() && "Must enter basic block first.")(static_cast<void> (0));
73
74 if (LiveRegs[rx] == dv)
75 return;
76 if (LiveRegs[rx])
77 release(LiveRegs[rx]);
78 LiveRegs[rx] = retain(dv);
79}
80
81void ExecutionDomainFix::kill(int rx) {
82 assert(unsigned(rx) < NumRegs && "Invalid index")(static_cast<void> (0));
83 assert(!LiveRegs.empty() && "Must enter basic block first.")(static_cast<void> (0));
84 if (!LiveRegs[rx])
85 return;
86
87 release(LiveRegs[rx]);
88 LiveRegs[rx] = nullptr;
89}
90
91void ExecutionDomainFix::force(int rx, unsigned domain) {
92 assert(unsigned(rx) < NumRegs && "Invalid index")(static_cast<void> (0));
93 assert(!LiveRegs.empty() && "Must enter basic block first.")(static_cast<void> (0));
94 if (DomainValue *dv = LiveRegs[rx]) {
61
Assuming 'dv' is non-null
62
Taking true branch
95 if (dv->isCollapsed())
63
Calling 'DomainValue::isCollapsed'
69
Returning from 'DomainValue::isCollapsed'
70
Taking true branch
96 dv->addDomain(domain);
71
Passing the value 32 via 1st parameter 'domain'
72
Calling 'DomainValue::addDomain'
97 else if (dv->hasDomain(domain))
98 collapse(dv, domain);
99 else {
100 // This is an incompatible open DomainValue. Collapse it to whatever and
101 // force the new value into domain. This costs a domain crossing.
102 collapse(dv, dv->getFirstDomain());
103 assert(LiveRegs[rx] && "Not live after collapse?")(static_cast<void> (0));
104 LiveRegs[rx]->addDomain(domain);
105 }
106 } else {
107 // Set up basic collapsed DomainValue.
108 setLiveReg(rx, alloc(domain));
109 }
110}
111
112void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) {
113 assert(dv->hasDomain(domain) && "Cannot collapse")(static_cast<void> (0));
114
115 // Collapse all the instructions.
116 while (!dv->Instrs.empty())
117 TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
118 dv->setSingleDomain(domain);
119
120 // If there are multiple users, give them new, unique DomainValues.
121 if (!LiveRegs.empty() && dv->Refs > 1)
122 for (unsigned rx = 0; rx != NumRegs; ++rx)
123 if (LiveRegs[rx] == dv)
124 setLiveReg(rx, alloc(domain));
125}
126
127bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) {
128 assert(!A->isCollapsed() && "Cannot merge into collapsed")(static_cast<void> (0));
129 assert(!B->isCollapsed() && "Cannot merge from collapsed")(static_cast<void> (0));
130 if (A == B)
131 return true;
132 // Restrict to the domains that A and B have in common.
133 unsigned common = A->getCommonDomains(B->AvailableDomains);
134 if (!common)
135 return false;
136 A->AvailableDomains = common;
137 A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
138
139 // Clear the old DomainValue so we won't try to swizzle instructions twice.
140 B->clear();
141 // All uses of B are referred to A.
142 B->Next = retain(A);
143
144 for (unsigned rx = 0; rx != NumRegs; ++rx) {
145 assert(!LiveRegs.empty() && "no space allocated for live registers")(static_cast<void> (0));
146 if (LiveRegs[rx] == B)
147 setLiveReg(rx, A);
148 }
149 return true;
150}
151
152void ExecutionDomainFix::enterBasicBlock(
153 const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
154
155 MachineBasicBlock *MBB = TraversedMBB.MBB;
156
157 // Set up LiveRegs to represent registers entering MBB.
158 // Set default domain values to 'no domain' (nullptr)
159 if (LiveRegs.empty())
14
Assuming the condition is false
15
Taking false branch
160 LiveRegs.assign(NumRegs, nullptr);
161
162 // This is the entry block.
163 if (MBB->pred_empty()) {
16
Assuming the condition is false
17
Taking false branch
164 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n")do { } while (false);
165 return;
166 }
167
168 // Try to coalesce live-out registers from predecessors.
169 for (MachineBasicBlock *pred : MBB->predecessors()) {
170 assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() &&(static_cast<void> (0))
171 "Should have pre-allocated MBBInfos for all MBBs")(static_cast<void> (0));
172 LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()];
173 // Incoming is null if this is a backedge from a BB
174 // we haven't processed yet
175 if (Incoming.empty())
18
Assuming the condition is false
19
Taking false branch
176 continue;
177
178 for (unsigned rx = 0; rx != NumRegs; ++rx) {
20
Assuming 'rx' is not equal to field 'NumRegs'
21
Loop condition is true. Entering loop body
179 DomainValue *pdv = resolve(Incoming[rx]);
22
Calling 'ExecutionDomainFix::resolve'
28
Returning from 'ExecutionDomainFix::resolve'
180 if (!pdv
28.1
'pdv' is non-null
28.1
'pdv' is non-null
28.1
'pdv' is non-null
28.1
'pdv' is non-null
)
29
Taking false branch
181 continue;
182 if (!LiveRegs[rx]) {
30
Assuming pointer value is null
31
Taking false branch
183 setLiveReg(rx, pdv);
184 continue;
185 }
186
187 // We have a live DomainValue from more than one predecessor.
188 if (LiveRegs[rx]->isCollapsed()) {
32
Calling 'DomainValue::isCollapsed'
38
Returning from 'DomainValue::isCollapsed'
39
Taking false branch
189 // We are already collapsed, but predecessor is not. Force it.
190 unsigned Domain = LiveRegs[rx]->getFirstDomain();
191 if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
192 collapse(pdv, Domain);
193 continue;
194 }
195
196 // Currently open, merge in predecessor.
197 if (!pdv->isCollapsed())
40
Calling 'DomainValue::isCollapsed'
46
Returning from 'DomainValue::isCollapsed'
47
Taking false branch
198 merge(LiveRegs[rx], pdv);
199 else
200 force(rx, pdv->getFirstDomain());
48
Calling 'DomainValue::getFirstDomain'
58
Returning from 'DomainValue::getFirstDomain'
59
Passing the value 32 via 2nd parameter 'domain'
60
Calling 'ExecutionDomainFix::force'
201 }
202 }
203 LLVM_DEBUG(dbgs() << printMBBReference(*MBB)do { } while (false)
204 << (!TraversedMBB.IsDone ? ": incomplete\n"do { } while (false)
205 : ": all preds known\n"))do { } while (false);
206}
207
208void ExecutionDomainFix::leaveBasicBlock(
209 const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
210 assert(!LiveRegs.empty() && "Must enter basic block first.")(static_cast<void> (0));
211 unsigned MBBNumber = TraversedMBB.MBB->getNumber();
212 assert(MBBNumber < MBBOutRegsInfos.size() &&(static_cast<void> (0))
213 "Unexpected basic block number.")(static_cast<void> (0));
214 // Save register clearances at end of MBB - used by enterBasicBlock().
215 for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) {
216 release(OldLiveReg);
217 }
218 MBBOutRegsInfos[MBBNumber] = LiveRegs;
219 LiveRegs.clear();
220}
221
222bool ExecutionDomainFix::visitInstr(MachineInstr *MI) {
223 // Update instructions with explicit execution domains.
224 std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
225 if (DomP.first) {
226 if (DomP.second)
227 visitSoftInstr(MI, DomP.second);
228 else
229 visitHardInstr(MI, DomP.first);
230 }
231
232 return !DomP.first;
233}
234
235void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) {
236 assert(!MI->isDebugInstr() && "Won't process debug values")(static_cast<void> (0));
237 const MCInstrDesc &MCID = MI->getDesc();
238 for (unsigned i = 0,
239 e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
240 i != e; ++i) {
241 MachineOperand &MO = MI->getOperand(i);
242 if (!MO.isReg())
243 continue;
244 if (MO.isUse())
245 continue;
246 for (int rx : regIndices(MO.getReg())) {
247 // This instruction explicitly defines rx.
248 LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI)do { } while (false);
249
250 // Kill off domains redefined by generic instructions.
251 if (Kill)
252 kill(rx);
253 }
254 }
255}
256
257void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
258 // Collapse all uses.
259 for (unsigned i = mi->getDesc().getNumDefs(),
260 e = mi->getDesc().getNumOperands();
261 i != e; ++i) {
262 MachineOperand &mo = mi->getOperand(i);
263 if (!mo.isReg())
264 continue;
265 for (int rx : regIndices(mo.getReg())) {
266 force(rx, domain);
267 }
268 }
269
270 // Kill all defs and force them.
271 for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
272 MachineOperand &mo = mi->getOperand(i);
273 if (!mo.isReg())
274 continue;
275 for (int rx : regIndices(mo.getReg())) {
276 kill(rx);
277 force(rx, domain);
278 }
279 }
280}
281
282void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
283 // Bitmask of available domains for this instruction after taking collapsed
284 // operands into account.
285 unsigned available = mask;
286
287 // Scan the explicit use operands for incoming domains.
288 SmallVector<int, 4> used;
289 if (!LiveRegs.empty())
290 for (unsigned i = mi->getDesc().getNumDefs(),
291 e = mi->getDesc().getNumOperands();
292 i != e; ++i) {
293 MachineOperand &mo = mi->getOperand(i);
294 if (!mo.isReg())
295 continue;
296 for (int rx : regIndices(mo.getReg())) {
297 DomainValue *dv = LiveRegs[rx];
298 if (dv == nullptr)
299 continue;
300 // Bitmask of domains that dv and available have in common.
301 unsigned common = dv->getCommonDomains(available);
302 // Is it possible to use this collapsed register for free?
303 if (dv->isCollapsed()) {
304 // Restrict available domains to the ones in common with the operand.
305 // If there are no common domains, we must pay the cross-domain
306 // penalty for this operand.
307 if (common)
308 available = common;
309 } else if (common)
310 // Open DomainValue is compatible, save it for merging.
311 used.push_back(rx);
312 else
313 // Open DomainValue is not compatible with instruction. It is useless
314 // now.
315 kill(rx);
316 }
317 }
318
319 // If the collapsed operands force a single domain, propagate the collapse.
320 if (isPowerOf2_32(available)) {
321 unsigned domain = countTrailingZeros(available);
322 TII->setExecutionDomain(*mi, domain);
323 visitHardInstr(mi, domain);
324 return;
325 }
326
327 // Kill off any remaining uses that don't match available, and build a list of
328 // incoming DomainValues that we want to merge.
329 SmallVector<int, 4> Regs;
330 for (int rx : used) {
331 assert(!LiveRegs.empty() && "no space allocated for live registers")(static_cast<void> (0));
332 DomainValue *&LR = LiveRegs[rx];
333 // This useless DomainValue could have been missed above.
334 if (!LR->getCommonDomains(available)) {
335 kill(rx);
336 continue;
337 }
338 // Sorted insertion.
339 // Enables giving priority to the latest domains during merging.
340 const int Def = RDA->getReachingDef(mi, RC->getRegister(rx));
341 auto I = partition_point(Regs, [&](int I) {
342 return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def;
343 });
344 Regs.insert(I, rx);
345 }
346
347 // doms are now sorted in order of appearance. Try to merge them all, giving
348 // priority to the latest ones.
349 DomainValue *dv = nullptr;
350 while (!Regs.empty()) {
351 if (!dv) {
352 dv = LiveRegs[Regs.pop_back_val()];
353 // Force the first dv to match the current instruction.
354 dv->AvailableDomains = dv->getCommonDomains(available);
355 assert(dv->AvailableDomains && "Domain should have been filtered")(static_cast<void> (0));
356 continue;
357 }
358
359 DomainValue *Latest = LiveRegs[Regs.pop_back_val()];
360 // Skip already merged values.
361 if (Latest == dv || Latest->Next)
362 continue;
363 if (merge(dv, Latest))
364 continue;
365
366 // If latest didn't merge, it is useless now. Kill all registers using it.
367 for (int i : used) {
368 assert(!LiveRegs.empty() && "no space allocated for live registers")(static_cast<void> (0));
369 if (LiveRegs[i] == Latest)
370 kill(i);
371 }
372 }
373
374 // dv is the DomainValue we are going to use for this instruction.
375 if (!dv) {
376 dv = alloc();
377 dv->AvailableDomains = available;
378 }
379 dv->Instrs.push_back(mi);
380
381 // Finally set all defs and non-collapsed uses to dv. We must iterate through
382 // all the operators, including imp-def ones.
383 for (const MachineOperand &mo : mi->operands()) {
384 if (!mo.isReg())
385 continue;
386 for (int rx : regIndices(mo.getReg())) {
387 if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) {
388 kill(rx);
389 setLiveReg(rx, dv);
390 }
391 }
392 }
393}
394
395void ExecutionDomainFix::processBasicBlock(
396 const LoopTraversal::TraversedMBBInfo &TraversedMBB) {
397 enterBasicBlock(TraversedMBB);
13
Calling 'ExecutionDomainFix::enterBasicBlock'
398 // If this block is not done, it makes little sense to make any decisions
399 // based on clearance information. We need to make a second pass anyway,
400 // and by then we'll have better information, so we can avoid doing the work
401 // to try and break dependencies now.
402 for (MachineInstr &MI : *TraversedMBB.MBB) {
403 if (!MI.isDebugInstr()) {
404 bool Kill = false;
405 if (TraversedMBB.PrimaryPass)
406 Kill = visitInstr(&MI);
407 processDefs(&MI, Kill);
408 }
409 }
410 leaveBasicBlock(TraversedMBB);
411}
412
413bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) {
414 if (skipFunction(mf.getFunction()))
1
Assuming the condition is false
2
Taking false branch
415 return false;
416 MF = &mf;
417 TII = MF->getSubtarget().getInstrInfo();
418 TRI = MF->getSubtarget().getRegisterInfo();
419 LiveRegs.clear();
420 assert(NumRegs == RC->getNumRegs() && "Bad regclass")(static_cast<void> (0));
421
422 LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: "do { } while (false)
3
Loop condition is false. Exiting loop
423 << TRI->getRegClassName(RC) << " **********\n")do { } while (false);
424
425 // If no relevant registers are used in the function, we can skip it
426 // completely.
427 bool anyregs = false;
428 const MachineRegisterInfo &MRI = mf.getRegInfo();
429 for (unsigned Reg : *RC) {
4
Assuming '__begin1' is not equal to '__end1'
430 if (MRI.isPhysRegUsed(Reg)) {
5
Assuming the condition is true
6
Taking true branch
431 anyregs = true;
432 break;
433 }
434 }
435 if (!anyregs
7.1
'anyregs' is true
7.1
'anyregs' is true
7.1
'anyregs' is true
7.1
'anyregs' is true
)
7
Execution continues on line 435
8
Taking false branch
436 return false;
437
438 RDA = &getAnalysis<ReachingDefAnalysis>();
439
440 // Initialize the AliasMap on the first use.
441 if (AliasMap.empty()) {
9
Assuming the condition is false
10
Taking false branch
442 // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
443 // therefore the LiveRegs array.
444 AliasMap.resize(TRI->getNumRegs());
445 for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
446 for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid();
447 ++AI)
448 AliasMap[*AI].push_back(i);
449 }
450
451 // Initialize the MBBOutRegsInfos
452 MBBOutRegsInfos.resize(mf.getNumBlockIDs());
453
454 // Traverse the basic blocks.
455 LoopTraversal Traversal;
456 LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf);
457 for (const LoopTraversal::TraversedMBBInfo &TraversedMBB : TraversedMBBOrder)
11
Assuming '__begin1' is not equal to '__end1'
458 processBasicBlock(TraversedMBB);
12
Calling 'ExecutionDomainFix::processBasicBlock'
459
460 for (const LiveRegsDVInfo &OutLiveRegs : MBBOutRegsInfos)
461 for (DomainValue *OutLiveReg : OutLiveRegs)
462 if (OutLiveReg)
463 release(OutLiveReg);
464
465 MBBOutRegsInfos.clear();
466 Avail.clear();
467 Allocator.DestroyAll();
468
469 return false;
470}

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/CodeGen/ExecutionDomainFix.h

1//==-- llvm/CodeGen/ExecutionDomainFix.h - Execution Domain Fix -*- C++ -*--==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file Execution Domain Fix pass.
10///
11/// Some X86 SSE instructions like mov, and, or, xor are available in different
12/// variants for different operand types. These variant instructions are
13/// equivalent, but on Nehalem and newer cpus there is extra latency
14/// transferring data between integer and floating point domains. ARM cores
15/// have similar issues when they are configured with both VFP and NEON
16/// pipelines.
17///
18/// This pass changes the variant instructions to minimize domain crossings.
19//
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_EXECUTIONDOMAINFIX_H
23#define LLVM_CODEGEN_EXECUTIONDOMAINFIX_H
24
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/CodeGen/LoopTraversal.h"
27#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/CodeGen/ReachingDefAnalysis.h"
29#include "llvm/CodeGen/TargetRegisterInfo.h"
30
31namespace llvm {
32
33class MachineInstr;
34class TargetInstrInfo;
35
36/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
37/// of execution domains.
38///
39/// An open DomainValue represents a set of instructions that can still switch
40/// execution domain. Multiple registers may refer to the same open
41/// DomainValue - they will eventually be collapsed to the same execution
42/// domain.
43///
44/// A collapsed DomainValue represents a single register that has been forced
45/// into one of more execution domains. There is a separate collapsed
46/// DomainValue for each register, but it may contain multiple execution
47/// domains. A register value is initially created in a single execution
48/// domain, but if we were forced to pay the penalty of a domain crossing, we
49/// keep track of the fact that the register is now available in multiple
50/// domains.
51struct DomainValue {
52 /// Basic reference counting.
53 unsigned Refs = 0;
54
55 /// Bitmask of available domains. For an open DomainValue, it is the still
56 /// possible domains for collapsing. For a collapsed DomainValue it is the
57 /// domains where the register is available for free.
58 unsigned AvailableDomains;
59
60 /// Pointer to the next DomainValue in a chain. When two DomainValues are
61 /// merged, Victim.Next is set to point to Victor, so old DomainValue
62 /// references can be updated by following the chain.
63 DomainValue *Next;
64
65 /// Twiddleable instructions using or defining these registers.
66 SmallVector<MachineInstr *, 8> Instrs;
67
68 DomainValue() { clear(); }
69
70 /// A collapsed DomainValue has no instructions to twiddle - it simply keeps
71 /// track of the domains where the registers are already available.
72 bool isCollapsed() const { return Instrs.empty(); }
33
Calling 'SmallVectorBase::empty'
36
Returning from 'SmallVectorBase::empty'
37
Returning zero, which participates in a condition later
41
Calling 'SmallVectorBase::empty'
44
Returning from 'SmallVectorBase::empty'
45
Returning the value 1, which participates in a condition later
64
Calling 'SmallVectorBase::empty'
67
Returning from 'SmallVectorBase::empty'
68
Returning the value 1, which participates in a condition later
73
74 /// Is domain available?
75 bool hasDomain(unsigned domain) const {
76 assert(domain <(static_cast<void> (0))
77 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&(static_cast<void> (0))
78 "undefined behavior")(static_cast<void> (0));
79 return AvailableDomains & (1u << domain);
80 }
81
82 /// Mark domain as available.
83 void addDomain(unsigned domain) {
84 assert(domain <(static_cast<void> (0))
85 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&(static_cast<void> (0))
86 "undefined behavior")(static_cast<void> (0));
87 AvailableDomains |= 1u << domain;
73
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'
88 }
89
90 // Restrict to a single domain available.
91 void setSingleDomain(unsigned domain) {
92 assert(domain <(static_cast<void> (0))
93 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&(static_cast<void> (0))
94 "undefined behavior")(static_cast<void> (0));
95 AvailableDomains = 1u << domain;
96 }
97
98 /// Return bitmask of domains that are available and in mask.
99 unsigned getCommonDomains(unsigned mask) const {
100 return AvailableDomains & mask;
101 }
102
103 /// First domain available.
104 unsigned getFirstDomain() const {
105 return countTrailingZeros(AvailableDomains);
49
Calling 'countTrailingZeros<unsigned int>'
56
Returning from 'countTrailingZeros<unsigned int>'
57
Returning the value 32
106 }
107
108 /// Clear this DomainValue and point to next which has all its data.
109 void clear() {
110 AvailableDomains = 0;
111 Next = nullptr;
112 Instrs.clear();
113 }
114};
115
116class ExecutionDomainFix : public MachineFunctionPass {
117 SpecificBumpPtrAllocator<DomainValue> Allocator;
118 SmallVector<DomainValue *, 16> Avail;
119
120 const TargetRegisterClass *const RC;
121 MachineFunction *MF;
122 const TargetInstrInfo *TII;
123 const TargetRegisterInfo *TRI;
124 std::vector<SmallVector<int, 1>> AliasMap;
125 const unsigned NumRegs;
126 /// Value currently in each register, or NULL when no value is being tracked.
127 /// This counts as a DomainValue reference.
128 using LiveRegsDVInfo = std::vector<DomainValue *>;
129 LiveRegsDVInfo LiveRegs;
130 /// Keeps domain information for all registers. Note that this
131 /// is different from the usual definition notion of liveness. The CPU
132 /// doesn't care whether or not we consider a register killed.
133 using OutRegsInfoMap = SmallVector<LiveRegsDVInfo, 4>;
134 OutRegsInfoMap MBBOutRegsInfos;
135
136 ReachingDefAnalysis *RDA;
137
138public:
139 ExecutionDomainFix(char &PassID, const TargetRegisterClass &RC)
140 : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {}
141
142 void getAnalysisUsage(AnalysisUsage &AU) const override {
143 AU.setPreservesAll();
144 AU.addRequired<ReachingDefAnalysis>();
145 MachineFunctionPass::getAnalysisUsage(AU);
146 }
147
148 bool runOnMachineFunction(MachineFunction &MF) override;
149
150 MachineFunctionProperties getRequiredProperties() const override {
151 return MachineFunctionProperties().set(
152 MachineFunctionProperties::Property::NoVRegs);
153 }
154
155private:
156 /// Translate TRI register number to a list of indices into our smaller tables
157 /// of interesting registers.
158 iterator_range<SmallVectorImpl<int>::const_iterator>
159 regIndices(unsigned Reg) const;
160
161 /// DomainValue allocation.
162 DomainValue *alloc(int domain = -1);
163
164 /// Add reference to DV.
165 DomainValue *retain(DomainValue *DV) {
166 if (DV)
167 ++DV->Refs;
168 return DV;
169 }
170
171 /// Release a reference to DV. When the last reference is released,
172 /// collapse if needed.
173 void release(DomainValue *);
174
175 /// Follow the chain of dead DomainValues until a live DomainValue is reached.
176 /// Update the referenced pointer when necessary.
177 DomainValue *resolve(DomainValue *&);
178
179 /// Set LiveRegs[rx] = dv, updating reference counts.
180 void setLiveReg(int rx, DomainValue *DV);
181
182 /// Kill register rx, recycle or collapse any DomainValue.
183 void kill(int rx);
184
185 /// Force register rx into domain.
186 void force(int rx, unsigned domain);
187
188 /// Collapse open DomainValue into given domain. If there are multiple
189 /// registers using dv, they each get a unique collapsed DomainValue.
190 void collapse(DomainValue *dv, unsigned domain);
191
192 /// All instructions and registers in B are moved to A, and B is released.
193 bool merge(DomainValue *A, DomainValue *B);
194
195 /// Set up LiveRegs by merging predecessor live-out values.
196 void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
197
198 /// Update live-out values.
199 void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
200
201 /// Process he given basic block.
202 void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB);
203
204 /// Visit given insturcion.
205 bool visitInstr(MachineInstr *);
206
207 /// Update def-ages for registers defined by MI.
208 /// If Kill is set, also kill off DomainValues clobbered by the defs.
209 void processDefs(MachineInstr *, bool Kill);
210
211 /// A soft instruction can be changed to work in other domains given by mask.
212 void visitSoftInstr(MachineInstr *, unsigned mask);
213
214 /// A hard instruction only works in one domain. All input registers will be
215 /// forced into that domain.
216 void visitHardInstr(MachineInstr *, unsigned domain);
217};
218
219} // namespace llvm
220
221#endif // LLVM_CODEGEN_EXECUTIONDOMAINFIX_H

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/ADT/SmallVector.h

1//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SmallVector class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ADT_SMALLVECTOR_H
14#define LLVM_ADT_SMALLVECTOR_H
15
16#include "llvm/ADT/iterator_range.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/MemAlloc.h"
20#include "llvm/Support/type_traits.h"
21#include <algorithm>
22#include <cassert>
23#include <cstddef>
24#include <cstdlib>
25#include <cstring>
26#include <functional>
27#include <initializer_list>
28#include <iterator>
29#include <limits>
30#include <memory>
31#include <new>
32#include <type_traits>
33#include <utility>
34
35namespace llvm {
36
37/// This is all the stuff common to all SmallVectors.
38///
39/// The template parameter specifies the type which should be used to hold the
40/// Size and Capacity of the SmallVector, so it can be adjusted.
41/// Using 32 bit size is desirable to shrink the size of the SmallVector.
42/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
43/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
44/// buffering bitcode output - which can exceed 4GB.
45template <class Size_T> class SmallVectorBase {
46protected:
47 void *BeginX;
48 Size_T Size = 0, Capacity;
49
50 /// The maximum value of the Size_T used.
51 static constexpr size_t SizeTypeMax() {
52 return std::numeric_limits<Size_T>::max();
53 }
54
55 SmallVectorBase() = delete;
56 SmallVectorBase(void *FirstEl, size_t TotalCapacity)
57 : BeginX(FirstEl), Capacity(TotalCapacity) {}
58
59 /// This is a helper for \a grow() that's out of line to reduce code
60 /// duplication. This function will report a fatal error if it can't grow at
61 /// least to \p MinSize.
62 void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity);
63
64 /// This is an implementation of the grow() method which only works
65 /// on POD-like data types and is out of line to reduce code duplication.
66 /// This function will report a fatal error if it cannot increase capacity.
67 void grow_pod(void *FirstEl, size_t MinSize, size_t TSize);
68
69public:
70 size_t size() const { return Size; }
71 size_t capacity() const { return Capacity; }
72
73 LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; }
34
Assuming field 'Size' is not equal to 0
35
Returning zero, which participates in a condition later
42
Assuming field 'Size' is 0
43
Returning the value 1, which participates in a condition later
65
Assuming field 'Size' is 0
66
Returning the value 1, which participates in a condition later
74
75 /// Set the array size to \p N, which the current array must have enough
76 /// capacity for.
77 ///
78 /// This does not construct or destroy any elements in the vector.
79 ///
80 /// Clients can use this in conjunction with capacity() to write past the end
81 /// of the buffer when they know that more elements are available, and only
82 /// update the size later. This avoids the cost of value initializing elements
83 /// which will only be overwritten.
84 void set_size(size_t N) {
85 assert(N <= capacity())(static_cast<void> (0));
86 Size = N;
87 }
88};
89
90template <class T>
91using SmallVectorSizeType =
92 typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
93 uint32_t>::type;
94
95/// Figure out the offset of the first element.
96template <class T, typename = void> struct SmallVectorAlignmentAndSize {
97 alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof(
98 SmallVectorBase<SmallVectorSizeType<T>>)];
99 alignas(T) char FirstEl[sizeof(T)];
100};
101
102/// This is the part of SmallVectorTemplateBase which does not depend on whether
103/// the type T is a POD. The extra dummy template argument is used by ArrayRef
104/// to avoid unnecessarily requiring T to be complete.
105template <typename T, typename = void>
106class SmallVectorTemplateCommon
107 : public SmallVectorBase<SmallVectorSizeType<T>> {
108 using Base = SmallVectorBase<SmallVectorSizeType<T>>;
109
110 /// Find the address of the first element. For this pointer math to be valid
111 /// with small-size of 0 for T with lots of alignment, it's important that
112 /// SmallVectorStorage is properly-aligned even for small-size of 0.
113 void *getFirstEl() const {
114 return const_cast<void *>(reinterpret_cast<const void *>(
115 reinterpret_cast<const char *>(this) +
116 offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl
)
));
117 }
118 // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
119
120protected:
121 SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}
122
123 void grow_pod(size_t MinSize, size_t TSize) {
124 Base::grow_pod(getFirstEl(), MinSize, TSize);
125 }
126
127 /// Return true if this is a smallvector which has not had dynamic
128 /// memory allocated for it.
129 bool isSmall() const { return this->BeginX == getFirstEl(); }
130
131 /// Put this vector in a state of being small.
132 void resetToSmall() {
133 this->BeginX = getFirstEl();
134 this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
135 }
136
137 /// Return true if V is an internal reference to the given range.
138 bool isReferenceToRange(const void *V, const void *First, const void *Last) const {
139 // Use std::less to avoid UB.
140 std::less<> LessThan;
141 return !LessThan(V, First) && LessThan(V, Last);
142 }
143
144 /// Return true if V is an internal reference to this vector.
145 bool isReferenceToStorage(const void *V) const {
146 return isReferenceToRange(V, this->begin(), this->end());
147 }
148
149 /// Return true if First and Last form a valid (possibly empty) range in this
150 /// vector's storage.
151 bool isRangeInStorage(const void *First, const void *Last) const {
152 // Use std::less to avoid UB.
153 std::less<> LessThan;
154 return !LessThan(First, this->begin()) && !LessThan(Last, First) &&
155 !LessThan(this->end(), Last);
156 }
157
158 /// Return true unless Elt will be invalidated by resizing the vector to
159 /// NewSize.
160 bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
161 // Past the end.
162 if (LLVM_LIKELY(!isReferenceToStorage(Elt))__builtin_expect((bool)(!isReferenceToStorage(Elt)), true))
163 return true;
164
165 // Return false if Elt will be destroyed by shrinking.
166 if (NewSize <= this->size())
167 return Elt < this->begin() + NewSize;
168
169 // Return false if we need to grow.
170 return NewSize <= this->capacity();
171 }
172
173 /// Check whether Elt will be invalidated by resizing the vector to NewSize.
174 void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
175 assert(isSafeToReferenceAfterResize(Elt, NewSize) &&(static_cast<void> (0))
176 "Attempting to reference an element of the vector in an operation "(static_cast<void> (0))
177 "that invalidates it")(static_cast<void> (0));
178 }
179
180 /// Check whether Elt will be invalidated by increasing the size of the
181 /// vector by N.
182 void assertSafeToAdd(const void *Elt, size_t N = 1) {
183 this->assertSafeToReferenceAfterResize(Elt, this->size() + N);
184 }
185
186 /// Check whether any part of the range will be invalidated by clearing.
187 void assertSafeToReferenceAfterClear(const T *From, const T *To) {
188 if (From == To)
189 return;
190 this->assertSafeToReferenceAfterResize(From, 0);
191 this->assertSafeToReferenceAfterResize(To - 1, 0);
192 }
193 template <
194 class ItTy,
195 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
196 bool> = false>
197 void assertSafeToReferenceAfterClear(ItTy, ItTy) {}
198
199 /// Check whether any part of the range will be invalidated by growing.
200 void assertSafeToAddRange(const T *From, const T *To) {
201 if (From == To)
202 return;
203 this->assertSafeToAdd(From, To - From);
204 this->assertSafeToAdd(To - 1, To - From);
205 }
206 template <
207 class ItTy,
208 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
209 bool> = false>
210 void assertSafeToAddRange(ItTy, ItTy) {}
211
212 /// Reserve enough space to add one element, and return the updated element
213 /// pointer in case it was a reference to the storage.
214 template <class U>
215 static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt,
216 size_t N) {
217 size_t NewSize = This->size() + N;
218 if (LLVM_LIKELY(NewSize <= This->capacity())__builtin_expect((bool)(NewSize <= This->capacity()), true
)
)
219 return &Elt;
220
221 bool ReferencesStorage = false;
222 int64_t Index = -1;
223 if (!U::TakesParamByValue) {
224 if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))__builtin_expect((bool)(This->isReferenceToStorage(&Elt
)), false)
) {
225 ReferencesStorage = true;
226 Index = &Elt - This->begin();
227 }
228 }
229 This->grow(NewSize);
230 return ReferencesStorage ? This->begin() + Index : &Elt;
231 }
232
233public:
234 using size_type = size_t;
235 using difference_type = ptrdiff_t;
236 using value_type = T;
237 using iterator = T *;
238 using const_iterator = const T *;
239
240 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
241 using reverse_iterator = std::reverse_iterator<iterator>;
242
243 using reference = T &;
244 using const_reference = const T &;
245 using pointer = T *;
246 using const_pointer = const T *;
247
248 using Base::capacity;
249 using Base::empty;
250 using Base::size;
251
252 // forward iterator creation methods.
253 iterator begin() { return (iterator)this->BeginX; }
254 const_iterator begin() const { return (const_iterator)this->BeginX; }
255 iterator end() { return begin() + size(); }
256 const_iterator end() const { return begin() + size(); }
257
258 // reverse iterator creation methods.
259 reverse_iterator rbegin() { return reverse_iterator(end()); }
260 const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
261 reverse_iterator rend() { return reverse_iterator(begin()); }
262 const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
263
264 size_type size_in_bytes() const { return size() * sizeof(T); }
265 size_type max_size() const {
266 return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
267 }
268
269 size_t capacity_in_bytes() const { return capacity() * sizeof(T); }
270
271 /// Return a pointer to the vector's buffer, even if empty().
272 pointer data() { return pointer(begin()); }
273 /// Return a pointer to the vector's buffer, even if empty().
274 const_pointer data() const { return const_pointer(begin()); }
275
276 reference operator[](size_type idx) {
277 assert(idx < size())(static_cast<void> (0));
278 return begin()[idx];
279 }
280 const_reference operator[](size_type idx) const {
281 assert(idx < size())(static_cast<void> (0));
282 return begin()[idx];
283 }
284
285 reference front() {
286 assert(!empty())(static_cast<void> (0));
287 return begin()[0];
288 }
289 const_reference front() const {
290 assert(!empty())(static_cast<void> (0));
291 return begin()[0];
292 }
293
294 reference back() {
295 assert(!empty())(static_cast<void> (0));
296 return end()[-1];
297 }
298 const_reference back() const {
299 assert(!empty())(static_cast<void> (0));
300 return end()[-1];
301 }
302};
303
304/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put
305/// method implementations that are designed to work with non-trivial T's.
306///
307/// We approximate is_trivially_copyable with trivial move/copy construction and
308/// trivial destruction. While the standard doesn't specify that you're allowed
309/// copy these types with memcpy, there is no way for the type to observe this.
310/// This catches the important case of std::pair<POD, POD>, which is not
311/// trivially assignable.
312template <typename T, bool = (is_trivially_copy_constructible<T>::value) &&
313 (is_trivially_move_constructible<T>::value) &&
314 std::is_trivially_destructible<T>::value>
315class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
316 friend class SmallVectorTemplateCommon<T>;
317
318protected:
319 static constexpr bool TakesParamByValue = false;
320 using ValueParamT = const T &;
321
322 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
323
324 static void destroy_range(T *S, T *E) {
325 while (S != E) {
326 --E;
327 E->~T();
328 }
329 }
330
331 /// Move the range [I, E) into the uninitialized memory starting with "Dest",
332 /// constructing elements as needed.
333 template<typename It1, typename It2>
334 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
335 std::uninitialized_copy(std::make_move_iterator(I),
336 std::make_move_iterator(E), Dest);
337 }
338
339 /// Copy the range [I, E) onto the uninitialized memory starting with "Dest",
340 /// constructing elements as needed.
341 template<typename It1, typename It2>
342 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
343 std::uninitialized_copy(I, E, Dest);
344 }
345
346 /// Grow the allocated memory (without initializing new elements), doubling
347 /// the size of the allocated memory. Guarantees space for at least one more
348 /// element, or MinSize more elements if specified.
349 void grow(size_t MinSize = 0);
350
351 /// Create a new allocation big enough for \p MinSize and pass back its size
352 /// in \p NewCapacity. This is the first section of \a grow().
353 T *mallocForGrow(size_t MinSize, size_t &NewCapacity) {
354 return static_cast<T *>(
355 SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
356 MinSize, sizeof(T), NewCapacity));
357 }
358
359 /// Move existing elements over to the new allocation \p NewElts, the middle
360 /// section of \a grow().
361 void moveElementsForGrow(T *NewElts);
362
363 /// Transfer ownership of the allocation, finishing up \a grow().
364 void takeAllocationForGrow(T *NewElts, size_t NewCapacity);
365
366 /// Reserve enough space to add one element, and return the updated element
367 /// pointer in case it was a reference to the storage.
368 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
369 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
370 }
371
372 /// Reserve enough space to add one element, and return the updated element
373 /// pointer in case it was a reference to the storage.
374 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
375 return const_cast<T *>(
376 this->reserveForParamAndGetAddressImpl(this, Elt, N));
377 }
378
379 static T &&forward_value_param(T &&V) { return std::move(V); }
380 static const T &forward_value_param(const T &V) { return V; }
381
382 void growAndAssign(size_t NumElts, const T &Elt) {
383 // Grow manually in case Elt is an internal reference.
384 size_t NewCapacity;
385 T *NewElts = mallocForGrow(NumElts, NewCapacity);
386 std::uninitialized_fill_n(NewElts, NumElts, Elt);
387 this->destroy_range(this->begin(), this->end());
388 takeAllocationForGrow(NewElts, NewCapacity);
389 this->set_size(NumElts);
390 }
391
392 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
393 // Grow manually in case one of Args is an internal reference.
394 size_t NewCapacity;
395 T *NewElts = mallocForGrow(0, NewCapacity);
396 ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...);
397 moveElementsForGrow(NewElts);
398 takeAllocationForGrow(NewElts, NewCapacity);
399 this->set_size(this->size() + 1);
400 return this->back();
401 }
402
403public:
404 void push_back(const T &Elt) {
405 const T *EltPtr = reserveForParamAndGetAddress(Elt);
406 ::new ((void *)this->end()) T(*EltPtr);
407 this->set_size(this->size() + 1);
408 }
409
410 void push_back(T &&Elt) {
411 T *EltPtr = reserveForParamAndGetAddress(Elt);
412 ::new ((void *)this->end()) T(::std::move(*EltPtr));
413 this->set_size(this->size() + 1);
414 }
415
416 void pop_back() {
417 this->set_size(this->size() - 1);
418 this->end()->~T();
419 }
420};
421
422// Define this out-of-line to dissuade the C++ compiler from inlining it.
423template <typename T, bool TriviallyCopyable>
424void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
425 size_t NewCapacity;
426 T *NewElts = mallocForGrow(MinSize, NewCapacity);
427 moveElementsForGrow(NewElts);
428 takeAllocationForGrow(NewElts, NewCapacity);
429}
430
431// Define this out-of-line to dissuade the C++ compiler from inlining it.
432template <typename T, bool TriviallyCopyable>
433void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow(
434 T *NewElts) {
435 // Move the elements over.
436 this->uninitialized_move(this->begin(), this->end(), NewElts);
437
438 // Destroy the original elements.
439 destroy_range(this->begin(), this->end());
440}
441
442// Define this out-of-line to dissuade the C++ compiler from inlining it.
443template <typename T, bool TriviallyCopyable>
444void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow(
445 T *NewElts, size_t NewCapacity) {
446 // If this wasn't grown from the inline copy, deallocate the old space.
447 if (!this->isSmall())
448 free(this->begin());
449
450 this->BeginX = NewElts;
451 this->Capacity = NewCapacity;
452}
453
454/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
455/// method implementations that are designed to work with trivially copyable
456/// T's. This allows using memcpy in place of copy/move construction and
457/// skipping destruction.
458template <typename T>
459class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
460 friend class SmallVectorTemplateCommon<T>;
461
462protected:
463 /// True if it's cheap enough to take parameters by value. Doing so avoids
464 /// overhead related to mitigations for reference invalidation.
465 static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *);
466
467 /// Either const T& or T, depending on whether it's cheap enough to take
468 /// parameters by value.
469 using ValueParamT =
470 typename std::conditional<TakesParamByValue, T, const T &>::type;
471
472 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
473
474 // No need to do a destroy loop for POD's.
475 static void destroy_range(T *, T *) {}
476
477 /// Move the range [I, E) onto the uninitialized memory
478 /// starting with "Dest", constructing elements into it as needed.
479 template<typename It1, typename It2>
480 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
481 // Just do a copy.
482 uninitialized_copy(I, E, Dest);
483 }
484
485 /// Copy the range [I, E) onto the uninitialized memory
486 /// starting with "Dest", constructing elements into it as needed.
487 template<typename It1, typename It2>
488 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
489 // Arbitrary iterator types; just use the basic implementation.
490 std::uninitialized_copy(I, E, Dest);
491 }
492
493 /// Copy the range [I, E) onto the uninitialized memory
494 /// starting with "Dest", constructing elements into it as needed.
495 template <typename T1, typename T2>
496 static void uninitialized_copy(
497 T1 *I, T1 *E, T2 *Dest,
498 std::enable_if_t<std::is_same<typename std::remove_const<T1>::type,
499 T2>::value> * = nullptr) {
500 // Use memcpy for PODs iterated by pointers (which includes SmallVector
501 // iterators): std::uninitialized_copy optimizes to memmove, but we can
502 // use memcpy here. Note that I and E are iterators and thus might be
503 // invalid for memcpy if they are equal.
504 if (I != E)
505 memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
506 }
507
508 /// Double the size of the allocated memory, guaranteeing space for at
509 /// least one more element or MinSize if specified.
510 void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); }
511
512 /// Reserve enough space to add one element, and return the updated element
513 /// pointer in case it was a reference to the storage.
514 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
515 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
516 }
517
518 /// Reserve enough space to add one element, and return the updated element
519 /// pointer in case it was a reference to the storage.
520 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
521 return const_cast<T *>(
522 this->reserveForParamAndGetAddressImpl(this, Elt, N));
523 }
524
525 /// Copy \p V or return a reference, depending on \a ValueParamT.
526 static ValueParamT forward_value_param(ValueParamT V) { return V; }
527
528 void growAndAssign(size_t NumElts, T Elt) {
529 // Elt has been copied in case it's an internal reference, side-stepping
530 // reference invalidation problems without losing the realloc optimization.
531 this->set_size(0);
532 this->grow(NumElts);
533 std::uninitialized_fill_n(this->begin(), NumElts, Elt);
534 this->set_size(NumElts);
535 }
536
537 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
538 // Use push_back with a copy in case Args has an internal reference,
539 // side-stepping reference invalidation problems without losing the realloc
540 // optimization.
541 push_back(T(std::forward<ArgTypes>(Args)...));
542 return this->back();
543 }
544
545public:
546 void push_back(ValueParamT Elt) {
547 const T *EltPtr = reserveForParamAndGetAddress(Elt);
548 memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T));
549 this->set_size(this->size() + 1);
550 }
551
552 void pop_back() { this->set_size(this->size() - 1); }
553};
554
555/// This class consists of common code factored out of the SmallVector class to
556/// reduce code duplication based on the SmallVector 'N' template parameter.
557template <typename T>
558class SmallVectorImpl : public SmallVectorTemplateBase<T> {
559 using SuperClass = SmallVectorTemplateBase<T>;
560
561public:
562 using iterator = typename SuperClass::iterator;
563 using const_iterator = typename SuperClass::const_iterator;
564 using reference = typename SuperClass::reference;
565 using size_type = typename SuperClass::size_type;
566
567protected:
568 using SmallVectorTemplateBase<T>::TakesParamByValue;
569 using ValueParamT = typename SuperClass::ValueParamT;
570
571 // Default ctor - Initialize to empty.
572 explicit SmallVectorImpl(unsigned N)
573 : SmallVectorTemplateBase<T>(N) {}
574
575public:
576 SmallVectorImpl(const SmallVectorImpl &) = delete;
577
578 ~SmallVectorImpl() {
579 // Subclass has already destructed this vector's elements.
580 // If this wasn't grown from the inline copy, deallocate the old space.
581 if (!this->isSmall())
582 free(this->begin());
583 }
584
585 void clear() {
586 this->destroy_range(this->begin(), this->end());
587 this->Size = 0;
588 }
589
590private:
591 template <bool ForOverwrite> void resizeImpl(size_type N) {
592 if (N < this->size()) {
593 this->pop_back_n(this->size() - N);
594 } else if (N > this->size()) {
595 this->reserve(N);
596 for (auto I = this->end(), E = this->begin() + N; I != E; ++I)
597 if (ForOverwrite)
598 new (&*I) T;
599 else
600 new (&*I) T();
601 this->set_size(N);
602 }
603 }
604
605public:
606 void resize(size_type N) { resizeImpl<false>(N); }
607
608 /// Like resize, but \ref T is POD, the new values won't be initialized.
609 void resize_for_overwrite(size_type N) { resizeImpl<true>(N); }
610
611 void resize(size_type N, ValueParamT NV) {
612 if (N == this->size())
613 return;
614
615 if (N < this->size()) {
616 this->pop_back_n(this->size() - N);
617 return;
618 }
619
620 // N > this->size(). Defer to append.
621 this->append(N - this->size(), NV);
622 }
623
624 void reserve(size_type N) {
625 if (this->capacity() < N)
626 this->grow(N);
627 }
628
629 void pop_back_n(size_type NumItems) {
630 assert(this->size() >= NumItems)(static_cast<void> (0));
631 this->destroy_range(this->end() - NumItems, this->end());
632 this->set_size(this->size() - NumItems);
633 }
634
635 LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() {
636 T Result = ::std::move(this->back());
637 this->pop_back();
638 return Result;
639 }
640
641 void swap(SmallVectorImpl &RHS);
642
643 /// Add the specified range to the end of the SmallVector.
644 template <typename in_iter,
645 typename = std::enable_if_t<std::is_convertible<
646 typename std::iterator_traits<in_iter>::iterator_category,
647 std::input_iterator_tag>::value>>
648 void append(in_iter in_start, in_iter in_end) {
649 this->assertSafeToAddRange(in_start, in_end);
650 size_type NumInputs = std::distance(in_start, in_end);
651 this->reserve(this->size() + NumInputs);
652 this->uninitialized_copy(in_start, in_end, this->end());
653 this->set_size(this->size() + NumInputs);
654 }
655
656 /// Append \p NumInputs copies of \p Elt to the end.
657 void append(size_type NumInputs, ValueParamT Elt) {
658 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs);
659 std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr);
660 this->set_size(this->size() + NumInputs);
661 }
662
663 void append(std::initializer_list<T> IL) {
664 append(IL.begin(), IL.end());
665 }
666
667 void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); }
668
669 void assign(size_type NumElts, ValueParamT Elt) {
670 // Note that Elt could be an internal reference.
671 if (NumElts > this->capacity()) {
672 this->growAndAssign(NumElts, Elt);
673 return;
674 }
675
676 // Assign over existing elements.
677 std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
678 if (NumElts > this->size())
679 std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt);
680 else if (NumElts < this->size())
681 this->destroy_range(this->begin() + NumElts, this->end());
682 this->set_size(NumElts);
683 }
684
685 // FIXME: Consider assigning over existing elements, rather than clearing &
686 // re-initializing them - for all assign(...) variants.
687
688 template <typename in_iter,
689 typename = std::enable_if_t<std::is_convertible<
690 typename std::iterator_traits<in_iter>::iterator_category,
691 std::input_iterator_tag>::value>>
692 void assign(in_iter in_start, in_iter in_end) {
693 this->assertSafeToReferenceAfterClear(in_start, in_end);
694 clear();
695 append(in_start, in_end);
696 }
697
698 void assign(std::initializer_list<T> IL) {
699 clear();
700 append(IL);
701 }
702
703 void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); }
704
705 iterator erase(const_iterator CI) {
706 // Just cast away constness because this is a non-const member function.
707 iterator I = const_cast<iterator>(CI);
708
709 assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds.")(static_cast<void> (0));
710
711 iterator N = I;
712 // Shift all elts down one.
713 std::move(I+1, this->end(), I);
714 // Drop the last elt.
715 this->pop_back();
716 return(N);
717 }
718
719 iterator erase(const_iterator CS, const_iterator CE) {
720 // Just cast away constness because this is a non-const member function.
721 iterator S = const_cast<iterator>(CS);
722 iterator E = const_cast<iterator>(CE);
723
724 assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds.")(static_cast<void> (0));
725
726 iterator N = S;
727 // Shift all elts down.
728 iterator I = std::move(E, this->end(), S);
729 // Drop the last elts.
730 this->destroy_range(I, this->end());
731 this->set_size(I - this->begin());
732 return(N);
733 }
734
735private:
736 template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) {
737 // Callers ensure that ArgType is derived from T.
738 static_assert(
739 std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>,
740 T>::value,
741 "ArgType must be derived from T!");
742
743 if (I == this->end()) { // Important special case for empty vector.
744 this->push_back(::std::forward<ArgType>(Elt));
745 return this->end()-1;
746 }
747
748 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast<void> (0));
749
750 // Grow if necessary.
751 size_t Index = I - this->begin();
752 std::remove_reference_t<ArgType> *EltPtr =
753 this->reserveForParamAndGetAddress(Elt);
754 I = this->begin() + Index;
755
756 ::new ((void*) this->end()) T(::std::move(this->back()));
757 // Push everything else over.
758 std::move_backward(I, this->end()-1, this->end());
759 this->set_size(this->size() + 1);
760
761 // If we just moved the element we're inserting, be sure to update
762 // the reference (never happens if TakesParamByValue).
763 static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value,
764 "ArgType must be 'T' when taking by value!");
765 if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end()))
766 ++EltPtr;
767
768 *I = ::std::forward<ArgType>(*EltPtr);
769 return I;
770 }
771
772public:
773 iterator insert(iterator I, T &&Elt) {
774 return insert_one_impl(I, this->forward_value_param(std::move(Elt)));
775 }
776
777 iterator insert(iterator I, const T &Elt) {
778 return insert_one_impl(I, this->forward_value_param(Elt));
779 }
780
781 iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) {
782 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
783 size_t InsertElt = I - this->begin();
784
785 if (I == this->end()) { // Important special case for empty vector.
786 append(NumToInsert, Elt);
787 return this->begin()+InsertElt;
788 }
789
790 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast<void> (0));
791
792 // Ensure there is enough space, and get the (maybe updated) address of
793 // Elt.
794 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert);
795
796 // Uninvalidate the iterator.
797 I = this->begin()+InsertElt;
798
799 // If there are more elements between the insertion point and the end of the
800 // range than there are being inserted, we can use a simple approach to
801 // insertion. Since we already reserved space, we know that this won't
802 // reallocate the vector.
803 if (size_t(this->end()-I) >= NumToInsert) {
804 T *OldEnd = this->end();
805 append(std::move_iterator<iterator>(this->end() - NumToInsert),
806 std::move_iterator<iterator>(this->end()));
807
808 // Copy the existing elements that get replaced.
809 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
810
811 // If we just moved the element we're inserting, be sure to update
812 // the reference (never happens if TakesParamByValue).
813 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
814 EltPtr += NumToInsert;
815
816 std::fill_n(I, NumToInsert, *EltPtr);
817 return I;
818 }
819
820 // Otherwise, we're inserting more elements than exist already, and we're
821 // not inserting at the end.
822
823 // Move over the elements that we're about to overwrite.
824 T *OldEnd = this->end();
825 this->set_size(this->size() + NumToInsert);
826 size_t NumOverwritten = OldEnd-I;
827 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
828
829 // If we just moved the element we're inserting, be sure to update
830 // the reference (never happens if TakesParamByValue).
831 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
832 EltPtr += NumToInsert;
833
834 // Replace the overwritten part.
835 std::fill_n(I, NumOverwritten, *EltPtr);
836
837 // Insert the non-overwritten middle part.
838 std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr);
839 return I;
840 }
841
842 template <typename ItTy,
843 typename = std::enable_if_t<std::is_convertible<
844 typename std::iterator_traits<ItTy>::iterator_category,
845 std::input_iterator_tag>::value>>
846 iterator insert(iterator I, ItTy From, ItTy To) {
847 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
848 size_t InsertElt = I - this->begin();
849
850 if (I == this->end()) { // Important special case for empty vector.
851 append(From, To);
852 return this->begin()+InsertElt;
853 }
854
855 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")(static_cast<void> (0));
856
857 // Check that the reserve that follows doesn't invalidate the iterators.
858 this->assertSafeToAddRange(From, To);
859
860 size_t NumToInsert = std::distance(From, To);
861
862 // Ensure there is enough space.
863 reserve(this->size() + NumToInsert);
864
865 // Uninvalidate the iterator.
866 I = this->begin()+InsertElt;
867
868 // If there are more elements between the insertion point and the end of the
869 // range than there are being inserted, we can use a simple approach to
870 // insertion. Since we already reserved space, we know that this won't
871 // reallocate the vector.
872 if (size_t(this->end()-I) >= NumToInsert) {
873 T *OldEnd = this->end();
874 append(std::move_iterator<iterator>(this->end() - NumToInsert),
875 std::move_iterator<iterator>(this->end()));
876
877 // Copy the existing elements that get replaced.
878 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
879
880 std::copy(From, To, I);
881 return I;
882 }
883
884 // Otherwise, we're inserting more elements than exist already, and we're
885 // not inserting at the end.
886
887 // Move over the elements that we're about to overwrite.
888 T *OldEnd = this->end();
889 this->set_size(this->size() + NumToInsert);
890 size_t NumOverwritten = OldEnd-I;
891 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
892
893 // Replace the overwritten part.
894 for (T *J = I; NumOverwritten > 0; --NumOverwritten) {
895 *J = *From;
896 ++J; ++From;
897 }
898
899 // Insert the non-overwritten middle part.
900 this->uninitialized_copy(From, To, OldEnd);
901 return I;
902 }
903
904 void insert(iterator I, std::initializer_list<T> IL) {
905 insert(I, IL.begin(), IL.end());
906 }
907
908 template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
909 if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false)
)
910 return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...);
911
912 ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
913 this->set_size(this->size() + 1);
914 return this->back();
915 }
916
917 SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
918
919 SmallVectorImpl &operator=(SmallVectorImpl &&RHS);
920
921 bool operator==(const SmallVectorImpl &RHS) const {
922 if (this->size() != RHS.size()) return false;
923 return std::equal(this->begin(), this->end(), RHS.begin());
924 }
925 bool operator!=(const SmallVectorImpl &RHS) const {
926 return !(*this == RHS);
927 }
928
929 bool operator<(const SmallVectorImpl &RHS) const {
930 return std::lexicographical_compare(this->begin(), this->end(),
931 RHS.begin(), RHS.end());
932 }
933};
934
935template <typename T>
936void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
937 if (this == &RHS) return;
938
939 // We can only avoid copying elements if neither vector is small.
940 if (!this->isSmall() && !RHS.isSmall()) {
941 std::swap(this->BeginX, RHS.BeginX);
942 std::swap(this->Size, RHS.Size);
943 std::swap(this->Capacity, RHS.Capacity);
944 return;
945 }
946 this->reserve(RHS.size());
947 RHS.reserve(this->size());
948
949 // Swap the shared elements.
950 size_t NumShared = this->size();
951 if (NumShared > RHS.size()) NumShared = RHS.size();
952 for (size_type i = 0; i != NumShared; ++i)
953 std::swap((*this)[i], RHS[i]);
954
955 // Copy over the extra elts.
956 if (this->size() > RHS.size()) {
957 size_t EltDiff = this->size() - RHS.size();
958 this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
959 RHS.set_size(RHS.size() + EltDiff);
960 this->destroy_range(this->begin()+NumShared, this->end());
961 this->set_size(NumShared);
962 } else if (RHS.size() > this->size()) {
963 size_t EltDiff = RHS.size() - this->size();
964 this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
965 this->set_size(this->size() + EltDiff);
966 this->destroy_range(RHS.begin()+NumShared, RHS.end());
967 RHS.set_size(NumShared);
968 }
969}
970
971template <typename T>
972SmallVectorImpl<T> &SmallVectorImpl<T>::
973 operator=(const SmallVectorImpl<T> &RHS) {
974 // Avoid self-assignment.
975 if (this == &RHS) return *this;
976
977 // If we already have sufficient space, assign the common elements, then
978 // destroy any excess.
979 size_t RHSSize = RHS.size();
980 size_t CurSize = this->size();
981 if (CurSize >= RHSSize) {
982 // Assign common elements.
983 iterator NewEnd;
984 if (RHSSize)
985 NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
986 else
987 NewEnd = this->begin();
988
989 // Destroy excess elements.
990 this->destroy_range(NewEnd, this->end());
991
992 // Trim.
993 this->set_size(RHSSize);
994 return *this;
995 }
996
997 // If we have to grow to have enough elements, destroy the current elements.
998 // This allows us to avoid copying them during the grow.
999 // FIXME: don't do this if they're efficiently moveable.
1000 if (this->capacity() < RHSSize) {
1001 // Destroy current elements.
1002 this->clear();
1003 CurSize = 0;
1004 this->grow(RHSSize);
1005 } else if (CurSize) {
1006 // Otherwise, use assignment for the already-constructed elements.
1007 std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
1008 }
1009
1010 // Copy construct the new elements in place.
1011 this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
1012 this->begin()+CurSize);
1013
1014 // Set end.
1015 this->set_size(RHSSize);
1016 return *this;
1017}
1018
1019template <typename T>
1020SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
1021 // Avoid self-assignment.
1022 if (this == &RHS) return *this;
1023
1024 // If the RHS isn't small, clear this vector and then steal its buffer.
1025 if (!RHS.isSmall()) {
1026 this->destroy_range(this->begin(), this->end());
1027 if (!this->isSmall()) free(this->begin());
1028 this->BeginX = RHS.BeginX;
1029 this->Size = RHS.Size;
1030 this->Capacity = RHS.Capacity;
1031 RHS.resetToSmall();
1032 return *this;
1033 }
1034
1035 // If we already have sufficient space, assign the common elements, then
1036 // destroy any excess.
1037 size_t RHSSize = RHS.size();
1038 size_t CurSize = this->size();
1039 if (CurSize >= RHSSize) {
1040 // Assign common elements.
1041 iterator NewEnd = this->begin();
1042 if (RHSSize)
1043 NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd);
1044
1045 // Destroy excess elements and trim the bounds.
1046 this->destroy_range(NewEnd, this->end());
1047 this->set_size(RHSSize);
1048
1049 // Clear the RHS.
1050 RHS.clear();
1051
1052 return *this;
1053 }
1054
1055 // If we have to grow to have enough elements, destroy the current elements.
1056 // This allows us to avoid copying them during the grow.
1057 // FIXME: this may not actually make any sense if we can efficiently move
1058 // elements.
1059 if (this->capacity() < RHSSize) {
1060 // Destroy current elements.
1061 this->clear();
1062 CurSize = 0;
1063 this->grow(RHSSize);
1064 } else if (CurSize) {
1065 // Otherwise, use assignment for the already-constructed elements.
1066 std::move(RHS.begin(), RHS.begin()+CurSize, this->begin());
1067 }
1068
1069 // Move-construct the new elements in place.
1070 this->uninitialized_move(RHS.begin()+CurSize, RHS.end(),
1071 this->begin()+CurSize);
1072
1073 // Set end.
1074 this->set_size(RHSSize);
1075
1076 RHS.clear();
1077 return *this;
1078}
1079
1080/// Storage for the SmallVector elements. This is specialized for the N=0 case
1081/// to avoid allocating unnecessary storage.
1082template <typename T, unsigned N>
1083struct SmallVectorStorage {
1084 alignas(T) char InlineElts[N * sizeof(T)];
1085};
1086
1087/// We need the storage to be properly aligned even for small-size of 0 so that
1088/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is
1089/// well-defined.
1090template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {};
1091
1092/// Forward declaration of SmallVector so that
1093/// calculateSmallVectorDefaultInlinedElements can reference
1094/// `sizeof(SmallVector<T, 0>)`.
1095template <typename T, unsigned N> class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector;
1096
1097/// Helper class for calculating the default number of inline elements for
1098/// `SmallVector<T>`.
1099///
1100/// This should be migrated to a constexpr function when our minimum
1101/// compiler support is enough for multi-statement constexpr functions.
1102template <typename T> struct CalculateSmallVectorDefaultInlinedElements {
1103 // Parameter controlling the default number of inlined elements
1104 // for `SmallVector<T>`.
1105 //
1106 // The default number of inlined elements ensures that
1107 // 1. There is at least one inlined element.
1108 // 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless
1109 // it contradicts 1.
1110 static constexpr size_t kPreferredSmallVectorSizeof = 64;
1111
1112 // static_assert that sizeof(T) is not "too big".
1113 //
1114 // Because our policy guarantees at least one inlined element, it is possible
1115 // for an arbitrarily large inlined element to allocate an arbitrarily large
1116 // amount of inline storage. We generally consider it an antipattern for a
1117 // SmallVector to allocate an excessive amount of inline storage, so we want
1118 // to call attention to these cases and make sure that users are making an
1119 // intentional decision if they request a lot of inline storage.
1120 //
1121 // We want this assertion to trigger in pathological cases, but otherwise
1122 // not be too easy to hit. To accomplish that, the cutoff is actually somewhat
1123 // larger than kPreferredSmallVectorSizeof (otherwise,
1124 // `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that
1125 // pattern seems useful in practice).
1126 //
1127 // One wrinkle is that this assertion is in theory non-portable, since
1128 // sizeof(T) is in general platform-dependent. However, we don't expect this
1129 // to be much of an issue, because most LLVM development happens on 64-bit
1130 // hosts, and therefore sizeof(T) is expected to *decrease* when compiled for
1131 // 32-bit hosts, dodging the issue. The reverse situation, where development
1132 // happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a
1133 // 64-bit host, is expected to be very rare.
1134 static_assert(
1135 sizeof(T) <= 256,
1136 "You are trying to use a default number of inlined elements for "
1137 "`SmallVector<T>` but `sizeof(T)` is really big! Please use an "
1138 "explicit number of inlined elements with `SmallVector<T, N>` to make "
1139 "sure you really want that much inline storage.");
1140
1141 // Discount the size of the header itself when calculating the maximum inline
1142 // bytes.
1143 static constexpr size_t PreferredInlineBytes =
1144 kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>);
1145 static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T);
1146 static constexpr size_t value =
1147 NumElementsThatFit == 0 ? 1 : NumElementsThatFit;
1148};
1149
1150/// This is a 'vector' (really, a variable-sized array), optimized
1151/// for the case when the array is small. It contains some number of elements
1152/// in-place, which allows it to avoid heap allocation when the actual number of
1153/// elements is below that threshold. This allows normal "small" cases to be
1154/// fast without losing generality for large inputs.
1155///
1156/// \note
1157/// In the absence of a well-motivated choice for the number of inlined
1158/// elements \p N, it is recommended to use \c SmallVector<T> (that is,
1159/// omitting the \p N). This will choose a default number of inlined elements
1160/// reasonable for allocation on the stack (for example, trying to keep \c
1161/// sizeof(SmallVector<T>) around 64 bytes).
1162///
1163/// \warning This does not attempt to be exception safe.
1164///
1165/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h
1166template <typename T,
1167 unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value>
1168class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector : public SmallVectorImpl<T>,
1169 SmallVectorStorage<T, N> {
1170public:
1171 SmallVector() : SmallVectorImpl<T>(N) {}
1172
1173 ~SmallVector() {
1174 // Destroy the constructed elements in the vector.
1175 this->destroy_range(this->begin(), this->end());
1176 }
1177
1178 explicit SmallVector(size_t Size, const T &Value = T())
1179 : SmallVectorImpl<T>(N) {
1180 this->assign(Size, Value);
1181 }
1182
1183 template <typename ItTy,
1184 typename = std::enable_if_t<std::is_convertible<
1185 typename std::iterator_traits<ItTy>::iterator_category,
1186 std::input_iterator_tag>::value>>
1187 SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) {
1188 this->append(S, E);
1189 }
1190
1191 template <typename RangeTy>
1192 explicit SmallVector(const iterator_range<RangeTy> &R)
1193 : SmallVectorImpl<T>(N) {
1194 this->append(R.begin(), R.end());
1195 }
1196
1197 SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) {
1198 this->assign(IL);
1199 }
1200
1201 SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) {
1202 if (!RHS.empty())
1203 SmallVectorImpl<T>::operator=(RHS);
1204 }
1205
1206 SmallVector &operator=(const SmallVector &RHS) {
1207 SmallVectorImpl<T>::operator=(RHS);
1208 return *this;
1209 }
1210
1211 SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
1212 if (!RHS.empty())
1213 SmallVectorImpl<T>::operator=(::std::move(RHS));
1214 }
1215
1216 SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) {
1217 if (!RHS.empty())
1218 SmallVectorImpl<T>::operator=(::std::move(RHS));
1219 }
1220
1221 SmallVector &operator=(SmallVector &&RHS) {
1222 SmallVectorImpl<T>::operator=(::std::move(RHS));
1223 return *this;
1224 }
1225
1226 SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
1227 SmallVectorImpl<T>::operator=(::std::move(RHS));
1228 return *this;
1229 }
1230
1231 SmallVector &operator=(std::initializer_list<T> IL) {
1232 this->assign(IL);
1233 return *this;
1234 }
1235};
1236
1237template <typename T, unsigned N>
1238inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
1239 return X.capacity_in_bytes();
1240}
1241
1242/// Given a range of type R, iterate the entire range and return a
1243/// SmallVector with elements of the vector. This is useful, for example,
1244/// when you want to iterate a range and then sort the results.
1245template <unsigned Size, typename R>
1246SmallVector<typename std::remove_const<typename std::remove_reference<
1247 decltype(*std::begin(std::declval<R &>()))>::type>::type,
1248 Size>
1249to_vector(R &&Range) {
1250 return {std::begin(Range), std::end(Range)};
1251}
1252
1253} // end namespace llvm
1254
1255namespace std {
1256
1257 /// Implement std::swap in terms of SmallVector swap.
1258 template<typename T>
1259 inline void
1260 swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) {
1261 LHS.swap(RHS);
1262 }
1263
1264 /// Implement std::swap in terms of SmallVector swap.
1265 template<typename T, unsigned N>
1266 inline void
1267 swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
1268 LHS.swap(RHS);
1269 }
1270
1271} // end namespace std
1272
1273#endif // LLVM_ADT_SMALLVECTOR_H

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include <cassert>
18#include <climits>
19#include <cmath>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB
50.1
'ZB' is not equal to ZB_Undefined
50.1
'ZB' is not equal to ZB_Undefined
50.1
'ZB' is not equal to ZB_Undefined
50.1
'ZB' is not equal to ZB_Undefined
!= ZB_Undefined && Val == 0)
51
Assuming 'Val' is equal to 0
52
Taking true branch
117 return 32;
53
Returning the value 32
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB != ZB_Undefined && Val == 0)
133 return 64;
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
50
Calling 'TrailingZerosCounter::count'
54
Returning from 'TrailingZerosCounter::count'
55
Returning the value 32
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB == ZB_Max && Val == 0)
241 return std::numeric_limits<T>::max();
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")(static_cast<void> (0));
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315#if __has_builtin(__builtin_bitreverse8)1
316template<>
317inline uint8_t reverseBits<uint8_t>(uint8_t Val) {
318 return __builtin_bitreverse8(Val);
319}
320#endif
321
322#if __has_builtin(__builtin_bitreverse16)1
323template<>
324inline uint16_t reverseBits<uint16_t>(uint16_t Val) {
325 return __builtin_bitreverse16(Val);
326}
327#endif
328
329#if __has_builtin(__builtin_bitreverse32)1
330template<>
331inline uint32_t reverseBits<uint32_t>(uint32_t Val) {
332 return __builtin_bitreverse32(Val);
333}
334#endif
335
336#if __has_builtin(__builtin_bitreverse64)1
337template<>
338inline uint64_t reverseBits<uint64_t>(uint64_t Val) {
339 return __builtin_bitreverse64(Val);
340}
341#endif
342
343// NOTE: The following support functions use the _32/_64 extensions instead of
344// type overloading so that signed and unsigned integers can be used without
345// ambiguity.
346
347/// Return the high 32 bits of a 64 bit value.
348constexpr inline uint32_t Hi_32(uint64_t Value) {
349 return static_cast<uint32_t>(Value >> 32);
350}
351
352/// Return the low 32 bits of a 64 bit value.
353constexpr inline uint32_t Lo_32(uint64_t Value) {
354 return static_cast<uint32_t>(Value);
355}
356
357/// Make a 64-bit integer from a high / low pair of 32-bit integers.
358constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
359 return ((uint64_t)High << 32) | (uint64_t)Low;
360}
361
362/// Checks if an integer fits into the given bit width.
363template <unsigned N> constexpr inline bool isInt(int64_t x) {
364 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
365}
366// Template specializations to get better code for common cases.
367template <> constexpr inline bool isInt<8>(int64_t x) {
368 return static_cast<int8_t>(x) == x;
369}
370template <> constexpr inline bool isInt<16>(int64_t x) {
371 return static_cast<int16_t>(x) == x;
372}
373template <> constexpr inline bool isInt<32>(int64_t x) {
374 return static_cast<int32_t>(x) == x;
375}
376
377/// Checks if a signed integer is an N bit number shifted left by S.
378template <unsigned N, unsigned S>
379constexpr inline bool isShiftedInt(int64_t x) {
380 static_assert(
381 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
382 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
383 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
384}
385
386/// Checks if an unsigned integer fits into the given bit width.
387///
388/// This is written as two functions rather than as simply
389///
390/// return N >= 64 || X < (UINT64_C(1) << N);
391///
392/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
393/// left too many places.
394template <unsigned N>
395constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) {
396 static_assert(N > 0, "isUInt<0> doesn't make sense");
397 return X < (UINT64_C(1)1UL << (N));
398}
399template <unsigned N>
400constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) {
401 return true;
402}
403
404// Template specializations to get better code for common cases.
405template <> constexpr inline bool isUInt<8>(uint64_t x) {
406 return static_cast<uint8_t>(x) == x;
407}
408template <> constexpr inline bool isUInt<16>(uint64_t x) {
409 return static_cast<uint16_t>(x) == x;
410}
411template <> constexpr inline bool isUInt<32>(uint64_t x) {
412 return static_cast<uint32_t>(x) == x;
413}
414
415/// Checks if a unsigned integer is an N bit number shifted left by S.
416template <unsigned N, unsigned S>
417constexpr inline bool isShiftedUInt(uint64_t x) {
418 static_assert(
419 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
420 static_assert(N + S <= 64,
421 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
422 // Per the two static_asserts above, S must be strictly less than 64. So
423 // 1 << S is not undefined behavior.
424 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
425}
426
427/// Gets the maximum value for a N-bit unsigned integer.
428inline uint64_t maxUIntN(uint64_t N) {
429 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast<void> (0));
430
431 // uint64_t(1) << 64 is undefined behavior, so we can't do
432 // (uint64_t(1) << N) - 1
433 // without checking first that N != 64. But this works and doesn't have a
434 // branch.
435 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
436}
437
438/// Gets the minimum value for a N-bit signed integer.
439inline int64_t minIntN(int64_t N) {
440 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast<void> (0));
441
442 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
443}
444
445/// Gets the maximum value for a N-bit signed integer.
446inline int64_t maxIntN(int64_t N) {
447 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast<void> (0));
448
449 // This relies on two's complement wraparound when N == 64, so we convert to
450 // int64_t only at the very end to avoid UB.
451 return (UINT64_C(1)1UL << (N - 1)) - 1;
452}
453
454/// Checks if an unsigned integer fits into the given (dynamic) bit width.
455inline bool isUIntN(unsigned N, uint64_t x) {
456 return N >= 64 || x <= maxUIntN(N);
457}
458
459/// Checks if an signed integer fits into the given (dynamic) bit width.
460inline bool isIntN(unsigned N, int64_t x) {
461 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
462}
463
464/// Return true if the argument is a non-empty sequence of ones starting at the
465/// least significant bit with the remainder zero (32 bit version).
466/// Ex. isMask_32(0x0000FFFFU) == true.
467constexpr inline bool isMask_32(uint32_t Value) {
468 return Value && ((Value + 1) & Value) == 0;
469}
470
471/// Return true if the argument is a non-empty sequence of ones starting at the
472/// least significant bit with the remainder zero (64 bit version).
473constexpr inline bool isMask_64(uint64_t Value) {
474 return Value && ((Value + 1) & Value) == 0;
475}
476
477/// Return true if the argument contains a non-empty sequence of ones with the
478/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
479constexpr inline bool isShiftedMask_32(uint32_t Value) {
480 return Value && isMask_32((Value - 1) | Value);
481}
482
483/// Return true if the argument contains a non-empty sequence of ones with the
484/// remainder zero (64 bit version.)
485constexpr inline bool isShiftedMask_64(uint64_t Value) {
486 return Value && isMask_64((Value - 1) | Value);
487}
488
489/// Return true if the argument is a power of two > 0.
490/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
491constexpr inline bool isPowerOf2_32(uint32_t Value) {
492 return Value && !(Value & (Value - 1));
493}
494
495/// Return true if the argument is a power of two > 0 (64 bit edition.)
496constexpr inline bool isPowerOf2_64(uint64_t Value) {
497 return Value && !(Value & (Value - 1));
498}
499
500/// Count the number of ones from the most significant bit to the first
501/// zero bit.
502///
503/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
504/// Only unsigned integral types are allowed.
505///
506/// \param ZB the behavior on an input of all ones. Only ZB_Width and
507/// ZB_Undefined are valid arguments.
508template <typename T>
509unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
510 static_assert(std::numeric_limits<T>::is_integer &&
511 !std::numeric_limits<T>::is_signed,
512 "Only unsigned integral types are allowed.");
513 return countLeadingZeros<T>(~Value, ZB);
514}
515
516/// Count the number of ones from the least significant bit to the first
517/// zero bit.
518///
519/// Ex. countTrailingOnes(0x00FF00FF) == 8.
520/// Only unsigned integral types are allowed.
521///
522/// \param ZB the behavior on an input of all ones. Only ZB_Width and
523/// ZB_Undefined are valid arguments.
524template <typename T>
525unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
526 static_assert(std::numeric_limits<T>::is_integer &&
527 !std::numeric_limits<T>::is_signed,
528 "Only unsigned integral types are allowed.");
529 return countTrailingZeros<T>(~Value, ZB);
530}
531
532namespace detail {
533template <typename T, std::size_t SizeOfT> struct PopulationCounter {
534 static unsigned count(T Value) {
535 // Generic version, forward to 32 bits.
536 static_assert(SizeOfT <= 4, "Not implemented!");
537#if defined(__GNUC__4)
538 return __builtin_popcount(Value);
539#else
540 uint32_t v = Value;
541 v = v - ((v >> 1) & 0x55555555);
542 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
543 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
544#endif
545 }
546};
547
548template <typename T> struct PopulationCounter<T, 8> {
549 static unsigned count(T Value) {
550#if defined(__GNUC__4)
551 return __builtin_popcountll(Value);
552#else
553 uint64_t v = Value;
554 v = v - ((v >> 1) & 0x5555555555555555ULL);
555 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
556 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
557 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
558#endif
559 }
560};
561} // namespace detail
562
563/// Count the number of set bits in a value.
564/// Ex. countPopulation(0xF000F000) = 8
565/// Returns 0 if the word is zero.
566template <typename T>
567inline unsigned countPopulation(T Value) {
568 static_assert(std::numeric_limits<T>::is_integer &&
569 !std::numeric_limits<T>::is_signed,
570 "Only unsigned integral types are allowed.");
571 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
572}
573
574/// Compile time Log2.
575/// Valid only for positive powers of two.
576template <size_t kValue> constexpr inline size_t CTLog2() {
577 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
578 "Value is not a valid power of 2");
579 return 1 + CTLog2<kValue / 2>();
580}
581
582template <> constexpr inline size_t CTLog2<1>() { return 0; }
583
584/// Return the log base 2 of the specified value.
585inline double Log2(double Value) {
586#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
587 return __builtin_log(Value) / __builtin_log(2.0);
588#else
589 return log2(Value);
590#endif
591}
592
593/// Return the floor log base 2 of the specified value, -1 if the value is zero.
594/// (32 bit edition.)
595/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
596inline unsigned Log2_32(uint32_t Value) {
597 return 31 - countLeadingZeros(Value);
598}
599
600/// Return the floor log base 2 of the specified value, -1 if the value is zero.
601/// (64 bit edition.)
602inline unsigned Log2_64(uint64_t Value) {
603 return 63 - countLeadingZeros(Value);
604}
605
606/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
607/// (32 bit edition).
608/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
609inline unsigned Log2_32_Ceil(uint32_t Value) {
610 return 32 - countLeadingZeros(Value - 1);
611}
612
613/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
614/// (64 bit edition.)
615inline unsigned Log2_64_Ceil(uint64_t Value) {
616 return 64 - countLeadingZeros(Value - 1);
617}
618
619/// Return the greatest common divisor of the values using Euclid's algorithm.
620template <typename T>
621inline T greatestCommonDivisor(T A, T B) {
622 while (B) {
623 T Tmp = B;
624 B = A % B;
625 A = Tmp;
626 }
627 return A;
628}
629
630inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
631 return greatestCommonDivisor<uint64_t>(A, B);
632}
633
634/// This function takes a 64-bit integer and returns the bit equivalent double.
635inline double BitsToDouble(uint64_t Bits) {
636 double D;
637 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
638 memcpy(&D, &Bits, sizeof(Bits));
639 return D;
640}
641
642/// This function takes a 32-bit integer and returns the bit equivalent float.
643inline float BitsToFloat(uint32_t Bits) {
644 float F;
645 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
646 memcpy(&F, &Bits, sizeof(Bits));
647 return F;
648}
649
650/// This function takes a double and returns the bit equivalent 64-bit integer.
651/// Note that copying doubles around changes the bits of NaNs on some hosts,
652/// notably x86, so this routine cannot be used if these bits are needed.
653inline uint64_t DoubleToBits(double Double) {
654 uint64_t Bits;
655 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
656 memcpy(&Bits, &Double, sizeof(Double));
657 return Bits;
658}
659
660/// This function takes a float and returns the bit equivalent 32-bit integer.
661/// Note that copying floats around changes the bits of NaNs on some hosts,
662/// notably x86, so this routine cannot be used if these bits are needed.
663inline uint32_t FloatToBits(float Float) {
664 uint32_t Bits;
665 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
666 memcpy(&Bits, &Float, sizeof(Float));
667 return Bits;
668}
669
670/// A and B are either alignments or offsets. Return the minimum alignment that
671/// may be assumed after adding the two together.
672constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
673 // The largest power of 2 that divides both A and B.
674 //
675 // Replace "-Value" by "1+~Value" in the following commented code to avoid
676 // MSVC warning C4146
677 // return (A | B) & -(A | B);
678 return (A | B) & (1 + ~(A | B));
679}
680
681/// Returns the next power of two (in 64-bits) that is strictly greater than A.
682/// Returns zero on overflow.
683inline uint64_t NextPowerOf2(uint64_t A) {
684 A |= (A >> 1);
685 A |= (A >> 2);
686 A |= (A >> 4);
687 A |= (A >> 8);
688 A |= (A >> 16);
689 A |= (A >> 32);
690 return A + 1;
691}
692
693/// Returns the power of two which is less than or equal to the given value.
694/// Essentially, it is a floor operation across the domain of powers of two.
695inline uint64_t PowerOf2Floor(uint64_t A) {
696 if (!A) return 0;
697 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
698}
699
700/// Returns the power of two which is greater than or equal to the given value.
701/// Essentially, it is a ceil operation across the domain of powers of two.
702inline uint64_t PowerOf2Ceil(uint64_t A) {
703 if (!A)
704 return 0;
705 return NextPowerOf2(A - 1);
706}
707
708/// Returns the next integer (mod 2**64) that is greater than or equal to
709/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
710///
711/// If non-zero \p Skew is specified, the return value will be a minimal
712/// integer that is greater than or equal to \p Value and equal to
713/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
714/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
715///
716/// Examples:
717/// \code
718/// alignTo(5, 8) = 8
719/// alignTo(17, 8) = 24
720/// alignTo(~0LL, 8) = 0
721/// alignTo(321, 255) = 510
722///
723/// alignTo(5, 8, 7) = 7
724/// alignTo(17, 8, 1) = 17
725/// alignTo(~0LL, 8, 3) = 3
726/// alignTo(321, 255, 42) = 552
727/// \endcode
728inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
729 assert(Align != 0u && "Align can't be 0.")(static_cast<void> (0));
730 Skew %= Align;
731 return (Value + Align - 1 - Skew) / Align * Align + Skew;
732}
733
734/// Returns the next integer (mod 2**64) that is greater than or equal to
735/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
736template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
737 static_assert(Align != 0u, "Align must be non-zero");
738 return (Value + Align - 1) / Align * Align;
739}
740
741/// Returns the integer ceil(Numerator / Denominator).
742inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
743 return alignTo(Numerator, Denominator) / Denominator;
744}
745
746/// Returns the integer nearest(Numerator / Denominator).
747inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
748 return (Numerator + (Denominator / 2)) / Denominator;
749}
750
751/// Returns the largest uint64_t less than or equal to \p Value and is
752/// \p Skew mod \p Align. \p Align must be non-zero
753inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
754 assert(Align != 0u && "Align can't be 0.")(static_cast<void> (0));
755 Skew %= Align;
756 return (Value - Skew) / Align * Align + Skew;
757}
758
759/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
760/// Requires 0 < B <= 32.
761template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
762 static_assert(B > 0, "Bit width can't be 0.");
763 static_assert(B <= 32, "Bit width out of range.");
764 return int32_t(X << (32 - B)) >> (32 - B);
765}
766
767/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
768/// Requires 0 < B <= 32.
769inline int32_t SignExtend32(uint32_t X, unsigned B) {
770 assert(B > 0 && "Bit width can't be 0.")(static_cast<void> (0));
771 assert(B <= 32 && "Bit width out of range.")(static_cast<void> (0));
772 return int32_t(X << (32 - B)) >> (32 - B);
773}
774
775/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
776/// Requires 0 < B <= 64.
777template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
778 static_assert(B > 0, "Bit width can't be 0.");
779 static_assert(B <= 64, "Bit width out of range.");
780 return int64_t(x << (64 - B)) >> (64 - B);
781}
782
783/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
784/// Requires 0 < B <= 64.
785inline int64_t SignExtend64(uint64_t X, unsigned B) {
786 assert(B > 0 && "Bit width can't be 0.")(static_cast<void> (0));
787 assert(B <= 64 && "Bit width out of range.")(static_cast<void> (0));
788 return int64_t(X << (64 - B)) >> (64 - B);
789}
790
791/// Subtract two unsigned integers, X and Y, of type T and return the absolute
792/// value of the result.
793template <typename T>
794std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
795 return X > Y ? (X - Y) : (Y - X);
796}
797
798/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
799/// maximum representable value of T on overflow. ResultOverflowed indicates if
800/// the result is larger than the maximum representable value of type T.
801template <typename T>
802std::enable_if_t<std::is_unsigned<T>::value, T>
803SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
804 bool Dummy;
805 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
806 // Hacker's Delight, p. 29
807 T Z = X + Y;
808 Overflowed = (Z < X || Z < Y);
809 if (Overflowed)
810 return std::numeric_limits<T>::max();
811 else
812 return Z;
813}
814
815/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
816/// maximum representable value of T on overflow. ResultOverflowed indicates if
817/// the result is larger than the maximum representable value of type T.
818template <typename T>
819std::enable_if_t<std::is_unsigned<T>::value, T>
820SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
821 bool Dummy;
822 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
823
824 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
825 // because it fails for uint16_t (where multiplication can have undefined
826 // behavior due to promotion to int), and requires a division in addition
827 // to the multiplication.
828
829 Overflowed = false;
830
831 // Log2(Z) would be either Log2Z or Log2Z + 1.
832 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
833 // will necessarily be less than Log2Max as desired.
834 int Log2Z = Log2_64(X) + Log2_64(Y);
835 const T Max = std::numeric_limits<T>::max();
836 int Log2Max = Log2_64(Max);
837 if (Log2Z < Log2Max) {
838 return X * Y;
839 }
840 if (Log2Z > Log2Max) {
841 Overflowed = true;
842 return Max;
843 }
844
845 // We're going to use the top bit, and maybe overflow one
846 // bit past it. Multiply all but the bottom bit then add
847 // that on at the end.
848 T Z = (X >> 1) * Y;
849 if (Z & ~(Max >> 1)) {
850 Overflowed = true;
851 return Max;
852 }
853 Z <<= 1;
854 if (X & 1)
855 return SaturatingAdd(Z, Y, ResultOverflowed);
856
857 return Z;
858}
859
860/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
861/// the product. Clamp the result to the maximum representable value of T on
862/// overflow. ResultOverflowed indicates if the result is larger than the
863/// maximum representable value of type T.
864template <typename T>
865std::enable_if_t<std::is_unsigned<T>::value, T>
866SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
867 bool Dummy;
868 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
869
870 T Product = SaturatingMultiply(X, Y, &Overflowed);
871 if (Overflowed)
872 return Product;
873
874 return SaturatingAdd(A, Product, &Overflowed);
875}
876
877/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
878extern const float huge_valf;
879
880
881/// Add two signed integers, computing the two's complement truncated result,
882/// returning true if overflow occured.
883template <typename T>
884std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
885#if __has_builtin(__builtin_add_overflow)1
886 return __builtin_add_overflow(X, Y, &Result);
887#else
888 // Perform the unsigned addition.
889 using U = std::make_unsigned_t<T>;
890 const U UX = static_cast<U>(X);
891 const U UY = static_cast<U>(Y);
892 const U UResult = UX + UY;
893
894 // Convert to signed.
895 Result = static_cast<T>(UResult);
896
897 // Adding two positive numbers should result in a positive number.
898 if (X > 0 && Y > 0)
899 return Result <= 0;
900 // Adding two negatives should result in a negative number.
901 if (X < 0 && Y < 0)
902 return Result >= 0;
903 return false;
904#endif
905}
906
907/// Subtract two signed integers, computing the two's complement truncated
908/// result, returning true if an overflow ocurred.
909template <typename T>
910std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
911#if __has_builtin(__builtin_sub_overflow)1
912 return __builtin_sub_overflow(X, Y, &Result);
913#else
914 // Perform the unsigned addition.
915 using U = std::make_unsigned_t<T>;
916 const U UX = static_cast<U>(X);
917 const U UY = static_cast<U>(Y);
918 const U UResult = UX - UY;
919
920 // Convert to signed.
921 Result = static_cast<T>(UResult);
922
923 // Subtracting a positive number from a negative results in a negative number.
924 if (X <= 0 && Y > 0)
925 return Result >= 0;
926 // Subtracting a negative number from a positive results in a positive number.
927 if (X >= 0 && Y < 0)
928 return Result <= 0;
929 return false;
930#endif
931}
932
933/// Multiply two signed integers, computing the two's complement truncated
934/// result, returning true if an overflow ocurred.
935template <typename T>
936std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
937 // Perform the unsigned multiplication on absolute values.
938 using U = std::make_unsigned_t<T>;
939 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
940 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
941 const U UResult = UX * UY;
942
943 // Convert to signed.
944 const bool IsNegative = (X < 0) ^ (Y < 0);
945 Result = IsNegative ? (0 - UResult) : UResult;
946
947 // If any of the args was 0, result is 0 and no overflow occurs.
948 if (UX == 0 || UY == 0)
949 return false;
950
951 // UX and UY are in [1, 2^n], where n is the number of digits.
952 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
953 // positive) divided by an argument compares to the other.
954 if (IsNegative)
955 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
956 else
957 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
958}
959
960} // End llvm namespace
961
962#endif