File: | llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp |
Warning: | line 205, column 5 The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- WebAssemblyFrameLowering.cpp - WebAssembly Frame Lowering ----------==// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | /// | ||||
9 | /// \file | ||||
10 | /// This file contains the WebAssembly implementation of | ||||
11 | /// TargetFrameLowering class. | ||||
12 | /// | ||||
13 | /// On WebAssembly, there aren't a lot of things to do here. There are no | ||||
14 | /// callee-saved registers to save, and no spill slots. | ||||
15 | /// | ||||
16 | /// The stack grows downward. | ||||
17 | /// | ||||
18 | //===----------------------------------------------------------------------===// | ||||
19 | |||||
20 | #include "WebAssemblyFrameLowering.h" | ||||
21 | #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" | ||||
22 | #include "WebAssembly.h" | ||||
23 | #include "WebAssemblyInstrInfo.h" | ||||
24 | #include "WebAssemblyMachineFunctionInfo.h" | ||||
25 | #include "WebAssemblySubtarget.h" | ||||
26 | #include "WebAssemblyTargetMachine.h" | ||||
27 | #include "WebAssemblyUtilities.h" | ||||
28 | #include "llvm/CodeGen/MachineFrameInfo.h" | ||||
29 | #include "llvm/CodeGen/MachineFunction.h" | ||||
30 | #include "llvm/CodeGen/MachineInstrBuilder.h" | ||||
31 | #include "llvm/CodeGen/MachineModuleInfoImpls.h" | ||||
32 | #include "llvm/CodeGen/MachineRegisterInfo.h" | ||||
33 | #include "llvm/MC/MCAsmInfo.h" | ||||
34 | #include "llvm/Support/Debug.h" | ||||
35 | using namespace llvm; | ||||
36 | |||||
37 | #define DEBUG_TYPE"wasm-frame-info" "wasm-frame-info" | ||||
38 | |||||
39 | // TODO: wasm64 | ||||
40 | // TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions | ||||
41 | |||||
42 | /// We need a base pointer in the case of having items on the stack that | ||||
43 | /// require stricter alignment than the stack pointer itself. Because we need | ||||
44 | /// to shift the stack pointer by some unknown amount to force the alignment, | ||||
45 | /// we need to record the value of the stack pointer on entry to the function. | ||||
46 | bool WebAssemblyFrameLowering::hasBP(const MachineFunction &MF) const { | ||||
47 | const auto *RegInfo = | ||||
48 | MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); | ||||
49 | return RegInfo->needsStackRealignment(MF); | ||||
50 | } | ||||
51 | |||||
52 | /// Return true if the specified function should have a dedicated frame pointer | ||||
53 | /// register. | ||||
54 | bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const { | ||||
55 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | ||||
56 | |||||
57 | // When we have var-sized objects, we move the stack pointer by an unknown | ||||
58 | // amount, and need to emit a frame pointer to restore the stack to where we | ||||
59 | // were on function entry. | ||||
60 | // If we already need a base pointer, we use that to fix up the stack pointer. | ||||
61 | // If there are no fixed-size objects, we would have no use of a frame | ||||
62 | // pointer, and thus should not emit one. | ||||
63 | bool HasFixedSizedObjects = MFI.getStackSize() > 0; | ||||
64 | bool NeedsFixedReference = !hasBP(MF) || HasFixedSizedObjects; | ||||
65 | |||||
66 | return MFI.isFrameAddressTaken() || | ||||
67 | (MFI.hasVarSizedObjects() && NeedsFixedReference) || | ||||
68 | MFI.hasStackMap() || MFI.hasPatchPoint(); | ||||
69 | } | ||||
70 | |||||
71 | /// Under normal circumstances, when a frame pointer is not required, we reserve | ||||
72 | /// argument space for call sites in the function immediately on entry to the | ||||
73 | /// current function. This eliminates the need for add/sub sp brackets around | ||||
74 | /// call sites. Returns true if the call frame is included as part of the stack | ||||
75 | /// frame. | ||||
76 | bool WebAssemblyFrameLowering::hasReservedCallFrame( | ||||
77 | const MachineFunction &MF) const { | ||||
78 | return !MF.getFrameInfo().hasVarSizedObjects(); | ||||
79 | } | ||||
80 | |||||
81 | // Returns true if this function needs a local user-space stack pointer for its | ||||
82 | // local frame (not for exception handling). | ||||
83 | bool WebAssemblyFrameLowering::needsSPForLocalFrame( | ||||
84 | const MachineFunction &MF) const { | ||||
85 | auto &MFI = MF.getFrameInfo(); | ||||
86 | return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF); | ||||
87 | } | ||||
88 | |||||
89 | // In function with EH pads, we need to make a copy of the value of | ||||
90 | // __stack_pointer global in SP32 register, in order to use it when restoring | ||||
91 | // __stack_pointer after an exception is caught. | ||||
92 | bool WebAssemblyFrameLowering::needsPrologForEH( | ||||
93 | const MachineFunction &MF) const { | ||||
94 | auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType(); | ||||
95 | return EHType == ExceptionHandling::Wasm && | ||||
96 | MF.getFunction().hasPersonalityFn() && MF.getFrameInfo().hasCalls(); | ||||
97 | } | ||||
98 | |||||
99 | /// Returns true if this function needs a local user-space stack pointer. | ||||
100 | /// Unlike a machine stack pointer, the wasm user stack pointer is a global | ||||
101 | /// variable, so it is loaded into a register in the prolog. | ||||
102 | bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF) const { | ||||
103 | return needsSPForLocalFrame(MF) || needsPrologForEH(MF); | ||||
104 | } | ||||
105 | |||||
106 | /// Returns true if the local user-space stack pointer needs to be written back | ||||
107 | /// to __stack_pointer global by this function (this is not meaningful if | ||||
108 | /// needsSP is false). If false, the stack red zone can be used and only a local | ||||
109 | /// SP is needed. | ||||
110 | bool WebAssemblyFrameLowering::needsSPWriteback( | ||||
111 | const MachineFunction &MF) const { | ||||
112 | auto &MFI = MF.getFrameInfo(); | ||||
113 | assert(needsSP(MF))((needsSP(MF)) ? static_cast<void> (0) : __assert_fail ( "needsSP(MF)", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp" , 113, __PRETTY_FUNCTION__)); | ||||
114 | // When we don't need a local stack pointer for its local frame but only to | ||||
115 | // support EH, we don't need to write SP back in the epilog, because we don't | ||||
116 | // bump down the stack pointer in the prolog. We need to write SP back in the | ||||
117 | // epilog only if | ||||
118 | // 1. We need SP not only for EH support but also because we actually use | ||||
119 | // stack or we have a frame address taken. | ||||
120 | // 2. We cannot use the red zone. | ||||
121 | bool CanUseRedZone = MFI.getStackSize() <= RedZoneSize && !MFI.hasCalls() && | ||||
122 | !MF.getFunction().hasFnAttribute(Attribute::NoRedZone); | ||||
123 | return needsSPForLocalFrame(MF) && !CanUseRedZone; | ||||
124 | } | ||||
125 | |||||
126 | void WebAssemblyFrameLowering::writeSPToGlobal( | ||||
127 | unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, | ||||
128 | MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const { | ||||
129 | const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); | ||||
130 | |||||
131 | const char *ES = "__stack_pointer"; | ||||
132 | auto *SPSymbol = MF.createExternalSymbolName(ES); | ||||
133 | BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32)) | ||||
134 | .addExternalSymbol(SPSymbol) | ||||
135 | .addReg(SrcReg); | ||||
136 | } | ||||
137 | |||||
138 | MachineBasicBlock::iterator | ||||
139 | WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( | ||||
140 | MachineFunction &MF, MachineBasicBlock &MBB, | ||||
141 | MachineBasicBlock::iterator I) const { | ||||
142 | assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) &&((!I->getOperand(0).getImm() && (hasFP(MF) || hasBP (MF)) && "Call frame pseudos should only be used for dynamic stack adjustment" ) ? static_cast<void> (0) : __assert_fail ("!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) && \"Call frame pseudos should only be used for dynamic stack adjustment\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp" , 143, __PRETTY_FUNCTION__)) | ||||
143 | "Call frame pseudos should only be used for dynamic stack adjustment")((!I->getOperand(0).getImm() && (hasFP(MF) || hasBP (MF)) && "Call frame pseudos should only be used for dynamic stack adjustment" ) ? static_cast<void> (0) : __assert_fail ("!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) && \"Call frame pseudos should only be used for dynamic stack adjustment\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp" , 143, __PRETTY_FUNCTION__)); | ||||
144 | const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); | ||||
145 | if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && | ||||
146 | needsSPWriteback(MF)) { | ||||
147 | DebugLoc DL = I->getDebugLoc(); | ||||
148 | writeSPToGlobal(WebAssembly::SP32, MF, MBB, I, DL); | ||||
149 | } | ||||
150 | return MBB.erase(I); | ||||
151 | } | ||||
152 | |||||
153 | void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, | ||||
154 | MachineBasicBlock &MBB) const { | ||||
155 | // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions | ||||
156 | auto &MFI = MF.getFrameInfo(); | ||||
157 | assert(MFI.getCalleeSavedInfo().empty() &&((MFI.getCalleeSavedInfo().empty() && "WebAssembly should not have callee-saved registers" ) ? static_cast<void> (0) : __assert_fail ("MFI.getCalleeSavedInfo().empty() && \"WebAssembly should not have callee-saved registers\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp" , 158, __PRETTY_FUNCTION__)) | ||||
| |||||
158 | "WebAssembly should not have callee-saved registers")((MFI.getCalleeSavedInfo().empty() && "WebAssembly should not have callee-saved registers" ) ? static_cast<void> (0) : __assert_fail ("MFI.getCalleeSavedInfo().empty() && \"WebAssembly should not have callee-saved registers\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp" , 158, __PRETTY_FUNCTION__)); | ||||
159 | |||||
160 | if (!needsSP(MF)) | ||||
161 | return; | ||||
162 | uint64_t StackSize = MFI.getStackSize(); | ||||
163 | |||||
164 | const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); | ||||
165 | auto &MRI = MF.getRegInfo(); | ||||
166 | |||||
167 | auto InsertPt = MBB.begin(); | ||||
168 | while (InsertPt != MBB.end() && | ||||
169 | WebAssembly::isArgument(InsertPt->getOpcode())) | ||||
170 | ++InsertPt; | ||||
171 | DebugLoc DL; | ||||
172 | |||||
173 | const TargetRegisterClass *PtrRC = | ||||
174 | MRI.getTargetRegisterInfo()->getPointerRegClass(MF); | ||||
175 | unsigned SPReg = WebAssembly::SP32; | ||||
176 | if (StackSize) | ||||
177 | SPReg = MRI.createVirtualRegister(PtrRC); | ||||
178 | |||||
179 | const char *ES = "__stack_pointer"; | ||||
180 | auto *SPSymbol = MF.createExternalSymbolName(ES); | ||||
181 | BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg) | ||||
182 | .addExternalSymbol(SPSymbol); | ||||
183 | |||||
184 | bool HasBP = hasBP(MF); | ||||
185 | if (HasBP) { | ||||
186 | auto FI = MF.getInfo<WebAssemblyFunctionInfo>(); | ||||
187 | Register BasePtr = MRI.createVirtualRegister(PtrRC); | ||||
188 | FI->setBasePointerVreg(BasePtr); | ||||
189 | BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr) | ||||
190 | .addReg(SPReg); | ||||
191 | } | ||||
192 | if (StackSize
| ||||
193 | // Subtract the frame size | ||||
194 | Register OffsetReg = MRI.createVirtualRegister(PtrRC); | ||||
195 | BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) | ||||
196 | .addImm(StackSize); | ||||
197 | BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), | ||||
198 | WebAssembly::SP32) | ||||
199 | .addReg(SPReg) | ||||
200 | .addReg(OffsetReg); | ||||
201 | } | ||||
202 | if (HasBP
| ||||
203 | Register BitmaskReg = MRI.createVirtualRegister(PtrRC); | ||||
204 | unsigned Alignment = MFI.getMaxAlignment(); | ||||
205 | assert((1u << countTrailingZeros(Alignment)) == Alignment &&(((1u << countTrailingZeros(Alignment)) == Alignment && "Alignment must be a power of 2") ? static_cast<void> ( 0) : __assert_fail ("(1u << countTrailingZeros(Alignment)) == Alignment && \"Alignment must be a power of 2\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp" , 206, __PRETTY_FUNCTION__)) | ||||
| |||||
206 | "Alignment must be a power of 2")(((1u << countTrailingZeros(Alignment)) == Alignment && "Alignment must be a power of 2") ? static_cast<void> ( 0) : __assert_fail ("(1u << countTrailingZeros(Alignment)) == Alignment && \"Alignment must be a power of 2\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp" , 206, __PRETTY_FUNCTION__)); | ||||
207 | BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg) | ||||
208 | .addImm((int)~(Alignment - 1)); | ||||
209 | BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32), | ||||
210 | WebAssembly::SP32) | ||||
211 | .addReg(WebAssembly::SP32) | ||||
212 | .addReg(BitmaskReg); | ||||
213 | } | ||||
214 | if (hasFP(MF)) { | ||||
215 | // Unlike most conventional targets (where FP points to the saved FP), | ||||
216 | // FP points to the bottom of the fixed-size locals, so we can use positive | ||||
217 | // offsets in load/store instructions. | ||||
218 | BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), WebAssembly::FP32) | ||||
219 | .addReg(WebAssembly::SP32); | ||||
220 | } | ||||
221 | if (StackSize && needsSPWriteback(MF)) { | ||||
222 | writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPt, DL); | ||||
223 | } | ||||
224 | } | ||||
225 | |||||
226 | void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, | ||||
227 | MachineBasicBlock &MBB) const { | ||||
228 | uint64_t StackSize = MF.getFrameInfo().getStackSize(); | ||||
229 | if (!needsSP(MF) || !needsSPWriteback(MF)) | ||||
230 | return; | ||||
231 | const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); | ||||
232 | auto &MRI = MF.getRegInfo(); | ||||
233 | auto InsertPt = MBB.getFirstTerminator(); | ||||
234 | DebugLoc DL; | ||||
235 | |||||
236 | if (InsertPt != MBB.end()) | ||||
237 | DL = InsertPt->getDebugLoc(); | ||||
238 | |||||
239 | // Restore the stack pointer. If we had fixed-size locals, add the offset | ||||
240 | // subtracted in the prolog. | ||||
241 | unsigned SPReg = 0; | ||||
242 | if (hasBP(MF)) { | ||||
243 | auto FI = MF.getInfo<WebAssemblyFunctionInfo>(); | ||||
244 | SPReg = FI->getBasePointerVreg(); | ||||
245 | } else if (StackSize) { | ||||
246 | const TargetRegisterClass *PtrRC = | ||||
247 | MRI.getTargetRegisterInfo()->getPointerRegClass(MF); | ||||
248 | Register OffsetReg = MRI.createVirtualRegister(PtrRC); | ||||
249 | BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) | ||||
250 | .addImm(StackSize); | ||||
251 | // In the epilog we don't need to write the result back to the SP32 physreg | ||||
252 | // because it won't be used again. We can use a stackified register instead. | ||||
253 | SPReg = MRI.createVirtualRegister(PtrRC); | ||||
254 | BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg) | ||||
255 | .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32) | ||||
256 | .addReg(OffsetReg); | ||||
257 | } else { | ||||
258 | SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32; | ||||
259 | } | ||||
260 | |||||
261 | writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL); | ||||
262 | } | ||||
263 | |||||
264 | TargetFrameLowering::DwarfFrameBase | ||||
265 | WebAssemblyFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { | ||||
266 | DwarfFrameBase Loc; | ||||
267 | Loc.Kind = DwarfFrameBase::WasmFrameBase; | ||||
268 | const WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); | ||||
269 | if (needsSP(MF) && MFI.isFrameBaseVirtual()) { | ||||
270 | unsigned LocalNum = MFI.getFrameBaseLocal(); | ||||
271 | Loc.Location.WasmLoc = {WebAssembly::TI_LOCAL_START, LocalNum}; | ||||
272 | } else { | ||||
273 | // TODO: This should work on a breakpoint at a function with no frame, | ||||
274 | // but probably won't work for traversing up the stack. | ||||
275 | // TODO: This needs a relocation for correct __stack_pointer | ||||
276 | Loc.Location.WasmLoc = {WebAssembly::TI_GLOBAL_START, 0}; | ||||
277 | } | ||||
278 | return Loc; | ||||
279 | } |
1 | //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //===----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | // This file contains some functions that are useful for math stuff. | ||||
10 | // | ||||
11 | //===----------------------------------------------------------------------===// | ||||
12 | |||||
13 | #ifndef LLVM_SUPPORT_MATHEXTRAS_H | ||||
14 | #define LLVM_SUPPORT_MATHEXTRAS_H | ||||
15 | |||||
16 | #include "llvm/Support/Compiler.h" | ||||
17 | #include <algorithm> | ||||
18 | #include <cassert> | ||||
19 | #include <climits> | ||||
20 | #include <cmath> | ||||
21 | #include <cstdint> | ||||
22 | #include <cstring> | ||||
23 | #include <limits> | ||||
24 | #include <type_traits> | ||||
25 | |||||
26 | #ifdef __ANDROID_NDK__ | ||||
27 | #include <android/api-level.h> | ||||
28 | #endif | ||||
29 | |||||
30 | #ifdef _MSC_VER | ||||
31 | // Declare these intrinsics manually rather including intrin.h. It's very | ||||
32 | // expensive, and MathExtras.h is popular. | ||||
33 | // #include <intrin.h> | ||||
34 | extern "C" { | ||||
35 | unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); | ||||
36 | unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); | ||||
37 | unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); | ||||
38 | unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); | ||||
39 | } | ||||
40 | #endif | ||||
41 | |||||
42 | namespace llvm { | ||||
43 | |||||
44 | /// The behavior an operation has on an input of 0. | ||||
45 | enum ZeroBehavior { | ||||
46 | /// The returned value is undefined. | ||||
47 | ZB_Undefined, | ||||
48 | /// The returned value is numeric_limits<T>::max() | ||||
49 | ZB_Max, | ||||
50 | /// The returned value is numeric_limits<T>::digits | ||||
51 | ZB_Width | ||||
52 | }; | ||||
53 | |||||
54 | /// Mathematical constants. | ||||
55 | namespace numbers { | ||||
56 | // TODO: Track C++20 std::numbers. | ||||
57 | // TODO: Favor using the hexadecimal FP constants (requires C++17). | ||||
58 | constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 | ||||
59 | egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 | ||||
60 | ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 | ||||
61 | ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 | ||||
62 | log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) | ||||
63 | log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) | ||||
64 | pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 | ||||
65 | inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 | ||||
66 | sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 | ||||
67 | inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 | ||||
68 | sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 | ||||
69 | inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) | ||||
70 | sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 | ||||
71 | inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) | ||||
72 | phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 | ||||
73 | constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 | ||||
74 | egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 | ||||
75 | ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 | ||||
76 | ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 | ||||
77 | log2ef = 1.44269504F, // (0x1.715476P+0) | ||||
78 | log10ef = .434294482F, // (0x1.bcb7b2P-2) | ||||
79 | pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 | ||||
80 | inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 | ||||
81 | sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 | ||||
82 | inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 | ||||
83 | sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 | ||||
84 | inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) | ||||
85 | sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 | ||||
86 | inv_sqrt3f = .577350269F, // (0x1.279a74P-1) | ||||
87 | phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 | ||||
88 | } // namespace numbers | ||||
89 | |||||
90 | namespace detail { | ||||
91 | template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter { | ||||
92 | static unsigned count(T Val, ZeroBehavior) { | ||||
93 | if (!Val) | ||||
94 | return std::numeric_limits<T>::digits; | ||||
95 | if (Val & 0x1) | ||||
96 | return 0; | ||||
97 | |||||
98 | // Bisection method. | ||||
99 | unsigned ZeroBits = 0; | ||||
100 | T Shift = std::numeric_limits<T>::digits >> 1; | ||||
101 | T Mask = std::numeric_limits<T>::max() >> Shift; | ||||
102 | while (Shift) { | ||||
103 | if ((Val & Mask) == 0) { | ||||
104 | Val >>= Shift; | ||||
105 | ZeroBits |= Shift; | ||||
106 | } | ||||
107 | Shift >>= 1; | ||||
108 | Mask >>= Shift; | ||||
109 | } | ||||
110 | return ZeroBits; | ||||
111 | } | ||||
112 | }; | ||||
113 | |||||
114 | #if defined(__GNUC__4) || defined(_MSC_VER) | ||||
115 | template <typename T> struct TrailingZerosCounter<T, 4> { | ||||
116 | static unsigned count(T Val, ZeroBehavior ZB) { | ||||
117 | if (ZB
| ||||
118 | return 32; | ||||
119 | |||||
120 | #if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4) | ||||
121 | return __builtin_ctz(Val); | ||||
122 | #elif defined(_MSC_VER) | ||||
123 | unsigned long Index; | ||||
124 | _BitScanForward(&Index, Val); | ||||
125 | return Index; | ||||
126 | #endif | ||||
127 | } | ||||
128 | }; | ||||
129 | |||||
130 | #if !defined(_MSC_VER) || defined(_M_X64) | ||||
131 | template <typename T> struct TrailingZerosCounter<T, 8> { | ||||
132 | static unsigned count(T Val, ZeroBehavior ZB) { | ||||
133 | if (ZB != ZB_Undefined && Val == 0) | ||||
134 | return 64; | ||||
135 | |||||
136 | #if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4) | ||||
137 | return __builtin_ctzll(Val); | ||||
138 | #elif defined(_MSC_VER) | ||||
139 | unsigned long Index; | ||||
140 | _BitScanForward64(&Index, Val); | ||||
141 | return Index; | ||||
142 | #endif | ||||
143 | } | ||||
144 | }; | ||||
145 | #endif | ||||
146 | #endif | ||||
147 | } // namespace detail | ||||
148 | |||||
149 | /// Count number of 0's from the least significant bit to the most | ||||
150 | /// stopping at the first 1. | ||||
151 | /// | ||||
152 | /// Only unsigned integral types are allowed. | ||||
153 | /// | ||||
154 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are | ||||
155 | /// valid arguments. | ||||
156 | template <typename T> | ||||
157 | unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { | ||||
158 | static_assert(std::numeric_limits<T>::is_integer && | ||||
159 | !std::numeric_limits<T>::is_signed, | ||||
160 | "Only unsigned integral types are allowed."); | ||||
161 | return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB); | ||||
162 | } | ||||
163 | |||||
164 | namespace detail { | ||||
165 | template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter { | ||||
166 | static unsigned count(T Val, ZeroBehavior) { | ||||
167 | if (!Val) | ||||
168 | return std::numeric_limits<T>::digits; | ||||
169 | |||||
170 | // Bisection method. | ||||
171 | unsigned ZeroBits = 0; | ||||
172 | for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) { | ||||
173 | T Tmp = Val >> Shift; | ||||
174 | if (Tmp) | ||||
175 | Val = Tmp; | ||||
176 | else | ||||
177 | ZeroBits |= Shift; | ||||
178 | } | ||||
179 | return ZeroBits; | ||||
180 | } | ||||
181 | }; | ||||
182 | |||||
183 | #if defined(__GNUC__4) || defined(_MSC_VER) | ||||
184 | template <typename T> struct LeadingZerosCounter<T, 4> { | ||||
185 | static unsigned count(T Val, ZeroBehavior ZB) { | ||||
186 | if (ZB != ZB_Undefined && Val == 0) | ||||
187 | return 32; | ||||
188 | |||||
189 | #if __has_builtin(__builtin_clz)1 || defined(__GNUC__4) | ||||
190 | return __builtin_clz(Val); | ||||
191 | #elif defined(_MSC_VER) | ||||
192 | unsigned long Index; | ||||
193 | _BitScanReverse(&Index, Val); | ||||
194 | return Index ^ 31; | ||||
195 | #endif | ||||
196 | } | ||||
197 | }; | ||||
198 | |||||
199 | #if !defined(_MSC_VER) || defined(_M_X64) | ||||
200 | template <typename T> struct LeadingZerosCounter<T, 8> { | ||||
201 | static unsigned count(T Val, ZeroBehavior ZB) { | ||||
202 | if (ZB != ZB_Undefined && Val == 0) | ||||
203 | return 64; | ||||
204 | |||||
205 | #if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4) | ||||
206 | return __builtin_clzll(Val); | ||||
207 | #elif defined(_MSC_VER) | ||||
208 | unsigned long Index; | ||||
209 | _BitScanReverse64(&Index, Val); | ||||
210 | return Index ^ 63; | ||||
211 | #endif | ||||
212 | } | ||||
213 | }; | ||||
214 | #endif | ||||
215 | #endif | ||||
216 | } // namespace detail | ||||
217 | |||||
218 | /// Count number of 0's from the most significant bit to the least | ||||
219 | /// stopping at the first 1. | ||||
220 | /// | ||||
221 | /// Only unsigned integral types are allowed. | ||||
222 | /// | ||||
223 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are | ||||
224 | /// valid arguments. | ||||
225 | template <typename T> | ||||
226 | unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { | ||||
227 | static_assert(std::numeric_limits<T>::is_integer && | ||||
228 | !std::numeric_limits<T>::is_signed, | ||||
229 | "Only unsigned integral types are allowed."); | ||||
230 | return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB); | ||||
231 | } | ||||
232 | |||||
233 | /// Get the index of the first set bit starting from the least | ||||
234 | /// significant bit. | ||||
235 | /// | ||||
236 | /// Only unsigned integral types are allowed. | ||||
237 | /// | ||||
238 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are | ||||
239 | /// valid arguments. | ||||
240 | template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { | ||||
241 | if (ZB == ZB_Max && Val == 0) | ||||
242 | return std::numeric_limits<T>::max(); | ||||
243 | |||||
244 | return countTrailingZeros(Val, ZB_Undefined); | ||||
245 | } | ||||
246 | |||||
247 | /// Create a bitmask with the N right-most bits set to 1, and all other | ||||
248 | /// bits set to 0. Only unsigned types are allowed. | ||||
249 | template <typename T> T maskTrailingOnes(unsigned N) { | ||||
250 | static_assert(std::is_unsigned<T>::value, "Invalid type!"); | ||||
251 | const unsigned Bits = CHAR_BIT8 * sizeof(T); | ||||
252 | assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast< void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 252, __PRETTY_FUNCTION__)); | ||||
253 | return N == 0 ? 0 : (T(-1) >> (Bits - N)); | ||||
254 | } | ||||
255 | |||||
256 | /// Create a bitmask with the N left-most bits set to 1, and all other | ||||
257 | /// bits set to 0. Only unsigned types are allowed. | ||||
258 | template <typename T> T maskLeadingOnes(unsigned N) { | ||||
259 | return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); | ||||
260 | } | ||||
261 | |||||
262 | /// Create a bitmask with the N right-most bits set to 0, and all other | ||||
263 | /// bits set to 1. Only unsigned types are allowed. | ||||
264 | template <typename T> T maskTrailingZeros(unsigned N) { | ||||
265 | return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N); | ||||
266 | } | ||||
267 | |||||
268 | /// Create a bitmask with the N left-most bits set to 0, and all other | ||||
269 | /// bits set to 1. Only unsigned types are allowed. | ||||
270 | template <typename T> T maskLeadingZeros(unsigned N) { | ||||
271 | return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N); | ||||
272 | } | ||||
273 | |||||
274 | /// Get the index of the last set bit starting from the least | ||||
275 | /// significant bit. | ||||
276 | /// | ||||
277 | /// Only unsigned integral types are allowed. | ||||
278 | /// | ||||
279 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are | ||||
280 | /// valid arguments. | ||||
281 | template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) { | ||||
282 | if (ZB == ZB_Max && Val == 0) | ||||
283 | return std::numeric_limits<T>::max(); | ||||
284 | |||||
285 | // Use ^ instead of - because both gcc and llvm can remove the associated ^ | ||||
286 | // in the __builtin_clz intrinsic on x86. | ||||
287 | return countLeadingZeros(Val, ZB_Undefined) ^ | ||||
288 | (std::numeric_limits<T>::digits - 1); | ||||
289 | } | ||||
290 | |||||
291 | /// Macro compressed bit reversal table for 256 bits. | ||||
292 | /// | ||||
293 | /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable | ||||
294 | static const unsigned char BitReverseTable256[256] = { | ||||
295 | #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 | ||||
296 | #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) | ||||
297 | #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) | ||||
298 | R6(0), R6(2), R6(1), R6(3) | ||||
299 | #undef R2 | ||||
300 | #undef R4 | ||||
301 | #undef R6 | ||||
302 | }; | ||||
303 | |||||
304 | /// Reverse the bits in \p Val. | ||||
305 | template <typename T> | ||||
306 | T reverseBits(T Val) { | ||||
307 | unsigned char in[sizeof(Val)]; | ||||
308 | unsigned char out[sizeof(Val)]; | ||||
309 | std::memcpy(in, &Val, sizeof(Val)); | ||||
310 | for (unsigned i = 0; i < sizeof(Val); ++i) | ||||
311 | out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; | ||||
312 | std::memcpy(&Val, out, sizeof(Val)); | ||||
313 | return Val; | ||||
314 | } | ||||
315 | |||||
316 | // NOTE: The following support functions use the _32/_64 extensions instead of | ||||
317 | // type overloading so that signed and unsigned integers can be used without | ||||
318 | // ambiguity. | ||||
319 | |||||
320 | /// Return the high 32 bits of a 64 bit value. | ||||
321 | constexpr inline uint32_t Hi_32(uint64_t Value) { | ||||
322 | return static_cast<uint32_t>(Value >> 32); | ||||
323 | } | ||||
324 | |||||
325 | /// Return the low 32 bits of a 64 bit value. | ||||
326 | constexpr inline uint32_t Lo_32(uint64_t Value) { | ||||
327 | return static_cast<uint32_t>(Value); | ||||
328 | } | ||||
329 | |||||
330 | /// Make a 64-bit integer from a high / low pair of 32-bit integers. | ||||
331 | constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { | ||||
332 | return ((uint64_t)High << 32) | (uint64_t)Low; | ||||
333 | } | ||||
334 | |||||
335 | /// Checks if an integer fits into the given bit width. | ||||
336 | template <unsigned N> constexpr inline bool isInt(int64_t x) { | ||||
337 | return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1))); | ||||
338 | } | ||||
339 | // Template specializations to get better code for common cases. | ||||
340 | template <> constexpr inline bool isInt<8>(int64_t x) { | ||||
341 | return static_cast<int8_t>(x) == x; | ||||
342 | } | ||||
343 | template <> constexpr inline bool isInt<16>(int64_t x) { | ||||
344 | return static_cast<int16_t>(x) == x; | ||||
345 | } | ||||
346 | template <> constexpr inline bool isInt<32>(int64_t x) { | ||||
347 | return static_cast<int32_t>(x) == x; | ||||
348 | } | ||||
349 | |||||
350 | /// Checks if a signed integer is an N bit number shifted left by S. | ||||
351 | template <unsigned N, unsigned S> | ||||
352 | constexpr inline bool isShiftedInt(int64_t x) { | ||||
353 | static_assert( | ||||
354 | N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); | ||||
355 | static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); | ||||
356 | return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); | ||||
357 | } | ||||
358 | |||||
359 | /// Checks if an unsigned integer fits into the given bit width. | ||||
360 | /// | ||||
361 | /// This is written as two functions rather than as simply | ||||
362 | /// | ||||
363 | /// return N >= 64 || X < (UINT64_C(1) << N); | ||||
364 | /// | ||||
365 | /// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting | ||||
366 | /// left too many places. | ||||
367 | template <unsigned N> | ||||
368 | constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) { | ||||
369 | static_assert(N > 0, "isUInt<0> doesn't make sense"); | ||||
370 | return X < (UINT64_C(1)1UL << (N)); | ||||
371 | } | ||||
372 | template <unsigned N> | ||||
373 | constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t X) { | ||||
374 | return true; | ||||
375 | } | ||||
376 | |||||
377 | // Template specializations to get better code for common cases. | ||||
378 | template <> constexpr inline bool isUInt<8>(uint64_t x) { | ||||
379 | return static_cast<uint8_t>(x) == x; | ||||
380 | } | ||||
381 | template <> constexpr inline bool isUInt<16>(uint64_t x) { | ||||
382 | return static_cast<uint16_t>(x) == x; | ||||
383 | } | ||||
384 | template <> constexpr inline bool isUInt<32>(uint64_t x) { | ||||
385 | return static_cast<uint32_t>(x) == x; | ||||
386 | } | ||||
387 | |||||
388 | /// Checks if a unsigned integer is an N bit number shifted left by S. | ||||
389 | template <unsigned N, unsigned S> | ||||
390 | constexpr inline bool isShiftedUInt(uint64_t x) { | ||||
391 | static_assert( | ||||
392 | N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); | ||||
393 | static_assert(N + S <= 64, | ||||
394 | "isShiftedUInt<N, S> with N + S > 64 is too wide."); | ||||
395 | // Per the two static_asserts above, S must be strictly less than 64. So | ||||
396 | // 1 << S is not undefined behavior. | ||||
397 | return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0); | ||||
398 | } | ||||
399 | |||||
400 | /// Gets the maximum value for a N-bit unsigned integer. | ||||
401 | inline uint64_t maxUIntN(uint64_t N) { | ||||
402 | assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range" ) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 402, __PRETTY_FUNCTION__)); | ||||
403 | |||||
404 | // uint64_t(1) << 64 is undefined behavior, so we can't do | ||||
405 | // (uint64_t(1) << N) - 1 | ||||
406 | // without checking first that N != 64. But this works and doesn't have a | ||||
407 | // branch. | ||||
408 | return UINT64_MAX(18446744073709551615UL) >> (64 - N); | ||||
409 | } | ||||
410 | |||||
411 | /// Gets the minimum value for a N-bit signed integer. | ||||
412 | inline int64_t minIntN(int64_t N) { | ||||
413 | assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range" ) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 413, __PRETTY_FUNCTION__)); | ||||
414 | |||||
415 | return -(UINT64_C(1)1UL<<(N-1)); | ||||
416 | } | ||||
417 | |||||
418 | /// Gets the maximum value for a N-bit signed integer. | ||||
419 | inline int64_t maxIntN(int64_t N) { | ||||
420 | assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range" ) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 420, __PRETTY_FUNCTION__)); | ||||
421 | |||||
422 | // This relies on two's complement wraparound when N == 64, so we convert to | ||||
423 | // int64_t only at the very end to avoid UB. | ||||
424 | return (UINT64_C(1)1UL << (N - 1)) - 1; | ||||
425 | } | ||||
426 | |||||
427 | /// Checks if an unsigned integer fits into the given (dynamic) bit width. | ||||
428 | inline bool isUIntN(unsigned N, uint64_t x) { | ||||
429 | return N >= 64 || x <= maxUIntN(N); | ||||
430 | } | ||||
431 | |||||
432 | /// Checks if an signed integer fits into the given (dynamic) bit width. | ||||
433 | inline bool isIntN(unsigned N, int64_t x) { | ||||
434 | return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); | ||||
435 | } | ||||
436 | |||||
437 | /// Return true if the argument is a non-empty sequence of ones starting at the | ||||
438 | /// least significant bit with the remainder zero (32 bit version). | ||||
439 | /// Ex. isMask_32(0x0000FFFFU) == true. | ||||
440 | constexpr inline bool isMask_32(uint32_t Value) { | ||||
441 | return Value && ((Value + 1) & Value) == 0; | ||||
442 | } | ||||
443 | |||||
444 | /// Return true if the argument is a non-empty sequence of ones starting at the | ||||
445 | /// least significant bit with the remainder zero (64 bit version). | ||||
446 | constexpr inline bool isMask_64(uint64_t Value) { | ||||
447 | return Value && ((Value + 1) & Value) == 0; | ||||
448 | } | ||||
449 | |||||
450 | /// Return true if the argument contains a non-empty sequence of ones with the | ||||
451 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. | ||||
452 | constexpr inline bool isShiftedMask_32(uint32_t Value) { | ||||
453 | return Value && isMask_32((Value - 1) | Value); | ||||
454 | } | ||||
455 | |||||
456 | /// Return true if the argument contains a non-empty sequence of ones with the | ||||
457 | /// remainder zero (64 bit version.) | ||||
458 | constexpr inline bool isShiftedMask_64(uint64_t Value) { | ||||
459 | return Value && isMask_64((Value - 1) | Value); | ||||
460 | } | ||||
461 | |||||
462 | /// Return true if the argument is a power of two > 0. | ||||
463 | /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) | ||||
464 | constexpr inline bool isPowerOf2_32(uint32_t Value) { | ||||
465 | return Value && !(Value & (Value - 1)); | ||||
466 | } | ||||
467 | |||||
468 | /// Return true if the argument is a power of two > 0 (64 bit edition.) | ||||
469 | constexpr inline bool isPowerOf2_64(uint64_t Value) { | ||||
470 | return Value && !(Value & (Value - 1)); | ||||
471 | } | ||||
472 | |||||
473 | /// Count the number of ones from the most significant bit to the first | ||||
474 | /// zero bit. | ||||
475 | /// | ||||
476 | /// Ex. countLeadingOnes(0xFF0FFF00) == 8. | ||||
477 | /// Only unsigned integral types are allowed. | ||||
478 | /// | ||||
479 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and | ||||
480 | /// ZB_Undefined are valid arguments. | ||||
481 | template <typename T> | ||||
482 | unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { | ||||
483 | static_assert(std::numeric_limits<T>::is_integer && | ||||
484 | !std::numeric_limits<T>::is_signed, | ||||
485 | "Only unsigned integral types are allowed."); | ||||
486 | return countLeadingZeros<T>(~Value, ZB); | ||||
487 | } | ||||
488 | |||||
489 | /// Count the number of ones from the least significant bit to the first | ||||
490 | /// zero bit. | ||||
491 | /// | ||||
492 | /// Ex. countTrailingOnes(0x00FF00FF) == 8. | ||||
493 | /// Only unsigned integral types are allowed. | ||||
494 | /// | ||||
495 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and | ||||
496 | /// ZB_Undefined are valid arguments. | ||||
497 | template <typename T> | ||||
498 | unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { | ||||
499 | static_assert(std::numeric_limits<T>::is_integer && | ||||
500 | !std::numeric_limits<T>::is_signed, | ||||
501 | "Only unsigned integral types are allowed."); | ||||
502 | return countTrailingZeros<T>(~Value, ZB); | ||||
503 | } | ||||
504 | |||||
505 | namespace detail { | ||||
506 | template <typename T, std::size_t SizeOfT> struct PopulationCounter { | ||||
507 | static unsigned count(T Value) { | ||||
508 | // Generic version, forward to 32 bits. | ||||
509 | static_assert(SizeOfT <= 4, "Not implemented!"); | ||||
510 | #if defined(__GNUC__4) | ||||
511 | return __builtin_popcount(Value); | ||||
512 | #else | ||||
513 | uint32_t v = Value; | ||||
514 | v = v - ((v >> 1) & 0x55555555); | ||||
515 | v = (v & 0x33333333) + ((v >> 2) & 0x33333333); | ||||
516 | return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; | ||||
517 | #endif | ||||
518 | } | ||||
519 | }; | ||||
520 | |||||
521 | template <typename T> struct PopulationCounter<T, 8> { | ||||
522 | static unsigned count(T Value) { | ||||
523 | #if defined(__GNUC__4) | ||||
524 | return __builtin_popcountll(Value); | ||||
525 | #else | ||||
526 | uint64_t v = Value; | ||||
527 | v = v - ((v >> 1) & 0x5555555555555555ULL); | ||||
528 | v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); | ||||
529 | v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; | ||||
530 | return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56); | ||||
531 | #endif | ||||
532 | } | ||||
533 | }; | ||||
534 | } // namespace detail | ||||
535 | |||||
536 | /// Count the number of set bits in a value. | ||||
537 | /// Ex. countPopulation(0xF000F000) = 8 | ||||
538 | /// Returns 0 if the word is zero. | ||||
539 | template <typename T> | ||||
540 | inline unsigned countPopulation(T Value) { | ||||
541 | static_assert(std::numeric_limits<T>::is_integer && | ||||
542 | !std::numeric_limits<T>::is_signed, | ||||
543 | "Only unsigned integral types are allowed."); | ||||
544 | return detail::PopulationCounter<T, sizeof(T)>::count(Value); | ||||
545 | } | ||||
546 | |||||
547 | /// Compile time Log2. | ||||
548 | /// Valid only for positive powers of two. | ||||
549 | template <size_t kValue> constexpr inline size_t CTLog2() { | ||||
550 | static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), | ||||
551 | "Value is not a valid power of 2"); | ||||
552 | return 1 + CTLog2<kValue / 2>(); | ||||
553 | } | ||||
554 | |||||
555 | template <> constexpr inline size_t CTLog2<1>() { return 0; } | ||||
556 | |||||
557 | /// Return the log base 2 of the specified value. | ||||
558 | inline double Log2(double Value) { | ||||
559 | #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 | ||||
560 | return __builtin_log(Value) / __builtin_log(2.0); | ||||
561 | #else | ||||
562 | return log2(Value); | ||||
563 | #endif | ||||
564 | } | ||||
565 | |||||
566 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. | ||||
567 | /// (32 bit edition.) | ||||
568 | /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 | ||||
569 | inline unsigned Log2_32(uint32_t Value) { | ||||
570 | return 31 - countLeadingZeros(Value); | ||||
571 | } | ||||
572 | |||||
573 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. | ||||
574 | /// (64 bit edition.) | ||||
575 | inline unsigned Log2_64(uint64_t Value) { | ||||
576 | return 63 - countLeadingZeros(Value); | ||||
577 | } | ||||
578 | |||||
579 | /// Return the ceil log base 2 of the specified value, 32 if the value is zero. | ||||
580 | /// (32 bit edition). | ||||
581 | /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 | ||||
582 | inline unsigned Log2_32_Ceil(uint32_t Value) { | ||||
583 | return 32 - countLeadingZeros(Value - 1); | ||||
584 | } | ||||
585 | |||||
586 | /// Return the ceil log base 2 of the specified value, 64 if the value is zero. | ||||
587 | /// (64 bit edition.) | ||||
588 | inline unsigned Log2_64_Ceil(uint64_t Value) { | ||||
589 | return 64 - countLeadingZeros(Value - 1); | ||||
590 | } | ||||
591 | |||||
592 | /// Return the greatest common divisor of the values using Euclid's algorithm. | ||||
593 | template <typename T> | ||||
594 | inline T greatestCommonDivisor(T A, T B) { | ||||
595 | while (B) { | ||||
596 | T Tmp = B; | ||||
597 | B = A % B; | ||||
598 | A = Tmp; | ||||
599 | } | ||||
600 | return A; | ||||
601 | } | ||||
602 | |||||
603 | inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { | ||||
604 | return greatestCommonDivisor<uint64_t>(A, B); | ||||
605 | } | ||||
606 | |||||
607 | /// This function takes a 64-bit integer and returns the bit equivalent double. | ||||
608 | inline double BitsToDouble(uint64_t Bits) { | ||||
609 | double D; | ||||
610 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); | ||||
611 | memcpy(&D, &Bits, sizeof(Bits)); | ||||
612 | return D; | ||||
613 | } | ||||
614 | |||||
615 | /// This function takes a 32-bit integer and returns the bit equivalent float. | ||||
616 | inline float BitsToFloat(uint32_t Bits) { | ||||
617 | float F; | ||||
618 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); | ||||
619 | memcpy(&F, &Bits, sizeof(Bits)); | ||||
620 | return F; | ||||
621 | } | ||||
622 | |||||
623 | /// This function takes a double and returns the bit equivalent 64-bit integer. | ||||
624 | /// Note that copying doubles around changes the bits of NaNs on some hosts, | ||||
625 | /// notably x86, so this routine cannot be used if these bits are needed. | ||||
626 | inline uint64_t DoubleToBits(double Double) { | ||||
627 | uint64_t Bits; | ||||
628 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); | ||||
629 | memcpy(&Bits, &Double, sizeof(Double)); | ||||
630 | return Bits; | ||||
631 | } | ||||
632 | |||||
633 | /// This function takes a float and returns the bit equivalent 32-bit integer. | ||||
634 | /// Note that copying floats around changes the bits of NaNs on some hosts, | ||||
635 | /// notably x86, so this routine cannot be used if these bits are needed. | ||||
636 | inline uint32_t FloatToBits(float Float) { | ||||
637 | uint32_t Bits; | ||||
638 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); | ||||
639 | memcpy(&Bits, &Float, sizeof(Float)); | ||||
640 | return Bits; | ||||
641 | } | ||||
642 | |||||
643 | /// A and B are either alignments or offsets. Return the minimum alignment that | ||||
644 | /// may be assumed after adding the two together. | ||||
645 | constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { | ||||
646 | // The largest power of 2 that divides both A and B. | ||||
647 | // | ||||
648 | // Replace "-Value" by "1+~Value" in the following commented code to avoid | ||||
649 | // MSVC warning C4146 | ||||
650 | // return (A | B) & -(A | B); | ||||
651 | return (A | B) & (1 + ~(A | B)); | ||||
652 | } | ||||
653 | |||||
654 | /// Returns the next power of two (in 64-bits) that is strictly greater than A. | ||||
655 | /// Returns zero on overflow. | ||||
656 | inline uint64_t NextPowerOf2(uint64_t A) { | ||||
657 | A |= (A >> 1); | ||||
658 | A |= (A >> 2); | ||||
659 | A |= (A >> 4); | ||||
660 | A |= (A >> 8); | ||||
661 | A |= (A >> 16); | ||||
662 | A |= (A >> 32); | ||||
663 | return A + 1; | ||||
664 | } | ||||
665 | |||||
666 | /// Returns the power of two which is less than or equal to the given value. | ||||
667 | /// Essentially, it is a floor operation across the domain of powers of two. | ||||
668 | inline uint64_t PowerOf2Floor(uint64_t A) { | ||||
669 | if (!A) return 0; | ||||
670 | return 1ull << (63 - countLeadingZeros(A, ZB_Undefined)); | ||||
671 | } | ||||
672 | |||||
673 | /// Returns the power of two which is greater than or equal to the given value. | ||||
674 | /// Essentially, it is a ceil operation across the domain of powers of two. | ||||
675 | inline uint64_t PowerOf2Ceil(uint64_t A) { | ||||
676 | if (!A) | ||||
677 | return 0; | ||||
678 | return NextPowerOf2(A - 1); | ||||
679 | } | ||||
680 | |||||
681 | /// Returns the next integer (mod 2**64) that is greater than or equal to | ||||
682 | /// \p Value and is a multiple of \p Align. \p Align must be non-zero. | ||||
683 | /// | ||||
684 | /// If non-zero \p Skew is specified, the return value will be a minimal | ||||
685 | /// integer that is greater than or equal to \p Value and equal to | ||||
686 | /// \p Align * N + \p Skew for some integer N. If \p Skew is larger than | ||||
687 | /// \p Align, its value is adjusted to '\p Skew mod \p Align'. | ||||
688 | /// | ||||
689 | /// Examples: | ||||
690 | /// \code | ||||
691 | /// alignTo(5, 8) = 8 | ||||
692 | /// alignTo(17, 8) = 24 | ||||
693 | /// alignTo(~0LL, 8) = 0 | ||||
694 | /// alignTo(321, 255) = 510 | ||||
695 | /// | ||||
696 | /// alignTo(5, 8, 7) = 7 | ||||
697 | /// alignTo(17, 8, 1) = 17 | ||||
698 | /// alignTo(~0LL, 8, 3) = 3 | ||||
699 | /// alignTo(321, 255, 42) = 552 | ||||
700 | /// \endcode | ||||
701 | inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { | ||||
702 | assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast< void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 702, __PRETTY_FUNCTION__)); | ||||
703 | Skew %= Align; | ||||
704 | return (Value + Align - 1 - Skew) / Align * Align + Skew; | ||||
705 | } | ||||
706 | |||||
707 | /// Returns the next integer (mod 2**64) that is greater than or equal to | ||||
708 | /// \p Value and is a multiple of \c Align. \c Align must be non-zero. | ||||
709 | template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) { | ||||
710 | static_assert(Align != 0u, "Align must be non-zero"); | ||||
711 | return (Value + Align - 1) / Align * Align; | ||||
712 | } | ||||
713 | |||||
714 | /// Returns the integer ceil(Numerator / Denominator). | ||||
715 | inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { | ||||
716 | return alignTo(Numerator, Denominator) / Denominator; | ||||
717 | } | ||||
718 | |||||
719 | /// Returns the integer nearest(Numerator / Denominator). | ||||
720 | inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { | ||||
721 | return (Numerator + (Denominator / 2)) / Denominator; | ||||
722 | } | ||||
723 | |||||
724 | /// Returns the largest uint64_t less than or equal to \p Value and is | ||||
725 | /// \p Skew mod \p Align. \p Align must be non-zero | ||||
726 | inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { | ||||
727 | assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast< void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 727, __PRETTY_FUNCTION__)); | ||||
728 | Skew %= Align; | ||||
729 | return (Value - Skew) / Align * Align + Skew; | ||||
730 | } | ||||
731 | |||||
732 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. | ||||
733 | /// Requires 0 < B <= 32. | ||||
734 | template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) { | ||||
735 | static_assert(B > 0, "Bit width can't be 0."); | ||||
736 | static_assert(B <= 32, "Bit width out of range."); | ||||
737 | return int32_t(X << (32 - B)) >> (32 - B); | ||||
738 | } | ||||
739 | |||||
740 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. | ||||
741 | /// Requires 0 < B < 32. | ||||
742 | inline int32_t SignExtend32(uint32_t X, unsigned B) { | ||||
743 | assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast< void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 743, __PRETTY_FUNCTION__)); | ||||
744 | assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast <void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 744, __PRETTY_FUNCTION__)); | ||||
745 | return int32_t(X << (32 - B)) >> (32 - B); | ||||
746 | } | ||||
747 | |||||
748 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. | ||||
749 | /// Requires 0 < B < 64. | ||||
750 | template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { | ||||
751 | static_assert(B > 0, "Bit width can't be 0."); | ||||
752 | static_assert(B <= 64, "Bit width out of range."); | ||||
753 | return int64_t(x << (64 - B)) >> (64 - B); | ||||
754 | } | ||||
755 | |||||
756 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. | ||||
757 | /// Requires 0 < B < 64. | ||||
758 | inline int64_t SignExtend64(uint64_t X, unsigned B) { | ||||
759 | assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast< void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 759, __PRETTY_FUNCTION__)); | ||||
760 | assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast <void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\"" , "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h" , 760, __PRETTY_FUNCTION__)); | ||||
761 | return int64_t(X << (64 - B)) >> (64 - B); | ||||
762 | } | ||||
763 | |||||
764 | /// Subtract two unsigned integers, X and Y, of type T and return the absolute | ||||
765 | /// value of the result. | ||||
766 | template <typename T> | ||||
767 | std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) { | ||||
768 | return std::max(X, Y) - std::min(X, Y); | ||||
769 | } | ||||
770 | |||||
771 | /// Add two unsigned integers, X and Y, of type T. Clamp the result to the | ||||
772 | /// maximum representable value of T on overflow. ResultOverflowed indicates if | ||||
773 | /// the result is larger than the maximum representable value of type T. | ||||
774 | template <typename T> | ||||
775 | std::enable_if_t<std::is_unsigned<T>::value, T> | ||||
776 | SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { | ||||
777 | bool Dummy; | ||||
778 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; | ||||
779 | // Hacker's Delight, p. 29 | ||||
780 | T Z = X + Y; | ||||
781 | Overflowed = (Z < X || Z < Y); | ||||
782 | if (Overflowed) | ||||
783 | return std::numeric_limits<T>::max(); | ||||
784 | else | ||||
785 | return Z; | ||||
786 | } | ||||
787 | |||||
788 | /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the | ||||
789 | /// maximum representable value of T on overflow. ResultOverflowed indicates if | ||||
790 | /// the result is larger than the maximum representable value of type T. | ||||
791 | template <typename T> | ||||
792 | std::enable_if_t<std::is_unsigned<T>::value, T> | ||||
793 | SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { | ||||
794 | bool Dummy; | ||||
795 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; | ||||
796 | |||||
797 | // Hacker's Delight, p. 30 has a different algorithm, but we don't use that | ||||
798 | // because it fails for uint16_t (where multiplication can have undefined | ||||
799 | // behavior due to promotion to int), and requires a division in addition | ||||
800 | // to the multiplication. | ||||
801 | |||||
802 | Overflowed = false; | ||||
803 | |||||
804 | // Log2(Z) would be either Log2Z or Log2Z + 1. | ||||
805 | // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z | ||||
806 | // will necessarily be less than Log2Max as desired. | ||||
807 | int Log2Z = Log2_64(X) + Log2_64(Y); | ||||
808 | const T Max = std::numeric_limits<T>::max(); | ||||
809 | int Log2Max = Log2_64(Max); | ||||
810 | if (Log2Z < Log2Max) { | ||||
811 | return X * Y; | ||||
812 | } | ||||
813 | if (Log2Z > Log2Max) { | ||||
814 | Overflowed = true; | ||||
815 | return Max; | ||||
816 | } | ||||
817 | |||||
818 | // We're going to use the top bit, and maybe overflow one | ||||
819 | // bit past it. Multiply all but the bottom bit then add | ||||
820 | // that on at the end. | ||||
821 | T Z = (X >> 1) * Y; | ||||
822 | if (Z & ~(Max >> 1)) { | ||||
823 | Overflowed = true; | ||||
824 | return Max; | ||||
825 | } | ||||
826 | Z <<= 1; | ||||
827 | if (X & 1) | ||||
828 | return SaturatingAdd(Z, Y, ResultOverflowed); | ||||
829 | |||||
830 | return Z; | ||||
831 | } | ||||
832 | |||||
833 | /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to | ||||
834 | /// the product. Clamp the result to the maximum representable value of T on | ||||
835 | /// overflow. ResultOverflowed indicates if the result is larger than the | ||||
836 | /// maximum representable value of type T. | ||||
837 | template <typename T> | ||||
838 | std::enable_if_t<std::is_unsigned<T>::value, T> | ||||
839 | SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { | ||||
840 | bool Dummy; | ||||
841 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; | ||||
842 | |||||
843 | T Product = SaturatingMultiply(X, Y, &Overflowed); | ||||
844 | if (Overflowed) | ||||
845 | return Product; | ||||
846 | |||||
847 | return SaturatingAdd(A, Product, &Overflowed); | ||||
848 | } | ||||
849 | |||||
850 | /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. | ||||
851 | extern const float huge_valf; | ||||
852 | |||||
853 | |||||
854 | /// Add two signed integers, computing the two's complement truncated result, | ||||
855 | /// returning true if overflow occured. | ||||
856 | template <typename T> | ||||
857 | std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) { | ||||
858 | #if __has_builtin(__builtin_add_overflow)1 | ||||
859 | return __builtin_add_overflow(X, Y, &Result); | ||||
860 | #else | ||||
861 | // Perform the unsigned addition. | ||||
862 | using U = std::make_unsigned_t<T>; | ||||
863 | const U UX = static_cast<U>(X); | ||||
864 | const U UY = static_cast<U>(Y); | ||||
865 | const U UResult = UX + UY; | ||||
866 | |||||
867 | // Convert to signed. | ||||
868 | Result = static_cast<T>(UResult); | ||||
869 | |||||
870 | // Adding two positive numbers should result in a positive number. | ||||
871 | if (X > 0 && Y > 0) | ||||
872 | return Result <= 0; | ||||
873 | // Adding two negatives should result in a negative number. | ||||
874 | if (X < 0 && Y < 0) | ||||
875 | return Result >= 0; | ||||
876 | return false; | ||||
877 | #endif | ||||
878 | } | ||||
879 | |||||
880 | /// Subtract two signed integers, computing the two's complement truncated | ||||
881 | /// result, returning true if an overflow ocurred. | ||||
882 | template <typename T> | ||||
883 | std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) { | ||||
884 | #if __has_builtin(__builtin_sub_overflow)1 | ||||
885 | return __builtin_sub_overflow(X, Y, &Result); | ||||
886 | #else | ||||
887 | // Perform the unsigned addition. | ||||
888 | using U = std::make_unsigned_t<T>; | ||||
889 | const U UX = static_cast<U>(X); | ||||
890 | const U UY = static_cast<U>(Y); | ||||
891 | const U UResult = UX - UY; | ||||
892 | |||||
893 | // Convert to signed. | ||||
894 | Result = static_cast<T>(UResult); | ||||
895 | |||||
896 | // Subtracting a positive number from a negative results in a negative number. | ||||
897 | if (X <= 0 && Y > 0) | ||||
898 | return Result >= 0; | ||||
899 | // Subtracting a negative number from a positive results in a positive number. | ||||
900 | if (X >= 0 && Y < 0) | ||||
901 | return Result <= 0; | ||||
902 | return false; | ||||
903 | #endif | ||||
904 | } | ||||
905 | |||||
906 | /// Multiply two signed integers, computing the two's complement truncated | ||||
907 | /// result, returning true if an overflow ocurred. | ||||
908 | template <typename T> | ||||
909 | std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) { | ||||
910 | // Perform the unsigned multiplication on absolute values. | ||||
911 | using U = std::make_unsigned_t<T>; | ||||
912 | const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X); | ||||
913 | const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y); | ||||
914 | const U UResult = UX * UY; | ||||
915 | |||||
916 | // Convert to signed. | ||||
917 | const bool IsNegative = (X < 0) ^ (Y < 0); | ||||
918 | Result = IsNegative ? (0 - UResult) : UResult; | ||||
919 | |||||
920 | // If any of the args was 0, result is 0 and no overflow occurs. | ||||
921 | if (UX == 0 || UY == 0) | ||||
922 | return false; | ||||
923 | |||||
924 | // UX and UY are in [1, 2^n], where n is the number of digits. | ||||
925 | // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for | ||||
926 | // positive) divided by an argument compares to the other. | ||||
927 | if (IsNegative) | ||||
928 | return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY; | ||||
929 | else | ||||
930 | return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY; | ||||
931 | } | ||||
932 | |||||
933 | } // End llvm namespace | ||||
934 | |||||
935 | #endif |