Bug Summary

File:llvm/lib/Target/AMDGPU/SIModeRegister.cpp
Warning:line 197, column 55
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIModeRegister.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AMDGPU/SIModeRegister.cpp

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AMDGPU/SIModeRegister.cpp

1//===-- SIModeRegister.cpp - Mode Register --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass inserts changes to the Mode register settings as required.
10/// Note that currently it only deals with the Double Precision Floating Point
11/// rounding mode setting, but is intended to be generic enough to be easily
12/// expanded.
13///
14//===----------------------------------------------------------------------===//
15//
16#include "AMDGPU.h"
17#include "AMDGPUInstrInfo.h"
18#include "AMDGPUSubtarget.h"
19#include "SIInstrInfo.h"
20#include "SIMachineFunctionInfo.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/LLVMContext.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/raw_ostream.h"
30#include "llvm/Target/TargetMachine.h"
31#include <queue>
32
33#define DEBUG_TYPE"si-mode-register" "si-mode-register"
34
35STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.")static llvm::Statistic NumSetregInserted = {"si-mode-register"
, "NumSetregInserted", "Number of setreg of mode register inserted."
}
;
36
37using namespace llvm;
38
39struct Status {
40 // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
41 // known value
42 unsigned Mask;
43 unsigned Mode;
44
45 Status() : Mask(0), Mode(0){};
46
47 Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
48 Mode &= Mask;
49 };
50
51 // merge two status values such that only values that don't conflict are
52 // preserved
53 Status merge(const Status &S) const {
54 return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
55 }
56
57 // merge an unknown value by using the unknown value's mask to remove bits
58 // from the result
59 Status mergeUnknown(unsigned newMask) {
60 return Status(Mask & ~newMask, Mode & ~newMask);
61 }
62
63 // intersect two Status values to produce a mode and mask that is a subset
64 // of both values
65 Status intersect(const Status &S) const {
66 unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
67 unsigned NewMode = (Mode & NewMask);
68 return Status(NewMask, NewMode);
69 }
70
71 // produce the delta required to change the Mode to the required Mode
72 Status delta(const Status &S) const {
73 return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
74 }
75
76 bool operator==(const Status &S) const {
77 return (Mask == S.Mask) && (Mode == S.Mode);
78 }
79
80 bool operator!=(const Status &S) const { return !(*this == S); }
81
82 bool isCompatible(Status &S) {
83 return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
84 }
85
86 bool isCombinable(Status &S) {
87 return !(Mask & S.Mask) || isCompatible(S);
88 }
89};
90
91class BlockData {
92public:
93 // The Status that represents the mode register settings required by the
94 // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
95 Status Require;
96
97 // The Status that represents the net changes to the Mode register made by
98 // this block, Calculated in Phase 1.
99 Status Change;
100
101 // The Status that represents the mode register settings on exit from this
102 // block. Calculated in Phase 2.
103 Status Exit;
104
105 // The Status that represents the intersection of exit Mode register settings
106 // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
107 Status Pred;
108
109 // In Phase 1 we record the first instruction that has a mode requirement,
110 // which is used in Phase 3 if we need to insert a mode change.
111 MachineInstr *FirstInsertionPoint;
112
113 BlockData() : FirstInsertionPoint(nullptr) {};
114};
115
116namespace {
117
118class SIModeRegister : public MachineFunctionPass {
119public:
120 static char ID;
121
122 std::vector<std::unique_ptr<BlockData>> BlockInfo;
123 std::queue<MachineBasicBlock *> Phase2List;
124
125 // The default mode register setting currently only caters for the floating
126 // point double precision rounding mode.
127 // We currently assume the default rounding mode is Round to Nearest
128 // NOTE: this should come from a per function rounding mode setting once such
129 // a setting exists.
130 unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST0;
131 Status DefaultStatus =
132 Status(FP_ROUND_MODE_DP(0x3)(((0x3) & 0x3) << 2), FP_ROUND_MODE_DP(DefaultMode)(((DefaultMode) & 0x3) << 2));
133
134public:
135 SIModeRegister() : MachineFunctionPass(ID) {}
136
137 bool runOnMachineFunction(MachineFunction &MF) override;
138
139 void getAnalysisUsage(AnalysisUsage &AU) const override {
140 AU.setPreservesCFG();
141 MachineFunctionPass::getAnalysisUsage(AU);
142 }
143
144 void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
145
146 void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
147
148 void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
149
150 Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
151
152 void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
153 const SIInstrInfo *TII, Status InstrMode);
154};
155} // End anonymous namespace.
156
157INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,static void *initializeSIModeRegisterPassOnce(PassRegistry &
Registry) { PassInfo *PI = new PassInfo( "Insert required mode register values"
, "si-mode-register", &SIModeRegister::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SIModeRegister>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeSIModeRegisterPassFlag; void llvm::initializeSIModeRegisterPass
(PassRegistry &Registry) { llvm::call_once(InitializeSIModeRegisterPassFlag
, initializeSIModeRegisterPassOnce, std::ref(Registry)); }
158 "Insert required mode register values", false, false)static void *initializeSIModeRegisterPassOnce(PassRegistry &
Registry) { PassInfo *PI = new PassInfo( "Insert required mode register values"
, "si-mode-register", &SIModeRegister::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SIModeRegister>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeSIModeRegisterPassFlag; void llvm::initializeSIModeRegisterPass
(PassRegistry &Registry) { llvm::call_once(InitializeSIModeRegisterPassFlag
, initializeSIModeRegisterPassOnce, std::ref(Registry)); }
159
160char SIModeRegister::ID = 0;
161
162char &llvm::SIModeRegisterID = SIModeRegister::ID;
163
164FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
165
166// Determine the Mode register setting required for this instruction.
167// Instructions which don't use the Mode register return a null Status.
168// Note this currently only deals with instructions that use the floating point
169// double precision setting.
170Status SIModeRegister::getInstructionMode(MachineInstr &MI,
171 const SIInstrInfo *TII) {
172 if (TII->usesFPDPRounding(MI)) {
173 switch (MI.getOpcode()) {
174 case AMDGPU::V_INTERP_P1LL_F16:
175 case AMDGPU::V_INTERP_P1LV_F16:
176 case AMDGPU::V_INTERP_P2_F16:
177 // f16 interpolation instructions need double precision round to zero
178 return Status(FP_ROUND_MODE_DP(3)(((3) & 0x3) << 2),
179 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)(((3) & 0x3) << 2));
180 default:
181 return DefaultStatus;
182 }
183 }
184 return Status();
185}
186
187// Insert a setreg instruction to update the Mode register.
188// It is possible (though unlikely) for an instruction to require a change to
189// the value of disjoint parts of the Mode register when we don't know the
190// value of the intervening bits. In that case we need to use more than one
191// setreg instruction.
192void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
193 const SIInstrInfo *TII, Status InstrMode) {
194 while (InstrMode.Mask) {
8
Loop condition is true. Entering loop body
195 unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
196 unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
9
Calling 'countTrailingOnes<unsigned int>'
19
Returning from 'countTrailingOnes<unsigned int>'
20
'Width' initialized to 32
197 unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
198 BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
199 .addImm(Value)
200 .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
201 (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
202 (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
203 ++NumSetregInserted;
204 InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
205 }
206}
207
208// In Phase 1 we iterate through the instructions of the block and for each
209// instruction we get its mode usage. If the instruction uses the Mode register
210// we:
211// - update the Change status, which tracks the changes to the Mode register
212// made by this block
213// - if this instruction's requirements are compatible with the current setting
214// of the Mode register we merge the modes
215// - if it isn't compatible and an InsertionPoint isn't set, then we set the
216// InsertionPoint to the current instruction, and we remember the current
217// mode
218// - if it isn't compatible and InsertionPoint is set we insert a seteg before
219// that instruction (unless this instruction forms part of the block's
220// entry requirements in which case the insertion is deferred until Phase 3
221// when predecessor exit values are known), and move the insertion point to
222// this instruction
223// - if this is a setreg instruction we treat it as an incompatible instruction.
224// This is sub-optimal but avoids some nasty corner cases, and is expected to
225// occur very rarely.
226// - on exit we have set the Require, Change, and initial Exit modes.
227void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
228 const SIInstrInfo *TII) {
229 auto NewInfo = std::make_unique<BlockData>();
230 MachineInstr *InsertionPoint = nullptr;
231 // RequirePending is used to indicate whether we are collecting the initial
232 // requirements for the block, and need to defer the first InsertionPoint to
233 // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
234 // we discover an explict setreg that means this block doesn't have any
235 // initial requirements.
236 bool RequirePending = true;
237 Status IPChange;
238 for (MachineInstr &MI : MBB) {
239 Status InstrMode = getInstructionMode(MI, TII);
240 if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) ||
241 (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) {
242 // We preserve any explicit mode register setreg instruction we encounter,
243 // as we assume it has been inserted by a higher authority (this is
244 // likely to be a very rare occurrence).
245 unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
246 if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
247 AMDGPU::Hwreg::ID_MODE)
248 continue;
249
250 unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
251 AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
252 1;
253 unsigned Offset =
254 (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
255 unsigned Mask = ((1 << Width) - 1) << Offset;
256
257 // If an InsertionPoint is set we will insert a setreg there.
258 if (InsertionPoint) {
259 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
260 InsertionPoint = nullptr;
261 }
262 // If this is an immediate then we know the value being set, but if it is
263 // not an immediate then we treat the modified bits of the mode register
264 // as unknown.
265 if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
266 unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
267 unsigned Mode = (Val << Offset) & Mask;
268 Status Setreg = Status(Mask, Mode);
269 // If we haven't already set the initial requirements for the block we
270 // don't need to as the requirements start from this explicit setreg.
271 RequirePending = false;
272 NewInfo->Change = NewInfo->Change.merge(Setreg);
273 } else {
274 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
275 }
276 } else if (!NewInfo->Change.isCompatible(InstrMode)) {
277 // This instruction uses the Mode register and its requirements aren't
278 // compatible with the current mode.
279 if (InsertionPoint) {
280 // If the required mode change cannot be included in the current
281 // InsertionPoint changes, we need a setreg and start a new
282 // InsertionPoint.
283 if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
284 if (RequirePending) {
285 // This is the first insertionPoint in the block so we will defer
286 // the insertion of the setreg to Phase 3 where we know whether or
287 // not it is actually needed.
288 NewInfo->FirstInsertionPoint = InsertionPoint;
289 NewInfo->Require = NewInfo->Change;
290 RequirePending = false;
291 } else {
292 insertSetreg(MBB, InsertionPoint, TII,
293 IPChange.delta(NewInfo->Change));
294 IPChange = NewInfo->Change;
295 }
296 // Set the new InsertionPoint
297 InsertionPoint = &MI;
298 }
299 NewInfo->Change = NewInfo->Change.merge(InstrMode);
300 } else {
301 // No InsertionPoint is currently set - this is either the first in
302 // the block or we have previously seen an explicit setreg.
303 InsertionPoint = &MI;
304 IPChange = NewInfo->Change;
305 NewInfo->Change = NewInfo->Change.merge(InstrMode);
306 }
307 }
308 }
309 if (RequirePending) {
310 // If we haven't yet set the initial requirements for the block we set them
311 // now.
312 NewInfo->FirstInsertionPoint = InsertionPoint;
313 NewInfo->Require = NewInfo->Change;
314 } else if (InsertionPoint) {
315 // We need to insert a setreg at the InsertionPoint
316 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
317 }
318 NewInfo->Exit = NewInfo->Change;
319 BlockInfo[MBB.getNumber()] = std::move(NewInfo);
320}
321
322// In Phase 2 we revisit each block and calculate the common Mode register
323// value provided by all predecessor blocks. If the Exit value for the block
324// is changed, then we add the successor blocks to the worklist so that the
325// exit value is propagated.
326void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
327 const SIInstrInfo *TII) {
328// BlockData *BI = BlockInfo[MBB.getNumber()];
329 unsigned ThisBlock = MBB.getNumber();
330 if (MBB.pred_empty()) {
331 // There are no predecessors, so use the default starting status.
332 BlockInfo[ThisBlock]->Pred = DefaultStatus;
333 } else {
334 // Build a status that is common to all the predecessors by intersecting
335 // all the predecessor exit status values.
336 MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
337 MachineBasicBlock &PB = *(*P);
338 BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
339
340 for (P = std::next(P); P != E; P = std::next(P)) {
341 MachineBasicBlock *Pred = *P;
342 BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit);
343 }
344 }
345 Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
346 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
347 BlockInfo[ThisBlock]->Exit = TmpStatus;
348 // Add the successors to the work list so we can propagate the changed exit
349 // status.
350 for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
351 E = MBB.succ_end();
352 S != E; S = std::next(S)) {
353 MachineBasicBlock &B = *(*S);
354 Phase2List.push(&B);
355 }
356 }
357}
358
359// In Phase 3 we revisit each block and if it has an insertion point defined we
360// check whether the predecessor mode meets the block's entry requirements. If
361// not we insert an appropriate setreg instruction to modify the Mode register.
362void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
363 const SIInstrInfo *TII) {
364// BlockData *BI = BlockInfo[MBB.getNumber()];
365 unsigned ThisBlock = MBB.getNumber();
366 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
4
Taking true branch
367 Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
368 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
5
Assuming field 'FirstInsertionPoint' is null
6
Taking false branch
369 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
370 else
371 insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
7
Calling 'SIModeRegister::insertSetreg'
372 }
373}
374
375bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
376 BlockInfo.resize(MF.getNumBlockIDs());
377 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
378 const SIInstrInfo *TII = ST.getInstrInfo();
379
380 // Processing is performed in a number of phases
381
382 // Phase 1 - determine the initial mode required by each block, and add setreg
383 // instructions for intra block requirements.
384 for (MachineBasicBlock &BB : MF)
385 processBlockPhase1(BB, TII);
386
387 // Phase 2 - determine the exit mode from each block. We add all blocks to the
388 // list here, but will also add any that need to be revisited during Phase 2
389 // processing.
390 for (MachineBasicBlock &BB : MF)
391 Phase2List.push(&BB);
392 while (!Phase2List.empty()) {
1
Assuming the condition is false
2
Loop condition is false. Execution continues on line 399
393 processBlockPhase2(*Phase2List.front(), TII);
394 Phase2List.pop();
395 }
396
397 // Phase 3 - add an initial setreg to each block where the required entry mode
398 // is not satisfied by the exit mode of all its predecessors.
399 for (MachineBasicBlock &BB : MF)
400 processBlockPhase3(BB, TII);
3
Calling 'SIModeRegister::processBlockPhase3'
401
402 BlockInfo.clear();
403
404 return NumSetregInserted > 0;
405}

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/SwapByteOrder.h"
18#include <algorithm>
19#include <cassert>
20#include <climits>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB
11.1
'ZB' is not equal to ZB_Undefined
11.1
'ZB' is not equal to ZB_Undefined
!= ZB_Undefined && Val == 0)
12
Assuming 'Val' is equal to 0
13
Taking true branch
117 return 32;
14
Returning the value 32
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB != ZB_Undefined && Val == 0)
133 return 64;
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
11
Calling 'TrailingZerosCounter::count'
15
Returning from 'TrailingZerosCounter::count'
16
Returning the value 32
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB == ZB_Max && Val == 0)
241 return std::numeric_limits<T>::max();
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 251, __PRETTY_FUNCTION__))
;
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315// NOTE: The following support functions use the _32/_64 extensions instead of
316// type overloading so that signed and unsigned integers can be used without
317// ambiguity.
318
319/// Return the high 32 bits of a 64 bit value.
320constexpr inline uint32_t Hi_32(uint64_t Value) {
321 return static_cast<uint32_t>(Value >> 32);
322}
323
324/// Return the low 32 bits of a 64 bit value.
325constexpr inline uint32_t Lo_32(uint64_t Value) {
326 return static_cast<uint32_t>(Value);
327}
328
329/// Make a 64-bit integer from a high / low pair of 32-bit integers.
330constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
331 return ((uint64_t)High << 32) | (uint64_t)Low;
332}
333
334/// Checks if an integer fits into the given bit width.
335template <unsigned N> constexpr inline bool isInt(int64_t x) {
336 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
337}
338// Template specializations to get better code for common cases.
339template <> constexpr inline bool isInt<8>(int64_t x) {
340 return static_cast<int8_t>(x) == x;
341}
342template <> constexpr inline bool isInt<16>(int64_t x) {
343 return static_cast<int16_t>(x) == x;
344}
345template <> constexpr inline bool isInt<32>(int64_t x) {
346 return static_cast<int32_t>(x) == x;
347}
348
349/// Checks if a signed integer is an N bit number shifted left by S.
350template <unsigned N, unsigned S>
351constexpr inline bool isShiftedInt(int64_t x) {
352 static_assert(
353 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
354 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
355 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
356}
357
358/// Checks if an unsigned integer fits into the given bit width.
359///
360/// This is written as two functions rather than as simply
361///
362/// return N >= 64 || X < (UINT64_C(1) << N);
363///
364/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
365/// left too many places.
366template <unsigned N>
367constexpr inline typename std::enable_if<(N < 64), bool>::type
368isUInt(uint64_t X) {
369 static_assert(N > 0, "isUInt<0> doesn't make sense");
370 return X < (UINT64_C(1)1UL << (N));
371}
372template <unsigned N>
373constexpr inline typename std::enable_if<N >= 64, bool>::type
374isUInt(uint64_t X) {
375 return true;
376}
377
378// Template specializations to get better code for common cases.
379template <> constexpr inline bool isUInt<8>(uint64_t x) {
380 return static_cast<uint8_t>(x) == x;
381}
382template <> constexpr inline bool isUInt<16>(uint64_t x) {
383 return static_cast<uint16_t>(x) == x;
384}
385template <> constexpr inline bool isUInt<32>(uint64_t x) {
386 return static_cast<uint32_t>(x) == x;
387}
388
389/// Checks if a unsigned integer is an N bit number shifted left by S.
390template <unsigned N, unsigned S>
391constexpr inline bool isShiftedUInt(uint64_t x) {
392 static_assert(
393 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
394 static_assert(N + S <= 64,
395 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
396 // Per the two static_asserts above, S must be strictly less than 64. So
397 // 1 << S is not undefined behavior.
398 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
399}
400
401/// Gets the maximum value for a N-bit unsigned integer.
402inline uint64_t maxUIntN(uint64_t N) {
403 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 403, __PRETTY_FUNCTION__))
;
404
405 // uint64_t(1) << 64 is undefined behavior, so we can't do
406 // (uint64_t(1) << N) - 1
407 // without checking first that N != 64. But this works and doesn't have a
408 // branch.
409 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
410}
411
412/// Gets the minimum value for a N-bit signed integer.
413inline int64_t minIntN(int64_t N) {
414 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 414, __PRETTY_FUNCTION__))
;
415
416 return -(UINT64_C(1)1UL<<(N-1));
417}
418
419/// Gets the maximum value for a N-bit signed integer.
420inline int64_t maxIntN(int64_t N) {
421 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 421, __PRETTY_FUNCTION__))
;
422
423 // This relies on two's complement wraparound when N == 64, so we convert to
424 // int64_t only at the very end to avoid UB.
425 return (UINT64_C(1)1UL << (N - 1)) - 1;
426}
427
428/// Checks if an unsigned integer fits into the given (dynamic) bit width.
429inline bool isUIntN(unsigned N, uint64_t x) {
430 return N >= 64 || x <= maxUIntN(N);
431}
432
433/// Checks if an signed integer fits into the given (dynamic) bit width.
434inline bool isIntN(unsigned N, int64_t x) {
435 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
436}
437
438/// Return true if the argument is a non-empty sequence of ones starting at the
439/// least significant bit with the remainder zero (32 bit version).
440/// Ex. isMask_32(0x0000FFFFU) == true.
441constexpr inline bool isMask_32(uint32_t Value) {
442 return Value && ((Value + 1) & Value) == 0;
443}
444
445/// Return true if the argument is a non-empty sequence of ones starting at the
446/// least significant bit with the remainder zero (64 bit version).
447constexpr inline bool isMask_64(uint64_t Value) {
448 return Value && ((Value + 1) & Value) == 0;
449}
450
451/// Return true if the argument contains a non-empty sequence of ones with the
452/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
453constexpr inline bool isShiftedMask_32(uint32_t Value) {
454 return Value && isMask_32((Value - 1) | Value);
455}
456
457/// Return true if the argument contains a non-empty sequence of ones with the
458/// remainder zero (64 bit version.)
459constexpr inline bool isShiftedMask_64(uint64_t Value) {
460 return Value && isMask_64((Value - 1) | Value);
461}
462
463/// Return true if the argument is a power of two > 0.
464/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
465constexpr inline bool isPowerOf2_32(uint32_t Value) {
466 return Value && !(Value & (Value - 1));
467}
468
469/// Return true if the argument is a power of two > 0 (64 bit edition.)
470constexpr inline bool isPowerOf2_64(uint64_t Value) {
471 return Value && !(Value & (Value - 1));
472}
473
474/// Return a byte-swapped representation of the 16-bit argument.
475inline uint16_t ByteSwap_16(uint16_t Value) {
476 return sys::SwapByteOrder_16(Value);
477}
478
479/// Return a byte-swapped representation of the 32-bit argument.
480inline uint32_t ByteSwap_32(uint32_t Value) {
481 return sys::SwapByteOrder_32(Value);
482}
483
484/// Return a byte-swapped representation of the 64-bit argument.
485inline uint64_t ByteSwap_64(uint64_t Value) {
486 return sys::SwapByteOrder_64(Value);
487}
488
489/// Count the number of ones from the most significant bit to the first
490/// zero bit.
491///
492/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
493/// Only unsigned integral types are allowed.
494///
495/// \param ZB the behavior on an input of all ones. Only ZB_Width and
496/// ZB_Undefined are valid arguments.
497template <typename T>
498unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
499 static_assert(std::numeric_limits<T>::is_integer &&
500 !std::numeric_limits<T>::is_signed,
501 "Only unsigned integral types are allowed.");
502 return countLeadingZeros<T>(~Value, ZB);
503}
504
505/// Count the number of ones from the least significant bit to the first
506/// zero bit.
507///
508/// Ex. countTrailingOnes(0x00FF00FF) == 8.
509/// Only unsigned integral types are allowed.
510///
511/// \param ZB the behavior on an input of all ones. Only ZB_Width and
512/// ZB_Undefined are valid arguments.
513template <typename T>
514unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
515 static_assert(std::numeric_limits<T>::is_integer &&
516 !std::numeric_limits<T>::is_signed,
517 "Only unsigned integral types are allowed.");
518 return countTrailingZeros<T>(~Value, ZB);
10
Calling 'countTrailingZeros<unsigned int>'
17
Returning from 'countTrailingZeros<unsigned int>'
18
Returning the value 32
519}
520
521namespace detail {
522template <typename T, std::size_t SizeOfT> struct PopulationCounter {
523 static unsigned count(T Value) {
524 // Generic version, forward to 32 bits.
525 static_assert(SizeOfT <= 4, "Not implemented!");
526#if defined(__GNUC__4)
527 return __builtin_popcount(Value);
528#else
529 uint32_t v = Value;
530 v = v - ((v >> 1) & 0x55555555);
531 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
532 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
533#endif
534 }
535};
536
537template <typename T> struct PopulationCounter<T, 8> {
538 static unsigned count(T Value) {
539#if defined(__GNUC__4)
540 return __builtin_popcountll(Value);
541#else
542 uint64_t v = Value;
543 v = v - ((v >> 1) & 0x5555555555555555ULL);
544 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
545 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
546 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
547#endif
548 }
549};
550} // namespace detail
551
552/// Count the number of set bits in a value.
553/// Ex. countPopulation(0xF000F000) = 8
554/// Returns 0 if the word is zero.
555template <typename T>
556inline unsigned countPopulation(T Value) {
557 static_assert(std::numeric_limits<T>::is_integer &&
558 !std::numeric_limits<T>::is_signed,
559 "Only unsigned integral types are allowed.");
560 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
561}
562
563/// Compile time Log2.
564/// Valid only for positive powers of two.
565template <size_t kValue> constexpr inline size_t CTLog2() {
566 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
567 "Value is not a valid power of 2");
568 return 1 + CTLog2<kValue / 2>();
569}
570
571template <> constexpr inline size_t CTLog2<1>() { return 0; }
572
573/// Return the log base 2 of the specified value.
574inline double Log2(double Value) {
575#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
576 return __builtin_log(Value) / __builtin_log(2.0);
577#else
578 return log2(Value);
579#endif
580}
581
582/// Return the floor log base 2 of the specified value, -1 if the value is zero.
583/// (32 bit edition.)
584/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
585inline unsigned Log2_32(uint32_t Value) {
586 return 31 - countLeadingZeros(Value);
587}
588
589/// Return the floor log base 2 of the specified value, -1 if the value is zero.
590/// (64 bit edition.)
591inline unsigned Log2_64(uint64_t Value) {
592 return 63 - countLeadingZeros(Value);
593}
594
595/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
596/// (32 bit edition).
597/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
598inline unsigned Log2_32_Ceil(uint32_t Value) {
599 return 32 - countLeadingZeros(Value - 1);
600}
601
602/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
603/// (64 bit edition.)
604inline unsigned Log2_64_Ceil(uint64_t Value) {
605 return 64 - countLeadingZeros(Value - 1);
606}
607
608/// Return the greatest common divisor of the values using Euclid's algorithm.
609template <typename T>
610inline T greatestCommonDivisor(T A, T B) {
611 while (B) {
612 T Tmp = B;
613 B = A % B;
614 A = Tmp;
615 }
616 return A;
617}
618
619inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
620 return greatestCommonDivisor<uint64_t>(A, B);
621}
622
623/// This function takes a 64-bit integer and returns the bit equivalent double.
624inline double BitsToDouble(uint64_t Bits) {
625 double D;
626 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
627 memcpy(&D, &Bits, sizeof(Bits));
628 return D;
629}
630
631/// This function takes a 32-bit integer and returns the bit equivalent float.
632inline float BitsToFloat(uint32_t Bits) {
633 float F;
634 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
635 memcpy(&F, &Bits, sizeof(Bits));
636 return F;
637}
638
639/// This function takes a double and returns the bit equivalent 64-bit integer.
640/// Note that copying doubles around changes the bits of NaNs on some hosts,
641/// notably x86, so this routine cannot be used if these bits are needed.
642inline uint64_t DoubleToBits(double Double) {
643 uint64_t Bits;
644 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
645 memcpy(&Bits, &Double, sizeof(Double));
646 return Bits;
647}
648
649/// This function takes a float and returns the bit equivalent 32-bit integer.
650/// Note that copying floats around changes the bits of NaNs on some hosts,
651/// notably x86, so this routine cannot be used if these bits are needed.
652inline uint32_t FloatToBits(float Float) {
653 uint32_t Bits;
654 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
655 memcpy(&Bits, &Float, sizeof(Float));
656 return Bits;
657}
658
659/// A and B are either alignments or offsets. Return the minimum alignment that
660/// may be assumed after adding the two together.
661constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
662 // The largest power of 2 that divides both A and B.
663 //
664 // Replace "-Value" by "1+~Value" in the following commented code to avoid
665 // MSVC warning C4146
666 // return (A | B) & -(A | B);
667 return (A | B) & (1 + ~(A | B));
668}
669
670/// Returns the next power of two (in 64-bits) that is strictly greater than A.
671/// Returns zero on overflow.
672inline uint64_t NextPowerOf2(uint64_t A) {
673 A |= (A >> 1);
674 A |= (A >> 2);
675 A |= (A >> 4);
676 A |= (A >> 8);
677 A |= (A >> 16);
678 A |= (A >> 32);
679 return A + 1;
680}
681
682/// Returns the power of two which is less than or equal to the given value.
683/// Essentially, it is a floor operation across the domain of powers of two.
684inline uint64_t PowerOf2Floor(uint64_t A) {
685 if (!A) return 0;
686 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
687}
688
689/// Returns the power of two which is greater than or equal to the given value.
690/// Essentially, it is a ceil operation across the domain of powers of two.
691inline uint64_t PowerOf2Ceil(uint64_t A) {
692 if (!A)
693 return 0;
694 return NextPowerOf2(A - 1);
695}
696
697/// Returns the next integer (mod 2**64) that is greater than or equal to
698/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
699///
700/// If non-zero \p Skew is specified, the return value will be a minimal
701/// integer that is greater than or equal to \p Value and equal to
702/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
703/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
704///
705/// Examples:
706/// \code
707/// alignTo(5, 8) = 8
708/// alignTo(17, 8) = 24
709/// alignTo(~0LL, 8) = 0
710/// alignTo(321, 255) = 510
711///
712/// alignTo(5, 8, 7) = 7
713/// alignTo(17, 8, 1) = 17
714/// alignTo(~0LL, 8, 3) = 3
715/// alignTo(321, 255, 42) = 552
716/// \endcode
717inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
718 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 718, __PRETTY_FUNCTION__))
;
719 Skew %= Align;
720 return (Value + Align - 1 - Skew) / Align * Align + Skew;
721}
722
723/// Returns the next integer (mod 2**64) that is greater than or equal to
724/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
725template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
726 static_assert(Align != 0u, "Align must be non-zero");
727 return (Value + Align - 1) / Align * Align;
728}
729
730/// Returns the integer ceil(Numerator / Denominator).
731inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
732 return alignTo(Numerator, Denominator) / Denominator;
733}
734
735/// Returns the integer nearest(Numerator / Denominator).
736inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
737 return (Numerator + (Denominator / 2)) / Denominator;
738}
739
740/// Returns the largest uint64_t less than or equal to \p Value and is
741/// \p Skew mod \p Align. \p Align must be non-zero
742inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
743 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 743, __PRETTY_FUNCTION__))
;
744 Skew %= Align;
745 return (Value - Skew) / Align * Align + Skew;
746}
747
748/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
749/// Requires 0 < B <= 32.
750template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
751 static_assert(B > 0, "Bit width can't be 0.");
752 static_assert(B <= 32, "Bit width out of range.");
753 return int32_t(X << (32 - B)) >> (32 - B);
754}
755
756/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
757/// Requires 0 < B < 32.
758inline int32_t SignExtend32(uint32_t X, unsigned B) {
759 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 759, __PRETTY_FUNCTION__))
;
760 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 760, __PRETTY_FUNCTION__))
;
761 return int32_t(X << (32 - B)) >> (32 - B);
762}
763
764/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
765/// Requires 0 < B < 64.
766template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
767 static_assert(B > 0, "Bit width can't be 0.");
768 static_assert(B <= 64, "Bit width out of range.");
769 return int64_t(x << (64 - B)) >> (64 - B);
770}
771
772/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
773/// Requires 0 < B < 64.
774inline int64_t SignExtend64(uint64_t X, unsigned B) {
775 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 775, __PRETTY_FUNCTION__))
;
776 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 776, __PRETTY_FUNCTION__))
;
777 return int64_t(X << (64 - B)) >> (64 - B);
778}
779
780/// Subtract two unsigned integers, X and Y, of type T and return the absolute
781/// value of the result.
782template <typename T>
783typename std::enable_if<std::is_unsigned<T>::value, T>::type
784AbsoluteDifference(T X, T Y) {
785 return std::max(X, Y) - std::min(X, Y);
786}
787
788/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
789/// maximum representable value of T on overflow. ResultOverflowed indicates if
790/// the result is larger than the maximum representable value of type T.
791template <typename T>
792typename std::enable_if<std::is_unsigned<T>::value, T>::type
793SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
794 bool Dummy;
795 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
796 // Hacker's Delight, p. 29
797 T Z = X + Y;
798 Overflowed = (Z < X || Z < Y);
799 if (Overflowed)
800 return std::numeric_limits<T>::max();
801 else
802 return Z;
803}
804
805/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
806/// maximum representable value of T on overflow. ResultOverflowed indicates if
807/// the result is larger than the maximum representable value of type T.
808template <typename T>
809typename std::enable_if<std::is_unsigned<T>::value, T>::type
810SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
811 bool Dummy;
812 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
813
814 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
815 // because it fails for uint16_t (where multiplication can have undefined
816 // behavior due to promotion to int), and requires a division in addition
817 // to the multiplication.
818
819 Overflowed = false;
820
821 // Log2(Z) would be either Log2Z or Log2Z + 1.
822 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
823 // will necessarily be less than Log2Max as desired.
824 int Log2Z = Log2_64(X) + Log2_64(Y);
825 const T Max = std::numeric_limits<T>::max();
826 int Log2Max = Log2_64(Max);
827 if (Log2Z < Log2Max) {
828 return X * Y;
829 }
830 if (Log2Z > Log2Max) {
831 Overflowed = true;
832 return Max;
833 }
834
835 // We're going to use the top bit, and maybe overflow one
836 // bit past it. Multiply all but the bottom bit then add
837 // that on at the end.
838 T Z = (X >> 1) * Y;
839 if (Z & ~(Max >> 1)) {
840 Overflowed = true;
841 return Max;
842 }
843 Z <<= 1;
844 if (X & 1)
845 return SaturatingAdd(Z, Y, ResultOverflowed);
846
847 return Z;
848}
849
850/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
851/// the product. Clamp the result to the maximum representable value of T on
852/// overflow. ResultOverflowed indicates if the result is larger than the
853/// maximum representable value of type T.
854template <typename T>
855typename std::enable_if<std::is_unsigned<T>::value, T>::type
856SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
857 bool Dummy;
858 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
859
860 T Product = SaturatingMultiply(X, Y, &Overflowed);
861 if (Overflowed)
862 return Product;
863
864 return SaturatingAdd(A, Product, &Overflowed);
865}
866
867/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
868extern const float huge_valf;
869
870
871/// Add two signed integers, computing the two's complement truncated result,
872/// returning true if overflow occured.
873template <typename T>
874typename std::enable_if<std::is_signed<T>::value, T>::type
875AddOverflow(T X, T Y, T &Result) {
876#if __has_builtin(__builtin_add_overflow)1
877 return __builtin_add_overflow(X, Y, &Result);
878#else
879 // Perform the unsigned addition.
880 using U = typename std::make_unsigned<T>::type;
881 const U UX = static_cast<U>(X);
882 const U UY = static_cast<U>(Y);
883 const U UResult = UX + UY;
884
885 // Convert to signed.
886 Result = static_cast<T>(UResult);
887
888 // Adding two positive numbers should result in a positive number.
889 if (X > 0 && Y > 0)
890 return Result <= 0;
891 // Adding two negatives should result in a negative number.
892 if (X < 0 && Y < 0)
893 return Result >= 0;
894 return false;
895#endif
896}
897
898/// Subtract two signed integers, computing the two's complement truncated
899/// result, returning true if an overflow ocurred.
900template <typename T>
901typename std::enable_if<std::is_signed<T>::value, T>::type
902SubOverflow(T X, T Y, T &Result) {
903#if __has_builtin(__builtin_sub_overflow)1
904 return __builtin_sub_overflow(X, Y, &Result);
905#else
906 // Perform the unsigned addition.
907 using U = typename std::make_unsigned<T>::type;
908 const U UX = static_cast<U>(X);
909 const U UY = static_cast<U>(Y);
910 const U UResult = UX - UY;
911
912 // Convert to signed.
913 Result = static_cast<T>(UResult);
914
915 // Subtracting a positive number from a negative results in a negative number.
916 if (X <= 0 && Y > 0)
917 return Result >= 0;
918 // Subtracting a negative number from a positive results in a positive number.
919 if (X >= 0 && Y < 0)
920 return Result <= 0;
921 return false;
922#endif
923}
924
925
926/// Multiply two signed integers, computing the two's complement truncated
927/// result, returning true if an overflow ocurred.
928template <typename T>
929typename std::enable_if<std::is_signed<T>::value, T>::type
930MulOverflow(T X, T Y, T &Result) {
931 // Perform the unsigned multiplication on absolute values.
932 using U = typename std::make_unsigned<T>::type;
933 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
934 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
935 const U UResult = UX * UY;
936
937 // Convert to signed.
938 const bool IsNegative = (X < 0) ^ (Y < 0);
939 Result = IsNegative ? (0 - UResult) : UResult;
940
941 // If any of the args was 0, result is 0 and no overflow occurs.
942 if (UX == 0 || UY == 0)
943 return false;
944
945 // UX and UY are in [1, 2^n], where n is the number of digits.
946 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
947 // positive) divided by an argument compares to the other.
948 if (IsNegative)
949 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
950 else
951 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
952}
953
954} // End llvm namespace
955
956#endif