Bug Summary

File:build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
Warning:line 184, column 30
The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'int64_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name RISCVMatInt.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/RISCV/MCTargetDesc -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/RISCV/MCTargetDesc -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/RISCV -I lib/Target/RISCV -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp

/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp

1//===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "RISCVMatInt.h"
10#include "MCTargetDesc/RISCVMCTargetDesc.h"
11#include "llvm/ADT/APInt.h"
12#include "llvm/Support/MathExtras.h"
13using namespace llvm;
14
15static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
16 if (!HasRVC)
17 return Res.size();
18
19 int Cost = 0;
20 for (auto Instr : Res) {
21 // Assume instructions that aren't listed aren't compressible.
22 bool Compressed = false;
23 switch (Instr.Opc) {
24 case RISCV::SLLI:
25 case RISCV::SRLI:
26 Compressed = true;
27 break;
28 case RISCV::ADDI:
29 case RISCV::ADDIW:
30 case RISCV::LUI:
31 Compressed = isInt<6>(Instr.Imm);
32 break;
33 }
34 // Two RVC instructions take the same space as one RVI instruction, but
35 // can take longer to execute than the single RVI instruction. Thus, we
36 // consider that two RVC instruction are slightly more costly than one
37 // RVI instruction. For longer sequences of RVC instructions the space
38 // savings can be worth it, though. The costs below try to model that.
39 if (!Compressed)
40 Cost += 100; // Baseline cost of one RVI instruction: 100%.
41 else
42 Cost += 70; // 70% cost of baseline.
43 }
44 return Cost;
45}
46
47// Recursively generate a sequence for materializing an integer.
48static void generateInstSeqImpl(int64_t Val,
49 const FeatureBitset &ActiveFeatures,
50 RISCVMatInt::InstSeq &Res) {
51 bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
52
53 if (isInt<32>(Val)) {
54 // Depending on the active bits in the immediate Value v, the following
55 // instruction sequences are emitted:
56 //
57 // v == 0 : ADDI
58 // v[0,12) != 0 && v[12,32) == 0 : ADDI
59 // v[0,12) == 0 && v[12,32) != 0 : LUI
60 // v[0,32) != 0 : LUI+ADDI(W)
61 int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
62 int64_t Lo12 = SignExtend64<12>(Val);
63
64 if (Hi20)
65 Res.push_back(RISCVMatInt::Inst(RISCV::LUI, Hi20));
66
67 if (Lo12 || Hi20 == 0) {
68 unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
69 Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12));
70 }
71 return;
72 }
73
74 assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target")(static_cast <bool> (IsRV64 && "Can't emit >32-bit imm for non-RV64 target"
) ? void (0) : __assert_fail ("IsRV64 && \"Can't emit >32-bit imm for non-RV64 target\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 74, __extension__
__PRETTY_FUNCTION__))
;
75
76 // Use BSETI for a single bit.
77 if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val)) {
78 Res.push_back(RISCVMatInt::Inst(RISCV::BSETI, Log2_64(Val)));
79 return;
80 }
81
82 // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
83 // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
84 // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
85 // while the following ADDI instructions contribute up to 12 bits each.
86 //
87 // On the first glance, implementing this seems to be possible by simply
88 // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
89 // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
90 // fact that ADDI performs a sign extended addition, doing it like that would
91 // only be possible when at most 11 bits of the ADDI instructions are used.
92 // Using all 12 bits of the ADDI instructions, like done by GAS, actually
93 // requires that the constant is processed starting with the least significant
94 // bit.
95 //
96 // In the following, constants are processed from LSB to MSB but instruction
97 // emission is performed from MSB to LSB by recursively calling
98 // generateInstSeq. In each recursion, first the lowest 12 bits are removed
99 // from the constant and the optimal shift amount, which can be greater than
100 // 12 bits if the constant is sparse, is determined. Then, the shifted
101 // remaining constant is processed recursively and gets emitted as soon as it
102 // fits into 32 bits. The emission of the shifts and additions is subsequently
103 // performed when the recursion returns.
104
105 int64_t Lo12 = SignExtend64<12>(Val);
106 Val = (uint64_t)Val - (uint64_t)Lo12;
107
108 int ShiftAmount = 0;
109 bool Unsigned = false;
110
111 // Val might now be valid for LUI without needing a shift.
112 if (!isInt<32>(Val)) {
113 ShiftAmount = findFirstSet((uint64_t)Val);
114 Val >>= ShiftAmount;
115
116 // If the remaining bits don't fit in 12 bits, we might be able to reduce the
117 // shift amount in order to use LUI which will zero the lower 12 bits.
118 if (ShiftAmount > 12 && !isInt<12>(Val)) {
119 if (isInt<32>((uint64_t)Val << 12)) {
120 // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
121 ShiftAmount -= 12;
122 Val = (uint64_t)Val << 12;
123 } else if (isUInt<32>((uint64_t)Val << 12) &&
124 ActiveFeatures[RISCV::FeatureStdExtZba]) {
125 // Reduce the shift amount and add zeros to the LSBs so it will match
126 // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
127 ShiftAmount -= 12;
128 Val = ((uint64_t)Val << 12) | (0xffffffffull << 32);
129 Unsigned = true;
130 }
131 }
132
133 // Try to use SLLI_UW for Val when it is uint32 but not int32.
134 if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) &&
135 ActiveFeatures[RISCV::FeatureStdExtZba]) {
136 // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
137 // SLLI_UW.
138 Val = ((uint64_t)Val) | (0xffffffffull << 32);
139 Unsigned = true;
140 }
141 }
142
143 generateInstSeqImpl(Val, ActiveFeatures, Res);
144
145 // Skip shift if we were able to use LUI directly.
146 if (ShiftAmount) {
147 if (Unsigned)
148 Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount));
149 else
150 Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
151 }
152
153 if (Lo12)
154 Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
155}
156
157static unsigned extractRotateInfo(int64_t Val) {
158 // for case: 0b111..1..xxxxxx1..1..
159 unsigned LeadingOnes = countLeadingOnes((uint64_t)Val);
160 unsigned TrailingOnes = countTrailingOnes((uint64_t)Val);
161 if (TrailingOnes > 0 && TrailingOnes < 64 &&
162 (LeadingOnes + TrailingOnes) > (64 - 12))
163 return 64 - TrailingOnes;
164
165 // for case: 0bxxx1..1..1...xxx
166 unsigned UpperTrailingOnes = countTrailingOnes(Hi_32(Val));
167 unsigned LowerLeadingOnes = countLeadingOnes(Lo_32(Val));
168 if (UpperTrailingOnes < 32 &&
169 (UpperTrailingOnes + LowerLeadingOnes) > (64 - 12))
170 return 32 - UpperTrailingOnes;
171
172 return 0;
173}
174
175namespace llvm::RISCVMatInt {
176InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
177 RISCVMatInt::InstSeq Res;
178 generateInstSeqImpl(Val, ActiveFeatures, Res);
179
180 // If there are trailing zeros, try generating a sign extended constant with
181 // no trailing zeros and use a final SLLI to restore them.
182 if ((Val & 1) == 0 && Res.size() > 2) {
1
Assuming the condition is true
2
Assuming the condition is true
3
Taking true branch
183 unsigned TrailingZeros = countTrailingZeros((uint64_t)Val);
4
Calling 'countTrailingZeros<unsigned long>'
11
Returning from 'countTrailingZeros<unsigned long>'
12
'TrailingZeros' initialized to 64
184 int64_t ShiftedVal = Val >> TrailingZeros;
13
The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'int64_t'
185 RISCVMatInt::InstSeq TmpSeq;
186 generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
187 TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SLLI, TrailingZeros));
188
189 // Keep the new sequence if it is an improvement.
190 if (TmpSeq.size() < Res.size()) {
191 Res = TmpSeq;
192 // A 2 instruction sequence is the best we can do.
193 if (Res.size() <= 2)
194 return Res;
195 }
196 }
197
198 // If the constant is positive we might be able to generate a shifted constant
199 // with no leading zeros and use a final SRLI to restore them.
200 if (Val > 0 && Res.size() > 2) {
201 assert(ActiveFeatures[RISCV::Feature64Bit] &&(static_cast <bool> (ActiveFeatures[RISCV::Feature64Bit
] && "Expected RV32 to only need 2 instructions") ? void
(0) : __assert_fail ("ActiveFeatures[RISCV::Feature64Bit] && \"Expected RV32 to only need 2 instructions\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 202, __extension__
__PRETTY_FUNCTION__))
202 "Expected RV32 to only need 2 instructions")(static_cast <bool> (ActiveFeatures[RISCV::Feature64Bit
] && "Expected RV32 to only need 2 instructions") ? void
(0) : __assert_fail ("ActiveFeatures[RISCV::Feature64Bit] && \"Expected RV32 to only need 2 instructions\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 202, __extension__
__PRETTY_FUNCTION__))
;
203 unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
204 uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
205 // Fill in the bits that will be shifted out with 1s. An example where this
206 // helps is trailing one masks with 32 or more ones. This will generate
207 // ADDI -1 and an SRLI.
208 ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
209
210 RISCVMatInt::InstSeq TmpSeq;
211 generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
212 TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
213
214 // Keep the new sequence if it is an improvement.
215 if (TmpSeq.size() < Res.size()) {
216 Res = TmpSeq;
217 // A 2 instruction sequence is the best we can do.
218 if (Res.size() <= 2)
219 return Res;
220 }
221
222 // Some cases can benefit from filling the lower bits with zeros instead.
223 ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
224 TmpSeq.clear();
225 generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
226 TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
227
228 // Keep the new sequence if it is an improvement.
229 if (TmpSeq.size() < Res.size()) {
230 Res = TmpSeq;
231 // A 2 instruction sequence is the best we can do.
232 if (Res.size() <= 2)
233 return Res;
234 }
235
236 // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
237 // the end of the sequence.
238 if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
239 // Try replacing upper bits with 1.
240 uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
241 TmpSeq.clear();
242 generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
243 TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADD_UW, 0));
244
245 // Keep the new sequence if it is an improvement.
246 if (TmpSeq.size() < Res.size()) {
247 Res = TmpSeq;
248 // A 2 instruction sequence is the best we can do.
249 if (Res.size() <= 2)
250 return Res;
251 }
252 }
253 }
254
255 // Perform optimization with BCLRI/BSETI in the Zbs extension.
256 if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
257 assert(ActiveFeatures[RISCV::Feature64Bit] &&(static_cast <bool> (ActiveFeatures[RISCV::Feature64Bit
] && "Expected RV32 to only need 2 instructions") ? void
(0) : __assert_fail ("ActiveFeatures[RISCV::Feature64Bit] && \"Expected RV32 to only need 2 instructions\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 258, __extension__
__PRETTY_FUNCTION__))
258 "Expected RV32 to only need 2 instructions")(static_cast <bool> (ActiveFeatures[RISCV::Feature64Bit
] && "Expected RV32 to only need 2 instructions") ? void
(0) : __assert_fail ("ActiveFeatures[RISCV::Feature64Bit] && \"Expected RV32 to only need 2 instructions\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 258, __extension__
__PRETTY_FUNCTION__))
;
259
260 // 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
261 // call generateInstSeqImpl with Val|0x80000000 (which is expected be
262 // an int32), then emit (BCLRI r, 31).
263 // 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
264 // with Val&~0x80000000 (which is expected to be an int32), then
265 // emit (BSETI r, 31).
266 int64_t NewVal;
267 unsigned Opc;
268 if (Val < 0) {
269 Opc = RISCV::BCLRI;
270 NewVal = Val | 0x80000000ll;
271 } else {
272 Opc = RISCV::BSETI;
273 NewVal = Val & ~0x80000000ll;
274 }
275 if (isInt<32>(NewVal)) {
276 RISCVMatInt::InstSeq TmpSeq;
277 generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
278 TmpSeq.push_back(RISCVMatInt::Inst(Opc, 31));
279 if (TmpSeq.size() < Res.size())
280 Res = TmpSeq;
281 }
282
283 // Try to use BCLRI for upper 32 bits if the original lower 32 bits are
284 // negative int32, or use BSETI for upper 32 bits if the original lower
285 // 32 bits are positive int32.
286 int32_t Lo = Val;
287 uint32_t Hi = Val >> 32;
288 Opc = 0;
289 RISCVMatInt::InstSeq TmpSeq;
290 generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
291 // Check if it is profitable to use BCLRI/BSETI.
292 if (Lo > 0 && TmpSeq.size() + countPopulation(Hi) < Res.size()) {
293 Opc = RISCV::BSETI;
294 } else if (Lo < 0 && TmpSeq.size() + countPopulation(~Hi) < Res.size()) {
295 Opc = RISCV::BCLRI;
296 Hi = ~Hi;
297 }
298 // Search for each bit and build corresponding BCLRI/BSETI.
299 if (Opc > 0) {
300 while (Hi != 0) {
301 unsigned Bit = countTrailingZeros(Hi);
302 TmpSeq.push_back(RISCVMatInt::Inst(Opc, Bit + 32));
303 Hi &= ~(1 << Bit);
304 }
305 if (TmpSeq.size() < Res.size())
306 Res = TmpSeq;
307 }
308 }
309
310 // Perform optimization with SH*ADD in the Zba extension.
311 if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
312 assert(ActiveFeatures[RISCV::Feature64Bit] &&(static_cast <bool> (ActiveFeatures[RISCV::Feature64Bit
] && "Expected RV32 to only need 2 instructions") ? void
(0) : __assert_fail ("ActiveFeatures[RISCV::Feature64Bit] && \"Expected RV32 to only need 2 instructions\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 313, __extension__
__PRETTY_FUNCTION__))
313 "Expected RV32 to only need 2 instructions")(static_cast <bool> (ActiveFeatures[RISCV::Feature64Bit
] && "Expected RV32 to only need 2 instructions") ? void
(0) : __assert_fail ("ActiveFeatures[RISCV::Feature64Bit] && \"Expected RV32 to only need 2 instructions\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 313, __extension__
__PRETTY_FUNCTION__))
;
314 int64_t Div = 0;
315 unsigned Opc = 0;
316 RISCVMatInt::InstSeq TmpSeq;
317 // Select the opcode and divisor.
318 if ((Val % 3) == 0 && isInt<32>(Val / 3)) {
319 Div = 3;
320 Opc = RISCV::SH1ADD;
321 } else if ((Val % 5) == 0 && isInt<32>(Val / 5)) {
322 Div = 5;
323 Opc = RISCV::SH2ADD;
324 } else if ((Val % 9) == 0 && isInt<32>(Val / 9)) {
325 Div = 9;
326 Opc = RISCV::SH3ADD;
327 }
328 // Build the new instruction sequence.
329 if (Div > 0) {
330 generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
331 TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
332 if (TmpSeq.size() < Res.size())
333 Res = TmpSeq;
334 } else {
335 // Try to use LUI+SH*ADD+ADDI.
336 int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
337 int64_t Lo12 = SignExtend64<12>(Val);
338 Div = 0;
339 if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
340 Div = 3;
341 Opc = RISCV::SH1ADD;
342 } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
343 Div = 5;
344 Opc = RISCV::SH2ADD;
345 } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
346 Div = 9;
347 Opc = RISCV::SH3ADD;
348 }
349 // Build the new instruction sequence.
350 if (Div > 0) {
351 // For Val that has zero Lo12 (implies Val equals to Hi52) should has
352 // already been processed to LUI+SH*ADD by previous optimization.
353 assert(Lo12 != 0 &&(static_cast <bool> (Lo12 != 0 && "unexpected instruction sequence for immediate materialisation"
) ? void (0) : __assert_fail ("Lo12 != 0 && \"unexpected instruction sequence for immediate materialisation\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 354, __extension__
__PRETTY_FUNCTION__))
354 "unexpected instruction sequence for immediate materialisation")(static_cast <bool> (Lo12 != 0 && "unexpected instruction sequence for immediate materialisation"
) ? void (0) : __assert_fail ("Lo12 != 0 && \"unexpected instruction sequence for immediate materialisation\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 354, __extension__
__PRETTY_FUNCTION__))
;
355 assert(TmpSeq.empty() && "Expected empty TmpSeq")(static_cast <bool> (TmpSeq.empty() && "Expected empty TmpSeq"
) ? void (0) : __assert_fail ("TmpSeq.empty() && \"Expected empty TmpSeq\""
, "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp", 355, __extension__
__PRETTY_FUNCTION__))
;
356 generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
357 TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
358 TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
359 if (TmpSeq.size() < Res.size())
360 Res = TmpSeq;
361 }
362 }
363 }
364
365 // Perform optimization with rori in the Zbb extension.
366 if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbb]) {
367 if (unsigned Rotate = extractRotateInfo(Val)) {
368 RISCVMatInt::InstSeq TmpSeq;
369 uint64_t NegImm12 =
370 ((uint64_t)Val >> (64 - Rotate)) | ((uint64_t)Val << Rotate);
371 assert(isInt<12>(NegImm12))(static_cast <bool> (isInt<12>(NegImm12)) ? void (
0) : __assert_fail ("isInt<12>(NegImm12)", "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp"
, 371, __extension__ __PRETTY_FUNCTION__))
;
372 TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, NegImm12));
373 TmpSeq.push_back(RISCVMatInt::Inst(RISCV::RORI, Rotate));
374 Res = TmpSeq;
375 }
376 }
377 return Res;
378}
379
380int getIntMatCost(const APInt &Val, unsigned Size,
381 const FeatureBitset &ActiveFeatures, bool CompressionCost) {
382 bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
383 bool HasRVC = CompressionCost && ActiveFeatures[RISCV::FeatureStdExtC];
384 int PlatRegSize = IsRV64 ? 64 : 32;
385
386 // Split the constant into platform register sized chunks, and calculate cost
387 // of each chunk.
388 int Cost = 0;
389 for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
390 APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
391 InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures);
392 Cost += getInstSeqCost(MatSeq, HasRVC);
393 }
394 return std::max(1, Cost);
395}
396
397OpndKind Inst::getOpndKind() const {
398 switch (Opc) {
399 default:
400 llvm_unreachable("Unexpected opcode!")::llvm::llvm_unreachable_internal("Unexpected opcode!", "llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp"
, 400)
;
401 case RISCV::LUI:
402 return RISCVMatInt::Imm;
403 case RISCV::ADD_UW:
404 return RISCVMatInt::RegX0;
405 case RISCV::SH1ADD:
406 case RISCV::SH2ADD:
407 case RISCV::SH3ADD:
408 return RISCVMatInt::RegReg;
409 case RISCV::ADDI:
410 case RISCV::ADDIW:
411 case RISCV::SLLI:
412 case RISCV::SRLI:
413 case RISCV::SLLI_UW:
414 case RISCV::RORI:
415 case RISCV::BSETI:
416 case RISCV::BCLRI:
417 return RISCVMatInt::RegImm;
418 }
419}
420
421} // namespace llvm::RISCVMatInt

/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/ADT/bit.h"
17#include "llvm/Support/Compiler.h"
18#include <cassert>
19#include <climits>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef _MSC_VER
26// Declare these intrinsics manually rather including intrin.h. It's very
27// expensive, and MathExtras.h is popular.
28// #include <intrin.h>
29extern "C" {
30unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
31unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
32unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
33unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
34}
35#endif
36
37namespace llvm {
38
39/// The behavior an operation has on an input of 0.
40enum ZeroBehavior {
41 /// The returned value is undefined.
42 ZB_Undefined,
43 /// The returned value is numeric_limits<T>::max()
44 ZB_Max,
45 /// The returned value is numeric_limits<T>::digits
46 ZB_Width
47};
48
49/// Mathematical constants.
50namespace numbers {
51// TODO: Track C++20 std::numbers.
52// TODO: Favor using the hexadecimal FP constants (requires C++17).
53constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
54 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
55 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
56 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
57 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
58 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
59 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
60 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
61 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
62 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
63 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
64 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
65 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
66 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
67 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
68constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
69 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
70 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
71 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
72 log2ef = 1.44269504F, // (0x1.715476P+0)
73 log10ef = .434294482F, // (0x1.bcb7b2P-2)
74 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
75 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
76 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
77 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
78 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
79 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
80 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
81 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
82 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
83} // namespace numbers
84
85namespace detail {
86template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
87 static unsigned count(T Val, ZeroBehavior) {
88 if (!Val)
89 return std::numeric_limits<T>::digits;
90 if (Val & 0x1)
91 return 0;
92
93 // Bisection method.
94 unsigned ZeroBits = 0;
95 T Shift = std::numeric_limits<T>::digits >> 1;
96 T Mask = std::numeric_limits<T>::max() >> Shift;
97 while (Shift) {
98 if ((Val & Mask) == 0) {
99 Val >>= Shift;
100 ZeroBits |= Shift;
101 }
102 Shift >>= 1;
103 Mask >>= Shift;
104 }
105 return ZeroBits;
106 }
107};
108
109#if defined(__GNUC__4) || defined(_MSC_VER)
110template <typename T> struct TrailingZerosCounter<T, 4> {
111 static unsigned count(T Val, ZeroBehavior ZB) {
112 if (ZB != ZB_Undefined && Val == 0)
113 return 32;
114
115#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
116 return __builtin_ctz(Val);
117#elif defined(_MSC_VER)
118 unsigned long Index;
119 _BitScanForward(&Index, Val);
120 return Index;
121#endif
122 }
123};
124
125#if !defined(_MSC_VER) || defined(_M_X64)
126template <typename T> struct TrailingZerosCounter<T, 8> {
127 static unsigned count(T Val, ZeroBehavior ZB) {
128 if (ZB
5.1
'ZB' is not equal to ZB_Undefined
5.1
'ZB' is not equal to ZB_Undefined
!= ZB_Undefined && Val == 0)
6
Assuming 'Val' is equal to 0
7
Taking true branch
129 return 64;
8
Returning the value 64
130
131#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
132 return __builtin_ctzll(Val);
133#elif defined(_MSC_VER)
134 unsigned long Index;
135 _BitScanForward64(&Index, Val);
136 return Index;
137#endif
138 }
139};
140#endif
141#endif
142} // namespace detail
143
144/// Count number of 0's from the least significant bit to the most
145/// stopping at the first 1.
146///
147/// Only unsigned integral types are allowed.
148///
149/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
150/// valid arguments.
151template <typename T>
152unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
153 static_assert(std::is_unsigned_v<T>,
154 "Only unsigned integral types are allowed.");
155 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
5
Calling 'TrailingZerosCounter::count'
9
Returning from 'TrailingZerosCounter::count'
10
Returning the value 64
156}
157
158namespace detail {
159template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
160 static unsigned count(T Val, ZeroBehavior) {
161 if (!Val)
162 return std::numeric_limits<T>::digits;
163
164 // Bisection method.
165 unsigned ZeroBits = 0;
166 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
167 T Tmp = Val >> Shift;
168 if (Tmp)
169 Val = Tmp;
170 else
171 ZeroBits |= Shift;
172 }
173 return ZeroBits;
174 }
175};
176
177#if defined(__GNUC__4) || defined(_MSC_VER)
178template <typename T> struct LeadingZerosCounter<T, 4> {
179 static unsigned count(T Val, ZeroBehavior ZB) {
180 if (ZB != ZB_Undefined && Val == 0)
181 return 32;
182
183#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
184 return __builtin_clz(Val);
185#elif defined(_MSC_VER)
186 unsigned long Index;
187 _BitScanReverse(&Index, Val);
188 return Index ^ 31;
189#endif
190 }
191};
192
193#if !defined(_MSC_VER) || defined(_M_X64)
194template <typename T> struct LeadingZerosCounter<T, 8> {
195 static unsigned count(T Val, ZeroBehavior ZB) {
196 if (ZB != ZB_Undefined && Val == 0)
197 return 64;
198
199#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
200 return __builtin_clzll(Val);
201#elif defined(_MSC_VER)
202 unsigned long Index;
203 _BitScanReverse64(&Index, Val);
204 return Index ^ 63;
205#endif
206 }
207};
208#endif
209#endif
210} // namespace detail
211
212/// Count number of 0's from the most significant bit to the least
213/// stopping at the first 1.
214///
215/// Only unsigned integral types are allowed.
216///
217/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
218/// valid arguments.
219template <typename T>
220unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
221 static_assert(std::is_unsigned_v<T>,
222 "Only unsigned integral types are allowed.");
223 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
224}
225
226/// Get the index of the first set bit starting from the least
227/// significant bit.
228///
229/// Only unsigned integral types are allowed.
230///
231/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
232/// valid arguments.
233template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
234 if (ZB == ZB_Max && Val == 0)
235 return std::numeric_limits<T>::max();
236
237 return countTrailingZeros(Val, ZB_Undefined);
238}
239
240/// Create a bitmask with the N right-most bits set to 1, and all other
241/// bits set to 0. Only unsigned types are allowed.
242template <typename T> T maskTrailingOnes(unsigned N) {
243 static_assert(std::is_unsigned<T>::value, "Invalid type!");
244 const unsigned Bits = CHAR_BIT8 * sizeof(T);
245 assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index"
) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "llvm/include/llvm/Support/MathExtras.h", 245, __extension__
__PRETTY_FUNCTION__))
;
246 return N == 0 ? 0 : (T(-1) >> (Bits - N));
247}
248
249/// Create a bitmask with the N left-most bits set to 1, and all other
250/// bits set to 0. Only unsigned types are allowed.
251template <typename T> T maskLeadingOnes(unsigned N) {
252 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
253}
254
255/// Create a bitmask with the N right-most bits set to 0, and all other
256/// bits set to 1. Only unsigned types are allowed.
257template <typename T> T maskTrailingZeros(unsigned N) {
258 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N left-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskLeadingZeros(unsigned N) {
264 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Get the index of the last set bit starting from the least
268/// significant bit.
269///
270/// Only unsigned integral types are allowed.
271///
272/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
273/// valid arguments.
274template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
275 if (ZB == ZB_Max && Val == 0)
276 return std::numeric_limits<T>::max();
277
278 // Use ^ instead of - because both gcc and llvm can remove the associated ^
279 // in the __builtin_clz intrinsic on x86.
280 return countLeadingZeros(Val, ZB_Undefined) ^
281 (std::numeric_limits<T>::digits - 1);
282}
283
284/// Macro compressed bit reversal table for 256 bits.
285///
286/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
287static const unsigned char BitReverseTable256[256] = {
288#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
289#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
290#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
291 R6(0), R6(2), R6(1), R6(3)
292#undef R2
293#undef R4
294#undef R6
295};
296
297/// Reverse the bits in \p Val.
298template <typename T> T reverseBits(T Val) {
299#if __has_builtin(__builtin_bitreverse8)1
300 if constexpr (std::is_same_v<T, uint8_t>)
301 return __builtin_bitreverse8(Val);
302#endif
303#if __has_builtin(__builtin_bitreverse16)1
304 if constexpr (std::is_same_v<T, uint16_t>)
305 return __builtin_bitreverse16(Val);
306#endif
307#if __has_builtin(__builtin_bitreverse32)1
308 if constexpr (std::is_same_v<T, uint32_t>)
309 return __builtin_bitreverse32(Val);
310#endif
311#if __has_builtin(__builtin_bitreverse64)1
312 if constexpr (std::is_same_v<T, uint64_t>)
313 return __builtin_bitreverse64(Val);
314#endif
315
316 unsigned char in[sizeof(Val)];
317 unsigned char out[sizeof(Val)];
318 std::memcpy(in, &Val, sizeof(Val));
319 for (unsigned i = 0; i < sizeof(Val); ++i)
320 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
321 std::memcpy(&Val, out, sizeof(Val));
322 return Val;
323}
324
325// NOTE: The following support functions use the _32/_64 extensions instead of
326// type overloading so that signed and unsigned integers can be used without
327// ambiguity.
328
329/// Return the high 32 bits of a 64 bit value.
330constexpr inline uint32_t Hi_32(uint64_t Value) {
331 return static_cast<uint32_t>(Value >> 32);
332}
333
334/// Return the low 32 bits of a 64 bit value.
335constexpr inline uint32_t Lo_32(uint64_t Value) {
336 return static_cast<uint32_t>(Value);
337}
338
339/// Make a 64-bit integer from a high / low pair of 32-bit integers.
340constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
341 return ((uint64_t)High << 32) | (uint64_t)Low;
342}
343
344/// Checks if an integer fits into the given bit width.
345template <unsigned N> constexpr inline bool isInt(int64_t x) {
346 if constexpr (N == 8)
347 return static_cast<int8_t>(x) == x;
348 if constexpr (N == 16)
349 return static_cast<int16_t>(x) == x;
350 if constexpr (N == 32)
351 return static_cast<int32_t>(x) == x;
352 if constexpr (N < 64)
353 return -(INT64_C(1)1L << (N - 1)) <= x && x < (INT64_C(1)1L << (N - 1));
354 (void)x; // MSVC v19.25 warns that x is unused.
355 return true;
356}
357
358/// Checks if a signed integer is an N bit number shifted left by S.
359template <unsigned N, unsigned S>
360constexpr inline bool isShiftedInt(int64_t x) {
361 static_assert(
362 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
363 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
364 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
365}
366
367/// Checks if an unsigned integer fits into the given bit width.
368template <unsigned N> constexpr inline bool isUInt(uint64_t x) {
369 static_assert(N > 0, "isUInt<0> doesn't make sense");
370 if constexpr (N == 8)
371 return static_cast<uint8_t>(x) == x;
372 if constexpr (N == 16)
373 return static_cast<uint16_t>(x) == x;
374 if constexpr (N == 32)
375 return static_cast<uint32_t>(x) == x;
376 if constexpr (N < 64)
377 return x < (UINT64_C(1)1UL << (N));
378 (void)x; // MSVC v19.25 warns that x is unused.
379 return true;
380}
381
382/// Checks if a unsigned integer is an N bit number shifted left by S.
383template <unsigned N, unsigned S>
384constexpr inline bool isShiftedUInt(uint64_t x) {
385 static_assert(
386 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
387 static_assert(N + S <= 64,
388 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
389 // Per the two static_asserts above, S must be strictly less than 64. So
390 // 1 << S is not undefined behavior.
391 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
392}
393
394/// Gets the maximum value for a N-bit unsigned integer.
395inline uint64_t maxUIntN(uint64_t N) {
396 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 396, __extension__
__PRETTY_FUNCTION__))
;
397
398 // uint64_t(1) << 64 is undefined behavior, so we can't do
399 // (uint64_t(1) << N) - 1
400 // without checking first that N != 64. But this works and doesn't have a
401 // branch.
402 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
403}
404
405/// Gets the minimum value for a N-bit signed integer.
406inline int64_t minIntN(int64_t N) {
407 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 407, __extension__
__PRETTY_FUNCTION__))
;
408
409 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
410}
411
412/// Gets the maximum value for a N-bit signed integer.
413inline int64_t maxIntN(int64_t N) {
414 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 414, __extension__
__PRETTY_FUNCTION__))
;
415
416 // This relies on two's complement wraparound when N == 64, so we convert to
417 // int64_t only at the very end to avoid UB.
418 return (UINT64_C(1)1UL << (N - 1)) - 1;
419}
420
421/// Checks if an unsigned integer fits into the given (dynamic) bit width.
422inline bool isUIntN(unsigned N, uint64_t x) {
423 return N >= 64 || x <= maxUIntN(N);
424}
425
426/// Checks if an signed integer fits into the given (dynamic) bit width.
427inline bool isIntN(unsigned N, int64_t x) {
428 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
429}
430
431/// Return true if the argument is a non-empty sequence of ones starting at the
432/// least significant bit with the remainder zero (32 bit version).
433/// Ex. isMask_32(0x0000FFFFU) == true.
434constexpr inline bool isMask_32(uint32_t Value) {
435 return Value && ((Value + 1) & Value) == 0;
436}
437
438/// Return true if the argument is a non-empty sequence of ones starting at the
439/// least significant bit with the remainder zero (64 bit version).
440constexpr inline bool isMask_64(uint64_t Value) {
441 return Value && ((Value + 1) & Value) == 0;
442}
443
444/// Return true if the argument contains a non-empty sequence of ones with the
445/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
446constexpr inline bool isShiftedMask_32(uint32_t Value) {
447 return Value && isMask_32((Value - 1) | Value);
448}
449
450/// Return true if the argument contains a non-empty sequence of ones with the
451/// remainder zero (64 bit version.)
452constexpr inline bool isShiftedMask_64(uint64_t Value) {
453 return Value && isMask_64((Value - 1) | Value);
454}
455
456/// Return true if the argument is a power of two > 0.
457/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
458constexpr inline bool isPowerOf2_32(uint32_t Value) {
459 return llvm::has_single_bit(Value);
460}
461
462/// Return true if the argument is a power of two > 0 (64 bit edition.)
463constexpr inline bool isPowerOf2_64(uint64_t Value) {
464 return llvm::has_single_bit(Value);
465}
466
467/// Count the number of ones from the most significant bit to the first
468/// zero bit.
469///
470/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
471/// Only unsigned integral types are allowed.
472///
473/// \param ZB the behavior on an input of all ones. Only ZB_Width and
474/// ZB_Undefined are valid arguments.
475template <typename T>
476unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
477 static_assert(std::is_unsigned_v<T>,
478 "Only unsigned integral types are allowed.");
479 return countLeadingZeros<T>(~Value, ZB);
480}
481
482/// Count the number of ones from the least significant bit to the first
483/// zero bit.
484///
485/// Ex. countTrailingOnes(0x00FF00FF) == 8.
486/// Only unsigned integral types are allowed.
487///
488/// \param ZB the behavior on an input of all ones. Only ZB_Width and
489/// ZB_Undefined are valid arguments.
490template <typename T>
491unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
492 static_assert(std::is_unsigned_v<T>,
493 "Only unsigned integral types are allowed.");
494 return countTrailingZeros<T>(~Value, ZB);
495}
496
497/// Count the number of set bits in a value.
498/// Ex. countPopulation(0xF000F000) = 8
499/// Returns 0 if the word is zero.
500template <typename T>
501inline unsigned countPopulation(T Value) {
502 static_assert(std::is_unsigned_v<T>,
503 "Only unsigned integral types are allowed.");
504 return (unsigned)llvm::popcount(Value);
505}
506
507/// Return true if the argument contains a non-empty sequence of ones with the
508/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
509/// If true, \p MaskIdx will specify the index of the lowest set bit and \p
510/// MaskLen is updated to specify the length of the mask, else neither are
511/// updated.
512inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx,
513 unsigned &MaskLen) {
514 if (!isShiftedMask_32(Value))
515 return false;
516 MaskIdx = countTrailingZeros(Value);
517 MaskLen = countPopulation(Value);
518 return true;
519}
520
521/// Return true if the argument contains a non-empty sequence of ones with the
522/// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index
523/// of the lowest set bit and \p MaskLen is updated to specify the length of the
524/// mask, else neither are updated.
525inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx,
526 unsigned &MaskLen) {
527 if (!isShiftedMask_64(Value))
528 return false;
529 MaskIdx = countTrailingZeros(Value);
530 MaskLen = countPopulation(Value);
531 return true;
532}
533
534/// Compile time Log2.
535/// Valid only for positive powers of two.
536template <size_t kValue> constexpr inline size_t CTLog2() {
537 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
538 "Value is not a valid power of 2");
539 return 1 + CTLog2<kValue / 2>();
540}
541
542template <> constexpr inline size_t CTLog2<1>() { return 0; }
543
544/// Return the floor log base 2 of the specified value, -1 if the value is zero.
545/// (32 bit edition.)
546/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
547inline unsigned Log2_32(uint32_t Value) {
548 return 31 - countLeadingZeros(Value);
549}
550
551/// Return the floor log base 2 of the specified value, -1 if the value is zero.
552/// (64 bit edition.)
553inline unsigned Log2_64(uint64_t Value) {
554 return 63 - countLeadingZeros(Value);
555}
556
557/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
558/// (32 bit edition).
559/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
560inline unsigned Log2_32_Ceil(uint32_t Value) {
561 return 32 - countLeadingZeros(Value - 1);
562}
563
564/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
565/// (64 bit edition.)
566inline unsigned Log2_64_Ceil(uint64_t Value) {
567 return 64 - countLeadingZeros(Value - 1);
568}
569
570/// This function takes a 64-bit integer and returns the bit equivalent double.
571inline double BitsToDouble(uint64_t Bits) {
572 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
573 return llvm::bit_cast<double>(Bits);
574}
575
576/// This function takes a 32-bit integer and returns the bit equivalent float.
577inline float BitsToFloat(uint32_t Bits) {
578 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
579 return llvm::bit_cast<float>(Bits);
580}
581
582/// This function takes a double and returns the bit equivalent 64-bit integer.
583/// Note that copying doubles around changes the bits of NaNs on some hosts,
584/// notably x86, so this routine cannot be used if these bits are needed.
585inline uint64_t DoubleToBits(double Double) {
586 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
587 return llvm::bit_cast<uint64_t>(Double);
588}
589
590/// This function takes a float and returns the bit equivalent 32-bit integer.
591/// Note that copying floats around changes the bits of NaNs on some hosts,
592/// notably x86, so this routine cannot be used if these bits are needed.
593inline uint32_t FloatToBits(float Float) {
594 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
595 return llvm::bit_cast<uint32_t>(Float);
596}
597
598/// A and B are either alignments or offsets. Return the minimum alignment that
599/// may be assumed after adding the two together.
600constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
601 // The largest power of 2 that divides both A and B.
602 //
603 // Replace "-Value" by "1+~Value" in the following commented code to avoid
604 // MSVC warning C4146
605 // return (A | B) & -(A | B);
606 return (A | B) & (1 + ~(A | B));
607}
608
609/// Returns the next power of two (in 64-bits) that is strictly greater than A.
610/// Returns zero on overflow.
611constexpr inline uint64_t NextPowerOf2(uint64_t A) {
612 A |= (A >> 1);
613 A |= (A >> 2);
614 A |= (A >> 4);
615 A |= (A >> 8);
616 A |= (A >> 16);
617 A |= (A >> 32);
618 return A + 1;
619}
620
621/// Returns the power of two which is less than or equal to the given value.
622/// Essentially, it is a floor operation across the domain of powers of two.
623inline uint64_t PowerOf2Floor(uint64_t A) {
624 if (!A) return 0;
625 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
626}
627
628/// Returns the power of two which is greater than or equal to the given value.
629/// Essentially, it is a ceil operation across the domain of powers of two.
630inline uint64_t PowerOf2Ceil(uint64_t A) {
631 if (!A)
632 return 0;
633 return NextPowerOf2(A - 1);
634}
635
636/// Returns the next integer (mod 2**64) that is greater than or equal to
637/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
638///
639/// Examples:
640/// \code
641/// alignTo(5, 8) = 8
642/// alignTo(17, 8) = 24
643/// alignTo(~0LL, 8) = 0
644/// alignTo(321, 255) = 510
645/// \endcode
646inline uint64_t alignTo(uint64_t Value, uint64_t Align) {
647 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 647, __extension__
__PRETTY_FUNCTION__))
;
648 return (Value + Align - 1) / Align * Align;
649}
650
651inline uint64_t alignToPowerOf2(uint64_t Value, uint64_t Align) {
652 assert(Align != 0 && (Align & (Align - 1)) == 0 &&(static_cast <bool> (Align != 0 && (Align &
(Align - 1)) == 0 && "Align must be a power of 2") ?
void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\""
, "llvm/include/llvm/Support/MathExtras.h", 653, __extension__
__PRETTY_FUNCTION__))
653 "Align must be a power of 2")(static_cast <bool> (Align != 0 && (Align &
(Align - 1)) == 0 && "Align must be a power of 2") ?
void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\""
, "llvm/include/llvm/Support/MathExtras.h", 653, __extension__
__PRETTY_FUNCTION__))
;
654 return (Value + Align - 1) & -Align;
655}
656
657/// If non-zero \p Skew is specified, the return value will be a minimal integer
658/// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for
659/// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p
660/// Skew mod \p A'. \p Align must be non-zero.
661///
662/// Examples:
663/// \code
664/// alignTo(5, 8, 7) = 7
665/// alignTo(17, 8, 1) = 17
666/// alignTo(~0LL, 8, 3) = 3
667/// alignTo(321, 255, 42) = 552
668/// \endcode
669inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew) {
670 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 670, __extension__
__PRETTY_FUNCTION__))
;
671 Skew %= Align;
672 return alignTo(Value - Skew, Align) + Skew;
673}
674
675/// Returns the next integer (mod 2**64) that is greater than or equal to
676/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
677template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
678 static_assert(Align != 0u, "Align must be non-zero");
679 return (Value + Align - 1) / Align * Align;
680}
681
682/// Returns the integer ceil(Numerator / Denominator).
683inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
684 return alignTo(Numerator, Denominator) / Denominator;
685}
686
687/// Returns the integer nearest(Numerator / Denominator).
688inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
689 return (Numerator + (Denominator / 2)) / Denominator;
690}
691
692/// Returns the largest uint64_t less than or equal to \p Value and is
693/// \p Skew mod \p Align. \p Align must be non-zero
694inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
695 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 695, __extension__
__PRETTY_FUNCTION__))
;
696 Skew %= Align;
697 return (Value - Skew) / Align * Align + Skew;
698}
699
700/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
701/// Requires 0 < B <= 32.
702template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
703 static_assert(B > 0, "Bit width can't be 0.");
704 static_assert(B <= 32, "Bit width out of range.");
705 return int32_t(X << (32 - B)) >> (32 - B);
706}
707
708/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
709/// Requires 0 < B <= 32.
710inline int32_t SignExtend32(uint32_t X, unsigned B) {
711 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 711, __extension__
__PRETTY_FUNCTION__))
;
712 assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 712, __extension__
__PRETTY_FUNCTION__))
;
713 return int32_t(X << (32 - B)) >> (32 - B);
714}
715
716/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
717/// Requires 0 < B <= 64.
718template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
719 static_assert(B > 0, "Bit width can't be 0.");
720 static_assert(B <= 64, "Bit width out of range.");
721 return int64_t(x << (64 - B)) >> (64 - B);
722}
723
724/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
725/// Requires 0 < B <= 64.
726inline int64_t SignExtend64(uint64_t X, unsigned B) {
727 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 727, __extension__
__PRETTY_FUNCTION__))
;
728 assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 728, __extension__
__PRETTY_FUNCTION__))
;
729 return int64_t(X << (64 - B)) >> (64 - B);
730}
731
732/// Subtract two unsigned integers, X and Y, of type T and return the absolute
733/// value of the result.
734template <typename T>
735std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
736 return X > Y ? (X - Y) : (Y - X);
737}
738
739/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
740/// maximum representable value of T on overflow. ResultOverflowed indicates if
741/// the result is larger than the maximum representable value of type T.
742template <typename T>
743std::enable_if_t<std::is_unsigned<T>::value, T>
744SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
745 bool Dummy;
746 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
747 // Hacker's Delight, p. 29
748 T Z = X + Y;
749 Overflowed = (Z < X || Z < Y);
750 if (Overflowed)
751 return std::numeric_limits<T>::max();
752 else
753 return Z;
754}
755
756/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
757/// maximum representable value of T on overflow. ResultOverflowed indicates if
758/// the result is larger than the maximum representable value of type T.
759template <typename T>
760std::enable_if_t<std::is_unsigned<T>::value, T>
761SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
762 bool Dummy;
763 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
764
765 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
766 // because it fails for uint16_t (where multiplication can have undefined
767 // behavior due to promotion to int), and requires a division in addition
768 // to the multiplication.
769
770 Overflowed = false;
771
772 // Log2(Z) would be either Log2Z or Log2Z + 1.
773 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
774 // will necessarily be less than Log2Max as desired.
775 int Log2Z = Log2_64(X) + Log2_64(Y);
776 const T Max = std::numeric_limits<T>::max();
777 int Log2Max = Log2_64(Max);
778 if (Log2Z < Log2Max) {
779 return X * Y;
780 }
781 if (Log2Z > Log2Max) {
782 Overflowed = true;
783 return Max;
784 }
785
786 // We're going to use the top bit, and maybe overflow one
787 // bit past it. Multiply all but the bottom bit then add
788 // that on at the end.
789 T Z = (X >> 1) * Y;
790 if (Z & ~(Max >> 1)) {
791 Overflowed = true;
792 return Max;
793 }
794 Z <<= 1;
795 if (X & 1)
796 return SaturatingAdd(Z, Y, ResultOverflowed);
797
798 return Z;
799}
800
801/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
802/// the product. Clamp the result to the maximum representable value of T on
803/// overflow. ResultOverflowed indicates if the result is larger than the
804/// maximum representable value of type T.
805template <typename T>
806std::enable_if_t<std::is_unsigned<T>::value, T>
807SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
808 bool Dummy;
809 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
810
811 T Product = SaturatingMultiply(X, Y, &Overflowed);
812 if (Overflowed)
813 return Product;
814
815 return SaturatingAdd(A, Product, &Overflowed);
816}
817
818/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
819extern const float huge_valf;
820
821
822/// Add two signed integers, computing the two's complement truncated result,
823/// returning true if overflow occurred.
824template <typename T>
825std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
826#if __has_builtin(__builtin_add_overflow)1
827 return __builtin_add_overflow(X, Y, &Result);
828#else
829 // Perform the unsigned addition.
830 using U = std::make_unsigned_t<T>;
831 const U UX = static_cast<U>(X);
832 const U UY = static_cast<U>(Y);
833 const U UResult = UX + UY;
834
835 // Convert to signed.
836 Result = static_cast<T>(UResult);
837
838 // Adding two positive numbers should result in a positive number.
839 if (X > 0 && Y > 0)
840 return Result <= 0;
841 // Adding two negatives should result in a negative number.
842 if (X < 0 && Y < 0)
843 return Result >= 0;
844 return false;
845#endif
846}
847
848/// Subtract two signed integers, computing the two's complement truncated
849/// result, returning true if an overflow ocurred.
850template <typename T>
851std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
852#if __has_builtin(__builtin_sub_overflow)1
853 return __builtin_sub_overflow(X, Y, &Result);
854#else
855 // Perform the unsigned addition.
856 using U = std::make_unsigned_t<T>;
857 const U UX = static_cast<U>(X);
858 const U UY = static_cast<U>(Y);
859 const U UResult = UX - UY;
860
861 // Convert to signed.
862 Result = static_cast<T>(UResult);
863
864 // Subtracting a positive number from a negative results in a negative number.
865 if (X <= 0 && Y > 0)
866 return Result >= 0;
867 // Subtracting a negative number from a positive results in a positive number.
868 if (X >= 0 && Y < 0)
869 return Result <= 0;
870 return false;
871#endif
872}
873
874/// Multiply two signed integers, computing the two's complement truncated
875/// result, returning true if an overflow ocurred.
876template <typename T>
877std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
878 // Perform the unsigned multiplication on absolute values.
879 using U = std::make_unsigned_t<T>;
880 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
881 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
882 const U UResult = UX * UY;
883
884 // Convert to signed.
885 const bool IsNegative = (X < 0) ^ (Y < 0);
886 Result = IsNegative ? (0 - UResult) : UResult;
887
888 // If any of the args was 0, result is 0 and no overflow occurs.
889 if (UX == 0 || UY == 0)
890 return false;
891
892 // UX and UY are in [1, 2^n], where n is the number of digits.
893 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
894 // positive) divided by an argument compares to the other.
895 if (IsNegative)
896 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
897 else
898 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
899}
900
901} // End llvm namespace
902
903#endif