Bug Summary

File:build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
Warning:line 360, column 25
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name MipsLegalizerInfo.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/Mips -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/Mips -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp

/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp

1//===- MipsLegalizerInfo.cpp ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for Mips.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "MipsLegalizerInfo.h"
14#include "MipsTargetMachine.h"
15#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
17#include "llvm/IR/IntrinsicsMips.h"
18
19using namespace llvm;
20
21struct TypesAndMemOps {
22 LLT ValTy;
23 LLT PtrTy;
24 unsigned MemSize;
25 bool SystemSupportsUnalignedAccess;
26};
27
28// Assumes power of 2 memory size. Subtargets that have only naturally-aligned
29// memory access need to perform additional legalization here.
30static bool isUnalignedMemmoryAccess(uint64_t MemSize, uint64_t AlignInBits) {
31 assert(isPowerOf2_64(MemSize) && "Expected power of 2 memory size")(static_cast <bool> (isPowerOf2_64(MemSize) && "Expected power of 2 memory size"
) ? void (0) : __assert_fail ("isPowerOf2_64(MemSize) && \"Expected power of 2 memory size\""
, "llvm/lib/Target/Mips/MipsLegalizerInfo.cpp", 31, __extension__
__PRETTY_FUNCTION__))
;
32 assert(isPowerOf2_64(AlignInBits) && "Expected power of 2 align")(static_cast <bool> (isPowerOf2_64(AlignInBits) &&
"Expected power of 2 align") ? void (0) : __assert_fail ("isPowerOf2_64(AlignInBits) && \"Expected power of 2 align\""
, "llvm/lib/Target/Mips/MipsLegalizerInfo.cpp", 32, __extension__
__PRETTY_FUNCTION__))
;
33 if (MemSize > AlignInBits)
34 return true;
35 return false;
36}
37
38static bool
39CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query,
40 std::initializer_list<TypesAndMemOps> SupportedValues) {
41 unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
42
43 // Non power of two memory access is never legal.
44 if (!isPowerOf2_64(QueryMemSize))
45 return false;
46
47 for (auto &Val : SupportedValues) {
48 if (Val.ValTy != Query.Types[0])
49 continue;
50 if (Val.PtrTy != Query.Types[1])
51 continue;
52 if (Val.MemSize != QueryMemSize)
53 continue;
54 if (!Val.SystemSupportsUnalignedAccess &&
55 isUnalignedMemmoryAccess(QueryMemSize, Query.MMODescrs[0].AlignInBits))
56 return false;
57 return true;
58 }
59 return false;
60}
61
62static bool CheckTyN(unsigned N, const LegalityQuery &Query,
63 std::initializer_list<LLT> SupportedValues) {
64 return llvm::is_contained(SupportedValues, Query.Types[N]);
65}
66
67MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
68 using namespace TargetOpcode;
69
70 const LLT s1 = LLT::scalar(1);
71 const LLT s8 = LLT::scalar(8);
72 const LLT s16 = LLT::scalar(16);
73 const LLT s32 = LLT::scalar(32);
74 const LLT s64 = LLT::scalar(64);
75 const LLT v16s8 = LLT::fixed_vector(16, 8);
76 const LLT v8s16 = LLT::fixed_vector(8, 16);
77 const LLT v4s32 = LLT::fixed_vector(4, 32);
78 const LLT v2s64 = LLT::fixed_vector(2, 64);
79 const LLT p0 = LLT::pointer(0, 32);
80
81 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
82 .legalIf([=, &ST](const LegalityQuery &Query) {
83 if (CheckTyN(0, Query, {s32}))
84 return true;
85 if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
86 return true;
87 return false;
88 })
89 .clampScalar(0, s32, s32);
90
91 getActionDefinitionsBuilder({G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO})
92 .lowerFor({{s32, s1}});
93
94 getActionDefinitionsBuilder(G_UMULH)
95 .legalFor({s32})
96 .maxScalar(0, s32);
97
98 // MIPS32r6 does not have alignment restrictions for memory access.
99 // For MIPS32r5 and older memory access must be naturally-aligned i.e. aligned
100 // to at least a multiple of its own size. There is however a two instruction
101 // combination that performs 4 byte unaligned access (lwr/lwl and swl/swr)
102 // therefore 4 byte load and store are legal and will use NoAlignRequirements.
103 bool NoAlignRequirements = true;
104
105 getActionDefinitionsBuilder({G_LOAD, G_STORE})
106 .legalIf([=, &ST](const LegalityQuery &Query) {
107 if (CheckTy0Ty1MemSizeAlign(
108 Query, {{s32, p0, 8, NoAlignRequirements},
109 {s32, p0, 16, ST.systemSupportsUnalignedAccess()},
110 {s32, p0, 32, NoAlignRequirements},
111 {p0, p0, 32, NoAlignRequirements},
112 {s64, p0, 64, ST.systemSupportsUnalignedAccess()}}))
113 return true;
114 if (ST.hasMSA() && CheckTy0Ty1MemSizeAlign(
115 Query, {{v16s8, p0, 128, NoAlignRequirements},
116 {v8s16, p0, 128, NoAlignRequirements},
117 {v4s32, p0, 128, NoAlignRequirements},
118 {v2s64, p0, 128, NoAlignRequirements}}))
119 return true;
120 return false;
121 })
122 // Custom lower scalar memory access, up to 8 bytes, for:
123 // - non-power-of-2 MemSizes
124 // - unaligned 2 or 8 byte MemSizes for MIPS32r5 and older
125 .customIf([=, &ST](const LegalityQuery &Query) {
126 if (!Query.Types[0].isScalar() || Query.Types[1] != p0 ||
127 Query.Types[0] == s1)
128 return false;
129
130 unsigned Size = Query.Types[0].getSizeInBits();
131 unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
132 assert(QueryMemSize <= Size && "Scalar can't hold MemSize")(static_cast <bool> (QueryMemSize <= Size &&
"Scalar can't hold MemSize") ? void (0) : __assert_fail ("QueryMemSize <= Size && \"Scalar can't hold MemSize\""
, "llvm/lib/Target/Mips/MipsLegalizerInfo.cpp", 132, __extension__
__PRETTY_FUNCTION__))
;
133
134 if (Size > 64 || QueryMemSize > 64)
135 return false;
136
137 if (!isPowerOf2_64(Query.MMODescrs[0].MemoryTy.getSizeInBits()))
138 return true;
139
140 if (!ST.systemSupportsUnalignedAccess() &&
141 isUnalignedMemmoryAccess(QueryMemSize,
142 Query.MMODescrs[0].AlignInBits)) {
143 assert(QueryMemSize != 32 && "4 byte load and store are legal")(static_cast <bool> (QueryMemSize != 32 && "4 byte load and store are legal"
) ? void (0) : __assert_fail ("QueryMemSize != 32 && \"4 byte load and store are legal\""
, "llvm/lib/Target/Mips/MipsLegalizerInfo.cpp", 143, __extension__
__PRETTY_FUNCTION__))
;
144 return true;
145 }
146
147 return false;
148 })
149 .minScalar(0, s32)
150 .lower();
151
152 getActionDefinitionsBuilder(G_IMPLICIT_DEF)
153 .legalFor({s32, s64});
154
155 getActionDefinitionsBuilder(G_UNMERGE_VALUES)
156 .legalFor({{s32, s64}});
157
158 getActionDefinitionsBuilder(G_MERGE_VALUES)
159 .legalFor({{s64, s32}});
160
161 getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD})
162 .legalForTypesWithMemDesc({{s32, p0, s8, 8},
163 {s32, p0, s16, 8}})
164 .clampScalar(0, s32, s32);
165
166 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
167 .legalIf([](const LegalityQuery &Query) { return false; })
168 .maxScalar(0, s32);
169
170 getActionDefinitionsBuilder(G_TRUNC)
171 .legalIf([](const LegalityQuery &Query) { return false; })
172 .maxScalar(1, s32);
173
174 getActionDefinitionsBuilder(G_SELECT)
175 .legalForCartesianProduct({p0, s32, s64}, {s32})
176 .minScalar(0, s32)
177 .minScalar(1, s32);
178
179 getActionDefinitionsBuilder(G_BRCOND)
180 .legalFor({s32})
181 .minScalar(0, s32);
182
183 getActionDefinitionsBuilder(G_BRJT)
184 .legalFor({{p0, s32}});
185
186 getActionDefinitionsBuilder(G_BRINDIRECT)
187 .legalFor({p0});
188
189 getActionDefinitionsBuilder(G_PHI)
190 .legalFor({p0, s32, s64})
191 .minScalar(0, s32);
192
193 getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
194 .legalFor({s32})
195 .clampScalar(0, s32, s32);
196
197 getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UDIV, G_UREM})
198 .legalIf([=, &ST](const LegalityQuery &Query) {
199 if (CheckTyN(0, Query, {s32}))
200 return true;
201 if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
202 return true;
203 return false;
204 })
205 .minScalar(0, s32)
206 .libcallFor({s64});
207
208 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
209 .legalFor({{s32, s32}})
210 .clampScalar(1, s32, s32)
211 .clampScalar(0, s32, s32);
212
213 getActionDefinitionsBuilder(G_ICMP)
214 .legalForCartesianProduct({s32}, {s32, p0})
215 .clampScalar(1, s32, s32)
216 .minScalar(0, s32);
217
218 getActionDefinitionsBuilder(G_CONSTANT)
219 .legalFor({s32})
220 .clampScalar(0, s32, s32);
221
222 getActionDefinitionsBuilder({G_PTR_ADD, G_INTTOPTR})
223 .legalFor({{p0, s32}});
224
225 getActionDefinitionsBuilder(G_PTRTOINT)
226 .legalFor({{s32, p0}});
227
228 getActionDefinitionsBuilder(G_FRAME_INDEX)
229 .legalFor({p0});
230
231 getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE})
232 .legalFor({p0});
233
234 getActionDefinitionsBuilder(G_DYN_STACKALLOC)
235 .lowerFor({{p0, s32}});
236
237 getActionDefinitionsBuilder(G_VASTART)
238 .legalFor({p0});
239
240 getActionDefinitionsBuilder(G_BSWAP)
241 .legalIf([=, &ST](const LegalityQuery &Query) {
242 if (ST.hasMips32r2() && CheckTyN(0, Query, {s32}))
243 return true;
244 return false;
245 })
246 .lowerIf([=, &ST](const LegalityQuery &Query) {
247 if (!ST.hasMips32r2() && CheckTyN(0, Query, {s32}))
248 return true;
249 return false;
250 })
251 .maxScalar(0, s32);
252
253 getActionDefinitionsBuilder(G_BITREVERSE)
254 .lowerFor({s32})
255 .maxScalar(0, s32);
256
257 getActionDefinitionsBuilder(G_CTLZ)
258 .legalFor({{s32, s32}})
259 .maxScalar(0, s32)
260 .maxScalar(1, s32);
261 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
262 .lowerFor({{s32, s32}});
263
264 getActionDefinitionsBuilder(G_CTTZ)
265 .lowerFor({{s32, s32}})
266 .maxScalar(0, s32)
267 .maxScalar(1, s32);
268 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF)
269 .lowerFor({{s32, s32}, {s64, s64}});
270
271 getActionDefinitionsBuilder(G_CTPOP)
272 .lowerFor({{s32, s32}})
273 .clampScalar(0, s32, s32)
274 .clampScalar(1, s32, s32);
275
276 // FP instructions
277 getActionDefinitionsBuilder(G_FCONSTANT)
278 .legalFor({s32, s64});
279
280 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT})
281 .legalIf([=, &ST](const LegalityQuery &Query) {
282 if (CheckTyN(0, Query, {s32, s64}))
283 return true;
284 if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
285 return true;
286 return false;
287 });
288
289 getActionDefinitionsBuilder(G_FCMP)
290 .legalFor({{s32, s32}, {s32, s64}})
291 .minScalar(0, s32);
292
293 getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR})
294 .libcallFor({s32, s64});
295
296 getActionDefinitionsBuilder(G_FPEXT)
297 .legalFor({{s64, s32}});
298
299 getActionDefinitionsBuilder(G_FPTRUNC)
300 .legalFor({{s32, s64}});
301
302 // FP to int conversion instructions
303 getActionDefinitionsBuilder(G_FPTOSI)
304 .legalForCartesianProduct({s32}, {s64, s32})
305 .libcallForCartesianProduct({s64}, {s64, s32})
306 .minScalar(0, s32);
307
308 getActionDefinitionsBuilder(G_FPTOUI)
309 .libcallForCartesianProduct({s64}, {s64, s32})
310 .lowerForCartesianProduct({s32}, {s64, s32})
311 .minScalar(0, s32);
312
313 // Int to FP conversion instructions
314 getActionDefinitionsBuilder(G_SITOFP)
315 .legalForCartesianProduct({s64, s32}, {s32})
316 .libcallForCartesianProduct({s64, s32}, {s64})
317 .minScalar(1, s32);
318
319 getActionDefinitionsBuilder(G_UITOFP)
320 .libcallForCartesianProduct({s64, s32}, {s64})
321 .customForCartesianProduct({s64, s32}, {s32})
322 .minScalar(1, s32);
323
324 getActionDefinitionsBuilder(G_SEXT_INREG).lower();
325
326 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
327
328 getLegacyLegalizerInfo().computeTables();
329 verify(*ST.getInstrInfo());
330}
331
332bool MipsLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
333 MachineInstr &MI) const {
334 using namespace TargetOpcode;
335
336 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
337 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
338
339 const LLT s32 = LLT::scalar(32);
340 const LLT s64 = LLT::scalar(64);
341
342 switch (MI.getOpcode()) {
1
Control jumps to 'case G_STORE:' at line 344
343 case G_LOAD:
344 case G_STORE: {
345 unsigned MemSize = (**MI.memoperands_begin()).getSize();
346 Register Val = MI.getOperand(0).getReg();
347 unsigned Size = MRI.getType(Val).getSizeInBits();
348
349 MachineMemOperand *MMOBase = *MI.memoperands_begin();
350
351 assert(MemSize <= 8 && "MemSize is too large")(static_cast <bool> (MemSize <= 8 && "MemSize is too large"
) ? void (0) : __assert_fail ("MemSize <= 8 && \"MemSize is too large\""
, "llvm/lib/Target/Mips/MipsLegalizerInfo.cpp", 351, __extension__
__PRETTY_FUNCTION__))
;
2
Assuming 'MemSize' is <= 8
3
'?' condition is true
352 assert(Size <= 64 && "Scalar size is too large")(static_cast <bool> (Size <= 64 && "Scalar size is too large"
) ? void (0) : __assert_fail ("Size <= 64 && \"Scalar size is too large\""
, "llvm/lib/Target/Mips/MipsLegalizerInfo.cpp", 352, __extension__
__PRETTY_FUNCTION__))
;
4
Assuming 'Size' is <= 64
5
'?' condition is true
353
354 // Split MemSize into two, P2HalfMemSize is largest power of two smaller
355 // then MemSize. e.g. 8 = 4 + 4 , 6 = 4 + 2, 3 = 2 + 1.
356 unsigned P2HalfMemSize, RemMemSize;
357 if (isPowerOf2_64(MemSize)) {
6
Taking false branch
358 P2HalfMemSize = RemMemSize = MemSize / 2;
359 } else {
360 P2HalfMemSize = 1 << Log2_32(MemSize);
7
Calling 'Log2_32'
9
Returning from 'Log2_32'
10
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
361 RemMemSize = MemSize - P2HalfMemSize;
362 }
363
364 Register BaseAddr = MI.getOperand(1).getReg();
365 LLT PtrTy = MRI.getType(BaseAddr);
366 MachineFunction &MF = MIRBuilder.getMF();
367
368 auto P2HalfMemOp = MF.getMachineMemOperand(MMOBase, 0, P2HalfMemSize);
369 auto RemMemOp = MF.getMachineMemOperand(MMOBase, P2HalfMemSize, RemMemSize);
370
371 if (MI.getOpcode() == G_STORE) {
372 // Widen Val to s32 or s64 in order to create legal G_LSHR or G_UNMERGE.
373 if (Size < 32)
374 Val = MIRBuilder.buildAnyExt(s32, Val).getReg(0);
375 if (Size > 32 && Size < 64)
376 Val = MIRBuilder.buildAnyExt(s64, Val).getReg(0);
377
378 auto C_P2HalfMemSize = MIRBuilder.buildConstant(s32, P2HalfMemSize);
379 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, BaseAddr, C_P2HalfMemSize);
380
381 if (MI.getOpcode() == G_STORE && MemSize <= 4) {
382 MIRBuilder.buildStore(Val, BaseAddr, *P2HalfMemOp);
383 auto C_P2Half_InBits = MIRBuilder.buildConstant(s32, P2HalfMemSize * 8);
384 auto Shift = MIRBuilder.buildLShr(s32, Val, C_P2Half_InBits);
385 MIRBuilder.buildStore(Shift, Addr, *RemMemOp);
386 } else {
387 auto Unmerge = MIRBuilder.buildUnmerge(s32, Val);
388 MIRBuilder.buildStore(Unmerge.getReg(0), BaseAddr, *P2HalfMemOp);
389 MIRBuilder.buildStore(Unmerge.getReg(1), Addr, *RemMemOp);
390 }
391 }
392
393 if (MI.getOpcode() == G_LOAD) {
394
395 if (MemSize <= 4) {
396 // This is anyextending load, use 4 byte lwr/lwl.
397 auto *Load4MMO = MF.getMachineMemOperand(MMOBase, 0, 4);
398
399 if (Size == 32)
400 MIRBuilder.buildLoad(Val, BaseAddr, *Load4MMO);
401 else {
402 auto Load = MIRBuilder.buildLoad(s32, BaseAddr, *Load4MMO);
403 MIRBuilder.buildTrunc(Val, Load.getReg(0));
404 }
405
406 } else {
407 auto C_P2HalfMemSize = MIRBuilder.buildConstant(s32, P2HalfMemSize);
408 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, BaseAddr, C_P2HalfMemSize);
409
410 auto Load_P2Half = MIRBuilder.buildLoad(s32, BaseAddr, *P2HalfMemOp);
411 auto Load_Rem = MIRBuilder.buildLoad(s32, Addr, *RemMemOp);
412
413 if (Size == 64)
414 MIRBuilder.buildMerge(Val, {Load_P2Half, Load_Rem});
415 else {
416 auto Merge = MIRBuilder.buildMerge(s64, {Load_P2Half, Load_Rem});
417 MIRBuilder.buildTrunc(Val, Merge);
418 }
419 }
420 }
421 MI.eraseFromParent();
422 break;
423 }
424 case G_UITOFP: {
425 Register Dst = MI.getOperand(0).getReg();
426 Register Src = MI.getOperand(1).getReg();
427 LLT DstTy = MRI.getType(Dst);
428 LLT SrcTy = MRI.getType(Src);
429
430 if (SrcTy != s32)
431 return false;
432 if (DstTy != s32 && DstTy != s64)
433 return false;
434
435 // Let 0xABCDEFGH be given unsigned in MI.getOperand(1). First let's convert
436 // unsigned to double. Mantissa has 52 bits so we use following trick:
437 // First make floating point bit mask 0x43300000ABCDEFGH.
438 // Mask represents 2^52 * 0x1.00000ABCDEFGH i.e. 0x100000ABCDEFGH.0 .
439 // Next, subtract 2^52 * 0x1.0000000000000 i.e. 0x10000000000000.0 from it.
440 // Done. Trunc double to float if needed.
441
442 auto C_HiMask = MIRBuilder.buildConstant(s32, UINT32_C(0x43300000)0x43300000U);
443 auto Bitcast = MIRBuilder.buildMerge(s64, {Src, C_HiMask.getReg(0)});
444
445 MachineInstrBuilder TwoP52FP = MIRBuilder.buildFConstant(
446 s64, BitsToDouble(UINT64_C(0x4330000000000000)0x4330000000000000UL));
447
448 if (DstTy == s64)
449 MIRBuilder.buildFSub(Dst, Bitcast, TwoP52FP);
450 else {
451 MachineInstrBuilder ResF64 = MIRBuilder.buildFSub(s64, Bitcast, TwoP52FP);
452 MIRBuilder.buildFPTrunc(Dst, ResF64);
453 }
454
455 MI.eraseFromParent();
456 break;
457 }
458 default:
459 return false;
460 }
461
462 return true;
463}
464
465static bool SelectMSA3OpIntrinsic(MachineInstr &MI, unsigned Opcode,
466 MachineIRBuilder &MIRBuilder,
467 const MipsSubtarget &ST) {
468 assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.")(static_cast <bool> (ST.hasMSA() && "MSA intrinsic not supported on target without MSA."
) ? void (0) : __assert_fail ("ST.hasMSA() && \"MSA intrinsic not supported on target without MSA.\""
, "llvm/lib/Target/Mips/MipsLegalizerInfo.cpp", 468, __extension__
__PRETTY_FUNCTION__))
;
469 if (!MIRBuilder.buildInstr(Opcode)
470 .add(MI.getOperand(0))
471 .add(MI.getOperand(2))
472 .add(MI.getOperand(3))
473 .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(),
474 *ST.getRegBankInfo()))
475 return false;
476 MI.eraseFromParent();
477 return true;
478}
479
480static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
481 MachineIRBuilder &MIRBuilder,
482 const MipsSubtarget &ST) {
483 assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.")(static_cast <bool> (ST.hasMSA() && "MSA intrinsic not supported on target without MSA."
) ? void (0) : __assert_fail ("ST.hasMSA() && \"MSA intrinsic not supported on target without MSA.\""
, "llvm/lib/Target/Mips/MipsLegalizerInfo.cpp", 483, __extension__
__PRETTY_FUNCTION__))
;
484 MIRBuilder.buildInstr(Opcode)
485 .add(MI.getOperand(0))
486 .add(MI.getOperand(2))
487 .add(MI.getOperand(3));
488 MI.eraseFromParent();
489 return true;
490}
491
492static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
493 MachineIRBuilder &MIRBuilder,
494 const MipsSubtarget &ST) {
495 assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.")(static_cast <bool> (ST.hasMSA() && "MSA intrinsic not supported on target without MSA."
) ? void (0) : __assert_fail ("ST.hasMSA() && \"MSA intrinsic not supported on target without MSA.\""
, "llvm/lib/Target/Mips/MipsLegalizerInfo.cpp", 495, __extension__
__PRETTY_FUNCTION__))
;
496 MIRBuilder.buildInstr(Opcode)
497 .add(MI.getOperand(0))
498 .add(MI.getOperand(2));
499 MI.eraseFromParent();
500 return true;
501}
502
503bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
504 MachineInstr &MI) const {
505 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
506 const MipsSubtarget &ST = MI.getMF()->getSubtarget<MipsSubtarget>();
507 const MipsInstrInfo &TII = *ST.getInstrInfo();
508 const MipsRegisterInfo &TRI = *ST.getRegisterInfo();
509 const RegisterBankInfo &RBI = *ST.getRegBankInfo();
510
511 switch (MI.getIntrinsicID()) {
512 case Intrinsic::trap: {
513 MachineInstr *Trap = MIRBuilder.buildInstr(Mips::TRAP);
514 MI.eraseFromParent();
515 return constrainSelectedInstRegOperands(*Trap, TII, TRI, RBI);
516 }
517 case Intrinsic::vacopy: {
518 MachinePointerInfo MPO;
519 LLT PtrTy = LLT::pointer(0, 32);
520 auto Tmp =
521 MIRBuilder.buildLoad(PtrTy, MI.getOperand(2),
522 *MI.getMF()->getMachineMemOperand(
523 MPO, MachineMemOperand::MOLoad, PtrTy, Align(4)));
524 MIRBuilder.buildStore(Tmp, MI.getOperand(1),
525 *MI.getMF()->getMachineMemOperand(
526 MPO, MachineMemOperand::MOStore, PtrTy, Align(4)));
527 MI.eraseFromParent();
528 return true;
529 }
530 case Intrinsic::mips_addv_b:
531 case Intrinsic::mips_addv_h:
532 case Intrinsic::mips_addv_w:
533 case Intrinsic::mips_addv_d:
534 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_ADD, MIRBuilder, ST);
535 case Intrinsic::mips_addvi_b:
536 return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_B, MIRBuilder, ST);
537 case Intrinsic::mips_addvi_h:
538 return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_H, MIRBuilder, ST);
539 case Intrinsic::mips_addvi_w:
540 return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_W, MIRBuilder, ST);
541 case Intrinsic::mips_addvi_d:
542 return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_D, MIRBuilder, ST);
543 case Intrinsic::mips_subv_b:
544 case Intrinsic::mips_subv_h:
545 case Intrinsic::mips_subv_w:
546 case Intrinsic::mips_subv_d:
547 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_SUB, MIRBuilder, ST);
548 case Intrinsic::mips_subvi_b:
549 return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_B, MIRBuilder, ST);
550 case Intrinsic::mips_subvi_h:
551 return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_H, MIRBuilder, ST);
552 case Intrinsic::mips_subvi_w:
553 return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_W, MIRBuilder, ST);
554 case Intrinsic::mips_subvi_d:
555 return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_D, MIRBuilder, ST);
556 case Intrinsic::mips_mulv_b:
557 case Intrinsic::mips_mulv_h:
558 case Intrinsic::mips_mulv_w:
559 case Intrinsic::mips_mulv_d:
560 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_MUL, MIRBuilder, ST);
561 case Intrinsic::mips_div_s_b:
562 case Intrinsic::mips_div_s_h:
563 case Intrinsic::mips_div_s_w:
564 case Intrinsic::mips_div_s_d:
565 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_SDIV, MIRBuilder, ST);
566 case Intrinsic::mips_mod_s_b:
567 case Intrinsic::mips_mod_s_h:
568 case Intrinsic::mips_mod_s_w:
569 case Intrinsic::mips_mod_s_d:
570 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_SREM, MIRBuilder, ST);
571 case Intrinsic::mips_div_u_b:
572 case Intrinsic::mips_div_u_h:
573 case Intrinsic::mips_div_u_w:
574 case Intrinsic::mips_div_u_d:
575 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_UDIV, MIRBuilder, ST);
576 case Intrinsic::mips_mod_u_b:
577 case Intrinsic::mips_mod_u_h:
578 case Intrinsic::mips_mod_u_w:
579 case Intrinsic::mips_mod_u_d:
580 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_UREM, MIRBuilder, ST);
581 case Intrinsic::mips_fadd_w:
582 case Intrinsic::mips_fadd_d:
583 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FADD, MIRBuilder, ST);
584 case Intrinsic::mips_fsub_w:
585 case Intrinsic::mips_fsub_d:
586 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FSUB, MIRBuilder, ST);
587 case Intrinsic::mips_fmul_w:
588 case Intrinsic::mips_fmul_d:
589 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FMUL, MIRBuilder, ST);
590 case Intrinsic::mips_fdiv_w:
591 case Intrinsic::mips_fdiv_d:
592 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FDIV, MIRBuilder, ST);
593 case Intrinsic::mips_fmax_a_w:
594 return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_W, MIRBuilder, ST);
595 case Intrinsic::mips_fmax_a_d:
596 return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_D, MIRBuilder, ST);
597 case Intrinsic::mips_fsqrt_w:
598 return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST);
599 case Intrinsic::mips_fsqrt_d:
600 return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST);
601 default:
602 break;
603 }
604 return true;
605}

/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/ADT/bit.h"
17#include "llvm/Support/Compiler.h"
18#include <cassert>
19#include <climits>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef _MSC_VER
26// Declare these intrinsics manually rather including intrin.h. It's very
27// expensive, and MathExtras.h is popular.
28// #include <intrin.h>
29extern "C" {
30unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
31unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
32unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
33unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
34}
35#endif
36
37namespace llvm {
38
39/// The behavior an operation has on an input of 0.
40enum ZeroBehavior {
41 /// The returned value is undefined.
42 ZB_Undefined,
43 /// The returned value is numeric_limits<T>::max()
44 ZB_Max,
45 /// The returned value is numeric_limits<T>::digits
46 ZB_Width
47};
48
49/// Mathematical constants.
50namespace numbers {
51// TODO: Track C++20 std::numbers.
52// TODO: Favor using the hexadecimal FP constants (requires C++17).
53constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
54 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
55 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
56 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
57 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
58 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
59 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
60 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
61 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
62 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
63 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
64 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
65 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
66 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
67 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
68constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
69 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
70 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
71 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
72 log2ef = 1.44269504F, // (0x1.715476P+0)
73 log10ef = .434294482F, // (0x1.bcb7b2P-2)
74 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
75 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
76 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
77 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
78 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
79 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
80 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
81 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
82 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
83} // namespace numbers
84
85namespace detail {
86template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
87 static unsigned count(T Val, ZeroBehavior) {
88 if (!Val)
89 return std::numeric_limits<T>::digits;
90 if (Val & 0x1)
91 return 0;
92
93 // Bisection method.
94 unsigned ZeroBits = 0;
95 T Shift = std::numeric_limits<T>::digits >> 1;
96 T Mask = std::numeric_limits<T>::max() >> Shift;
97 while (Shift) {
98 if ((Val & Mask) == 0) {
99 Val >>= Shift;
100 ZeroBits |= Shift;
101 }
102 Shift >>= 1;
103 Mask >>= Shift;
104 }
105 return ZeroBits;
106 }
107};
108
109#if defined(__GNUC__4) || defined(_MSC_VER)
110template <typename T> struct TrailingZerosCounter<T, 4> {
111 static unsigned count(T Val, ZeroBehavior ZB) {
112 if (ZB != ZB_Undefined && Val == 0)
113 return 32;
114
115#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
116 return __builtin_ctz(Val);
117#elif defined(_MSC_VER)
118 unsigned long Index;
119 _BitScanForward(&Index, Val);
120 return Index;
121#endif
122 }
123};
124
125#if !defined(_MSC_VER) || defined(_M_X64)
126template <typename T> struct TrailingZerosCounter<T, 8> {
127 static unsigned count(T Val, ZeroBehavior ZB) {
128 if (ZB != ZB_Undefined && Val == 0)
129 return 64;
130
131#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
132 return __builtin_ctzll(Val);
133#elif defined(_MSC_VER)
134 unsigned long Index;
135 _BitScanForward64(&Index, Val);
136 return Index;
137#endif
138 }
139};
140#endif
141#endif
142} // namespace detail
143
144/// Count number of 0's from the least significant bit to the most
145/// stopping at the first 1.
146///
147/// Only unsigned integral types are allowed.
148///
149/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
150/// valid arguments.
151template <typename T>
152unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
153 static_assert(std::is_unsigned_v<T>,
154 "Only unsigned integral types are allowed.");
155 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
156}
157
158namespace detail {
159template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
160 static unsigned count(T Val, ZeroBehavior) {
161 if (!Val)
162 return std::numeric_limits<T>::digits;
163
164 // Bisection method.
165 unsigned ZeroBits = 0;
166 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
167 T Tmp = Val >> Shift;
168 if (Tmp)
169 Val = Tmp;
170 else
171 ZeroBits |= Shift;
172 }
173 return ZeroBits;
174 }
175};
176
177#if defined(__GNUC__4) || defined(_MSC_VER)
178template <typename T> struct LeadingZerosCounter<T, 4> {
179 static unsigned count(T Val, ZeroBehavior ZB) {
180 if (ZB != ZB_Undefined && Val == 0)
181 return 32;
182
183#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
184 return __builtin_clz(Val);
185#elif defined(_MSC_VER)
186 unsigned long Index;
187 _BitScanReverse(&Index, Val);
188 return Index ^ 31;
189#endif
190 }
191};
192
193#if !defined(_MSC_VER) || defined(_M_X64)
194template <typename T> struct LeadingZerosCounter<T, 8> {
195 static unsigned count(T Val, ZeroBehavior ZB) {
196 if (ZB != ZB_Undefined && Val == 0)
197 return 64;
198
199#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
200 return __builtin_clzll(Val);
201#elif defined(_MSC_VER)
202 unsigned long Index;
203 _BitScanReverse64(&Index, Val);
204 return Index ^ 63;
205#endif
206 }
207};
208#endif
209#endif
210} // namespace detail
211
212/// Count number of 0's from the most significant bit to the least
213/// stopping at the first 1.
214///
215/// Only unsigned integral types are allowed.
216///
217/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
218/// valid arguments.
219template <typename T>
220unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
221 static_assert(std::is_unsigned_v<T>,
222 "Only unsigned integral types are allowed.");
223 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
224}
225
226/// Get the index of the first set bit starting from the least
227/// significant bit.
228///
229/// Only unsigned integral types are allowed.
230///
231/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
232/// valid arguments.
233template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
234 if (ZB == ZB_Max && Val == 0)
235 return std::numeric_limits<T>::max();
236
237 return countTrailingZeros(Val, ZB_Undefined);
238}
239
240/// Create a bitmask with the N right-most bits set to 1, and all other
241/// bits set to 0. Only unsigned types are allowed.
242template <typename T> T maskTrailingOnes(unsigned N) {
243 static_assert(std::is_unsigned<T>::value, "Invalid type!");
244 const unsigned Bits = CHAR_BIT8 * sizeof(T);
245 assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index"
) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "llvm/include/llvm/Support/MathExtras.h", 245, __extension__
__PRETTY_FUNCTION__))
;
246 return N == 0 ? 0 : (T(-1) >> (Bits - N));
247}
248
249/// Create a bitmask with the N left-most bits set to 1, and all other
250/// bits set to 0. Only unsigned types are allowed.
251template <typename T> T maskLeadingOnes(unsigned N) {
252 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
253}
254
255/// Create a bitmask with the N right-most bits set to 0, and all other
256/// bits set to 1. Only unsigned types are allowed.
257template <typename T> T maskTrailingZeros(unsigned N) {
258 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N left-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskLeadingZeros(unsigned N) {
264 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Get the index of the last set bit starting from the least
268/// significant bit.
269///
270/// Only unsigned integral types are allowed.
271///
272/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
273/// valid arguments.
274template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
275 if (ZB == ZB_Max && Val == 0)
276 return std::numeric_limits<T>::max();
277
278 // Use ^ instead of - because both gcc and llvm can remove the associated ^
279 // in the __builtin_clz intrinsic on x86.
280 return countLeadingZeros(Val, ZB_Undefined) ^
281 (std::numeric_limits<T>::digits - 1);
282}
283
284/// Macro compressed bit reversal table for 256 bits.
285///
286/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
287static const unsigned char BitReverseTable256[256] = {
288#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
289#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
290#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
291 R6(0), R6(2), R6(1), R6(3)
292#undef R2
293#undef R4
294#undef R6
295};
296
297/// Reverse the bits in \p Val.
298template <typename T> T reverseBits(T Val) {
299#if __has_builtin(__builtin_bitreverse8)1
300 if constexpr (std::is_same_v<T, uint8_t>)
301 return __builtin_bitreverse8(Val);
302#endif
303#if __has_builtin(__builtin_bitreverse16)1
304 if constexpr (std::is_same_v<T, uint16_t>)
305 return __builtin_bitreverse16(Val);
306#endif
307#if __has_builtin(__builtin_bitreverse32)1
308 if constexpr (std::is_same_v<T, uint32_t>)
309 return __builtin_bitreverse32(Val);
310#endif
311#if __has_builtin(__builtin_bitreverse64)1
312 if constexpr (std::is_same_v<T, uint64_t>)
313 return __builtin_bitreverse64(Val);
314#endif
315
316 unsigned char in[sizeof(Val)];
317 unsigned char out[sizeof(Val)];
318 std::memcpy(in, &Val, sizeof(Val));
319 for (unsigned i = 0; i < sizeof(Val); ++i)
320 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
321 std::memcpy(&Val, out, sizeof(Val));
322 return Val;
323}
324
325// NOTE: The following support functions use the _32/_64 extensions instead of
326// type overloading so that signed and unsigned integers can be used without
327// ambiguity.
328
329/// Return the high 32 bits of a 64 bit value.
330constexpr inline uint32_t Hi_32(uint64_t Value) {
331 return static_cast<uint32_t>(Value >> 32);
332}
333
334/// Return the low 32 bits of a 64 bit value.
335constexpr inline uint32_t Lo_32(uint64_t Value) {
336 return static_cast<uint32_t>(Value);
337}
338
339/// Make a 64-bit integer from a high / low pair of 32-bit integers.
340constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
341 return ((uint64_t)High << 32) | (uint64_t)Low;
342}
343
344/// Checks if an integer fits into the given bit width.
345template <unsigned N> constexpr inline bool isInt(int64_t x) {
346 if constexpr (N == 8)
347 return static_cast<int8_t>(x) == x;
348 if constexpr (N == 16)
349 return static_cast<int16_t>(x) == x;
350 if constexpr (N == 32)
351 return static_cast<int32_t>(x) == x;
352 if constexpr (N < 64)
353 return -(INT64_C(1)1L << (N - 1)) <= x && x < (INT64_C(1)1L << (N - 1));
354 (void)x; // MSVC v19.25 warns that x is unused.
355 return true;
356}
357
358/// Checks if a signed integer is an N bit number shifted left by S.
359template <unsigned N, unsigned S>
360constexpr inline bool isShiftedInt(int64_t x) {
361 static_assert(
362 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
363 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
364 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
365}
366
367/// Checks if an unsigned integer fits into the given bit width.
368template <unsigned N> constexpr inline bool isUInt(uint64_t x) {
369 static_assert(N > 0, "isUInt<0> doesn't make sense");
370 if constexpr (N == 8)
371 return static_cast<uint8_t>(x) == x;
372 if constexpr (N == 16)
373 return static_cast<uint16_t>(x) == x;
374 if constexpr (N == 32)
375 return static_cast<uint32_t>(x) == x;
376 if constexpr (N < 64)
377 return x < (UINT64_C(1)1UL << (N));
378 (void)x; // MSVC v19.25 warns that x is unused.
379 return true;
380}
381
382/// Checks if a unsigned integer is an N bit number shifted left by S.
383template <unsigned N, unsigned S>
384constexpr inline bool isShiftedUInt(uint64_t x) {
385 static_assert(
386 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
387 static_assert(N + S <= 64,
388 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
389 // Per the two static_asserts above, S must be strictly less than 64. So
390 // 1 << S is not undefined behavior.
391 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
392}
393
394/// Gets the maximum value for a N-bit unsigned integer.
395inline uint64_t maxUIntN(uint64_t N) {
396 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 396, __extension__
__PRETTY_FUNCTION__))
;
397
398 // uint64_t(1) << 64 is undefined behavior, so we can't do
399 // (uint64_t(1) << N) - 1
400 // without checking first that N != 64. But this works and doesn't have a
401 // branch.
402 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
403}
404
405/// Gets the minimum value for a N-bit signed integer.
406inline int64_t minIntN(int64_t N) {
407 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 407, __extension__
__PRETTY_FUNCTION__))
;
408
409 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
410}
411
412/// Gets the maximum value for a N-bit signed integer.
413inline int64_t maxIntN(int64_t N) {
414 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 414, __extension__
__PRETTY_FUNCTION__))
;
415
416 // This relies on two's complement wraparound when N == 64, so we convert to
417 // int64_t only at the very end to avoid UB.
418 return (UINT64_C(1)1UL << (N - 1)) - 1;
419}
420
421/// Checks if an unsigned integer fits into the given (dynamic) bit width.
422inline bool isUIntN(unsigned N, uint64_t x) {
423 return N >= 64 || x <= maxUIntN(N);
424}
425
426/// Checks if an signed integer fits into the given (dynamic) bit width.
427inline bool isIntN(unsigned N, int64_t x) {
428 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
429}
430
431/// Return true if the argument is a non-empty sequence of ones starting at the
432/// least significant bit with the remainder zero (32 bit version).
433/// Ex. isMask_32(0x0000FFFFU) == true.
434constexpr inline bool isMask_32(uint32_t Value) {
435 return Value && ((Value + 1) & Value) == 0;
436}
437
438/// Return true if the argument is a non-empty sequence of ones starting at the
439/// least significant bit with the remainder zero (64 bit version).
440constexpr inline bool isMask_64(uint64_t Value) {
441 return Value && ((Value + 1) & Value) == 0;
442}
443
444/// Return true if the argument contains a non-empty sequence of ones with the
445/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
446constexpr inline bool isShiftedMask_32(uint32_t Value) {
447 return Value && isMask_32((Value - 1) | Value);
448}
449
450/// Return true if the argument contains a non-empty sequence of ones with the
451/// remainder zero (64 bit version.)
452constexpr inline bool isShiftedMask_64(uint64_t Value) {
453 return Value && isMask_64((Value - 1) | Value);
454}
455
456/// Return true if the argument is a power of two > 0.
457/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
458constexpr inline bool isPowerOf2_32(uint32_t Value) {
459 return llvm::has_single_bit(Value);
460}
461
462/// Return true if the argument is a power of two > 0 (64 bit edition.)
463constexpr inline bool isPowerOf2_64(uint64_t Value) {
464 return llvm::has_single_bit(Value);
465}
466
467/// Count the number of ones from the most significant bit to the first
468/// zero bit.
469///
470/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
471/// Only unsigned integral types are allowed.
472///
473/// \param ZB the behavior on an input of all ones. Only ZB_Width and
474/// ZB_Undefined are valid arguments.
475template <typename T>
476unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
477 static_assert(std::is_unsigned_v<T>,
478 "Only unsigned integral types are allowed.");
479 return countLeadingZeros<T>(~Value, ZB);
480}
481
482/// Count the number of ones from the least significant bit to the first
483/// zero bit.
484///
485/// Ex. countTrailingOnes(0x00FF00FF) == 8.
486/// Only unsigned integral types are allowed.
487///
488/// \param ZB the behavior on an input of all ones. Only ZB_Width and
489/// ZB_Undefined are valid arguments.
490template <typename T>
491unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
492 static_assert(std::is_unsigned_v<T>,
493 "Only unsigned integral types are allowed.");
494 return countTrailingZeros<T>(~Value, ZB);
495}
496
497/// Count the number of set bits in a value.
498/// Ex. countPopulation(0xF000F000) = 8
499/// Returns 0 if the word is zero.
500template <typename T>
501inline unsigned countPopulation(T Value) {
502 static_assert(std::is_unsigned_v<T>,
503 "Only unsigned integral types are allowed.");
504 return (unsigned)llvm::popcount(Value);
505}
506
507/// Return true if the argument contains a non-empty sequence of ones with the
508/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
509/// If true, \p MaskIdx will specify the index of the lowest set bit and \p
510/// MaskLen is updated to specify the length of the mask, else neither are
511/// updated.
512inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx,
513 unsigned &MaskLen) {
514 if (!isShiftedMask_32(Value))
515 return false;
516 MaskIdx = countTrailingZeros(Value);
517 MaskLen = countPopulation(Value);
518 return true;
519}
520
521/// Return true if the argument contains a non-empty sequence of ones with the
522/// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index
523/// of the lowest set bit and \p MaskLen is updated to specify the length of the
524/// mask, else neither are updated.
525inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx,
526 unsigned &MaskLen) {
527 if (!isShiftedMask_64(Value))
528 return false;
529 MaskIdx = countTrailingZeros(Value);
530 MaskLen = countPopulation(Value);
531 return true;
532}
533
534/// Compile time Log2.
535/// Valid only for positive powers of two.
536template <size_t kValue> constexpr inline size_t CTLog2() {
537 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
538 "Value is not a valid power of 2");
539 return 1 + CTLog2<kValue / 2>();
540}
541
542template <> constexpr inline size_t CTLog2<1>() { return 0; }
543
544/// Return the floor log base 2 of the specified value, -1 if the value is zero.
545/// (32 bit edition.)
546/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
547inline unsigned Log2_32(uint32_t Value) {
548 return 31 - countLeadingZeros(Value);
8
Returning the value 4294967295
549}
550
551/// Return the floor log base 2 of the specified value, -1 if the value is zero.
552/// (64 bit edition.)
553inline unsigned Log2_64(uint64_t Value) {
554 return 63 - countLeadingZeros(Value);
555}
556
557/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
558/// (32 bit edition).
559/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
560inline unsigned Log2_32_Ceil(uint32_t Value) {
561 return 32 - countLeadingZeros(Value - 1);
562}
563
564/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
565/// (64 bit edition.)
566inline unsigned Log2_64_Ceil(uint64_t Value) {
567 return 64 - countLeadingZeros(Value - 1);
568}
569
570/// This function takes a 64-bit integer and returns the bit equivalent double.
571inline double BitsToDouble(uint64_t Bits) {
572 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
573 return llvm::bit_cast<double>(Bits);
574}
575
576/// This function takes a 32-bit integer and returns the bit equivalent float.
577inline float BitsToFloat(uint32_t Bits) {
578 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
579 return llvm::bit_cast<float>(Bits);
580}
581
582/// This function takes a double and returns the bit equivalent 64-bit integer.
583/// Note that copying doubles around changes the bits of NaNs on some hosts,
584/// notably x86, so this routine cannot be used if these bits are needed.
585inline uint64_t DoubleToBits(double Double) {
586 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
587 return llvm::bit_cast<uint64_t>(Double);
588}
589
590/// This function takes a float and returns the bit equivalent 32-bit integer.
591/// Note that copying floats around changes the bits of NaNs on some hosts,
592/// notably x86, so this routine cannot be used if these bits are needed.
593inline uint32_t FloatToBits(float Float) {
594 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
595 return llvm::bit_cast<uint32_t>(Float);
596}
597
598/// A and B are either alignments or offsets. Return the minimum alignment that
599/// may be assumed after adding the two together.
600constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
601 // The largest power of 2 that divides both A and B.
602 //
603 // Replace "-Value" by "1+~Value" in the following commented code to avoid
604 // MSVC warning C4146
605 // return (A | B) & -(A | B);
606 return (A | B) & (1 + ~(A | B));
607}
608
609/// Returns the next power of two (in 64-bits) that is strictly greater than A.
610/// Returns zero on overflow.
611constexpr inline uint64_t NextPowerOf2(uint64_t A) {
612 A |= (A >> 1);
613 A |= (A >> 2);
614 A |= (A >> 4);
615 A |= (A >> 8);
616 A |= (A >> 16);
617 A |= (A >> 32);
618 return A + 1;
619}
620
621/// Returns the power of two which is less than or equal to the given value.
622/// Essentially, it is a floor operation across the domain of powers of two.
623inline uint64_t PowerOf2Floor(uint64_t A) {
624 if (!A) return 0;
625 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
626}
627
628/// Returns the power of two which is greater than or equal to the given value.
629/// Essentially, it is a ceil operation across the domain of powers of two.
630inline uint64_t PowerOf2Ceil(uint64_t A) {
631 if (!A)
632 return 0;
633 return NextPowerOf2(A - 1);
634}
635
636/// Returns the next integer (mod 2**64) that is greater than or equal to
637/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
638///
639/// Examples:
640/// \code
641/// alignTo(5, 8) = 8
642/// alignTo(17, 8) = 24
643/// alignTo(~0LL, 8) = 0
644/// alignTo(321, 255) = 510
645/// \endcode
646inline uint64_t alignTo(uint64_t Value, uint64_t Align) {
647 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 647, __extension__
__PRETTY_FUNCTION__))
;
648 return (Value + Align - 1) / Align * Align;
649}
650
651inline uint64_t alignToPowerOf2(uint64_t Value, uint64_t Align) {
652 assert(Align != 0 && (Align & (Align - 1)) == 0 &&(static_cast <bool> (Align != 0 && (Align &
(Align - 1)) == 0 && "Align must be a power of 2") ?
void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\""
, "llvm/include/llvm/Support/MathExtras.h", 653, __extension__
__PRETTY_FUNCTION__))
653 "Align must be a power of 2")(static_cast <bool> (Align != 0 && (Align &
(Align - 1)) == 0 && "Align must be a power of 2") ?
void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\""
, "llvm/include/llvm/Support/MathExtras.h", 653, __extension__
__PRETTY_FUNCTION__))
;
654 return (Value + Align - 1) & -Align;
655}
656
657/// If non-zero \p Skew is specified, the return value will be a minimal integer
658/// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for
659/// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p
660/// Skew mod \p A'. \p Align must be non-zero.
661///
662/// Examples:
663/// \code
664/// alignTo(5, 8, 7) = 7
665/// alignTo(17, 8, 1) = 17
666/// alignTo(~0LL, 8, 3) = 3
667/// alignTo(321, 255, 42) = 552
668/// \endcode
669inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew) {
670 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 670, __extension__
__PRETTY_FUNCTION__))
;
671 Skew %= Align;
672 return alignTo(Value - Skew, Align) + Skew;
673}
674
675/// Returns the next integer (mod 2**64) that is greater than or equal to
676/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
677template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
678 static_assert(Align != 0u, "Align must be non-zero");
679 return (Value + Align - 1) / Align * Align;
680}
681
682/// Returns the integer ceil(Numerator / Denominator).
683inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
684 return alignTo(Numerator, Denominator) / Denominator;
685}
686
687/// Returns the integer nearest(Numerator / Denominator).
688inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
689 return (Numerator + (Denominator / 2)) / Denominator;
690}
691
692/// Returns the largest uint64_t less than or equal to \p Value and is
693/// \p Skew mod \p Align. \p Align must be non-zero
694inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
695 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 695, __extension__
__PRETTY_FUNCTION__))
;
696 Skew %= Align;
697 return (Value - Skew) / Align * Align + Skew;
698}
699
700/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
701/// Requires 0 < B <= 32.
702template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
703 static_assert(B > 0, "Bit width can't be 0.");
704 static_assert(B <= 32, "Bit width out of range.");
705 return int32_t(X << (32 - B)) >> (32 - B);
706}
707
708/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
709/// Requires 0 < B <= 32.
710inline int32_t SignExtend32(uint32_t X, unsigned B) {
711 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 711, __extension__
__PRETTY_FUNCTION__))
;
712 assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 712, __extension__
__PRETTY_FUNCTION__))
;
713 return int32_t(X << (32 - B)) >> (32 - B);
714}
715
716/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
717/// Requires 0 < B <= 64.
718template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
719 static_assert(B > 0, "Bit width can't be 0.");
720 static_assert(B <= 64, "Bit width out of range.");
721 return int64_t(x << (64 - B)) >> (64 - B);
722}
723
724/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
725/// Requires 0 < B <= 64.
726inline int64_t SignExtend64(uint64_t X, unsigned B) {
727 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 727, __extension__
__PRETTY_FUNCTION__))
;
728 assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 728, __extension__
__PRETTY_FUNCTION__))
;
729 return int64_t(X << (64 - B)) >> (64 - B);
730}
731
732/// Subtract two unsigned integers, X and Y, of type T and return the absolute
733/// value of the result.
734template <typename T>
735std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
736 return X > Y ? (X - Y) : (Y - X);
737}
738
739/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
740/// maximum representable value of T on overflow. ResultOverflowed indicates if
741/// the result is larger than the maximum representable value of type T.
742template <typename T>
743std::enable_if_t<std::is_unsigned<T>::value, T>
744SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
745 bool Dummy;
746 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
747 // Hacker's Delight, p. 29
748 T Z = X + Y;
749 Overflowed = (Z < X || Z < Y);
750 if (Overflowed)
751 return std::numeric_limits<T>::max();
752 else
753 return Z;
754}
755
756/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
757/// maximum representable value of T on overflow. ResultOverflowed indicates if
758/// the result is larger than the maximum representable value of type T.
759template <typename T>
760std::enable_if_t<std::is_unsigned<T>::value, T>
761SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
762 bool Dummy;
763 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
764
765 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
766 // because it fails for uint16_t (where multiplication can have undefined
767 // behavior due to promotion to int), and requires a division in addition
768 // to the multiplication.
769
770 Overflowed = false;
771
772 // Log2(Z) would be either Log2Z or Log2Z + 1.
773 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
774 // will necessarily be less than Log2Max as desired.
775 int Log2Z = Log2_64(X) + Log2_64(Y);
776 const T Max = std::numeric_limits<T>::max();
777 int Log2Max = Log2_64(Max);
778 if (Log2Z < Log2Max) {
779 return X * Y;
780 }
781 if (Log2Z > Log2Max) {
782 Overflowed = true;
783 return Max;
784 }
785
786 // We're going to use the top bit, and maybe overflow one
787 // bit past it. Multiply all but the bottom bit then add
788 // that on at the end.
789 T Z = (X >> 1) * Y;
790 if (Z & ~(Max >> 1)) {
791 Overflowed = true;
792 return Max;
793 }
794 Z <<= 1;
795 if (X & 1)
796 return SaturatingAdd(Z, Y, ResultOverflowed);
797
798 return Z;
799}
800
801/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
802/// the product. Clamp the result to the maximum representable value of T on
803/// overflow. ResultOverflowed indicates if the result is larger than the
804/// maximum representable value of type T.
805template <typename T>
806std::enable_if_t<std::is_unsigned<T>::value, T>
807SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
808 bool Dummy;
809 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
810
811 T Product = SaturatingMultiply(X, Y, &Overflowed);
812 if (Overflowed)
813 return Product;
814
815 return SaturatingAdd(A, Product, &Overflowed);
816}
817
818/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
819extern const float huge_valf;
820
821
822/// Add two signed integers, computing the two's complement truncated result,
823/// returning true if overflow occurred.
824template <typename T>
825std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
826#if __has_builtin(__builtin_add_overflow)1
827 return __builtin_add_overflow(X, Y, &Result);
828#else
829 // Perform the unsigned addition.
830 using U = std::make_unsigned_t<T>;
831 const U UX = static_cast<U>(X);
832 const U UY = static_cast<U>(Y);
833 const U UResult = UX + UY;
834
835 // Convert to signed.
836 Result = static_cast<T>(UResult);
837
838 // Adding two positive numbers should result in a positive number.
839 if (X > 0 && Y > 0)
840 return Result <= 0;
841 // Adding two negatives should result in a negative number.
842 if (X < 0 && Y < 0)
843 return Result >= 0;
844 return false;
845#endif
846}
847
848/// Subtract two signed integers, computing the two's complement truncated
849/// result, returning true if an overflow ocurred.
850template <typename T>
851std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
852#if __has_builtin(__builtin_sub_overflow)1
853 return __builtin_sub_overflow(X, Y, &Result);
854#else
855 // Perform the unsigned addition.
856 using U = std::make_unsigned_t<T>;
857 const U UX = static_cast<U>(X);
858 const U UY = static_cast<U>(Y);
859 const U UResult = UX - UY;
860
861 // Convert to signed.
862 Result = static_cast<T>(UResult);
863
864 // Subtracting a positive number from a negative results in a negative number.
865 if (X <= 0 && Y > 0)
866 return Result >= 0;
867 // Subtracting a negative number from a positive results in a positive number.
868 if (X >= 0 && Y < 0)
869 return Result <= 0;
870 return false;
871#endif
872}
873
874/// Multiply two signed integers, computing the two's complement truncated
875/// result, returning true if an overflow ocurred.
876template <typename T>
877std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
878 // Perform the unsigned multiplication on absolute values.
879 using U = std::make_unsigned_t<T>;
880 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
881 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
882 const U UResult = UX * UY;
883
884 // Convert to signed.
885 const bool IsNegative = (X < 0) ^ (Y < 0);
886 Result = IsNegative ? (0 - UResult) : UResult;
887
888 // If any of the args was 0, result is 0 and no overflow occurs.
889 if (UX == 0 || UY == 0)
890 return false;
891
892 // UX and UY are in [1, 2^n], where n is the number of digits.
893 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
894 // positive) divided by an argument compares to the other.
895 if (IsNegative)
896 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
897 else
898 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
899}
900
901} // End llvm namespace
902
903#endif