Bug Summary

File:llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
Warning:line 356, column 25
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name MipsLegalizerInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/Mips -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/Mips -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/build-llvm/lib/Target/Mips -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-04-14-063029-18377-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp

/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp

1//===- MipsLegalizerInfo.cpp ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for Mips.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "MipsLegalizerInfo.h"
14#include "MipsTargetMachine.h"
15#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16#include "llvm/IR/IntrinsicsMips.h"
17
18using namespace llvm;
19
20struct TypesAndMemOps {
21 LLT ValTy;
22 LLT PtrTy;
23 unsigned MemSize;
24 bool SystemSupportsUnalignedAccess;
25};
26
27// Assumes power of 2 memory size. Subtargets that have only naturally-aligned
28// memory access need to perform additional legalization here.
29static bool isUnalignedMemmoryAccess(uint64_t MemSize, uint64_t AlignInBits) {
30 assert(isPowerOf2_64(MemSize) && "Expected power of 2 memory size")((isPowerOf2_64(MemSize) && "Expected power of 2 memory size"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_64(MemSize) && \"Expected power of 2 memory size\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp"
, 30, __PRETTY_FUNCTION__))
;
31 assert(isPowerOf2_64(AlignInBits) && "Expected power of 2 align")((isPowerOf2_64(AlignInBits) && "Expected power of 2 align"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_64(AlignInBits) && \"Expected power of 2 align\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp"
, 31, __PRETTY_FUNCTION__))
;
32 if (MemSize > AlignInBits)
33 return true;
34 return false;
35}
36
37static bool
38CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query,
39 std::initializer_list<TypesAndMemOps> SupportedValues) {
40 unsigned QueryMemSize = Query.MMODescrs[0].SizeInBits;
41
42 // Non power of two memory access is never legal.
43 if (!isPowerOf2_64(QueryMemSize))
44 return false;
45
46 for (auto &Val : SupportedValues) {
47 if (Val.ValTy != Query.Types[0])
48 continue;
49 if (Val.PtrTy != Query.Types[1])
50 continue;
51 if (Val.MemSize != QueryMemSize)
52 continue;
53 if (!Val.SystemSupportsUnalignedAccess &&
54 isUnalignedMemmoryAccess(QueryMemSize, Query.MMODescrs[0].AlignInBits))
55 return false;
56 return true;
57 }
58 return false;
59}
60
61static bool CheckTyN(unsigned N, const LegalityQuery &Query,
62 std::initializer_list<LLT> SupportedValues) {
63 return llvm::is_contained(SupportedValues, Query.Types[N]);
64}
65
66MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
67 using namespace TargetOpcode;
68
69 const LLT s1 = LLT::scalar(1);
70 const LLT s32 = LLT::scalar(32);
71 const LLT s64 = LLT::scalar(64);
72 const LLT v16s8 = LLT::vector(16, 8);
73 const LLT v8s16 = LLT::vector(8, 16);
74 const LLT v4s32 = LLT::vector(4, 32);
75 const LLT v2s64 = LLT::vector(2, 64);
76 const LLT p0 = LLT::pointer(0, 32);
77
78 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
79 .legalIf([=, &ST](const LegalityQuery &Query) {
80 if (CheckTyN(0, Query, {s32}))
81 return true;
82 if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
83 return true;
84 return false;
85 })
86 .clampScalar(0, s32, s32);
87
88 getActionDefinitionsBuilder({G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO})
89 .lowerFor({{s32, s1}});
90
91 getActionDefinitionsBuilder(G_UMULH)
92 .legalFor({s32})
93 .maxScalar(0, s32);
94
95 // MIPS32r6 does not have alignment restrictions for memory access.
96 // For MIPS32r5 and older memory access must be naturally-aligned i.e. aligned
97 // to at least a multiple of its own size. There is however a two instruction
98 // combination that performs 4 byte unaligned access (lwr/lwl and swl/swr)
99 // therefore 4 byte load and store are legal and will use NoAlignRequirements.
100 bool NoAlignRequirements = true;
101
102 getActionDefinitionsBuilder({G_LOAD, G_STORE})
103 .legalIf([=, &ST](const LegalityQuery &Query) {
104 if (CheckTy0Ty1MemSizeAlign(
105 Query, {{s32, p0, 8, NoAlignRequirements},
106 {s32, p0, 16, ST.systemSupportsUnalignedAccess()},
107 {s32, p0, 32, NoAlignRequirements},
108 {p0, p0, 32, NoAlignRequirements},
109 {s64, p0, 64, ST.systemSupportsUnalignedAccess()}}))
110 return true;
111 if (ST.hasMSA() && CheckTy0Ty1MemSizeAlign(
112 Query, {{v16s8, p0, 128, NoAlignRequirements},
113 {v8s16, p0, 128, NoAlignRequirements},
114 {v4s32, p0, 128, NoAlignRequirements},
115 {v2s64, p0, 128, NoAlignRequirements}}))
116 return true;
117 return false;
118 })
119 // Custom lower scalar memory access, up to 8 bytes, for:
120 // - non-power-of-2 MemSizes
121 // - unaligned 2 or 8 byte MemSizes for MIPS32r5 and older
122 .customIf([=, &ST](const LegalityQuery &Query) {
123 if (!Query.Types[0].isScalar() || Query.Types[1] != p0 ||
124 Query.Types[0] == s1)
125 return false;
126
127 unsigned Size = Query.Types[0].getSizeInBits();
128 unsigned QueryMemSize = Query.MMODescrs[0].SizeInBits;
129 assert(QueryMemSize <= Size && "Scalar can't hold MemSize")((QueryMemSize <= Size && "Scalar can't hold MemSize"
) ? static_cast<void> (0) : __assert_fail ("QueryMemSize <= Size && \"Scalar can't hold MemSize\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp"
, 129, __PRETTY_FUNCTION__))
;
130
131 if (Size > 64 || QueryMemSize > 64)
132 return false;
133
134 if (!isPowerOf2_64(Query.MMODescrs[0].SizeInBits))
135 return true;
136
137 if (!ST.systemSupportsUnalignedAccess() &&
138 isUnalignedMemmoryAccess(QueryMemSize,
139 Query.MMODescrs[0].AlignInBits)) {
140 assert(QueryMemSize != 32 && "4 byte load and store are legal")((QueryMemSize != 32 && "4 byte load and store are legal"
) ? static_cast<void> (0) : __assert_fail ("QueryMemSize != 32 && \"4 byte load and store are legal\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp"
, 140, __PRETTY_FUNCTION__))
;
141 return true;
142 }
143
144 return false;
145 })
146 .minScalar(0, s32);
147
148 getActionDefinitionsBuilder(G_IMPLICIT_DEF)
149 .legalFor({s32, s64});
150
151 getActionDefinitionsBuilder(G_UNMERGE_VALUES)
152 .legalFor({{s32, s64}});
153
154 getActionDefinitionsBuilder(G_MERGE_VALUES)
155 .legalFor({{s64, s32}});
156
157 getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD})
158 .legalForTypesWithMemDesc({{s32, p0, 8, 8},
159 {s32, p0, 16, 8}})
160 .clampScalar(0, s32, s32);
161
162 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
163 .legalIf([](const LegalityQuery &Query) { return false; })
164 .maxScalar(0, s32);
165
166 getActionDefinitionsBuilder(G_TRUNC)
167 .legalIf([](const LegalityQuery &Query) { return false; })
168 .maxScalar(1, s32);
169
170 getActionDefinitionsBuilder(G_SELECT)
171 .legalForCartesianProduct({p0, s32, s64}, {s32})
172 .minScalar(0, s32)
173 .minScalar(1, s32);
174
175 getActionDefinitionsBuilder(G_BRCOND)
176 .legalFor({s32})
177 .minScalar(0, s32);
178
179 getActionDefinitionsBuilder(G_BRJT)
180 .legalFor({{p0, s32}});
181
182 getActionDefinitionsBuilder(G_BRINDIRECT)
183 .legalFor({p0});
184
185 getActionDefinitionsBuilder(G_PHI)
186 .legalFor({p0, s32, s64})
187 .minScalar(0, s32);
188
189 getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
190 .legalFor({s32})
191 .clampScalar(0, s32, s32);
192
193 getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UDIV, G_UREM})
194 .legalIf([=, &ST](const LegalityQuery &Query) {
195 if (CheckTyN(0, Query, {s32}))
196 return true;
197 if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
198 return true;
199 return false;
200 })
201 .minScalar(0, s32)
202 .libcallFor({s64});
203
204 getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
205 .legalFor({{s32, s32}})
206 .clampScalar(1, s32, s32)
207 .clampScalar(0, s32, s32);
208
209 getActionDefinitionsBuilder(G_ICMP)
210 .legalForCartesianProduct({s32}, {s32, p0})
211 .clampScalar(1, s32, s32)
212 .minScalar(0, s32);
213
214 getActionDefinitionsBuilder(G_CONSTANT)
215 .legalFor({s32})
216 .clampScalar(0, s32, s32);
217
218 getActionDefinitionsBuilder({G_PTR_ADD, G_INTTOPTR})
219 .legalFor({{p0, s32}});
220
221 getActionDefinitionsBuilder(G_PTRTOINT)
222 .legalFor({{s32, p0}});
223
224 getActionDefinitionsBuilder(G_FRAME_INDEX)
225 .legalFor({p0});
226
227 getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE})
228 .legalFor({p0});
229
230 getActionDefinitionsBuilder(G_DYN_STACKALLOC)
231 .lowerFor({{p0, s32}});
232
233 getActionDefinitionsBuilder(G_VASTART)
234 .legalFor({p0});
235
236 getActionDefinitionsBuilder(G_BSWAP)
237 .legalIf([=, &ST](const LegalityQuery &Query) {
238 if (ST.hasMips32r2() && CheckTyN(0, Query, {s32}))
239 return true;
240 return false;
241 })
242 .lowerIf([=, &ST](const LegalityQuery &Query) {
243 if (!ST.hasMips32r2() && CheckTyN(0, Query, {s32}))
244 return true;
245 return false;
246 })
247 .maxScalar(0, s32);
248
249 getActionDefinitionsBuilder(G_BITREVERSE)
250 .lowerFor({s32})
251 .maxScalar(0, s32);
252
253 getActionDefinitionsBuilder(G_CTLZ)
254 .legalFor({{s32, s32}})
255 .maxScalar(0, s32)
256 .maxScalar(1, s32);
257 getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
258 .lowerFor({{s32, s32}});
259
260 getActionDefinitionsBuilder(G_CTTZ)
261 .lowerFor({{s32, s32}})
262 .maxScalar(0, s32)
263 .maxScalar(1, s32);
264 getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF)
265 .lowerFor({{s32, s32}, {s64, s64}});
266
267 getActionDefinitionsBuilder(G_CTPOP)
268 .lowerFor({{s32, s32}})
269 .clampScalar(0, s32, s32)
270 .clampScalar(1, s32, s32);
271
272 // FP instructions
273 getActionDefinitionsBuilder(G_FCONSTANT)
274 .legalFor({s32, s64});
275
276 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT})
277 .legalIf([=, &ST](const LegalityQuery &Query) {
278 if (CheckTyN(0, Query, {s32, s64}))
279 return true;
280 if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}))
281 return true;
282 return false;
283 });
284
285 getActionDefinitionsBuilder(G_FCMP)
286 .legalFor({{s32, s32}, {s32, s64}})
287 .minScalar(0, s32);
288
289 getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR})
290 .libcallFor({s32, s64});
291
292 getActionDefinitionsBuilder(G_FPEXT)
293 .legalFor({{s64, s32}});
294
295 getActionDefinitionsBuilder(G_FPTRUNC)
296 .legalFor({{s32, s64}});
297
298 // FP to int conversion instructions
299 getActionDefinitionsBuilder(G_FPTOSI)
300 .legalForCartesianProduct({s32}, {s64, s32})
301 .libcallForCartesianProduct({s64}, {s64, s32})
302 .minScalar(0, s32);
303
304 getActionDefinitionsBuilder(G_FPTOUI)
305 .libcallForCartesianProduct({s64}, {s64, s32})
306 .lowerForCartesianProduct({s32}, {s64, s32})
307 .minScalar(0, s32);
308
309 // Int to FP conversion instructions
310 getActionDefinitionsBuilder(G_SITOFP)
311 .legalForCartesianProduct({s64, s32}, {s32})
312 .libcallForCartesianProduct({s64, s32}, {s64})
313 .minScalar(1, s32);
314
315 getActionDefinitionsBuilder(G_UITOFP)
316 .libcallForCartesianProduct({s64, s32}, {s64})
317 .customForCartesianProduct({s64, s32}, {s32})
318 .minScalar(1, s32);
319
320 getActionDefinitionsBuilder(G_SEXT_INREG).lower();
321
322 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
323
324 computeTables();
325 verify(*ST.getInstrInfo());
326}
327
328bool MipsLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
329 MachineInstr &MI) const {
330 using namespace TargetOpcode;
331
332 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
333 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
334
335 const LLT s32 = LLT::scalar(32);
336 const LLT s64 = LLT::scalar(64);
337
338 switch (MI.getOpcode()) {
1
Control jumps to 'case G_LOAD:' at line 339
339 case G_LOAD:
340 case G_STORE: {
341 unsigned MemSize = (**MI.memoperands_begin()).getSize();
342 Register Val = MI.getOperand(0).getReg();
343 unsigned Size = MRI.getType(Val).getSizeInBits();
344
345 MachineMemOperand *MMOBase = *MI.memoperands_begin();
346
347 assert(MemSize <= 8 && "MemSize is too large")((MemSize <= 8 && "MemSize is too large") ? static_cast
<void> (0) : __assert_fail ("MemSize <= 8 && \"MemSize is too large\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp"
, 347, __PRETTY_FUNCTION__))
;
2
Assuming 'MemSize' is <= 8
3
'?' condition is true
348 assert(Size <= 64 && "Scalar size is too large")((Size <= 64 && "Scalar size is too large") ? static_cast
<void> (0) : __assert_fail ("Size <= 64 && \"Scalar size is too large\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp"
, 348, __PRETTY_FUNCTION__))
;
4
Assuming 'Size' is <= 64
5
'?' condition is true
349
350 // Split MemSize into two, P2HalfMemSize is largest power of two smaller
351 // then MemSize. e.g. 8 = 4 + 4 , 6 = 4 + 2, 3 = 2 + 1.
352 unsigned P2HalfMemSize, RemMemSize;
353 if (isPowerOf2_64(MemSize)) {
6
Calling 'isPowerOf2_64'
9
Returning from 'isPowerOf2_64'
10
Taking false branch
354 P2HalfMemSize = RemMemSize = MemSize / 2;
355 } else {
356 P2HalfMemSize = 1 << Log2_32(MemSize);
11
Calling 'Log2_32'
13
Returning from 'Log2_32'
14
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
357 RemMemSize = MemSize - P2HalfMemSize;
358 }
359
360 Register BaseAddr = MI.getOperand(1).getReg();
361 LLT PtrTy = MRI.getType(BaseAddr);
362 MachineFunction &MF = MIRBuilder.getMF();
363
364 auto P2HalfMemOp = MF.getMachineMemOperand(MMOBase, 0, P2HalfMemSize);
365 auto RemMemOp = MF.getMachineMemOperand(MMOBase, P2HalfMemSize, RemMemSize);
366
367 if (MI.getOpcode() == G_STORE) {
368 // Widen Val to s32 or s64 in order to create legal G_LSHR or G_UNMERGE.
369 if (Size < 32)
370 Val = MIRBuilder.buildAnyExt(s32, Val).getReg(0);
371 if (Size > 32 && Size < 64)
372 Val = MIRBuilder.buildAnyExt(s64, Val).getReg(0);
373
374 auto C_P2HalfMemSize = MIRBuilder.buildConstant(s32, P2HalfMemSize);
375 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, BaseAddr, C_P2HalfMemSize);
376
377 if (MI.getOpcode() == G_STORE && MemSize <= 4) {
378 MIRBuilder.buildStore(Val, BaseAddr, *P2HalfMemOp);
379 auto C_P2Half_InBits = MIRBuilder.buildConstant(s32, P2HalfMemSize * 8);
380 auto Shift = MIRBuilder.buildLShr(s32, Val, C_P2Half_InBits);
381 MIRBuilder.buildStore(Shift, Addr, *RemMemOp);
382 } else {
383 auto Unmerge = MIRBuilder.buildUnmerge(s32, Val);
384 MIRBuilder.buildStore(Unmerge.getReg(0), BaseAddr, *P2HalfMemOp);
385 MIRBuilder.buildStore(Unmerge.getReg(1), Addr, *RemMemOp);
386 }
387 }
388
389 if (MI.getOpcode() == G_LOAD) {
390
391 if (MemSize <= 4) {
392 // This is anyextending load, use 4 byte lwr/lwl.
393 auto *Load4MMO = MF.getMachineMemOperand(MMOBase, 0, 4);
394
395 if (Size == 32)
396 MIRBuilder.buildLoad(Val, BaseAddr, *Load4MMO);
397 else {
398 auto Load = MIRBuilder.buildLoad(s32, BaseAddr, *Load4MMO);
399 MIRBuilder.buildTrunc(Val, Load.getReg(0));
400 }
401
402 } else {
403 auto C_P2HalfMemSize = MIRBuilder.buildConstant(s32, P2HalfMemSize);
404 auto Addr = MIRBuilder.buildPtrAdd(PtrTy, BaseAddr, C_P2HalfMemSize);
405
406 auto Load_P2Half = MIRBuilder.buildLoad(s32, BaseAddr, *P2HalfMemOp);
407 auto Load_Rem = MIRBuilder.buildLoad(s32, Addr, *RemMemOp);
408
409 if (Size == 64)
410 MIRBuilder.buildMerge(Val, {Load_P2Half, Load_Rem});
411 else {
412 auto Merge = MIRBuilder.buildMerge(s64, {Load_P2Half, Load_Rem});
413 MIRBuilder.buildTrunc(Val, Merge);
414 }
415 }
416 }
417 MI.eraseFromParent();
418 break;
419 }
420 case G_UITOFP: {
421 Register Dst = MI.getOperand(0).getReg();
422 Register Src = MI.getOperand(1).getReg();
423 LLT DstTy = MRI.getType(Dst);
424 LLT SrcTy = MRI.getType(Src);
425
426 if (SrcTy != s32)
427 return false;
428 if (DstTy != s32 && DstTy != s64)
429 return false;
430
431 // Let 0xABCDEFGH be given unsigned in MI.getOperand(1). First let's convert
432 // unsigned to double. Mantissa has 52 bits so we use following trick:
433 // First make floating point bit mask 0x43300000ABCDEFGH.
434 // Mask represents 2^52 * 0x1.00000ABCDEFGH i.e. 0x100000ABCDEFGH.0 .
435 // Next, subtract 2^52 * 0x1.0000000000000 i.e. 0x10000000000000.0 from it.
436 // Done. Trunc double to float if needed.
437
438 auto C_HiMask = MIRBuilder.buildConstant(s32, UINT32_C(0x43300000)0x43300000U);
439 auto Bitcast = MIRBuilder.buildMerge(s64, {Src, C_HiMask.getReg(0)});
440
441 MachineInstrBuilder TwoP52FP = MIRBuilder.buildFConstant(
442 s64, BitsToDouble(UINT64_C(0x4330000000000000)0x4330000000000000UL));
443
444 if (DstTy == s64)
445 MIRBuilder.buildFSub(Dst, Bitcast, TwoP52FP);
446 else {
447 MachineInstrBuilder ResF64 = MIRBuilder.buildFSub(s64, Bitcast, TwoP52FP);
448 MIRBuilder.buildFPTrunc(Dst, ResF64);
449 }
450
451 MI.eraseFromParent();
452 break;
453 }
454 default:
455 return false;
456 }
457
458 return true;
459}
460
461static bool SelectMSA3OpIntrinsic(MachineInstr &MI, unsigned Opcode,
462 MachineIRBuilder &MIRBuilder,
463 const MipsSubtarget &ST) {
464 assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.")((ST.hasMSA() && "MSA intrinsic not supported on target without MSA."
) ? static_cast<void> (0) : __assert_fail ("ST.hasMSA() && \"MSA intrinsic not supported on target without MSA.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp"
, 464, __PRETTY_FUNCTION__))
;
465 if (!MIRBuilder.buildInstr(Opcode)
466 .add(MI.getOperand(0))
467 .add(MI.getOperand(2))
468 .add(MI.getOperand(3))
469 .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(),
470 *ST.getRegBankInfo()))
471 return false;
472 MI.eraseFromParent();
473 return true;
474}
475
476static bool MSA3OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
477 MachineIRBuilder &MIRBuilder,
478 const MipsSubtarget &ST) {
479 assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.")((ST.hasMSA() && "MSA intrinsic not supported on target without MSA."
) ? static_cast<void> (0) : __assert_fail ("ST.hasMSA() && \"MSA intrinsic not supported on target without MSA.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp"
, 479, __PRETTY_FUNCTION__))
;
480 MIRBuilder.buildInstr(Opcode)
481 .add(MI.getOperand(0))
482 .add(MI.getOperand(2))
483 .add(MI.getOperand(3));
484 MI.eraseFromParent();
485 return true;
486}
487
488static bool MSA2OpIntrinsicToGeneric(MachineInstr &MI, unsigned Opcode,
489 MachineIRBuilder &MIRBuilder,
490 const MipsSubtarget &ST) {
491 assert(ST.hasMSA() && "MSA intrinsic not supported on target without MSA.")((ST.hasMSA() && "MSA intrinsic not supported on target without MSA."
) ? static_cast<void> (0) : __assert_fail ("ST.hasMSA() && \"MSA intrinsic not supported on target without MSA.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp"
, 491, __PRETTY_FUNCTION__))
;
492 MIRBuilder.buildInstr(Opcode)
493 .add(MI.getOperand(0))
494 .add(MI.getOperand(2));
495 MI.eraseFromParent();
496 return true;
497}
498
499bool MipsLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
500 MachineInstr &MI) const {
501 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
502 const MipsSubtarget &ST =
503 static_cast<const MipsSubtarget &>(MI.getMF()->getSubtarget());
504 const MipsInstrInfo &TII = *ST.getInstrInfo();
505 const MipsRegisterInfo &TRI = *ST.getRegisterInfo();
506 const RegisterBankInfo &RBI = *ST.getRegBankInfo();
507
508 switch (MI.getIntrinsicID()) {
509 case Intrinsic::trap: {
510 MachineInstr *Trap = MIRBuilder.buildInstr(Mips::TRAP);
511 MI.eraseFromParent();
512 return constrainSelectedInstRegOperands(*Trap, TII, TRI, RBI);
513 }
514 case Intrinsic::vacopy: {
515 MachinePointerInfo MPO;
516 auto Tmp =
517 MIRBuilder.buildLoad(LLT::pointer(0, 32), MI.getOperand(2),
518 *MI.getMF()->getMachineMemOperand(
519 MPO, MachineMemOperand::MOLoad, 4, Align(4)));
520 MIRBuilder.buildStore(Tmp, MI.getOperand(1),
521 *MI.getMF()->getMachineMemOperand(
522 MPO, MachineMemOperand::MOStore, 4, Align(4)));
523 MI.eraseFromParent();
524 return true;
525 }
526 case Intrinsic::mips_addv_b:
527 case Intrinsic::mips_addv_h:
528 case Intrinsic::mips_addv_w:
529 case Intrinsic::mips_addv_d:
530 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_ADD, MIRBuilder, ST);
531 case Intrinsic::mips_addvi_b:
532 return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_B, MIRBuilder, ST);
533 case Intrinsic::mips_addvi_h:
534 return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_H, MIRBuilder, ST);
535 case Intrinsic::mips_addvi_w:
536 return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_W, MIRBuilder, ST);
537 case Intrinsic::mips_addvi_d:
538 return SelectMSA3OpIntrinsic(MI, Mips::ADDVI_D, MIRBuilder, ST);
539 case Intrinsic::mips_subv_b:
540 case Intrinsic::mips_subv_h:
541 case Intrinsic::mips_subv_w:
542 case Intrinsic::mips_subv_d:
543 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_SUB, MIRBuilder, ST);
544 case Intrinsic::mips_subvi_b:
545 return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_B, MIRBuilder, ST);
546 case Intrinsic::mips_subvi_h:
547 return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_H, MIRBuilder, ST);
548 case Intrinsic::mips_subvi_w:
549 return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_W, MIRBuilder, ST);
550 case Intrinsic::mips_subvi_d:
551 return SelectMSA3OpIntrinsic(MI, Mips::SUBVI_D, MIRBuilder, ST);
552 case Intrinsic::mips_mulv_b:
553 case Intrinsic::mips_mulv_h:
554 case Intrinsic::mips_mulv_w:
555 case Intrinsic::mips_mulv_d:
556 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_MUL, MIRBuilder, ST);
557 case Intrinsic::mips_div_s_b:
558 case Intrinsic::mips_div_s_h:
559 case Intrinsic::mips_div_s_w:
560 case Intrinsic::mips_div_s_d:
561 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_SDIV, MIRBuilder, ST);
562 case Intrinsic::mips_mod_s_b:
563 case Intrinsic::mips_mod_s_h:
564 case Intrinsic::mips_mod_s_w:
565 case Intrinsic::mips_mod_s_d:
566 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_SREM, MIRBuilder, ST);
567 case Intrinsic::mips_div_u_b:
568 case Intrinsic::mips_div_u_h:
569 case Intrinsic::mips_div_u_w:
570 case Intrinsic::mips_div_u_d:
571 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_UDIV, MIRBuilder, ST);
572 case Intrinsic::mips_mod_u_b:
573 case Intrinsic::mips_mod_u_h:
574 case Intrinsic::mips_mod_u_w:
575 case Intrinsic::mips_mod_u_d:
576 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_UREM, MIRBuilder, ST);
577 case Intrinsic::mips_fadd_w:
578 case Intrinsic::mips_fadd_d:
579 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FADD, MIRBuilder, ST);
580 case Intrinsic::mips_fsub_w:
581 case Intrinsic::mips_fsub_d:
582 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FSUB, MIRBuilder, ST);
583 case Intrinsic::mips_fmul_w:
584 case Intrinsic::mips_fmul_d:
585 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FMUL, MIRBuilder, ST);
586 case Intrinsic::mips_fdiv_w:
587 case Intrinsic::mips_fdiv_d:
588 return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FDIV, MIRBuilder, ST);
589 case Intrinsic::mips_fmax_a_w:
590 return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_W, MIRBuilder, ST);
591 case Intrinsic::mips_fmax_a_d:
592 return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_D, MIRBuilder, ST);
593 case Intrinsic::mips_fsqrt_w:
594 return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST);
595 case Intrinsic::mips_fsqrt_d:
596 return MSA2OpIntrinsicToGeneric(MI, TargetOpcode::G_FSQRT, MIRBuilder, ST);
597 default:
598 break;
599 }
600 return true;
601}

/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include <algorithm>
18#include <cassert>
19#include <climits>
20#include <cmath>
21#include <cstdint>
22#include <cstring>
23#include <limits>
24#include <type_traits>
25
26#ifdef __ANDROID_NDK__
27#include <android/api-level.h>
28#endif
29
30#ifdef _MSC_VER
31// Declare these intrinsics manually rather including intrin.h. It's very
32// expensive, and MathExtras.h is popular.
33// #include <intrin.h>
34extern "C" {
35unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
36unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
37unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
38unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
39}
40#endif
41
42namespace llvm {
43
44/// The behavior an operation has on an input of 0.
45enum ZeroBehavior {
46 /// The returned value is undefined.
47 ZB_Undefined,
48 /// The returned value is numeric_limits<T>::max()
49 ZB_Max,
50 /// The returned value is numeric_limits<T>::digits
51 ZB_Width
52};
53
54/// Mathematical constants.
55namespace numbers {
56// TODO: Track C++20 std::numbers.
57// TODO: Favor using the hexadecimal FP constants (requires C++17).
58constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
59 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
60 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
61 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
62 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
63 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
64 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
65 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
66 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
67 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
68 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
69 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
70 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
71 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
72 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
73constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
74 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
75 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
76 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
77 log2ef = 1.44269504F, // (0x1.715476P+0)
78 log10ef = .434294482F, // (0x1.bcb7b2P-2)
79 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
80 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
81 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
82 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
83 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
84 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
85 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
86 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
87 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
88} // namespace numbers
89
90namespace detail {
91template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
92 static unsigned count(T Val, ZeroBehavior) {
93 if (!Val)
94 return std::numeric_limits<T>::digits;
95 if (Val & 0x1)
96 return 0;
97
98 // Bisection method.
99 unsigned ZeroBits = 0;
100 T Shift = std::numeric_limits<T>::digits >> 1;
101 T Mask = std::numeric_limits<T>::max() >> Shift;
102 while (Shift) {
103 if ((Val & Mask) == 0) {
104 Val >>= Shift;
105 ZeroBits |= Shift;
106 }
107 Shift >>= 1;
108 Mask >>= Shift;
109 }
110 return ZeroBits;
111 }
112};
113
114#if defined(__GNUC__4) || defined(_MSC_VER)
115template <typename T> struct TrailingZerosCounter<T, 4> {
116 static unsigned count(T Val, ZeroBehavior ZB) {
117 if (ZB != ZB_Undefined && Val == 0)
118 return 32;
119
120#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
121 return __builtin_ctz(Val);
122#elif defined(_MSC_VER)
123 unsigned long Index;
124 _BitScanForward(&Index, Val);
125 return Index;
126#endif
127 }
128};
129
130#if !defined(_MSC_VER) || defined(_M_X64)
131template <typename T> struct TrailingZerosCounter<T, 8> {
132 static unsigned count(T Val, ZeroBehavior ZB) {
133 if (ZB != ZB_Undefined && Val == 0)
134 return 64;
135
136#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
137 return __builtin_ctzll(Val);
138#elif defined(_MSC_VER)
139 unsigned long Index;
140 _BitScanForward64(&Index, Val);
141 return Index;
142#endif
143 }
144};
145#endif
146#endif
147} // namespace detail
148
149/// Count number of 0's from the least significant bit to the most
150/// stopping at the first 1.
151///
152/// Only unsigned integral types are allowed.
153///
154/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
155/// valid arguments.
156template <typename T>
157unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
158 static_assert(std::numeric_limits<T>::is_integer &&
159 !std::numeric_limits<T>::is_signed,
160 "Only unsigned integral types are allowed.");
161 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
162}
163
164namespace detail {
165template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
166 static unsigned count(T Val, ZeroBehavior) {
167 if (!Val)
168 return std::numeric_limits<T>::digits;
169
170 // Bisection method.
171 unsigned ZeroBits = 0;
172 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
173 T Tmp = Val >> Shift;
174 if (Tmp)
175 Val = Tmp;
176 else
177 ZeroBits |= Shift;
178 }
179 return ZeroBits;
180 }
181};
182
183#if defined(__GNUC__4) || defined(_MSC_VER)
184template <typename T> struct LeadingZerosCounter<T, 4> {
185 static unsigned count(T Val, ZeroBehavior ZB) {
186 if (ZB != ZB_Undefined && Val == 0)
187 return 32;
188
189#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
190 return __builtin_clz(Val);
191#elif defined(_MSC_VER)
192 unsigned long Index;
193 _BitScanReverse(&Index, Val);
194 return Index ^ 31;
195#endif
196 }
197};
198
199#if !defined(_MSC_VER) || defined(_M_X64)
200template <typename T> struct LeadingZerosCounter<T, 8> {
201 static unsigned count(T Val, ZeroBehavior ZB) {
202 if (ZB != ZB_Undefined && Val == 0)
203 return 64;
204
205#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
206 return __builtin_clzll(Val);
207#elif defined(_MSC_VER)
208 unsigned long Index;
209 _BitScanReverse64(&Index, Val);
210 return Index ^ 63;
211#endif
212 }
213};
214#endif
215#endif
216} // namespace detail
217
218/// Count number of 0's from the most significant bit to the least
219/// stopping at the first 1.
220///
221/// Only unsigned integral types are allowed.
222///
223/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
224/// valid arguments.
225template <typename T>
226unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
227 static_assert(std::numeric_limits<T>::is_integer &&
228 !std::numeric_limits<T>::is_signed,
229 "Only unsigned integral types are allowed.");
230 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
231}
232
233/// Get the index of the first set bit starting from the least
234/// significant bit.
235///
236/// Only unsigned integral types are allowed.
237///
238/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
239/// valid arguments.
240template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
241 if (ZB == ZB_Max && Val == 0)
242 return std::numeric_limits<T>::max();
243
244 return countTrailingZeros(Val, ZB_Undefined);
245}
246
247/// Create a bitmask with the N right-most bits set to 1, and all other
248/// bits set to 0. Only unsigned types are allowed.
249template <typename T> T maskTrailingOnes(unsigned N) {
250 static_assert(std::is_unsigned<T>::value, "Invalid type!");
251 const unsigned Bits = CHAR_BIT8 * sizeof(T);
252 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 252, __PRETTY_FUNCTION__))
;
253 return N == 0 ? 0 : (T(-1) >> (Bits - N));
254}
255
256/// Create a bitmask with the N left-most bits set to 1, and all other
257/// bits set to 0. Only unsigned types are allowed.
258template <typename T> T maskLeadingOnes(unsigned N) {
259 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
260}
261
262/// Create a bitmask with the N right-most bits set to 0, and all other
263/// bits set to 1. Only unsigned types are allowed.
264template <typename T> T maskTrailingZeros(unsigned N) {
265 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
266}
267
268/// Create a bitmask with the N left-most bits set to 0, and all other
269/// bits set to 1. Only unsigned types are allowed.
270template <typename T> T maskLeadingZeros(unsigned N) {
271 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
272}
273
274/// Get the index of the last set bit starting from the least
275/// significant bit.
276///
277/// Only unsigned integral types are allowed.
278///
279/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
280/// valid arguments.
281template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
282 if (ZB == ZB_Max && Val == 0)
283 return std::numeric_limits<T>::max();
284
285 // Use ^ instead of - because both gcc and llvm can remove the associated ^
286 // in the __builtin_clz intrinsic on x86.
287 return countLeadingZeros(Val, ZB_Undefined) ^
288 (std::numeric_limits<T>::digits - 1);
289}
290
291/// Macro compressed bit reversal table for 256 bits.
292///
293/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
294static const unsigned char BitReverseTable256[256] = {
295#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
296#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
297#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
298 R6(0), R6(2), R6(1), R6(3)
299#undef R2
300#undef R4
301#undef R6
302};
303
304/// Reverse the bits in \p Val.
305template <typename T>
306T reverseBits(T Val) {
307 unsigned char in[sizeof(Val)];
308 unsigned char out[sizeof(Val)];
309 std::memcpy(in, &Val, sizeof(Val));
310 for (unsigned i = 0; i < sizeof(Val); ++i)
311 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
312 std::memcpy(&Val, out, sizeof(Val));
313 return Val;
314}
315
316#if __has_builtin(__builtin_bitreverse8)1
317template<>
318inline uint8_t reverseBits<uint8_t>(uint8_t Val) {
319 return __builtin_bitreverse8(Val);
320}
321#endif
322
323#if __has_builtin(__builtin_bitreverse16)1
324template<>
325inline uint16_t reverseBits<uint16_t>(uint16_t Val) {
326 return __builtin_bitreverse16(Val);
327}
328#endif
329
330#if __has_builtin(__builtin_bitreverse32)1
331template<>
332inline uint32_t reverseBits<uint32_t>(uint32_t Val) {
333 return __builtin_bitreverse32(Val);
334}
335#endif
336
337#if __has_builtin(__builtin_bitreverse64)1
338template<>
339inline uint64_t reverseBits<uint64_t>(uint64_t Val) {
340 return __builtin_bitreverse64(Val);
341}
342#endif
343
344// NOTE: The following support functions use the _32/_64 extensions instead of
345// type overloading so that signed and unsigned integers can be used without
346// ambiguity.
347
348/// Return the high 32 bits of a 64 bit value.
349constexpr inline uint32_t Hi_32(uint64_t Value) {
350 return static_cast<uint32_t>(Value >> 32);
351}
352
353/// Return the low 32 bits of a 64 bit value.
354constexpr inline uint32_t Lo_32(uint64_t Value) {
355 return static_cast<uint32_t>(Value);
356}
357
358/// Make a 64-bit integer from a high / low pair of 32-bit integers.
359constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
360 return ((uint64_t)High << 32) | (uint64_t)Low;
361}
362
363/// Checks if an integer fits into the given bit width.
364template <unsigned N> constexpr inline bool isInt(int64_t x) {
365 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
366}
367// Template specializations to get better code for common cases.
368template <> constexpr inline bool isInt<8>(int64_t x) {
369 return static_cast<int8_t>(x) == x;
370}
371template <> constexpr inline bool isInt<16>(int64_t x) {
372 return static_cast<int16_t>(x) == x;
373}
374template <> constexpr inline bool isInt<32>(int64_t x) {
375 return static_cast<int32_t>(x) == x;
376}
377
378/// Checks if a signed integer is an N bit number shifted left by S.
379template <unsigned N, unsigned S>
380constexpr inline bool isShiftedInt(int64_t x) {
381 static_assert(
382 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
383 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
384 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
385}
386
387/// Checks if an unsigned integer fits into the given bit width.
388///
389/// This is written as two functions rather than as simply
390///
391/// return N >= 64 || X < (UINT64_C(1) << N);
392///
393/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
394/// left too many places.
395template <unsigned N>
396constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) {
397 static_assert(N > 0, "isUInt<0> doesn't make sense");
398 return X < (UINT64_C(1)1UL << (N));
399}
400template <unsigned N>
401constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) {
402 return true;
403}
404
405// Template specializations to get better code for common cases.
406template <> constexpr inline bool isUInt<8>(uint64_t x) {
407 return static_cast<uint8_t>(x) == x;
408}
409template <> constexpr inline bool isUInt<16>(uint64_t x) {
410 return static_cast<uint16_t>(x) == x;
411}
412template <> constexpr inline bool isUInt<32>(uint64_t x) {
413 return static_cast<uint32_t>(x) == x;
414}
415
416/// Checks if a unsigned integer is an N bit number shifted left by S.
417template <unsigned N, unsigned S>
418constexpr inline bool isShiftedUInt(uint64_t x) {
419 static_assert(
420 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
421 static_assert(N + S <= 64,
422 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
423 // Per the two static_asserts above, S must be strictly less than 64. So
424 // 1 << S is not undefined behavior.
425 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
426}
427
428/// Gets the maximum value for a N-bit unsigned integer.
429inline uint64_t maxUIntN(uint64_t N) {
430 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 430, __PRETTY_FUNCTION__))
;
431
432 // uint64_t(1) << 64 is undefined behavior, so we can't do
433 // (uint64_t(1) << N) - 1
434 // without checking first that N != 64. But this works and doesn't have a
435 // branch.
436 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
437}
438
439/// Gets the minimum value for a N-bit signed integer.
440inline int64_t minIntN(int64_t N) {
441 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 441, __PRETTY_FUNCTION__))
;
442
443 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
444}
445
446/// Gets the maximum value for a N-bit signed integer.
447inline int64_t maxIntN(int64_t N) {
448 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 448, __PRETTY_FUNCTION__))
;
449
450 // This relies on two's complement wraparound when N == 64, so we convert to
451 // int64_t only at the very end to avoid UB.
452 return (UINT64_C(1)1UL << (N - 1)) - 1;
453}
454
455/// Checks if an unsigned integer fits into the given (dynamic) bit width.
456inline bool isUIntN(unsigned N, uint64_t x) {
457 return N >= 64 || x <= maxUIntN(N);
458}
459
460/// Checks if an signed integer fits into the given (dynamic) bit width.
461inline bool isIntN(unsigned N, int64_t x) {
462 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
463}
464
465/// Return true if the argument is a non-empty sequence of ones starting at the
466/// least significant bit with the remainder zero (32 bit version).
467/// Ex. isMask_32(0x0000FFFFU) == true.
468constexpr inline bool isMask_32(uint32_t Value) {
469 return Value && ((Value + 1) & Value) == 0;
470}
471
472/// Return true if the argument is a non-empty sequence of ones starting at the
473/// least significant bit with the remainder zero (64 bit version).
474constexpr inline bool isMask_64(uint64_t Value) {
475 return Value && ((Value + 1) & Value) == 0;
476}
477
478/// Return true if the argument contains a non-empty sequence of ones with the
479/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
480constexpr inline bool isShiftedMask_32(uint32_t Value) {
481 return Value && isMask_32((Value - 1) | Value);
482}
483
484/// Return true if the argument contains a non-empty sequence of ones with the
485/// remainder zero (64 bit version.)
486constexpr inline bool isShiftedMask_64(uint64_t Value) {
487 return Value && isMask_64((Value - 1) | Value);
488}
489
490/// Return true if the argument is a power of two > 0.
491/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
492constexpr inline bool isPowerOf2_32(uint32_t Value) {
493 return Value && !(Value & (Value - 1));
494}
495
496/// Return true if the argument is a power of two > 0 (64 bit edition.)
497constexpr inline bool isPowerOf2_64(uint64_t Value) {
498 return Value && !(Value & (Value - 1));
7
Assuming 'Value' is 0
8
Returning zero, which participates in a condition later
499}
500
501/// Count the number of ones from the most significant bit to the first
502/// zero bit.
503///
504/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
505/// Only unsigned integral types are allowed.
506///
507/// \param ZB the behavior on an input of all ones. Only ZB_Width and
508/// ZB_Undefined are valid arguments.
509template <typename T>
510unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
511 static_assert(std::numeric_limits<T>::is_integer &&
512 !std::numeric_limits<T>::is_signed,
513 "Only unsigned integral types are allowed.");
514 return countLeadingZeros<T>(~Value, ZB);
515}
516
517/// Count the number of ones from the least significant bit to the first
518/// zero bit.
519///
520/// Ex. countTrailingOnes(0x00FF00FF) == 8.
521/// Only unsigned integral types are allowed.
522///
523/// \param ZB the behavior on an input of all ones. Only ZB_Width and
524/// ZB_Undefined are valid arguments.
525template <typename T>
526unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
527 static_assert(std::numeric_limits<T>::is_integer &&
528 !std::numeric_limits<T>::is_signed,
529 "Only unsigned integral types are allowed.");
530 return countTrailingZeros<T>(~Value, ZB);
531}
532
533namespace detail {
534template <typename T, std::size_t SizeOfT> struct PopulationCounter {
535 static unsigned count(T Value) {
536 // Generic version, forward to 32 bits.
537 static_assert(SizeOfT <= 4, "Not implemented!");
538#if defined(__GNUC__4)
539 return __builtin_popcount(Value);
540#else
541 uint32_t v = Value;
542 v = v - ((v >> 1) & 0x55555555);
543 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
544 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
545#endif
546 }
547};
548
549template <typename T> struct PopulationCounter<T, 8> {
550 static unsigned count(T Value) {
551#if defined(__GNUC__4)
552 return __builtin_popcountll(Value);
553#else
554 uint64_t v = Value;
555 v = v - ((v >> 1) & 0x5555555555555555ULL);
556 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
557 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
558 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
559#endif
560 }
561};
562} // namespace detail
563
564/// Count the number of set bits in a value.
565/// Ex. countPopulation(0xF000F000) = 8
566/// Returns 0 if the word is zero.
567template <typename T>
568inline unsigned countPopulation(T Value) {
569 static_assert(std::numeric_limits<T>::is_integer &&
570 !std::numeric_limits<T>::is_signed,
571 "Only unsigned integral types are allowed.");
572 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
573}
574
575/// Compile time Log2.
576/// Valid only for positive powers of two.
577template <size_t kValue> constexpr inline size_t CTLog2() {
578 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
579 "Value is not a valid power of 2");
580 return 1 + CTLog2<kValue / 2>();
581}
582
583template <> constexpr inline size_t CTLog2<1>() { return 0; }
584
585/// Return the log base 2 of the specified value.
586inline double Log2(double Value) {
587#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
588 return __builtin_log(Value) / __builtin_log(2.0);
589#else
590 return log2(Value);
591#endif
592}
593
594/// Return the floor log base 2 of the specified value, -1 if the value is zero.
595/// (32 bit edition.)
596/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
597inline unsigned Log2_32(uint32_t Value) {
598 return 31 - countLeadingZeros(Value);
12
Returning the value 4294967295
599}
600
601/// Return the floor log base 2 of the specified value, -1 if the value is zero.
602/// (64 bit edition.)
603inline unsigned Log2_64(uint64_t Value) {
604 return 63 - countLeadingZeros(Value);
605}
606
607/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
608/// (32 bit edition).
609/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
610inline unsigned Log2_32_Ceil(uint32_t Value) {
611 return 32 - countLeadingZeros(Value - 1);
612}
613
614/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
615/// (64 bit edition.)
616inline unsigned Log2_64_Ceil(uint64_t Value) {
617 return 64 - countLeadingZeros(Value - 1);
618}
619
620/// Return the greatest common divisor of the values using Euclid's algorithm.
621template <typename T>
622inline T greatestCommonDivisor(T A, T B) {
623 while (B) {
624 T Tmp = B;
625 B = A % B;
626 A = Tmp;
627 }
628 return A;
629}
630
631inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
632 return greatestCommonDivisor<uint64_t>(A, B);
633}
634
635/// This function takes a 64-bit integer and returns the bit equivalent double.
636inline double BitsToDouble(uint64_t Bits) {
637 double D;
638 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
639 memcpy(&D, &Bits, sizeof(Bits));
640 return D;
641}
642
643/// This function takes a 32-bit integer and returns the bit equivalent float.
644inline float BitsToFloat(uint32_t Bits) {
645 float F;
646 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
647 memcpy(&F, &Bits, sizeof(Bits));
648 return F;
649}
650
651/// This function takes a double and returns the bit equivalent 64-bit integer.
652/// Note that copying doubles around changes the bits of NaNs on some hosts,
653/// notably x86, so this routine cannot be used if these bits are needed.
654inline uint64_t DoubleToBits(double Double) {
655 uint64_t Bits;
656 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
657 memcpy(&Bits, &Double, sizeof(Double));
658 return Bits;
659}
660
661/// This function takes a float and returns the bit equivalent 32-bit integer.
662/// Note that copying floats around changes the bits of NaNs on some hosts,
663/// notably x86, so this routine cannot be used if these bits are needed.
664inline uint32_t FloatToBits(float Float) {
665 uint32_t Bits;
666 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
667 memcpy(&Bits, &Float, sizeof(Float));
668 return Bits;
669}
670
671/// A and B are either alignments or offsets. Return the minimum alignment that
672/// may be assumed after adding the two together.
673constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
674 // The largest power of 2 that divides both A and B.
675 //
676 // Replace "-Value" by "1+~Value" in the following commented code to avoid
677 // MSVC warning C4146
678 // return (A | B) & -(A | B);
679 return (A | B) & (1 + ~(A | B));
680}
681
682/// Returns the next power of two (in 64-bits) that is strictly greater than A.
683/// Returns zero on overflow.
684inline uint64_t NextPowerOf2(uint64_t A) {
685 A |= (A >> 1);
686 A |= (A >> 2);
687 A |= (A >> 4);
688 A |= (A >> 8);
689 A |= (A >> 16);
690 A |= (A >> 32);
691 return A + 1;
692}
693
694/// Returns the power of two which is less than or equal to the given value.
695/// Essentially, it is a floor operation across the domain of powers of two.
696inline uint64_t PowerOf2Floor(uint64_t A) {
697 if (!A) return 0;
698 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
699}
700
701/// Returns the power of two which is greater than or equal to the given value.
702/// Essentially, it is a ceil operation across the domain of powers of two.
703inline uint64_t PowerOf2Ceil(uint64_t A) {
704 if (!A)
705 return 0;
706 return NextPowerOf2(A - 1);
707}
708
709/// Returns the next integer (mod 2**64) that is greater than or equal to
710/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
711///
712/// If non-zero \p Skew is specified, the return value will be a minimal
713/// integer that is greater than or equal to \p Value and equal to
714/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
715/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
716///
717/// Examples:
718/// \code
719/// alignTo(5, 8) = 8
720/// alignTo(17, 8) = 24
721/// alignTo(~0LL, 8) = 0
722/// alignTo(321, 255) = 510
723///
724/// alignTo(5, 8, 7) = 7
725/// alignTo(17, 8, 1) = 17
726/// alignTo(~0LL, 8, 3) = 3
727/// alignTo(321, 255, 42) = 552
728/// \endcode
729inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
730 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 730, __PRETTY_FUNCTION__))
;
731 Skew %= Align;
732 return (Value + Align - 1 - Skew) / Align * Align + Skew;
733}
734
735/// Returns the next integer (mod 2**64) that is greater than or equal to
736/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
737template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
738 static_assert(Align != 0u, "Align must be non-zero");
739 return (Value + Align - 1) / Align * Align;
740}
741
742/// Returns the integer ceil(Numerator / Denominator).
743inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
744 return alignTo(Numerator, Denominator) / Denominator;
745}
746
747/// Returns the integer nearest(Numerator / Denominator).
748inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
749 return (Numerator + (Denominator / 2)) / Denominator;
750}
751
752/// Returns the largest uint64_t less than or equal to \p Value and is
753/// \p Skew mod \p Align. \p Align must be non-zero
754inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
755 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 755, __PRETTY_FUNCTION__))
;
756 Skew %= Align;
757 return (Value - Skew) / Align * Align + Skew;
758}
759
760/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
761/// Requires 0 < B <= 32.
762template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
763 static_assert(B > 0, "Bit width can't be 0.");
764 static_assert(B <= 32, "Bit width out of range.");
765 return int32_t(X << (32 - B)) >> (32 - B);
766}
767
768/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
769/// Requires 0 < B <= 32.
770inline int32_t SignExtend32(uint32_t X, unsigned B) {
771 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 771, __PRETTY_FUNCTION__))
;
772 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 772, __PRETTY_FUNCTION__))
;
773 return int32_t(X << (32 - B)) >> (32 - B);
774}
775
776/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
777/// Requires 0 < B <= 64.
778template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
779 static_assert(B > 0, "Bit width can't be 0.");
780 static_assert(B <= 64, "Bit width out of range.");
781 return int64_t(x << (64 - B)) >> (64 - B);
782}
783
784/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
785/// Requires 0 < B <= 64.
786inline int64_t SignExtend64(uint64_t X, unsigned B) {
787 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 787, __PRETTY_FUNCTION__))
;
788 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-13~++20210413100635+64c24f493e5f/llvm/include/llvm/Support/MathExtras.h"
, 788, __PRETTY_FUNCTION__))
;
789 return int64_t(X << (64 - B)) >> (64 - B);
790}
791
792/// Subtract two unsigned integers, X and Y, of type T and return the absolute
793/// value of the result.
794template <typename T>
795std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
796 return std::max(X, Y) - std::min(X, Y);
797}
798
799/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
800/// maximum representable value of T on overflow. ResultOverflowed indicates if
801/// the result is larger than the maximum representable value of type T.
802template <typename T>
803std::enable_if_t<std::is_unsigned<T>::value, T>
804SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
805 bool Dummy;
806 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
807 // Hacker's Delight, p. 29
808 T Z = X + Y;
809 Overflowed = (Z < X || Z < Y);
810 if (Overflowed)
811 return std::numeric_limits<T>::max();
812 else
813 return Z;
814}
815
816/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
817/// maximum representable value of T on overflow. ResultOverflowed indicates if
818/// the result is larger than the maximum representable value of type T.
819template <typename T>
820std::enable_if_t<std::is_unsigned<T>::value, T>
821SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
822 bool Dummy;
823 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
824
825 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
826 // because it fails for uint16_t (where multiplication can have undefined
827 // behavior due to promotion to int), and requires a division in addition
828 // to the multiplication.
829
830 Overflowed = false;
831
832 // Log2(Z) would be either Log2Z or Log2Z + 1.
833 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
834 // will necessarily be less than Log2Max as desired.
835 int Log2Z = Log2_64(X) + Log2_64(Y);
836 const T Max = std::numeric_limits<T>::max();
837 int Log2Max = Log2_64(Max);
838 if (Log2Z < Log2Max) {
839 return X * Y;
840 }
841 if (Log2Z > Log2Max) {
842 Overflowed = true;
843 return Max;
844 }
845
846 // We're going to use the top bit, and maybe overflow one
847 // bit past it. Multiply all but the bottom bit then add
848 // that on at the end.
849 T Z = (X >> 1) * Y;
850 if (Z & ~(Max >> 1)) {
851 Overflowed = true;
852 return Max;
853 }
854 Z <<= 1;
855 if (X & 1)
856 return SaturatingAdd(Z, Y, ResultOverflowed);
857
858 return Z;
859}
860
861/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
862/// the product. Clamp the result to the maximum representable value of T on
863/// overflow. ResultOverflowed indicates if the result is larger than the
864/// maximum representable value of type T.
865template <typename T>
866std::enable_if_t<std::is_unsigned<T>::value, T>
867SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
868 bool Dummy;
869 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
870
871 T Product = SaturatingMultiply(X, Y, &Overflowed);
872 if (Overflowed)
873 return Product;
874
875 return SaturatingAdd(A, Product, &Overflowed);
876}
877
878/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
879extern const float huge_valf;
880
881
882/// Add two signed integers, computing the two's complement truncated result,
883/// returning true if overflow occured.
884template <typename T>
885std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
886#if __has_builtin(__builtin_add_overflow)1
887 return __builtin_add_overflow(X, Y, &Result);
888#else
889 // Perform the unsigned addition.
890 using U = std::make_unsigned_t<T>;
891 const U UX = static_cast<U>(X);
892 const U UY = static_cast<U>(Y);
893 const U UResult = UX + UY;
894
895 // Convert to signed.
896 Result = static_cast<T>(UResult);
897
898 // Adding two positive numbers should result in a positive number.
899 if (X > 0 && Y > 0)
900 return Result <= 0;
901 // Adding two negatives should result in a negative number.
902 if (X < 0 && Y < 0)
903 return Result >= 0;
904 return false;
905#endif
906}
907
908/// Subtract two signed integers, computing the two's complement truncated
909/// result, returning true if an overflow ocurred.
910template <typename T>
911std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
912#if __has_builtin(__builtin_sub_overflow)1
913 return __builtin_sub_overflow(X, Y, &Result);
914#else
915 // Perform the unsigned addition.
916 using U = std::make_unsigned_t<T>;
917 const U UX = static_cast<U>(X);
918 const U UY = static_cast<U>(Y);
919 const U UResult = UX - UY;
920
921 // Convert to signed.
922 Result = static_cast<T>(UResult);
923
924 // Subtracting a positive number from a negative results in a negative number.
925 if (X <= 0 && Y > 0)
926 return Result >= 0;
927 // Subtracting a negative number from a positive results in a positive number.
928 if (X >= 0 && Y < 0)
929 return Result <= 0;
930 return false;
931#endif
932}
933
934/// Multiply two signed integers, computing the two's complement truncated
935/// result, returning true if an overflow ocurred.
936template <typename T>
937std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
938 // Perform the unsigned multiplication on absolute values.
939 using U = std::make_unsigned_t<T>;
940 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
941 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
942 const U UResult = UX * UY;
943
944 // Convert to signed.
945 const bool IsNegative = (X < 0) ^ (Y < 0);
946 Result = IsNegative ? (0 - UResult) : UResult;
947
948 // If any of the args was 0, result is 0 and no overflow occurs.
949 if (UX == 0 || UY == 0)
950 return false;
951
952 // UX and UY are in [1, 2^n], where n is the number of digits.
953 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
954 // positive) divided by an argument compares to the other.
955 if (IsNegative)
956 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
957 else
958 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
959}
960
961} // End llvm namespace
962
963#endif