Bug Summary

File:llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Warning:line 770, column 21
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64ExpandImm.h"
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/IR/IntrinsicsAArch64.h"
19#include "llvm/Support/Debug.h"
20#include <algorithm>
21using namespace llvm;
22
23#define DEBUG_TYPE"aarch64tti" "aarch64tti"
24
25static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
26 cl::init(true), cl::Hidden);
27
28bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
29 const Function *Callee) const {
30 const TargetMachine &TM = getTLI()->getTargetMachine();
31
32 const FeatureBitset &CallerBits =
33 TM.getSubtargetImpl(*Caller)->getFeatureBits();
34 const FeatureBitset &CalleeBits =
35 TM.getSubtargetImpl(*Callee)->getFeatureBits();
36
37 // Inline a callee if its target-features are a subset of the callers
38 // target-features.
39 return (CallerBits & CalleeBits) == CalleeBits;
40}
41
42/// Calculate the cost of materializing a 64-bit value. This helper
43/// method might only calculate a fraction of a larger immediate. Therefore it
44/// is valid to return a cost of ZERO.
45int AArch64TTIImpl::getIntImmCost(int64_t Val) {
46 // Check if the immediate can be encoded within an instruction.
47 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
48 return 0;
49
50 if (Val < 0)
51 Val = ~Val;
52
53 // Calculate how many moves we will need to materialize this constant.
54 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
55 AArch64_IMM::expandMOVImm(Val, 64, Insn);
56 return Insn.size();
57}
58
59/// Calculate the cost of materializing the given constant.
60int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
61 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 61, __PRETTY_FUNCTION__))
;
62
63 unsigned BitSize = Ty->getPrimitiveSizeInBits();
64 if (BitSize == 0)
65 return ~0U;
66
67 // Sign-extend all constants to a multiple of 64-bit.
68 APInt ImmVal = Imm;
69 if (BitSize & 0x3f)
70 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
71
72 // Split the constant into 64-bit chunks and calculate the cost for each
73 // chunk.
74 int Cost = 0;
75 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
76 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
77 int64_t Val = Tmp.getSExtValue();
78 Cost += getIntImmCost(Val);
79 }
80 // We need at least one instruction to materialze the constant.
81 return std::max(1, Cost);
82}
83
84int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
85 const APInt &Imm, Type *Ty) {
86 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 86, __PRETTY_FUNCTION__))
;
87
88 unsigned BitSize = Ty->getPrimitiveSizeInBits();
89 // There is no cost model for constants with a bit size of 0. Return TCC_Free
90 // here, so that constant hoisting will ignore this constant.
91 if (BitSize == 0)
92 return TTI::TCC_Free;
93
94 unsigned ImmIdx = ~0U;
95 switch (Opcode) {
96 default:
97 return TTI::TCC_Free;
98 case Instruction::GetElementPtr:
99 // Always hoist the base address of a GetElementPtr.
100 if (Idx == 0)
101 return 2 * TTI::TCC_Basic;
102 return TTI::TCC_Free;
103 case Instruction::Store:
104 ImmIdx = 0;
105 break;
106 case Instruction::Add:
107 case Instruction::Sub:
108 case Instruction::Mul:
109 case Instruction::UDiv:
110 case Instruction::SDiv:
111 case Instruction::URem:
112 case Instruction::SRem:
113 case Instruction::And:
114 case Instruction::Or:
115 case Instruction::Xor:
116 case Instruction::ICmp:
117 ImmIdx = 1;
118 break;
119 // Always return TCC_Free for the shift value of a shift instruction.
120 case Instruction::Shl:
121 case Instruction::LShr:
122 case Instruction::AShr:
123 if (Idx == 1)
124 return TTI::TCC_Free;
125 break;
126 case Instruction::Trunc:
127 case Instruction::ZExt:
128 case Instruction::SExt:
129 case Instruction::IntToPtr:
130 case Instruction::PtrToInt:
131 case Instruction::BitCast:
132 case Instruction::PHI:
133 case Instruction::Call:
134 case Instruction::Select:
135 case Instruction::Ret:
136 case Instruction::Load:
137 break;
138 }
139
140 if (Idx == ImmIdx) {
141 int NumConstants = (BitSize + 63) / 64;
142 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
143 return (Cost <= NumConstants * TTI::TCC_Basic)
144 ? static_cast<int>(TTI::TCC_Free)
145 : Cost;
146 }
147 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
148}
149
150int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
151 const APInt &Imm, Type *Ty) {
152 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 152, __PRETTY_FUNCTION__))
;
153
154 unsigned BitSize = Ty->getPrimitiveSizeInBits();
155 // There is no cost model for constants with a bit size of 0. Return TCC_Free
156 // here, so that constant hoisting will ignore this constant.
157 if (BitSize == 0)
158 return TTI::TCC_Free;
159
160 // Most (all?) AArch64 intrinsics do not support folding immediates into the
161 // selected instruction, so we compute the materialization cost for the
162 // immediate directly.
163 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
164 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
165
166 switch (IID) {
167 default:
168 return TTI::TCC_Free;
169 case Intrinsic::sadd_with_overflow:
170 case Intrinsic::uadd_with_overflow:
171 case Intrinsic::ssub_with_overflow:
172 case Intrinsic::usub_with_overflow:
173 case Intrinsic::smul_with_overflow:
174 case Intrinsic::umul_with_overflow:
175 if (Idx == 1) {
176 int NumConstants = (BitSize + 63) / 64;
177 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
178 return (Cost <= NumConstants * TTI::TCC_Basic)
179 ? static_cast<int>(TTI::TCC_Free)
180 : Cost;
181 }
182 break;
183 case Intrinsic::experimental_stackmap:
184 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
185 return TTI::TCC_Free;
186 break;
187 case Intrinsic::experimental_patchpoint_void:
188 case Intrinsic::experimental_patchpoint_i64:
189 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
190 return TTI::TCC_Free;
191 break;
192 }
193 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
194}
195
196TargetTransformInfo::PopcntSupportKind
197AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
198 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 198, __PRETTY_FUNCTION__))
;
199 if (TyWidth == 32 || TyWidth == 64)
200 return TTI::PSK_FastHardware;
201 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
202 return TTI::PSK_Software;
203}
204
205bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
206 ArrayRef<const Value *> Args) {
207
208 // A helper that returns a vector type from the given type. The number of
209 // elements in type Ty determine the vector width.
210 auto toVectorTy = [&](Type *ArgTy) {
211 return VectorType::get(ArgTy->getScalarType(),
212 DstTy->getVectorNumElements());
213 };
214
215 // Exit early if DstTy is not a vector type whose elements are at least
216 // 16-bits wide.
217 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
218 return false;
219
220 // Determine if the operation has a widening variant. We consider both the
221 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
222 // instructions.
223 //
224 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
225 // verify that their extending operands are eliminated during code
226 // generation.
227 switch (Opcode) {
228 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
229 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
230 break;
231 default:
232 return false;
233 }
234
235 // To be a widening instruction (either the "wide" or "long" versions), the
236 // second operand must be a sign- or zero extend having a single user. We
237 // only consider extends having a single user because they may otherwise not
238 // be eliminated.
239 if (Args.size() != 2 ||
240 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
241 !Args[1]->hasOneUse())
242 return false;
243 auto *Extend = cast<CastInst>(Args[1]);
244
245 // Legalize the destination type and ensure it can be used in a widening
246 // operation.
247 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
248 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
249 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
250 return false;
251
252 // Legalize the source type and ensure it can be used in a widening
253 // operation.
254 Type *SrcTy = toVectorTy(Extend->getSrcTy());
255 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
256 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
257 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
258 return false;
259
260 // Get the total number of vector elements in the legalized types.
261 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
262 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
263
264 // Return true if the legalized types have the same number of vector elements
265 // and the destination element type size is twice that of the source type.
266 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
267}
268
269int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
270 const Instruction *I) {
271 int ISD = TLI->InstructionOpcodeToISD(Opcode);
272 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 272, __PRETTY_FUNCTION__))
;
273
274 // If the cast is observable, and it is used by a widening instruction (e.g.,
275 // uaddl, saddw, etc.), it may be free.
276 if (I && I->hasOneUse()) {
277 auto *SingleUser = cast<Instruction>(*I->user_begin());
278 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
279 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
280 // If the cast is the second operand, it is free. We will generate either
281 // a "wide" or "long" version of the widening instruction.
282 if (I == SingleUser->getOperand(1))
283 return 0;
284 // If the cast is not the second operand, it will be free if it looks the
285 // same as the second operand. In this case, we will generate a "long"
286 // version of the widening instruction.
287 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
288 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
289 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
290 return 0;
291 }
292 }
293
294 EVT SrcTy = TLI->getValueType(DL, Src);
295 EVT DstTy = TLI->getValueType(DL, Dst);
296
297 if (!SrcTy.isSimple() || !DstTy.isSimple())
298 return BaseT::getCastInstrCost(Opcode, Dst, Src);
299
300 static const TypeConversionCostTblEntry
301 ConversionTbl[] = {
302 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
303 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
304 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
305 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
306
307 // The number of shll instructions for the extension.
308 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
309 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
310 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
311 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
312 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
313 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
314 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
315 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
316 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
317 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
318 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
319 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
320 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
321 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
322 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
323 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
324
325 // LowerVectorINT_TO_FP:
326 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
327 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
328 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
329 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
330 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
331 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
332
333 // Complex: to v2f32
334 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
335 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
336 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
337 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
338 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
339 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
340
341 // Complex: to v4f32
342 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
343 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
344 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
345 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
346
347 // Complex: to v8f32
348 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
349 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
350 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
351 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
352
353 // Complex: to v16f32
354 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
355 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
356
357 // Complex: to v2f64
358 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
359 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
360 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
361 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
362 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
363 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
364
365
366 // LowerVectorFP_TO_INT
367 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
368 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
369 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
370 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
371 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
372 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
373
374 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
375 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
376 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
377 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
378 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
379 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
380 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
381
382 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
383 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
384 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
385 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
386 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
387
388 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
389 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
390 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
391 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
392 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
393 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
394 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
395 };
396
397 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
398 DstTy.getSimpleVT(),
399 SrcTy.getSimpleVT()))
400 return Entry->Cost;
401
402 return BaseT::getCastInstrCost(Opcode, Dst, Src);
403}
404
405int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
406 VectorType *VecTy,
407 unsigned Index) {
408
409 // Make sure we were given a valid extend opcode.
410 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
411 "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
;
412
413 // We are extending an element we extract from a vector, so the source type
414 // of the extend is the element type of the vector.
415 auto *Src = VecTy->getElementType();
416
417 // Sign- and zero-extends are for integer types only.
418 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType>
(Src) && "Invalid type") ? static_cast<void> (0
) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 418, __PRETTY_FUNCTION__))
;
419
420 // Get the cost for the extract. We compute the cost (if any) for the extend
421 // below.
422 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
423
424 // Legalize the types.
425 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
426 auto DstVT = TLI->getValueType(DL, Dst);
427 auto SrcVT = TLI->getValueType(DL, Src);
428
429 // If the resulting type is still a vector and the destination type is legal,
430 // we may get the extension for free. If not, get the default cost for the
431 // extend.
432 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
433 return Cost + getCastInstrCost(Opcode, Dst, Src);
434
435 // The destination type should be larger than the element type. If not, get
436 // the default cost for the extend.
437 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
438 return Cost + getCastInstrCost(Opcode, Dst, Src);
439
440 switch (Opcode) {
441 default:
442 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 442)
;
443
444 // For sign-extends, we only need a smov, which performs the extension
445 // automatically.
446 case Instruction::SExt:
447 return Cost;
448
449 // For zero-extends, the extend is performed automatically by a umov unless
450 // the destination type is i64 and the element type is i8 or i16.
451 case Instruction::ZExt:
452 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
453 return Cost;
454 }
455
456 // If we are unable to perform the extend for free, get the default cost.
457 return Cost + getCastInstrCost(Opcode, Dst, Src);
458}
459
460int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
461 unsigned Index) {
462 assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type"
) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 462, __PRETTY_FUNCTION__))
;
463
464 if (Index != -1U) {
465 // Legalize the type.
466 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
467
468 // This type is legalized to a scalar type.
469 if (!LT.second.isVector())
470 return 0;
471
472 // The type may be split. Normalize the index to the new type.
473 unsigned Width = LT.second.getVectorNumElements();
474 Index = Index % Width;
475
476 // The element at index zero is already inside the vector.
477 if (Index == 0)
478 return 0;
479 }
480
481 // All other insert/extracts cost this much.
482 return ST->getVectorInsertExtractBaseCost();
483}
484
485int AArch64TTIImpl::getArithmeticInstrCost(
486 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
487 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
488 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
489 const Instruction *CxtI) {
490 // Legalize the type.
491 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
492
493 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
494 // add in the widening overhead specified by the sub-target. Since the
495 // extends feeding widening instructions are performed automatically, they
496 // aren't present in the generated code and have a zero cost. By adding a
497 // widening overhead here, we attach the total cost of the combined operation
498 // to the widening instruction.
499 int Cost = 0;
500 if (isWideningInstruction(Ty, Opcode, Args))
501 Cost += ST->getWideningBaseCost();
502
503 int ISD = TLI->InstructionOpcodeToISD(Opcode);
504
505 switch (ISD) {
506 default:
507 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
508 Opd1PropInfo, Opd2PropInfo);
509 case ISD::SDIV:
510 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
511 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
512 // On AArch64, scalar signed division by constants power-of-two are
513 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
514 // The OperandValue properties many not be same as that of previous
515 // operation; conservatively assume OP_None.
516 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
517 TargetTransformInfo::OP_None,
518 TargetTransformInfo::OP_None);
519 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
520 TargetTransformInfo::OP_None,
521 TargetTransformInfo::OP_None);
522 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
523 TargetTransformInfo::OP_None,
524 TargetTransformInfo::OP_None);
525 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
526 TargetTransformInfo::OP_None,
527 TargetTransformInfo::OP_None);
528 return Cost;
529 }
530 LLVM_FALLTHROUGH[[gnu::fallthrough]];
531 case ISD::UDIV:
532 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
533 auto VT = TLI->getValueType(DL, Ty);
534 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
535 // Vector signed division by constant are expanded to the
536 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
537 // to MULHS + SUB + SRL + ADD + SRL.
538 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
539 Opd2Info,
540 TargetTransformInfo::OP_None,
541 TargetTransformInfo::OP_None);
542 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
543 Opd2Info,
544 TargetTransformInfo::OP_None,
545 TargetTransformInfo::OP_None);
546 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
547 Opd2Info,
548 TargetTransformInfo::OP_None,
549 TargetTransformInfo::OP_None);
550 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
551 }
552 }
553
554 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
555 Opd1PropInfo, Opd2PropInfo);
556 if (Ty->isVectorTy()) {
557 // On AArch64, vector divisions are not supported natively and are
558 // expanded into scalar divisions of each pair of elements.
559 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
560 Opd2Info, Opd1PropInfo, Opd2PropInfo);
561 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
562 Opd2Info, Opd1PropInfo, Opd2PropInfo);
563 // TODO: if one of the arguments is scalar, then it's not necessary to
564 // double the cost of handling the vector elements.
565 Cost += Cost;
566 }
567 return Cost;
568
569 case ISD::ADD:
570 case ISD::MUL:
571 case ISD::XOR:
572 case ISD::OR:
573 case ISD::AND:
574 // These nodes are marked as 'custom' for combining purposes only.
575 // We know that they are legal. See LowerAdd in ISelLowering.
576 return (Cost + 1) * LT.first;
577 }
578}
579
580int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
581 const SCEV *Ptr) {
582 // Address computations in vectorized code with non-consecutive addresses will
583 // likely result in more instructions compared to scalar code where the
584 // computation can more often be merged into the index mode. The resulting
585 // extra micro-ops can significantly decrease throughput.
586 unsigned NumVectorInstToHideOverhead = 10;
587 int MaxMergeDistance = 64;
588
589 if (Ty->isVectorTy() && SE &&
590 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
591 return NumVectorInstToHideOverhead;
592
593 // In many cases the address computation is not merged into the instruction
594 // addressing mode.
595 return 1;
596}
597
598int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
599 Type *CondTy, const Instruction *I) {
600
601 int ISD = TLI->InstructionOpcodeToISD(Opcode);
602 // We don't lower some vector selects well that are wider than the register
603 // width.
604 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
605 // We would need this many instructions to hide the scalarization happening.
606 const int AmortizationCost = 20;
607 static const TypeConversionCostTblEntry
608 VectorSelectTbl[] = {
609 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
610 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
611 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
612 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
613 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
614 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
615 };
616
617 EVT SelCondTy = TLI->getValueType(DL, CondTy);
618 EVT SelValTy = TLI->getValueType(DL, ValTy);
619 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
620 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
621 SelCondTy.getSimpleVT(),
622 SelValTy.getSimpleVT()))
623 return Entry->Cost;
624 }
625 }
626 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
627}
628
629AArch64TTIImpl::TTI::MemCmpExpansionOptions
630AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
631 TTI::MemCmpExpansionOptions Options;
632 Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
633 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
634 Options.NumLoadsPerBlock = Options.MaxNumLoads;
635 // TODO: Though vector loads usually perform well on AArch64, in some targets
636 // they may wake up the FP unit, which raises the power consumption. Perhaps
637 // they could be used with no holds barred (-O3).
638 Options.LoadSizes = {8, 4, 2, 1};
639 return Options;
640}
641
642int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
643 MaybeAlign Alignment, unsigned AddressSpace,
644 const Instruction *I) {
645 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
646
647 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
648 LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
649 // Unaligned stores are extremely inefficient. We don't split all
650 // unaligned 128-bit stores because the negative impact that has shown in
651 // practice on inlined block copy code.
652 // We make such stores expensive so that we will only vectorize if there
653 // are 6 other instructions getting vectorized.
654 const int AmortizationCost = 6;
655
656 return LT.first * 2 * AmortizationCost;
657 }
658
659 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
660 unsigned ProfitableNumElements;
661 if (Opcode == Instruction::Store)
662 // We use a custom trunc store lowering so v.4b should be profitable.
663 ProfitableNumElements = 4;
664 else
665 // We scalarize the loads because there is not v.4b register and we
666 // have to promote the elements to v.2.
667 ProfitableNumElements = 8;
668
669 if (Ty->getVectorNumElements() < ProfitableNumElements) {
670 unsigned NumVecElts = Ty->getVectorNumElements();
671 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
672 // We generate 2 instructions per vector element.
673 return NumVectorizableInstsToAmortize * NumVecElts * 2;
674 }
675 }
676
677 return LT.first;
678}
679
680int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
681 unsigned Factor,
682 ArrayRef<unsigned> Indices,
683 unsigned Alignment,
684 unsigned AddressSpace,
685 bool UseMaskForCond,
686 bool UseMaskForGaps) {
687 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 687, __PRETTY_FUNCTION__))
;
688 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 688, __PRETTY_FUNCTION__))
;
689
690 if (!UseMaskForCond && !UseMaskForGaps &&
691 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
692 unsigned NumElts = VecTy->getVectorNumElements();
693 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
694
695 // ldN/stN only support legal vector types of size 64 or 128 in bits.
696 // Accesses having vector types that are a multiple of 128 bits can be
697 // matched to more than one ldN/stN instruction.
698 if (NumElts % Factor == 0 &&
699 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
700 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
701 }
702
703 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
704 Alignment, AddressSpace,
705 UseMaskForCond, UseMaskForGaps);
706}
707
708int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
709 int Cost = 0;
710 for (auto *I : Tys) {
711 if (!I->isVectorTy())
712 continue;
713 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
714 Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
715 getMemoryOpCost(Instruction::Load, I, Align(128), 0);
716 }
717 return Cost;
718}
719
720unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
721 return ST->getMaxInterleaveFactor();
722}
723
724// For Falkor, we want to avoid having too many strided loads in a loop since
725// that can exhaust the HW prefetcher resources. We adjust the unroller
726// MaxCount preference below to attempt to ensure unrolling doesn't create too
727// many strided loads.
728static void
729getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
730 TargetTransformInfo::UnrollingPreferences &UP) {
731 enum { MaxStridedLoads = 7 };
732 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
733 int StridedLoads = 0;
734 // FIXME? We could make this more precise by looking at the CFG and
735 // e.g. not counting loads in each side of an if-then-else diamond.
736 for (const auto BB : L->blocks()) {
737 for (auto &I : *BB) {
738 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
739 if (!LMemI)
740 continue;
741
742 Value *PtrValue = LMemI->getPointerOperand();
743 if (L->isLoopInvariant(PtrValue))
744 continue;
745
746 const SCEV *LSCEV = SE.getSCEV(PtrValue);
747 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
748 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
749 continue;
750
751 // FIXME? We could take pairing of unrolled load copies into account
752 // by looking at the AddRec, but we would probably have to limit this
753 // to loops with no stores or other memory optimization barriers.
754 ++StridedLoads;
755 // We've seen enough strided loads that seeing more won't make a
756 // difference.
757 if (StridedLoads > MaxStridedLoads / 2)
758 return StridedLoads;
759 }
760 }
761 return StridedLoads;
762 };
763
764 int StridedLoads = countStridedLoads(L, SE);
765 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
7
Assuming 'DebugFlag' is false
8
Loop condition is false. Exiting loop
766 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
767 // Pick the largest power of 2 unroll count that won't result in too many
768 // strided loads.
769 if (StridedLoads) {
9
Assuming 'StridedLoads' is not equal to 0
10
Taking true branch
770 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
11
Calling 'Log2_32'
13
Returning from 'Log2_32'
14
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
771 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
772 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
773 }
774}
775
776void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
777 TTI::UnrollingPreferences &UP) {
778 // Enable partial unrolling and runtime unrolling.
779 BaseT::getUnrollingPreferences(L, SE, UP);
780
781 // For inner loop, it is more likely to be a hot one, and the runtime check
782 // can be promoted out from LICM pass, so the overhead is less, let's try
783 // a larger threshold to unroll more loops.
784 if (L->getLoopDepth() > 1)
1
Assuming the condition is false
2
Taking false branch
785 UP.PartialThreshold *= 2;
786
787 // Disable partial & runtime unrolling on -Os.
788 UP.PartialOptSizeThreshold = 0;
789
790 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
3
Assuming the condition is true
5
Taking true branch
791 EnableFalkorHWPFUnrollFix)
4
Assuming the condition is true
792 getFalkorUnrollingPreferences(L, SE, UP);
6
Calling 'getFalkorUnrollingPreferences'
793}
794
795Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
796 Type *ExpectedType) {
797 switch (Inst->getIntrinsicID()) {
798 default:
799 return nullptr;
800 case Intrinsic::aarch64_neon_st2:
801 case Intrinsic::aarch64_neon_st3:
802 case Intrinsic::aarch64_neon_st4: {
803 // Create a struct type
804 StructType *ST = dyn_cast<StructType>(ExpectedType);
805 if (!ST)
806 return nullptr;
807 unsigned NumElts = Inst->getNumArgOperands() - 1;
808 if (ST->getNumElements() != NumElts)
809 return nullptr;
810 for (unsigned i = 0, e = NumElts; i != e; ++i) {
811 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
812 return nullptr;
813 }
814 Value *Res = UndefValue::get(ExpectedType);
815 IRBuilder<> Builder(Inst);
816 for (unsigned i = 0, e = NumElts; i != e; ++i) {
817 Value *L = Inst->getArgOperand(i);
818 Res = Builder.CreateInsertValue(Res, L, i);
819 }
820 return Res;
821 }
822 case Intrinsic::aarch64_neon_ld2:
823 case Intrinsic::aarch64_neon_ld3:
824 case Intrinsic::aarch64_neon_ld4:
825 if (Inst->getType() == ExpectedType)
826 return Inst;
827 return nullptr;
828 }
829}
830
831bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
832 MemIntrinsicInfo &Info) {
833 switch (Inst->getIntrinsicID()) {
834 default:
835 break;
836 case Intrinsic::aarch64_neon_ld2:
837 case Intrinsic::aarch64_neon_ld3:
838 case Intrinsic::aarch64_neon_ld4:
839 Info.ReadMem = true;
840 Info.WriteMem = false;
841 Info.PtrVal = Inst->getArgOperand(0);
842 break;
843 case Intrinsic::aarch64_neon_st2:
844 case Intrinsic::aarch64_neon_st3:
845 case Intrinsic::aarch64_neon_st4:
846 Info.ReadMem = false;
847 Info.WriteMem = true;
848 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
849 break;
850 }
851
852 switch (Inst->getIntrinsicID()) {
853 default:
854 return false;
855 case Intrinsic::aarch64_neon_ld2:
856 case Intrinsic::aarch64_neon_st2:
857 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
858 break;
859 case Intrinsic::aarch64_neon_ld3:
860 case Intrinsic::aarch64_neon_st3:
861 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
862 break;
863 case Intrinsic::aarch64_neon_ld4:
864 case Intrinsic::aarch64_neon_st4:
865 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
866 break;
867 }
868 return true;
869}
870
871/// See if \p I should be considered for address type promotion. We check if \p
872/// I is a sext with right type and used in memory accesses. If it used in a
873/// "complex" getelementptr, we allow it to be promoted without finding other
874/// sext instructions that sign extended the same initial value. A getelementptr
875/// is considered as "complex" if it has more than 2 operands.
876bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
877 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
878 bool Considerable = false;
879 AllowPromotionWithoutCommonHeader = false;
880 if (!isa<SExtInst>(&I))
881 return false;
882 Type *ConsideredSExtType =
883 Type::getInt64Ty(I.getParent()->getParent()->getContext());
884 if (I.getType() != ConsideredSExtType)
885 return false;
886 // See if the sext is the one with the right type and used in at least one
887 // GetElementPtrInst.
888 for (const User *U : I.users()) {
889 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
890 Considerable = true;
891 // A getelementptr is considered as "complex" if it has more than 2
892 // operands. We will promote a SExt used in such complex GEP as we
893 // expect some computation to be merged if they are done on 64 bits.
894 if (GEPInst->getNumOperands() > 2) {
895 AllowPromotionWithoutCommonHeader = true;
896 break;
897 }
898 }
899 }
900 return Considerable;
901}
902
903bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
904 TTI::ReductionFlags Flags) const {
905 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 905, __PRETTY_FUNCTION__))
;
906 unsigned ScalarBits = Ty->getScalarSizeInBits();
907 switch (Opcode) {
908 case Instruction::FAdd:
909 case Instruction::FMul:
910 case Instruction::And:
911 case Instruction::Or:
912 case Instruction::Xor:
913 case Instruction::Mul:
914 return false;
915 case Instruction::Add:
916 return ScalarBits * Ty->getVectorNumElements() >= 128;
917 case Instruction::ICmp:
918 return (ScalarBits < 64) &&
919 (ScalarBits * Ty->getVectorNumElements() >= 128);
920 case Instruction::FCmp:
921 return Flags.NoNaN;
922 default:
923 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 923)
;
924 }
925 return false;
926}
927
928int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
929 bool IsPairwiseForm) {
930
931 if (IsPairwiseForm)
932 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
933
934 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
935 MVT MTy = LT.second;
936 int ISD = TLI->InstructionOpcodeToISD(Opcode);
937 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 937, __PRETTY_FUNCTION__))
;
938
939 // Horizontal adds can use the 'addv' instruction. We model the cost of these
940 // instructions as normal vector adds. This is the only arithmetic vector
941 // reduction operation for which we have an instruction.
942 static const CostTblEntry CostTblNoPairwise[]{
943 {ISD::ADD, MVT::v8i8, 1},
944 {ISD::ADD, MVT::v16i8, 1},
945 {ISD::ADD, MVT::v4i16, 1},
946 {ISD::ADD, MVT::v8i16, 1},
947 {ISD::ADD, MVT::v4i32, 1},
948 };
949
950 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
951 return LT.first * Entry->Cost;
952
953 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
954}
955
956int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
957 Type *SubTp) {
958 if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
959 Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
960 static const CostTblEntry ShuffleTbl[] = {
961 // Broadcast shuffle kinds can be performed with 'dup'.
962 { TTI::SK_Broadcast, MVT::v8i8, 1 },
963 { TTI::SK_Broadcast, MVT::v16i8, 1 },
964 { TTI::SK_Broadcast, MVT::v4i16, 1 },
965 { TTI::SK_Broadcast, MVT::v8i16, 1 },
966 { TTI::SK_Broadcast, MVT::v2i32, 1 },
967 { TTI::SK_Broadcast, MVT::v4i32, 1 },
968 { TTI::SK_Broadcast, MVT::v2i64, 1 },
969 { TTI::SK_Broadcast, MVT::v2f32, 1 },
970 { TTI::SK_Broadcast, MVT::v4f32, 1 },
971 { TTI::SK_Broadcast, MVT::v2f64, 1 },
972 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
973 // 'zip1/zip2' instructions.
974 { TTI::SK_Transpose, MVT::v8i8, 1 },
975 { TTI::SK_Transpose, MVT::v16i8, 1 },
976 { TTI::SK_Transpose, MVT::v4i16, 1 },
977 { TTI::SK_Transpose, MVT::v8i16, 1 },
978 { TTI::SK_Transpose, MVT::v2i32, 1 },
979 { TTI::SK_Transpose, MVT::v4i32, 1 },
980 { TTI::SK_Transpose, MVT::v2i64, 1 },
981 { TTI::SK_Transpose, MVT::v2f32, 1 },
982 { TTI::SK_Transpose, MVT::v4f32, 1 },
983 { TTI::SK_Transpose, MVT::v2f64, 1 },
984 // Select shuffle kinds.
985 // TODO: handle vXi8/vXi16.
986 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
987 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
988 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
989 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
990 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
991 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
992 // PermuteSingleSrc shuffle kinds.
993 // TODO: handle vXi8/vXi16.
994 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
995 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
996 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
997 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
998 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
999 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
1000 };
1001 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1002 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
1003 return LT.first * Entry->Cost;
1004 }
1005
1006 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1007}

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include <algorithm>
18#include <cassert>
19#include <climits>
20#include <cmath>
21#include <cstdint>
22#include <cstring>
23#include <limits>
24#include <type_traits>
25
26#ifdef __ANDROID_NDK__
27#include <android/api-level.h>
28#endif
29
30#ifdef _MSC_VER
31// Declare these intrinsics manually rather including intrin.h. It's very
32// expensive, and MathExtras.h is popular.
33// #include <intrin.h>
34extern "C" {
35unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
36unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
37unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
38unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
39}
40#endif
41
42namespace llvm {
43
44/// The behavior an operation has on an input of 0.
45enum ZeroBehavior {
46 /// The returned value is undefined.
47 ZB_Undefined,
48 /// The returned value is numeric_limits<T>::max()
49 ZB_Max,
50 /// The returned value is numeric_limits<T>::digits
51 ZB_Width
52};
53
54/// Mathematical constants.
55namespace numbers {
56// TODO: Track C++20 std::numbers.
57// TODO: Favor using the hexadecimal FP constants (requires C++17).
58constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
59 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
60 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
61 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
62 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
63 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
64 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
65 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
66 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
67 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
68 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
69 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
70 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
71 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
72 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
73constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
74 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
75 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
76 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
77 log2ef = 1.44269504F, // (0x1.715476P+0)
78 log10ef = .434294482F, // (0x1.bcb7b2P-2)
79 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
80 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
81 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
82 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
83 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
84 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
85 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
86 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
87 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
88} // namespace numbers
89
90namespace detail {
91template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
92 static unsigned count(T Val, ZeroBehavior) {
93 if (!Val)
94 return std::numeric_limits<T>::digits;
95 if (Val & 0x1)
96 return 0;
97
98 // Bisection method.
99 unsigned ZeroBits = 0;
100 T Shift = std::numeric_limits<T>::digits >> 1;
101 T Mask = std::numeric_limits<T>::max() >> Shift;
102 while (Shift) {
103 if ((Val & Mask) == 0) {
104 Val >>= Shift;
105 ZeroBits |= Shift;
106 }
107 Shift >>= 1;
108 Mask >>= Shift;
109 }
110 return ZeroBits;
111 }
112};
113
114#if defined(__GNUC__4) || defined(_MSC_VER)
115template <typename T> struct TrailingZerosCounter<T, 4> {
116 static unsigned count(T Val, ZeroBehavior ZB) {
117 if (ZB != ZB_Undefined && Val == 0)
118 return 32;
119
120#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
121 return __builtin_ctz(Val);
122#elif defined(_MSC_VER)
123 unsigned long Index;
124 _BitScanForward(&Index, Val);
125 return Index;
126#endif
127 }
128};
129
130#if !defined(_MSC_VER) || defined(_M_X64)
131template <typename T> struct TrailingZerosCounter<T, 8> {
132 static unsigned count(T Val, ZeroBehavior ZB) {
133 if (ZB != ZB_Undefined && Val == 0)
134 return 64;
135
136#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
137 return __builtin_ctzll(Val);
138#elif defined(_MSC_VER)
139 unsigned long Index;
140 _BitScanForward64(&Index, Val);
141 return Index;
142#endif
143 }
144};
145#endif
146#endif
147} // namespace detail
148
149/// Count number of 0's from the least significant bit to the most
150/// stopping at the first 1.
151///
152/// Only unsigned integral types are allowed.
153///
154/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
155/// valid arguments.
156template <typename T>
157unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
158 static_assert(std::numeric_limits<T>::is_integer &&
159 !std::numeric_limits<T>::is_signed,
160 "Only unsigned integral types are allowed.");
161 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
162}
163
164namespace detail {
165template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
166 static unsigned count(T Val, ZeroBehavior) {
167 if (!Val)
168 return std::numeric_limits<T>::digits;
169
170 // Bisection method.
171 unsigned ZeroBits = 0;
172 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
173 T Tmp = Val >> Shift;
174 if (Tmp)
175 Val = Tmp;
176 else
177 ZeroBits |= Shift;
178 }
179 return ZeroBits;
180 }
181};
182
183#if defined(__GNUC__4) || defined(_MSC_VER)
184template <typename T> struct LeadingZerosCounter<T, 4> {
185 static unsigned count(T Val, ZeroBehavior ZB) {
186 if (ZB != ZB_Undefined && Val == 0)
187 return 32;
188
189#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
190 return __builtin_clz(Val);
191#elif defined(_MSC_VER)
192 unsigned long Index;
193 _BitScanReverse(&Index, Val);
194 return Index ^ 31;
195#endif
196 }
197};
198
199#if !defined(_MSC_VER) || defined(_M_X64)
200template <typename T> struct LeadingZerosCounter<T, 8> {
201 static unsigned count(T Val, ZeroBehavior ZB) {
202 if (ZB != ZB_Undefined && Val == 0)
203 return 64;
204
205#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
206 return __builtin_clzll(Val);
207#elif defined(_MSC_VER)
208 unsigned long Index;
209 _BitScanReverse64(&Index, Val);
210 return Index ^ 63;
211#endif
212 }
213};
214#endif
215#endif
216} // namespace detail
217
218/// Count number of 0's from the most significant bit to the least
219/// stopping at the first 1.
220///
221/// Only unsigned integral types are allowed.
222///
223/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
224/// valid arguments.
225template <typename T>
226unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
227 static_assert(std::numeric_limits<T>::is_integer &&
228 !std::numeric_limits<T>::is_signed,
229 "Only unsigned integral types are allowed.");
230 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
231}
232
233/// Get the index of the first set bit starting from the least
234/// significant bit.
235///
236/// Only unsigned integral types are allowed.
237///
238/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
239/// valid arguments.
240template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
241 if (ZB == ZB_Max && Val == 0)
242 return std::numeric_limits<T>::max();
243
244 return countTrailingZeros(Val, ZB_Undefined);
245}
246
247/// Create a bitmask with the N right-most bits set to 1, and all other
248/// bits set to 0. Only unsigned types are allowed.
249template <typename T> T maskTrailingOnes(unsigned N) {
250 static_assert(std::is_unsigned<T>::value, "Invalid type!");
251 const unsigned Bits = CHAR_BIT8 * sizeof(T);
252 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 252, __PRETTY_FUNCTION__))
;
253 return N == 0 ? 0 : (T(-1) >> (Bits - N));
254}
255
256/// Create a bitmask with the N left-most bits set to 1, and all other
257/// bits set to 0. Only unsigned types are allowed.
258template <typename T> T maskLeadingOnes(unsigned N) {
259 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
260}
261
262/// Create a bitmask with the N right-most bits set to 0, and all other
263/// bits set to 1. Only unsigned types are allowed.
264template <typename T> T maskTrailingZeros(unsigned N) {
265 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
266}
267
268/// Create a bitmask with the N left-most bits set to 0, and all other
269/// bits set to 1. Only unsigned types are allowed.
270template <typename T> T maskLeadingZeros(unsigned N) {
271 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
272}
273
274/// Get the index of the last set bit starting from the least
275/// significant bit.
276///
277/// Only unsigned integral types are allowed.
278///
279/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
280/// valid arguments.
281template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
282 if (ZB == ZB_Max && Val == 0)
283 return std::numeric_limits<T>::max();
284
285 // Use ^ instead of - because both gcc and llvm can remove the associated ^
286 // in the __builtin_clz intrinsic on x86.
287 return countLeadingZeros(Val, ZB_Undefined) ^
288 (std::numeric_limits<T>::digits - 1);
289}
290
291/// Macro compressed bit reversal table for 256 bits.
292///
293/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
294static const unsigned char BitReverseTable256[256] = {
295#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
296#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
297#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
298 R6(0), R6(2), R6(1), R6(3)
299#undef R2
300#undef R4
301#undef R6
302};
303
304/// Reverse the bits in \p Val.
305template <typename T>
306T reverseBits(T Val) {
307 unsigned char in[sizeof(Val)];
308 unsigned char out[sizeof(Val)];
309 std::memcpy(in, &Val, sizeof(Val));
310 for (unsigned i = 0; i < sizeof(Val); ++i)
311 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
312 std::memcpy(&Val, out, sizeof(Val));
313 return Val;
314}
315
316// NOTE: The following support functions use the _32/_64 extensions instead of
317// type overloading so that signed and unsigned integers can be used without
318// ambiguity.
319
320/// Return the high 32 bits of a 64 bit value.
321constexpr inline uint32_t Hi_32(uint64_t Value) {
322 return static_cast<uint32_t>(Value >> 32);
323}
324
325/// Return the low 32 bits of a 64 bit value.
326constexpr inline uint32_t Lo_32(uint64_t Value) {
327 return static_cast<uint32_t>(Value);
328}
329
330/// Make a 64-bit integer from a high / low pair of 32-bit integers.
331constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
332 return ((uint64_t)High << 32) | (uint64_t)Low;
333}
334
335/// Checks if an integer fits into the given bit width.
336template <unsigned N> constexpr inline bool isInt(int64_t x) {
337 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
338}
339// Template specializations to get better code for common cases.
340template <> constexpr inline bool isInt<8>(int64_t x) {
341 return static_cast<int8_t>(x) == x;
342}
343template <> constexpr inline bool isInt<16>(int64_t x) {
344 return static_cast<int16_t>(x) == x;
345}
346template <> constexpr inline bool isInt<32>(int64_t x) {
347 return static_cast<int32_t>(x) == x;
348}
349
350/// Checks if a signed integer is an N bit number shifted left by S.
351template <unsigned N, unsigned S>
352constexpr inline bool isShiftedInt(int64_t x) {
353 static_assert(
354 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
355 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
356 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
357}
358
359/// Checks if an unsigned integer fits into the given bit width.
360///
361/// This is written as two functions rather than as simply
362///
363/// return N >= 64 || X < (UINT64_C(1) << N);
364///
365/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
366/// left too many places.
367template <unsigned N>
368constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) {
369 static_assert(N > 0, "isUInt<0> doesn't make sense");
370 return X < (UINT64_C(1)1UL << (N));
371}
372template <unsigned N>
373constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t X) {
374 return true;
375}
376
377// Template specializations to get better code for common cases.
378template <> constexpr inline bool isUInt<8>(uint64_t x) {
379 return static_cast<uint8_t>(x) == x;
380}
381template <> constexpr inline bool isUInt<16>(uint64_t x) {
382 return static_cast<uint16_t>(x) == x;
383}
384template <> constexpr inline bool isUInt<32>(uint64_t x) {
385 return static_cast<uint32_t>(x) == x;
386}
387
388/// Checks if a unsigned integer is an N bit number shifted left by S.
389template <unsigned N, unsigned S>
390constexpr inline bool isShiftedUInt(uint64_t x) {
391 static_assert(
392 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
393 static_assert(N + S <= 64,
394 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
395 // Per the two static_asserts above, S must be strictly less than 64. So
396 // 1 << S is not undefined behavior.
397 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
398}
399
400/// Gets the maximum value for a N-bit unsigned integer.
401inline uint64_t maxUIntN(uint64_t N) {
402 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 402, __PRETTY_FUNCTION__))
;
403
404 // uint64_t(1) << 64 is undefined behavior, so we can't do
405 // (uint64_t(1) << N) - 1
406 // without checking first that N != 64. But this works and doesn't have a
407 // branch.
408 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
409}
410
411/// Gets the minimum value for a N-bit signed integer.
412inline int64_t minIntN(int64_t N) {
413 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 413, __PRETTY_FUNCTION__))
;
414
415 return -(UINT64_C(1)1UL<<(N-1));
416}
417
418/// Gets the maximum value for a N-bit signed integer.
419inline int64_t maxIntN(int64_t N) {
420 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 420, __PRETTY_FUNCTION__))
;
421
422 // This relies on two's complement wraparound when N == 64, so we convert to
423 // int64_t only at the very end to avoid UB.
424 return (UINT64_C(1)1UL << (N - 1)) - 1;
425}
426
427/// Checks if an unsigned integer fits into the given (dynamic) bit width.
428inline bool isUIntN(unsigned N, uint64_t x) {
429 return N >= 64 || x <= maxUIntN(N);
430}
431
432/// Checks if an signed integer fits into the given (dynamic) bit width.
433inline bool isIntN(unsigned N, int64_t x) {
434 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
435}
436
437/// Return true if the argument is a non-empty sequence of ones starting at the
438/// least significant bit with the remainder zero (32 bit version).
439/// Ex. isMask_32(0x0000FFFFU) == true.
440constexpr inline bool isMask_32(uint32_t Value) {
441 return Value && ((Value + 1) & Value) == 0;
442}
443
444/// Return true if the argument is a non-empty sequence of ones starting at the
445/// least significant bit with the remainder zero (64 bit version).
446constexpr inline bool isMask_64(uint64_t Value) {
447 return Value && ((Value + 1) & Value) == 0;
448}
449
450/// Return true if the argument contains a non-empty sequence of ones with the
451/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
452constexpr inline bool isShiftedMask_32(uint32_t Value) {
453 return Value && isMask_32((Value - 1) | Value);
454}
455
456/// Return true if the argument contains a non-empty sequence of ones with the
457/// remainder zero (64 bit version.)
458constexpr inline bool isShiftedMask_64(uint64_t Value) {
459 return Value && isMask_64((Value - 1) | Value);
460}
461
462/// Return true if the argument is a power of two > 0.
463/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
464constexpr inline bool isPowerOf2_32(uint32_t Value) {
465 return Value && !(Value & (Value - 1));
466}
467
468/// Return true if the argument is a power of two > 0 (64 bit edition.)
469constexpr inline bool isPowerOf2_64(uint64_t Value) {
470 return Value && !(Value & (Value - 1));
471}
472
473/// Count the number of ones from the most significant bit to the first
474/// zero bit.
475///
476/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
477/// Only unsigned integral types are allowed.
478///
479/// \param ZB the behavior on an input of all ones. Only ZB_Width and
480/// ZB_Undefined are valid arguments.
481template <typename T>
482unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
483 static_assert(std::numeric_limits<T>::is_integer &&
484 !std::numeric_limits<T>::is_signed,
485 "Only unsigned integral types are allowed.");
486 return countLeadingZeros<T>(~Value, ZB);
487}
488
489/// Count the number of ones from the least significant bit to the first
490/// zero bit.
491///
492/// Ex. countTrailingOnes(0x00FF00FF) == 8.
493/// Only unsigned integral types are allowed.
494///
495/// \param ZB the behavior on an input of all ones. Only ZB_Width and
496/// ZB_Undefined are valid arguments.
497template <typename T>
498unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
499 static_assert(std::numeric_limits<T>::is_integer &&
500 !std::numeric_limits<T>::is_signed,
501 "Only unsigned integral types are allowed.");
502 return countTrailingZeros<T>(~Value, ZB);
503}
504
505namespace detail {
506template <typename T, std::size_t SizeOfT> struct PopulationCounter {
507 static unsigned count(T Value) {
508 // Generic version, forward to 32 bits.
509 static_assert(SizeOfT <= 4, "Not implemented!");
510#if defined(__GNUC__4)
511 return __builtin_popcount(Value);
512#else
513 uint32_t v = Value;
514 v = v - ((v >> 1) & 0x55555555);
515 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
516 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
517#endif
518 }
519};
520
521template <typename T> struct PopulationCounter<T, 8> {
522 static unsigned count(T Value) {
523#if defined(__GNUC__4)
524 return __builtin_popcountll(Value);
525#else
526 uint64_t v = Value;
527 v = v - ((v >> 1) & 0x5555555555555555ULL);
528 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
529 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
530 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
531#endif
532 }
533};
534} // namespace detail
535
536/// Count the number of set bits in a value.
537/// Ex. countPopulation(0xF000F000) = 8
538/// Returns 0 if the word is zero.
539template <typename T>
540inline unsigned countPopulation(T Value) {
541 static_assert(std::numeric_limits<T>::is_integer &&
542 !std::numeric_limits<T>::is_signed,
543 "Only unsigned integral types are allowed.");
544 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
545}
546
547/// Compile time Log2.
548/// Valid only for positive powers of two.
549template <size_t kValue> constexpr inline size_t CTLog2() {
550 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
551 "Value is not a valid power of 2");
552 return 1 + CTLog2<kValue / 2>();
553}
554
555template <> constexpr inline size_t CTLog2<1>() { return 0; }
556
557/// Return the log base 2 of the specified value.
558inline double Log2(double Value) {
559#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
560 return __builtin_log(Value) / __builtin_log(2.0);
561#else
562 return log2(Value);
563#endif
564}
565
566/// Return the floor log base 2 of the specified value, -1 if the value is zero.
567/// (32 bit edition.)
568/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
569inline unsigned Log2_32(uint32_t Value) {
570 return 31 - countLeadingZeros(Value);
12
Returning the value 4294967295
571}
572
573/// Return the floor log base 2 of the specified value, -1 if the value is zero.
574/// (64 bit edition.)
575inline unsigned Log2_64(uint64_t Value) {
576 return 63 - countLeadingZeros(Value);
577}
578
579/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
580/// (32 bit edition).
581/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
582inline unsigned Log2_32_Ceil(uint32_t Value) {
583 return 32 - countLeadingZeros(Value - 1);
584}
585
586/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
587/// (64 bit edition.)
588inline unsigned Log2_64_Ceil(uint64_t Value) {
589 return 64 - countLeadingZeros(Value - 1);
590}
591
592/// Return the greatest common divisor of the values using Euclid's algorithm.
593template <typename T>
594inline T greatestCommonDivisor(T A, T B) {
595 while (B) {
596 T Tmp = B;
597 B = A % B;
598 A = Tmp;
599 }
600 return A;
601}
602
603inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
604 return greatestCommonDivisor<uint64_t>(A, B);
605}
606
607/// This function takes a 64-bit integer and returns the bit equivalent double.
608inline double BitsToDouble(uint64_t Bits) {
609 double D;
610 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
611 memcpy(&D, &Bits, sizeof(Bits));
612 return D;
613}
614
615/// This function takes a 32-bit integer and returns the bit equivalent float.
616inline float BitsToFloat(uint32_t Bits) {
617 float F;
618 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
619 memcpy(&F, &Bits, sizeof(Bits));
620 return F;
621}
622
623/// This function takes a double and returns the bit equivalent 64-bit integer.
624/// Note that copying doubles around changes the bits of NaNs on some hosts,
625/// notably x86, so this routine cannot be used if these bits are needed.
626inline uint64_t DoubleToBits(double Double) {
627 uint64_t Bits;
628 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
629 memcpy(&Bits, &Double, sizeof(Double));
630 return Bits;
631}
632
633/// This function takes a float and returns the bit equivalent 32-bit integer.
634/// Note that copying floats around changes the bits of NaNs on some hosts,
635/// notably x86, so this routine cannot be used if these bits are needed.
636inline uint32_t FloatToBits(float Float) {
637 uint32_t Bits;
638 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
639 memcpy(&Bits, &Float, sizeof(Float));
640 return Bits;
641}
642
643/// A and B are either alignments or offsets. Return the minimum alignment that
644/// may be assumed after adding the two together.
645constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
646 // The largest power of 2 that divides both A and B.
647 //
648 // Replace "-Value" by "1+~Value" in the following commented code to avoid
649 // MSVC warning C4146
650 // return (A | B) & -(A | B);
651 return (A | B) & (1 + ~(A | B));
652}
653
654/// Returns the next power of two (in 64-bits) that is strictly greater than A.
655/// Returns zero on overflow.
656inline uint64_t NextPowerOf2(uint64_t A) {
657 A |= (A >> 1);
658 A |= (A >> 2);
659 A |= (A >> 4);
660 A |= (A >> 8);
661 A |= (A >> 16);
662 A |= (A >> 32);
663 return A + 1;
664}
665
666/// Returns the power of two which is less than or equal to the given value.
667/// Essentially, it is a floor operation across the domain of powers of two.
668inline uint64_t PowerOf2Floor(uint64_t A) {
669 if (!A) return 0;
670 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
671}
672
673/// Returns the power of two which is greater than or equal to the given value.
674/// Essentially, it is a ceil operation across the domain of powers of two.
675inline uint64_t PowerOf2Ceil(uint64_t A) {
676 if (!A)
677 return 0;
678 return NextPowerOf2(A - 1);
679}
680
681/// Returns the next integer (mod 2**64) that is greater than or equal to
682/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
683///
684/// If non-zero \p Skew is specified, the return value will be a minimal
685/// integer that is greater than or equal to \p Value and equal to
686/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
687/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
688///
689/// Examples:
690/// \code
691/// alignTo(5, 8) = 8
692/// alignTo(17, 8) = 24
693/// alignTo(~0LL, 8) = 0
694/// alignTo(321, 255) = 510
695///
696/// alignTo(5, 8, 7) = 7
697/// alignTo(17, 8, 1) = 17
698/// alignTo(~0LL, 8, 3) = 3
699/// alignTo(321, 255, 42) = 552
700/// \endcode
701inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
702 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 702, __PRETTY_FUNCTION__))
;
703 Skew %= Align;
704 return (Value + Align - 1 - Skew) / Align * Align + Skew;
705}
706
707/// Returns the next integer (mod 2**64) that is greater than or equal to
708/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
709template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
710 static_assert(Align != 0u, "Align must be non-zero");
711 return (Value + Align - 1) / Align * Align;
712}
713
714/// Returns the integer ceil(Numerator / Denominator).
715inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
716 return alignTo(Numerator, Denominator) / Denominator;
717}
718
719/// Returns the integer nearest(Numerator / Denominator).
720inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
721 return (Numerator + (Denominator / 2)) / Denominator;
722}
723
724/// Returns the largest uint64_t less than or equal to \p Value and is
725/// \p Skew mod \p Align. \p Align must be non-zero
726inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
727 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 727, __PRETTY_FUNCTION__))
;
728 Skew %= Align;
729 return (Value - Skew) / Align * Align + Skew;
730}
731
732/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
733/// Requires 0 < B <= 32.
734template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
735 static_assert(B > 0, "Bit width can't be 0.");
736 static_assert(B <= 32, "Bit width out of range.");
737 return int32_t(X << (32 - B)) >> (32 - B);
738}
739
740/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
741/// Requires 0 < B < 32.
742inline int32_t SignExtend32(uint32_t X, unsigned B) {
743 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 743, __PRETTY_FUNCTION__))
;
744 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 744, __PRETTY_FUNCTION__))
;
745 return int32_t(X << (32 - B)) >> (32 - B);
746}
747
748/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
749/// Requires 0 < B < 64.
750template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
751 static_assert(B > 0, "Bit width can't be 0.");
752 static_assert(B <= 64, "Bit width out of range.");
753 return int64_t(x << (64 - B)) >> (64 - B);
754}
755
756/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
757/// Requires 0 < B < 64.
758inline int64_t SignExtend64(uint64_t X, unsigned B) {
759 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 759, __PRETTY_FUNCTION__))
;
760 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/Support/MathExtras.h"
, 760, __PRETTY_FUNCTION__))
;
761 return int64_t(X << (64 - B)) >> (64 - B);
762}
763
764/// Subtract two unsigned integers, X and Y, of type T and return the absolute
765/// value of the result.
766template <typename T>
767std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
768 return std::max(X, Y) - std::min(X, Y);
769}
770
771/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
772/// maximum representable value of T on overflow. ResultOverflowed indicates if
773/// the result is larger than the maximum representable value of type T.
774template <typename T>
775std::enable_if_t<std::is_unsigned<T>::value, T>
776SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
777 bool Dummy;
778 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
779 // Hacker's Delight, p. 29
780 T Z = X + Y;
781 Overflowed = (Z < X || Z < Y);
782 if (Overflowed)
783 return std::numeric_limits<T>::max();
784 else
785 return Z;
786}
787
788/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
789/// maximum representable value of T on overflow. ResultOverflowed indicates if
790/// the result is larger than the maximum representable value of type T.
791template <typename T>
792std::enable_if_t<std::is_unsigned<T>::value, T>
793SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
794 bool Dummy;
795 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
796
797 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
798 // because it fails for uint16_t (where multiplication can have undefined
799 // behavior due to promotion to int), and requires a division in addition
800 // to the multiplication.
801
802 Overflowed = false;
803
804 // Log2(Z) would be either Log2Z or Log2Z + 1.
805 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
806 // will necessarily be less than Log2Max as desired.
807 int Log2Z = Log2_64(X) + Log2_64(Y);
808 const T Max = std::numeric_limits<T>::max();
809 int Log2Max = Log2_64(Max);
810 if (Log2Z < Log2Max) {
811 return X * Y;
812 }
813 if (Log2Z > Log2Max) {
814 Overflowed = true;
815 return Max;
816 }
817
818 // We're going to use the top bit, and maybe overflow one
819 // bit past it. Multiply all but the bottom bit then add
820 // that on at the end.
821 T Z = (X >> 1) * Y;
822 if (Z & ~(Max >> 1)) {
823 Overflowed = true;
824 return Max;
825 }
826 Z <<= 1;
827 if (X & 1)
828 return SaturatingAdd(Z, Y, ResultOverflowed);
829
830 return Z;
831}
832
833/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
834/// the product. Clamp the result to the maximum representable value of T on
835/// overflow. ResultOverflowed indicates if the result is larger than the
836/// maximum representable value of type T.
837template <typename T>
838std::enable_if_t<std::is_unsigned<T>::value, T>
839SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
840 bool Dummy;
841 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
842
843 T Product = SaturatingMultiply(X, Y, &Overflowed);
844 if (Overflowed)
845 return Product;
846
847 return SaturatingAdd(A, Product, &Overflowed);
848}
849
850/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
851extern const float huge_valf;
852
853
854/// Add two signed integers, computing the two's complement truncated result,
855/// returning true if overflow occured.
856template <typename T>
857std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
858#if __has_builtin(__builtin_add_overflow)1
859 return __builtin_add_overflow(X, Y, &Result);
860#else
861 // Perform the unsigned addition.
862 using U = std::make_unsigned_t<T>;
863 const U UX = static_cast<U>(X);
864 const U UY = static_cast<U>(Y);
865 const U UResult = UX + UY;
866
867 // Convert to signed.
868 Result = static_cast<T>(UResult);
869
870 // Adding two positive numbers should result in a positive number.
871 if (X > 0 && Y > 0)
872 return Result <= 0;
873 // Adding two negatives should result in a negative number.
874 if (X < 0 && Y < 0)
875 return Result >= 0;
876 return false;
877#endif
878}
879
880/// Subtract two signed integers, computing the two's complement truncated
881/// result, returning true if an overflow ocurred.
882template <typename T>
883std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
884#if __has_builtin(__builtin_sub_overflow)1
885 return __builtin_sub_overflow(X, Y, &Result);
886#else
887 // Perform the unsigned addition.
888 using U = std::make_unsigned_t<T>;
889 const U UX = static_cast<U>(X);
890 const U UY = static_cast<U>(Y);
891 const U UResult = UX - UY;
892
893 // Convert to signed.
894 Result = static_cast<T>(UResult);
895
896 // Subtracting a positive number from a negative results in a negative number.
897 if (X <= 0 && Y > 0)
898 return Result >= 0;
899 // Subtracting a negative number from a positive results in a positive number.
900 if (X >= 0 && Y < 0)
901 return Result <= 0;
902 return false;
903#endif
904}
905
906/// Multiply two signed integers, computing the two's complement truncated
907/// result, returning true if an overflow ocurred.
908template <typename T>
909std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
910 // Perform the unsigned multiplication on absolute values.
911 using U = std::make_unsigned_t<T>;
912 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
913 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
914 const U UResult = UX * UY;
915
916 // Convert to signed.
917 const bool IsNegative = (X < 0) ^ (Y < 0);
918 Result = IsNegative ? (0 - UResult) : UResult;
919
920 // If any of the args was 0, result is 0 and no overflow occurs.
921 if (UX == 0 || UY == 0)
922 return false;
923
924 // UX and UY are in [1, 2^n], where n is the number of digits.
925 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
926 // positive) divided by an argument compares to the other.
927 if (IsNegative)
928 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
929 else
930 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
931}
932
933} // End llvm namespace
934
935#endif