Bug Summary

File:llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Warning:line 770, column 21
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64ExpandImm.h"
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/IR/IntrinsicsAArch64.h"
19#include "llvm/Support/Debug.h"
20#include <algorithm>
21using namespace llvm;
22
23#define DEBUG_TYPE"aarch64tti" "aarch64tti"
24
25static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
26 cl::init(true), cl::Hidden);
27
28bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
29 const Function *Callee) const {
30 const TargetMachine &TM = getTLI()->getTargetMachine();
31
32 const FeatureBitset &CallerBits =
33 TM.getSubtargetImpl(*Caller)->getFeatureBits();
34 const FeatureBitset &CalleeBits =
35 TM.getSubtargetImpl(*Callee)->getFeatureBits();
36
37 // Inline a callee if its target-features are a subset of the callers
38 // target-features.
39 return (CallerBits & CalleeBits) == CalleeBits;
40}
41
42/// Calculate the cost of materializing a 64-bit value. This helper
43/// method might only calculate a fraction of a larger immediate. Therefore it
44/// is valid to return a cost of ZERO.
45int AArch64TTIImpl::getIntImmCost(int64_t Val) {
46 // Check if the immediate can be encoded within an instruction.
47 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
48 return 0;
49
50 if (Val < 0)
51 Val = ~Val;
52
53 // Calculate how many moves we will need to materialize this constant.
54 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
55 AArch64_IMM::expandMOVImm(Val, 64, Insn);
56 return Insn.size();
57}
58
59/// Calculate the cost of materializing the given constant.
60int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
61 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 61, __PRETTY_FUNCTION__))
;
62
63 unsigned BitSize = Ty->getPrimitiveSizeInBits();
64 if (BitSize == 0)
65 return ~0U;
66
67 // Sign-extend all constants to a multiple of 64-bit.
68 APInt ImmVal = Imm;
69 if (BitSize & 0x3f)
70 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
71
72 // Split the constant into 64-bit chunks and calculate the cost for each
73 // chunk.
74 int Cost = 0;
75 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
76 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
77 int64_t Val = Tmp.getSExtValue();
78 Cost += getIntImmCost(Val);
79 }
80 // We need at least one instruction to materialze the constant.
81 return std::max(1, Cost);
82}
83
84int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
85 const APInt &Imm, Type *Ty) {
86 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 86, __PRETTY_FUNCTION__))
;
87
88 unsigned BitSize = Ty->getPrimitiveSizeInBits();
89 // There is no cost model for constants with a bit size of 0. Return TCC_Free
90 // here, so that constant hoisting will ignore this constant.
91 if (BitSize == 0)
92 return TTI::TCC_Free;
93
94 unsigned ImmIdx = ~0U;
95 switch (Opcode) {
96 default:
97 return TTI::TCC_Free;
98 case Instruction::GetElementPtr:
99 // Always hoist the base address of a GetElementPtr.
100 if (Idx == 0)
101 return 2 * TTI::TCC_Basic;
102 return TTI::TCC_Free;
103 case Instruction::Store:
104 ImmIdx = 0;
105 break;
106 case Instruction::Add:
107 case Instruction::Sub:
108 case Instruction::Mul:
109 case Instruction::UDiv:
110 case Instruction::SDiv:
111 case Instruction::URem:
112 case Instruction::SRem:
113 case Instruction::And:
114 case Instruction::Or:
115 case Instruction::Xor:
116 case Instruction::ICmp:
117 ImmIdx = 1;
118 break;
119 // Always return TCC_Free for the shift value of a shift instruction.
120 case Instruction::Shl:
121 case Instruction::LShr:
122 case Instruction::AShr:
123 if (Idx == 1)
124 return TTI::TCC_Free;
125 break;
126 case Instruction::Trunc:
127 case Instruction::ZExt:
128 case Instruction::SExt:
129 case Instruction::IntToPtr:
130 case Instruction::PtrToInt:
131 case Instruction::BitCast:
132 case Instruction::PHI:
133 case Instruction::Call:
134 case Instruction::Select:
135 case Instruction::Ret:
136 case Instruction::Load:
137 break;
138 }
139
140 if (Idx == ImmIdx) {
141 int NumConstants = (BitSize + 63) / 64;
142 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
143 return (Cost <= NumConstants * TTI::TCC_Basic)
144 ? static_cast<int>(TTI::TCC_Free)
145 : Cost;
146 }
147 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
148}
149
150int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
151 const APInt &Imm, Type *Ty) {
152 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 152, __PRETTY_FUNCTION__))
;
153
154 unsigned BitSize = Ty->getPrimitiveSizeInBits();
155 // There is no cost model for constants with a bit size of 0. Return TCC_Free
156 // here, so that constant hoisting will ignore this constant.
157 if (BitSize == 0)
158 return TTI::TCC_Free;
159
160 // Most (all?) AArch64 intrinsics do not support folding immediates into the
161 // selected instruction, so we compute the materialization cost for the
162 // immediate directly.
163 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
164 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
165
166 switch (IID) {
167 default:
168 return TTI::TCC_Free;
169 case Intrinsic::sadd_with_overflow:
170 case Intrinsic::uadd_with_overflow:
171 case Intrinsic::ssub_with_overflow:
172 case Intrinsic::usub_with_overflow:
173 case Intrinsic::smul_with_overflow:
174 case Intrinsic::umul_with_overflow:
175 if (Idx == 1) {
176 int NumConstants = (BitSize + 63) / 64;
177 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
178 return (Cost <= NumConstants * TTI::TCC_Basic)
179 ? static_cast<int>(TTI::TCC_Free)
180 : Cost;
181 }
182 break;
183 case Intrinsic::experimental_stackmap:
184 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
185 return TTI::TCC_Free;
186 break;
187 case Intrinsic::experimental_patchpoint_void:
188 case Intrinsic::experimental_patchpoint_i64:
189 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
190 return TTI::TCC_Free;
191 break;
192 }
193 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
194}
195
196TargetTransformInfo::PopcntSupportKind
197AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
198 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 198, __PRETTY_FUNCTION__))
;
199 if (TyWidth == 32 || TyWidth == 64)
200 return TTI::PSK_FastHardware;
201 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
202 return TTI::PSK_Software;
203}
204
205bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
206 ArrayRef<const Value *> Args) {
207
208 // A helper that returns a vector type from the given type. The number of
209 // elements in type Ty determine the vector width.
210 auto toVectorTy = [&](Type *ArgTy) {
211 return VectorType::get(ArgTy->getScalarType(),
212 DstTy->getVectorNumElements());
213 };
214
215 // Exit early if DstTy is not a vector type whose elements are at least
216 // 16-bits wide.
217 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
218 return false;
219
220 // Determine if the operation has a widening variant. We consider both the
221 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
222 // instructions.
223 //
224 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
225 // verify that their extending operands are eliminated during code
226 // generation.
227 switch (Opcode) {
228 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
229 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
230 break;
231 default:
232 return false;
233 }
234
235 // To be a widening instruction (either the "wide" or "long" versions), the
236 // second operand must be a sign- or zero extend having a single user. We
237 // only consider extends having a single user because they may otherwise not
238 // be eliminated.
239 if (Args.size() != 2 ||
240 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
241 !Args[1]->hasOneUse())
242 return false;
243 auto *Extend = cast<CastInst>(Args[1]);
244
245 // Legalize the destination type and ensure it can be used in a widening
246 // operation.
247 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
248 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
249 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
250 return false;
251
252 // Legalize the source type and ensure it can be used in a widening
253 // operation.
254 Type *SrcTy = toVectorTy(Extend->getSrcTy());
255 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
256 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
257 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
258 return false;
259
260 // Get the total number of vector elements in the legalized types.
261 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
262 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
263
264 // Return true if the legalized types have the same number of vector elements
265 // and the destination element type size is twice that of the source type.
266 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
267}
268
269int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
270 const Instruction *I) {
271 int ISD = TLI->InstructionOpcodeToISD(Opcode);
272 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 272, __PRETTY_FUNCTION__))
;
273
274 // If the cast is observable, and it is used by a widening instruction (e.g.,
275 // uaddl, saddw, etc.), it may be free.
276 if (I && I->hasOneUse()) {
277 auto *SingleUser = cast<Instruction>(*I->user_begin());
278 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
279 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
280 // If the cast is the second operand, it is free. We will generate either
281 // a "wide" or "long" version of the widening instruction.
282 if (I == SingleUser->getOperand(1))
283 return 0;
284 // If the cast is not the second operand, it will be free if it looks the
285 // same as the second operand. In this case, we will generate a "long"
286 // version of the widening instruction.
287 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
288 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
289 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
290 return 0;
291 }
292 }
293
294 EVT SrcTy = TLI->getValueType(DL, Src);
295 EVT DstTy = TLI->getValueType(DL, Dst);
296
297 if (!SrcTy.isSimple() || !DstTy.isSimple())
298 return BaseT::getCastInstrCost(Opcode, Dst, Src);
299
300 static const TypeConversionCostTblEntry
301 ConversionTbl[] = {
302 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
303 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
304 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
305 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
306
307 // The number of shll instructions for the extension.
308 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
309 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
310 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
311 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
312 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
313 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
314 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
315 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
316 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
317 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
318 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
319 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
320 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
321 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
322 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
323 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
324
325 // LowerVectorINT_TO_FP:
326 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
327 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
328 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
329 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
330 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
331 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
332
333 // Complex: to v2f32
334 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
335 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
336 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
337 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
338 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
339 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
340
341 // Complex: to v4f32
342 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
343 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
344 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
345 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
346
347 // Complex: to v8f32
348 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
349 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
350 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
351 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
352
353 // Complex: to v16f32
354 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
355 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
356
357 // Complex: to v2f64
358 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
359 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
360 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
361 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
362 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
363 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
364
365
366 // LowerVectorFP_TO_INT
367 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
368 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
369 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
370 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
371 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
372 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
373
374 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
375 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
376 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
377 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
378 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
379 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
380 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
381
382 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
383 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
384 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
385 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
386 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
387
388 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
389 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
390 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
391 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
392 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
393 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
394 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
395 };
396
397 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
398 DstTy.getSimpleVT(),
399 SrcTy.getSimpleVT()))
400 return Entry->Cost;
401
402 return BaseT::getCastInstrCost(Opcode, Dst, Src);
403}
404
405int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
406 VectorType *VecTy,
407 unsigned Index) {
408
409 // Make sure we were given a valid extend opcode.
410 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
411 "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
;
412
413 // We are extending an element we extract from a vector, so the source type
414 // of the extend is the element type of the vector.
415 auto *Src = VecTy->getElementType();
416
417 // Sign- and zero-extends are for integer types only.
418 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType>
(Src) && "Invalid type") ? static_cast<void> (0
) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 418, __PRETTY_FUNCTION__))
;
419
420 // Get the cost for the extract. We compute the cost (if any) for the extend
421 // below.
422 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
423
424 // Legalize the types.
425 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
426 auto DstVT = TLI->getValueType(DL, Dst);
427 auto SrcVT = TLI->getValueType(DL, Src);
428
429 // If the resulting type is still a vector and the destination type is legal,
430 // we may get the extension for free. If not, get the default cost for the
431 // extend.
432 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
433 return Cost + getCastInstrCost(Opcode, Dst, Src);
434
435 // The destination type should be larger than the element type. If not, get
436 // the default cost for the extend.
437 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
438 return Cost + getCastInstrCost(Opcode, Dst, Src);
439
440 switch (Opcode) {
441 default:
442 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 442)
;
443
444 // For sign-extends, we only need a smov, which performs the extension
445 // automatically.
446 case Instruction::SExt:
447 return Cost;
448
449 // For zero-extends, the extend is performed automatically by a umov unless
450 // the destination type is i64 and the element type is i8 or i16.
451 case Instruction::ZExt:
452 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
453 return Cost;
454 }
455
456 // If we are unable to perform the extend for free, get the default cost.
457 return Cost + getCastInstrCost(Opcode, Dst, Src);
458}
459
460int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
461 unsigned Index) {
462 assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type"
) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 462, __PRETTY_FUNCTION__))
;
463
464 if (Index != -1U) {
465 // Legalize the type.
466 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
467
468 // This type is legalized to a scalar type.
469 if (!LT.second.isVector())
470 return 0;
471
472 // The type may be split. Normalize the index to the new type.
473 unsigned Width = LT.second.getVectorNumElements();
474 Index = Index % Width;
475
476 // The element at index zero is already inside the vector.
477 if (Index == 0)
478 return 0;
479 }
480
481 // All other insert/extracts cost this much.
482 return ST->getVectorInsertExtractBaseCost();
483}
484
485int AArch64TTIImpl::getArithmeticInstrCost(
486 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
487 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
488 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
489 const Instruction *CxtI) {
490 // Legalize the type.
491 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
492
493 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
494 // add in the widening overhead specified by the sub-target. Since the
495 // extends feeding widening instructions are performed automatically, they
496 // aren't present in the generated code and have a zero cost. By adding a
497 // widening overhead here, we attach the total cost of the combined operation
498 // to the widening instruction.
499 int Cost = 0;
500 if (isWideningInstruction(Ty, Opcode, Args))
501 Cost += ST->getWideningBaseCost();
502
503 int ISD = TLI->InstructionOpcodeToISD(Opcode);
504
505 switch (ISD) {
506 default:
507 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
508 Opd1PropInfo, Opd2PropInfo);
509 case ISD::SDIV:
510 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
511 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
512 // On AArch64, scalar signed division by constants power-of-two are
513 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
514 // The OperandValue properties many not be same as that of previous
515 // operation; conservatively assume OP_None.
516 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
517 TargetTransformInfo::OP_None,
518 TargetTransformInfo::OP_None);
519 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
520 TargetTransformInfo::OP_None,
521 TargetTransformInfo::OP_None);
522 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
523 TargetTransformInfo::OP_None,
524 TargetTransformInfo::OP_None);
525 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
526 TargetTransformInfo::OP_None,
527 TargetTransformInfo::OP_None);
528 return Cost;
529 }
530 LLVM_FALLTHROUGH[[gnu::fallthrough]];
531 case ISD::UDIV:
532 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
533 auto VT = TLI->getValueType(DL, Ty);
534 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
535 // Vector signed division by constant are expanded to the
536 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
537 // to MULHS + SUB + SRL + ADD + SRL.
538 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
539 Opd2Info,
540 TargetTransformInfo::OP_None,
541 TargetTransformInfo::OP_None);
542 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
543 Opd2Info,
544 TargetTransformInfo::OP_None,
545 TargetTransformInfo::OP_None);
546 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
547 Opd2Info,
548 TargetTransformInfo::OP_None,
549 TargetTransformInfo::OP_None);
550 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
551 }
552 }
553
554 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
555 Opd1PropInfo, Opd2PropInfo);
556 if (Ty->isVectorTy()) {
557 // On AArch64, vector divisions are not supported natively and are
558 // expanded into scalar divisions of each pair of elements.
559 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
560 Opd2Info, Opd1PropInfo, Opd2PropInfo);
561 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
562 Opd2Info, Opd1PropInfo, Opd2PropInfo);
563 // TODO: if one of the arguments is scalar, then it's not necessary to
564 // double the cost of handling the vector elements.
565 Cost += Cost;
566 }
567 return Cost;
568
569 case ISD::ADD:
570 case ISD::MUL:
571 case ISD::XOR:
572 case ISD::OR:
573 case ISD::AND:
574 // These nodes are marked as 'custom' for combining purposes only.
575 // We know that they are legal. See LowerAdd in ISelLowering.
576 return (Cost + 1) * LT.first;
577 }
578}
579
580int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
581 const SCEV *Ptr) {
582 // Address computations in vectorized code with non-consecutive addresses will
583 // likely result in more instructions compared to scalar code where the
584 // computation can more often be merged into the index mode. The resulting
585 // extra micro-ops can significantly decrease throughput.
586 unsigned NumVectorInstToHideOverhead = 10;
587 int MaxMergeDistance = 64;
588
589 if (Ty->isVectorTy() && SE &&
590 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
591 return NumVectorInstToHideOverhead;
592
593 // In many cases the address computation is not merged into the instruction
594 // addressing mode.
595 return 1;
596}
597
598int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
599 Type *CondTy, const Instruction *I) {
600
601 int ISD = TLI->InstructionOpcodeToISD(Opcode);
602 // We don't lower some vector selects well that are wider than the register
603 // width.
604 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
605 // We would need this many instructions to hide the scalarization happening.
606 const int AmortizationCost = 20;
607 static const TypeConversionCostTblEntry
608 VectorSelectTbl[] = {
609 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
610 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
611 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
612 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
613 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
614 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
615 };
616
617 EVT SelCondTy = TLI->getValueType(DL, CondTy);
618 EVT SelValTy = TLI->getValueType(DL, ValTy);
619 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
620 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
621 SelCondTy.getSimpleVT(),
622 SelValTy.getSimpleVT()))
623 return Entry->Cost;
624 }
625 }
626 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
627}
628
629AArch64TTIImpl::TTI::MemCmpExpansionOptions
630AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
631 TTI::MemCmpExpansionOptions Options;
632 Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
633 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
634 Options.NumLoadsPerBlock = Options.MaxNumLoads;
635 // TODO: Though vector loads usually perform well on AArch64, in some targets
636 // they may wake up the FP unit, which raises the power consumption. Perhaps
637 // they could be used with no holds barred (-O3).
638 Options.LoadSizes = {8, 4, 2, 1};
639 return Options;
640}
641
642int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
643 MaybeAlign Alignment, unsigned AddressSpace,
644 const Instruction *I) {
645 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
646
647 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
648 LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
649 // Unaligned stores are extremely inefficient. We don't split all
650 // unaligned 128-bit stores because the negative impact that has shown in
651 // practice on inlined block copy code.
652 // We make such stores expensive so that we will only vectorize if there
653 // are 6 other instructions getting vectorized.
654 const int AmortizationCost = 6;
655
656 return LT.first * 2 * AmortizationCost;
657 }
658
659 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
660 unsigned ProfitableNumElements;
661 if (Opcode == Instruction::Store)
662 // We use a custom trunc store lowering so v.4b should be profitable.
663 ProfitableNumElements = 4;
664 else
665 // We scalarize the loads because there is not v.4b register and we
666 // have to promote the elements to v.2.
667 ProfitableNumElements = 8;
668
669 if (Ty->getVectorNumElements() < ProfitableNumElements) {
670 unsigned NumVecElts = Ty->getVectorNumElements();
671 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
672 // We generate 2 instructions per vector element.
673 return NumVectorizableInstsToAmortize * NumVecElts * 2;
674 }
675 }
676
677 return LT.first;
678}
679
680int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
681 unsigned Factor,
682 ArrayRef<unsigned> Indices,
683 unsigned Alignment,
684 unsigned AddressSpace,
685 bool UseMaskForCond,
686 bool UseMaskForGaps) {
687 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 687, __PRETTY_FUNCTION__))
;
688 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 688, __PRETTY_FUNCTION__))
;
689
690 if (!UseMaskForCond && !UseMaskForGaps &&
691 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
692 unsigned NumElts = VecTy->getVectorNumElements();
693 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
694
695 // ldN/stN only support legal vector types of size 64 or 128 in bits.
696 // Accesses having vector types that are a multiple of 128 bits can be
697 // matched to more than one ldN/stN instruction.
698 if (NumElts % Factor == 0 &&
699 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
700 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
701 }
702
703 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
704 Alignment, AddressSpace,
705 UseMaskForCond, UseMaskForGaps);
706}
707
708int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
709 int Cost = 0;
710 for (auto *I : Tys) {
711 if (!I->isVectorTy())
712 continue;
713 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
714 Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
715 getMemoryOpCost(Instruction::Load, I, Align(128), 0);
716 }
717 return Cost;
718}
719
720unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
721 return ST->getMaxInterleaveFactor();
722}
723
724// For Falkor, we want to avoid having too many strided loads in a loop since
725// that can exhaust the HW prefetcher resources. We adjust the unroller
726// MaxCount preference below to attempt to ensure unrolling doesn't create too
727// many strided loads.
728static void
729getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
730 TargetTransformInfo::UnrollingPreferences &UP) {
731 enum { MaxStridedLoads = 7 };
732 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
733 int StridedLoads = 0;
734 // FIXME? We could make this more precise by looking at the CFG and
735 // e.g. not counting loads in each side of an if-then-else diamond.
736 for (const auto BB : L->blocks()) {
737 for (auto &I : *BB) {
738 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
739 if (!LMemI)
740 continue;
741
742 Value *PtrValue = LMemI->getPointerOperand();
743 if (L->isLoopInvariant(PtrValue))
744 continue;
745
746 const SCEV *LSCEV = SE.getSCEV(PtrValue);
747 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
748 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
749 continue;
750
751 // FIXME? We could take pairing of unrolled load copies into account
752 // by looking at the AddRec, but we would probably have to limit this
753 // to loops with no stores or other memory optimization barriers.
754 ++StridedLoads;
755 // We've seen enough strided loads that seeing more won't make a
756 // difference.
757 if (StridedLoads > MaxStridedLoads / 2)
758 return StridedLoads;
759 }
760 }
761 return StridedLoads;
762 };
763
764 int StridedLoads = countStridedLoads(L, SE);
765 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
7
Assuming 'DebugFlag' is false
8
Loop condition is false. Exiting loop
766 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
767 // Pick the largest power of 2 unroll count that won't result in too many
768 // strided loads.
769 if (StridedLoads) {
9
Assuming 'StridedLoads' is not equal to 0
10
Taking true branch
770 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
11
Calling 'Log2_32'
13
Returning from 'Log2_32'
14
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
771 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
772 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
773 }
774}
775
776void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
777 TTI::UnrollingPreferences &UP) {
778 // Enable partial unrolling and runtime unrolling.
779 BaseT::getUnrollingPreferences(L, SE, UP);
780
781 // For inner loop, it is more likely to be a hot one, and the runtime check
782 // can be promoted out from LICM pass, so the overhead is less, let's try
783 // a larger threshold to unroll more loops.
784 if (L->getLoopDepth() > 1)
1
Assuming the condition is false
2
Taking false branch
785 UP.PartialThreshold *= 2;
786
787 // Disable partial & runtime unrolling on -Os.
788 UP.PartialOptSizeThreshold = 0;
789
790 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
3
Assuming the condition is true
5
Taking true branch
791 EnableFalkorHWPFUnrollFix)
4
Assuming the condition is true
792 getFalkorUnrollingPreferences(L, SE, UP);
6
Calling 'getFalkorUnrollingPreferences'
793}
794
795Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
796 Type *ExpectedType) {
797 switch (Inst->getIntrinsicID()) {
798 default:
799 return nullptr;
800 case Intrinsic::aarch64_neon_st2:
801 case Intrinsic::aarch64_neon_st3:
802 case Intrinsic::aarch64_neon_st4: {
803 // Create a struct type
804 StructType *ST = dyn_cast<StructType>(ExpectedType);
805 if (!ST)
806 return nullptr;
807 unsigned NumElts = Inst->getNumArgOperands() - 1;
808 if (ST->getNumElements() != NumElts)
809 return nullptr;
810 for (unsigned i = 0, e = NumElts; i != e; ++i) {
811 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
812 return nullptr;
813 }
814 Value *Res = UndefValue::get(ExpectedType);
815 IRBuilder<> Builder(Inst);
816 for (unsigned i = 0, e = NumElts; i != e; ++i) {
817 Value *L = Inst->getArgOperand(i);
818 Res = Builder.CreateInsertValue(Res, L, i);
819 }
820 return Res;
821 }
822 case Intrinsic::aarch64_neon_ld2:
823 case Intrinsic::aarch64_neon_ld3:
824 case Intrinsic::aarch64_neon_ld4:
825 if (Inst->getType() == ExpectedType)
826 return Inst;
827 return nullptr;
828 }
829}
830
831bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
832 MemIntrinsicInfo &Info) {
833 switch (Inst->getIntrinsicID()) {
834 default:
835 break;
836 case Intrinsic::aarch64_neon_ld2:
837 case Intrinsic::aarch64_neon_ld3:
838 case Intrinsic::aarch64_neon_ld4:
839 Info.ReadMem = true;
840 Info.WriteMem = false;
841 Info.PtrVal = Inst->getArgOperand(0);
842 break;
843 case Intrinsic::aarch64_neon_st2:
844 case Intrinsic::aarch64_neon_st3:
845 case Intrinsic::aarch64_neon_st4:
846 Info.ReadMem = false;
847 Info.WriteMem = true;
848 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
849 break;
850 }
851
852 switch (Inst->getIntrinsicID()) {
853 default:
854 return false;
855 case Intrinsic::aarch64_neon_ld2:
856 case Intrinsic::aarch64_neon_st2:
857 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
858 break;
859 case Intrinsic::aarch64_neon_ld3:
860 case Intrinsic::aarch64_neon_st3:
861 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
862 break;
863 case Intrinsic::aarch64_neon_ld4:
864 case Intrinsic::aarch64_neon_st4:
865 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
866 break;
867 }
868 return true;
869}
870
871/// See if \p I should be considered for address type promotion. We check if \p
872/// I is a sext with right type and used in memory accesses. If it used in a
873/// "complex" getelementptr, we allow it to be promoted without finding other
874/// sext instructions that sign extended the same initial value. A getelementptr
875/// is considered as "complex" if it has more than 2 operands.
876bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
877 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
878 bool Considerable = false;
879 AllowPromotionWithoutCommonHeader = false;
880 if (!isa<SExtInst>(&I))
881 return false;
882 Type *ConsideredSExtType =
883 Type::getInt64Ty(I.getParent()->getParent()->getContext());
884 if (I.getType() != ConsideredSExtType)
885 return false;
886 // See if the sext is the one with the right type and used in at least one
887 // GetElementPtrInst.
888 for (const User *U : I.users()) {
889 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
890 Considerable = true;
891 // A getelementptr is considered as "complex" if it has more than 2
892 // operands. We will promote a SExt used in such complex GEP as we
893 // expect some computation to be merged if they are done on 64 bits.
894 if (GEPInst->getNumOperands() > 2) {
895 AllowPromotionWithoutCommonHeader = true;
896 break;
897 }
898 }
899 }
900 return Considerable;
901}
902
903bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
904 TTI::ReductionFlags Flags) const {
905 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 905, __PRETTY_FUNCTION__))
;
906 unsigned ScalarBits = Ty->getScalarSizeInBits();
907 switch (Opcode) {
908 case Instruction::FAdd:
909 case Instruction::FMul:
910 case Instruction::And:
911 case Instruction::Or:
912 case Instruction::Xor:
913 case Instruction::Mul:
914 return false;
915 case Instruction::Add:
916 return ScalarBits * Ty->getVectorNumElements() >= 128;
917 case Instruction::ICmp:
918 return (ScalarBits < 64) &&
919 (ScalarBits * Ty->getVectorNumElements() >= 128);
920 case Instruction::FCmp:
921 return Flags.NoNaN;
922 default:
923 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 923)
;
924 }
925 return false;
926}
927
928int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
929 bool IsPairwiseForm) {
930
931 if (IsPairwiseForm)
932 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
933
934 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
935 MVT MTy = LT.second;
936 int ISD = TLI->InstructionOpcodeToISD(Opcode);
937 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 937, __PRETTY_FUNCTION__))
;
938
939 // Horizontal adds can use the 'addv' instruction. We model the cost of these
940 // instructions as normal vector adds. This is the only arithmetic vector
941 // reduction operation for which we have an instruction.
942 static const CostTblEntry CostTblNoPairwise[]{
943 {ISD::ADD, MVT::v8i8, 1},
944 {ISD::ADD, MVT::v16i8, 1},
945 {ISD::ADD, MVT::v4i16, 1},
946 {ISD::ADD, MVT::v8i16, 1},
947 {ISD::ADD, MVT::v4i32, 1},
948 };
949
950 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
951 return LT.first * Entry->Cost;
952
953 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
954}
955
956int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
957 Type *SubTp) {
958 if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
959 Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
960 static const CostTblEntry ShuffleTbl[] = {
961 // Broadcast shuffle kinds can be performed with 'dup'.
962 { TTI::SK_Broadcast, MVT::v8i8, 1 },
963 { TTI::SK_Broadcast, MVT::v16i8, 1 },
964 { TTI::SK_Broadcast, MVT::v4i16, 1 },
965 { TTI::SK_Broadcast, MVT::v8i16, 1 },
966 { TTI::SK_Broadcast, MVT::v2i32, 1 },
967 { TTI::SK_Broadcast, MVT::v4i32, 1 },
968 { TTI::SK_Broadcast, MVT::v2i64, 1 },
969 { TTI::SK_Broadcast, MVT::v2f32, 1 },
970 { TTI::SK_Broadcast, MVT::v4f32, 1 },
971 { TTI::SK_Broadcast, MVT::v2f64, 1 },
972 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
973 // 'zip1/zip2' instructions.
974 { TTI::SK_Transpose, MVT::v8i8, 1 },
975 { TTI::SK_Transpose, MVT::v16i8, 1 },
976 { TTI::SK_Transpose, MVT::v4i16, 1 },
977 { TTI::SK_Transpose, MVT::v8i16, 1 },
978 { TTI::SK_Transpose, MVT::v2i32, 1 },
979 { TTI::SK_Transpose, MVT::v4i32, 1 },
980 { TTI::SK_Transpose, MVT::v2i64, 1 },
981 { TTI::SK_Transpose, MVT::v2f32, 1 },
982 { TTI::SK_Transpose, MVT::v4f32, 1 },
983 { TTI::SK_Transpose, MVT::v2f64, 1 },
984 // Select shuffle kinds.
985 // TODO: handle vXi8/vXi16.
986 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
987 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
988 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
989 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
990 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
991 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
992 // PermuteSingleSrc shuffle kinds.
993 // TODO: handle vXi8/vXi16.
994 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
995 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
996 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
997 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
998 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
999 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
1000 };
1001 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1002 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
1003 return LT.first * Entry->Cost;
1004 }
1005
1006 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1007}

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/SwapByteOrder.h"
18#include <algorithm>
19#include <cassert>
20#include <climits>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB != ZB_Undefined && Val == 0)
117 return 32;
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB != ZB_Undefined && Val == 0)
133 return 64;
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB == ZB_Max && Val == 0)
241 return std::numeric_limits<T>::max();
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 251, __PRETTY_FUNCTION__))
;
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315// NOTE: The following support functions use the _32/_64 extensions instead of
316// type overloading so that signed and unsigned integers can be used without
317// ambiguity.
318
319/// Return the high 32 bits of a 64 bit value.
320constexpr inline uint32_t Hi_32(uint64_t Value) {
321 return static_cast<uint32_t>(Value >> 32);
322}
323
324/// Return the low 32 bits of a 64 bit value.
325constexpr inline uint32_t Lo_32(uint64_t Value) {
326 return static_cast<uint32_t>(Value);
327}
328
329/// Make a 64-bit integer from a high / low pair of 32-bit integers.
330constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
331 return ((uint64_t)High << 32) | (uint64_t)Low;
332}
333
334/// Checks if an integer fits into the given bit width.
335template <unsigned N> constexpr inline bool isInt(int64_t x) {
336 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
337}
338// Template specializations to get better code for common cases.
339template <> constexpr inline bool isInt<8>(int64_t x) {
340 return static_cast<int8_t>(x) == x;
341}
342template <> constexpr inline bool isInt<16>(int64_t x) {
343 return static_cast<int16_t>(x) == x;
344}
345template <> constexpr inline bool isInt<32>(int64_t x) {
346 return static_cast<int32_t>(x) == x;
347}
348
349/// Checks if a signed integer is an N bit number shifted left by S.
350template <unsigned N, unsigned S>
351constexpr inline bool isShiftedInt(int64_t x) {
352 static_assert(
353 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
354 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
355 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
356}
357
358/// Checks if an unsigned integer fits into the given bit width.
359///
360/// This is written as two functions rather than as simply
361///
362/// return N >= 64 || X < (UINT64_C(1) << N);
363///
364/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
365/// left too many places.
366template <unsigned N>
367constexpr inline typename std::enable_if<(N < 64), bool>::type
368isUInt(uint64_t X) {
369 static_assert(N > 0, "isUInt<0> doesn't make sense");
370 return X < (UINT64_C(1)1UL << (N));
371}
372template <unsigned N>
373constexpr inline typename std::enable_if<N >= 64, bool>::type
374isUInt(uint64_t X) {
375 return true;
376}
377
378// Template specializations to get better code for common cases.
379template <> constexpr inline bool isUInt<8>(uint64_t x) {
380 return static_cast<uint8_t>(x) == x;
381}
382template <> constexpr inline bool isUInt<16>(uint64_t x) {
383 return static_cast<uint16_t>(x) == x;
384}
385template <> constexpr inline bool isUInt<32>(uint64_t x) {
386 return static_cast<uint32_t>(x) == x;
387}
388
389/// Checks if a unsigned integer is an N bit number shifted left by S.
390template <unsigned N, unsigned S>
391constexpr inline bool isShiftedUInt(uint64_t x) {
392 static_assert(
393 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
394 static_assert(N + S <= 64,
395 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
396 // Per the two static_asserts above, S must be strictly less than 64. So
397 // 1 << S is not undefined behavior.
398 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
399}
400
401/// Gets the maximum value for a N-bit unsigned integer.
402inline uint64_t maxUIntN(uint64_t N) {
403 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 403, __PRETTY_FUNCTION__))
;
404
405 // uint64_t(1) << 64 is undefined behavior, so we can't do
406 // (uint64_t(1) << N) - 1
407 // without checking first that N != 64. But this works and doesn't have a
408 // branch.
409 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
410}
411
412/// Gets the minimum value for a N-bit signed integer.
413inline int64_t minIntN(int64_t N) {
414 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 414, __PRETTY_FUNCTION__))
;
415
416 return -(UINT64_C(1)1UL<<(N-1));
417}
418
419/// Gets the maximum value for a N-bit signed integer.
420inline int64_t maxIntN(int64_t N) {
421 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 421, __PRETTY_FUNCTION__))
;
422
423 // This relies on two's complement wraparound when N == 64, so we convert to
424 // int64_t only at the very end to avoid UB.
425 return (UINT64_C(1)1UL << (N - 1)) - 1;
426}
427
428/// Checks if an unsigned integer fits into the given (dynamic) bit width.
429inline bool isUIntN(unsigned N, uint64_t x) {
430 return N >= 64 || x <= maxUIntN(N);
431}
432
433/// Checks if an signed integer fits into the given (dynamic) bit width.
434inline bool isIntN(unsigned N, int64_t x) {
435 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
436}
437
438/// Return true if the argument is a non-empty sequence of ones starting at the
439/// least significant bit with the remainder zero (32 bit version).
440/// Ex. isMask_32(0x0000FFFFU) == true.
441constexpr inline bool isMask_32(uint32_t Value) {
442 return Value && ((Value + 1) & Value) == 0;
443}
444
445/// Return true if the argument is a non-empty sequence of ones starting at the
446/// least significant bit with the remainder zero (64 bit version).
447constexpr inline bool isMask_64(uint64_t Value) {
448 return Value && ((Value + 1) & Value) == 0;
449}
450
451/// Return true if the argument contains a non-empty sequence of ones with the
452/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
453constexpr inline bool isShiftedMask_32(uint32_t Value) {
454 return Value && isMask_32((Value - 1) | Value);
455}
456
457/// Return true if the argument contains a non-empty sequence of ones with the
458/// remainder zero (64 bit version.)
459constexpr inline bool isShiftedMask_64(uint64_t Value) {
460 return Value && isMask_64((Value - 1) | Value);
461}
462
463/// Return true if the argument is a power of two > 0.
464/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
465constexpr inline bool isPowerOf2_32(uint32_t Value) {
466 return Value && !(Value & (Value - 1));
467}
468
469/// Return true if the argument is a power of two > 0 (64 bit edition.)
470constexpr inline bool isPowerOf2_64(uint64_t Value) {
471 return Value && !(Value & (Value - 1));
472}
473
474/// Return a byte-swapped representation of the 16-bit argument.
475inline uint16_t ByteSwap_16(uint16_t Value) {
476 return sys::SwapByteOrder_16(Value);
477}
478
479/// Return a byte-swapped representation of the 32-bit argument.
480inline uint32_t ByteSwap_32(uint32_t Value) {
481 return sys::SwapByteOrder_32(Value);
482}
483
484/// Return a byte-swapped representation of the 64-bit argument.
485inline uint64_t ByteSwap_64(uint64_t Value) {
486 return sys::SwapByteOrder_64(Value);
487}
488
489/// Count the number of ones from the most significant bit to the first
490/// zero bit.
491///
492/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
493/// Only unsigned integral types are allowed.
494///
495/// \param ZB the behavior on an input of all ones. Only ZB_Width and
496/// ZB_Undefined are valid arguments.
497template <typename T>
498unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
499 static_assert(std::numeric_limits<T>::is_integer &&
500 !std::numeric_limits<T>::is_signed,
501 "Only unsigned integral types are allowed.");
502 return countLeadingZeros<T>(~Value, ZB);
503}
504
505/// Count the number of ones from the least significant bit to the first
506/// zero bit.
507///
508/// Ex. countTrailingOnes(0x00FF00FF) == 8.
509/// Only unsigned integral types are allowed.
510///
511/// \param ZB the behavior on an input of all ones. Only ZB_Width and
512/// ZB_Undefined are valid arguments.
513template <typename T>
514unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
515 static_assert(std::numeric_limits<T>::is_integer &&
516 !std::numeric_limits<T>::is_signed,
517 "Only unsigned integral types are allowed.");
518 return countTrailingZeros<T>(~Value, ZB);
519}
520
521namespace detail {
522template <typename T, std::size_t SizeOfT> struct PopulationCounter {
523 static unsigned count(T Value) {
524 // Generic version, forward to 32 bits.
525 static_assert(SizeOfT <= 4, "Not implemented!");
526#if defined(__GNUC__4)
527 return __builtin_popcount(Value);
528#else
529 uint32_t v = Value;
530 v = v - ((v >> 1) & 0x55555555);
531 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
532 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
533#endif
534 }
535};
536
537template <typename T> struct PopulationCounter<T, 8> {
538 static unsigned count(T Value) {
539#if defined(__GNUC__4)
540 return __builtin_popcountll(Value);
541#else
542 uint64_t v = Value;
543 v = v - ((v >> 1) & 0x5555555555555555ULL);
544 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
545 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
546 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
547#endif
548 }
549};
550} // namespace detail
551
552/// Count the number of set bits in a value.
553/// Ex. countPopulation(0xF000F000) = 8
554/// Returns 0 if the word is zero.
555template <typename T>
556inline unsigned countPopulation(T Value) {
557 static_assert(std::numeric_limits<T>::is_integer &&
558 !std::numeric_limits<T>::is_signed,
559 "Only unsigned integral types are allowed.");
560 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
561}
562
563/// Compile time Log2.
564/// Valid only for positive powers of two.
565template <size_t kValue> constexpr inline size_t CTLog2() {
566 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
567 "Value is not a valid power of 2");
568 return 1 + CTLog2<kValue / 2>();
569}
570
571template <> constexpr inline size_t CTLog2<1>() { return 0; }
572
573/// Return the log base 2 of the specified value.
574inline double Log2(double Value) {
575#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
576 return __builtin_log(Value) / __builtin_log(2.0);
577#else
578 return log2(Value);
579#endif
580}
581
582/// Return the floor log base 2 of the specified value, -1 if the value is zero.
583/// (32 bit edition.)
584/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
585inline unsigned Log2_32(uint32_t Value) {
586 return 31 - countLeadingZeros(Value);
12
Returning the value 4294967295
587}
588
589/// Return the floor log base 2 of the specified value, -1 if the value is zero.
590/// (64 bit edition.)
591inline unsigned Log2_64(uint64_t Value) {
592 return 63 - countLeadingZeros(Value);
593}
594
595/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
596/// (32 bit edition).
597/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
598inline unsigned Log2_32_Ceil(uint32_t Value) {
599 return 32 - countLeadingZeros(Value - 1);
600}
601
602/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
603/// (64 bit edition.)
604inline unsigned Log2_64_Ceil(uint64_t Value) {
605 return 64 - countLeadingZeros(Value - 1);
606}
607
608/// Return the greatest common divisor of the values using Euclid's algorithm.
609template <typename T>
610inline T greatestCommonDivisor(T A, T B) {
611 while (B) {
612 T Tmp = B;
613 B = A % B;
614 A = Tmp;
615 }
616 return A;
617}
618
619inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
620 return greatestCommonDivisor<uint64_t>(A, B);
621}
622
623/// This function takes a 64-bit integer and returns the bit equivalent double.
624inline double BitsToDouble(uint64_t Bits) {
625 double D;
626 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
627 memcpy(&D, &Bits, sizeof(Bits));
628 return D;
629}
630
631/// This function takes a 32-bit integer and returns the bit equivalent float.
632inline float BitsToFloat(uint32_t Bits) {
633 float F;
634 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
635 memcpy(&F, &Bits, sizeof(Bits));
636 return F;
637}
638
639/// This function takes a double and returns the bit equivalent 64-bit integer.
640/// Note that copying doubles around changes the bits of NaNs on some hosts,
641/// notably x86, so this routine cannot be used if these bits are needed.
642inline uint64_t DoubleToBits(double Double) {
643 uint64_t Bits;
644 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
645 memcpy(&Bits, &Double, sizeof(Double));
646 return Bits;
647}
648
649/// This function takes a float and returns the bit equivalent 32-bit integer.
650/// Note that copying floats around changes the bits of NaNs on some hosts,
651/// notably x86, so this routine cannot be used if these bits are needed.
652inline uint32_t FloatToBits(float Float) {
653 uint32_t Bits;
654 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
655 memcpy(&Bits, &Float, sizeof(Float));
656 return Bits;
657}
658
659/// A and B are either alignments or offsets. Return the minimum alignment that
660/// may be assumed after adding the two together.
661constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
662 // The largest power of 2 that divides both A and B.
663 //
664 // Replace "-Value" by "1+~Value" in the following commented code to avoid
665 // MSVC warning C4146
666 // return (A | B) & -(A | B);
667 return (A | B) & (1 + ~(A | B));
668}
669
670/// Returns the next power of two (in 64-bits) that is strictly greater than A.
671/// Returns zero on overflow.
672inline uint64_t NextPowerOf2(uint64_t A) {
673 A |= (A >> 1);
674 A |= (A >> 2);
675 A |= (A >> 4);
676 A |= (A >> 8);
677 A |= (A >> 16);
678 A |= (A >> 32);
679 return A + 1;
680}
681
682/// Returns the power of two which is less than or equal to the given value.
683/// Essentially, it is a floor operation across the domain of powers of two.
684inline uint64_t PowerOf2Floor(uint64_t A) {
685 if (!A) return 0;
686 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
687}
688
689/// Returns the power of two which is greater than or equal to the given value.
690/// Essentially, it is a ceil operation across the domain of powers of two.
691inline uint64_t PowerOf2Ceil(uint64_t A) {
692 if (!A)
693 return 0;
694 return NextPowerOf2(A - 1);
695}
696
697/// Returns the next integer (mod 2**64) that is greater than or equal to
698/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
699///
700/// If non-zero \p Skew is specified, the return value will be a minimal
701/// integer that is greater than or equal to \p Value and equal to
702/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
703/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
704///
705/// Examples:
706/// \code
707/// alignTo(5, 8) = 8
708/// alignTo(17, 8) = 24
709/// alignTo(~0LL, 8) = 0
710/// alignTo(321, 255) = 510
711///
712/// alignTo(5, 8, 7) = 7
713/// alignTo(17, 8, 1) = 17
714/// alignTo(~0LL, 8, 3) = 3
715/// alignTo(321, 255, 42) = 552
716/// \endcode
717inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
718 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 718, __PRETTY_FUNCTION__))
;
719 Skew %= Align;
720 return (Value + Align - 1 - Skew) / Align * Align + Skew;
721}
722
723/// Returns the next integer (mod 2**64) that is greater than or equal to
724/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
725template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
726 static_assert(Align != 0u, "Align must be non-zero");
727 return (Value + Align - 1) / Align * Align;
728}
729
730/// Returns the integer ceil(Numerator / Denominator).
731inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
732 return alignTo(Numerator, Denominator) / Denominator;
733}
734
735/// Returns the integer nearest(Numerator / Denominator).
736inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
737 return (Numerator + (Denominator / 2)) / Denominator;
738}
739
740/// Returns the largest uint64_t less than or equal to \p Value and is
741/// \p Skew mod \p Align. \p Align must be non-zero
742inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
743 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 743, __PRETTY_FUNCTION__))
;
744 Skew %= Align;
745 return (Value - Skew) / Align * Align + Skew;
746}
747
748/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
749/// Requires 0 < B <= 32.
750template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
751 static_assert(B > 0, "Bit width can't be 0.");
752 static_assert(B <= 32, "Bit width out of range.");
753 return int32_t(X << (32 - B)) >> (32 - B);
754}
755
756/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
757/// Requires 0 < B < 32.
758inline int32_t SignExtend32(uint32_t X, unsigned B) {
759 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 759, __PRETTY_FUNCTION__))
;
760 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 760, __PRETTY_FUNCTION__))
;
761 return int32_t(X << (32 - B)) >> (32 - B);
762}
763
764/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
765/// Requires 0 < B < 64.
766template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
767 static_assert(B > 0, "Bit width can't be 0.");
768 static_assert(B <= 64, "Bit width out of range.");
769 return int64_t(x << (64 - B)) >> (64 - B);
770}
771
772/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
773/// Requires 0 < B < 64.
774inline int64_t SignExtend64(uint64_t X, unsigned B) {
775 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 775, __PRETTY_FUNCTION__))
;
776 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 776, __PRETTY_FUNCTION__))
;
777 return int64_t(X << (64 - B)) >> (64 - B);
778}
779
780/// Subtract two unsigned integers, X and Y, of type T and return the absolute
781/// value of the result.
782template <typename T>
783typename std::enable_if<std::is_unsigned<T>::value, T>::type
784AbsoluteDifference(T X, T Y) {
785 return std::max(X, Y) - std::min(X, Y);
786}
787
788/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
789/// maximum representable value of T on overflow. ResultOverflowed indicates if
790/// the result is larger than the maximum representable value of type T.
791template <typename T>
792typename std::enable_if<std::is_unsigned<T>::value, T>::type
793SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
794 bool Dummy;
795 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
796 // Hacker's Delight, p. 29
797 T Z = X + Y;
798 Overflowed = (Z < X || Z < Y);
799 if (Overflowed)
800 return std::numeric_limits<T>::max();
801 else
802 return Z;
803}
804
805/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
806/// maximum representable value of T on overflow. ResultOverflowed indicates if
807/// the result is larger than the maximum representable value of type T.
808template <typename T>
809typename std::enable_if<std::is_unsigned<T>::value, T>::type
810SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
811 bool Dummy;
812 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
813
814 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
815 // because it fails for uint16_t (where multiplication can have undefined
816 // behavior due to promotion to int), and requires a division in addition
817 // to the multiplication.
818
819 Overflowed = false;
820
821 // Log2(Z) would be either Log2Z or Log2Z + 1.
822 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
823 // will necessarily be less than Log2Max as desired.
824 int Log2Z = Log2_64(X) + Log2_64(Y);
825 const T Max = std::numeric_limits<T>::max();
826 int Log2Max = Log2_64(Max);
827 if (Log2Z < Log2Max) {
828 return X * Y;
829 }
830 if (Log2Z > Log2Max) {
831 Overflowed = true;
832 return Max;
833 }
834
835 // We're going to use the top bit, and maybe overflow one
836 // bit past it. Multiply all but the bottom bit then add
837 // that on at the end.
838 T Z = (X >> 1) * Y;
839 if (Z & ~(Max >> 1)) {
840 Overflowed = true;
841 return Max;
842 }
843 Z <<= 1;
844 if (X & 1)
845 return SaturatingAdd(Z, Y, ResultOverflowed);
846
847 return Z;
848}
849
850/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
851/// the product. Clamp the result to the maximum representable value of T on
852/// overflow. ResultOverflowed indicates if the result is larger than the
853/// maximum representable value of type T.
854template <typename T>
855typename std::enable_if<std::is_unsigned<T>::value, T>::type
856SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
857 bool Dummy;
858 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
859
860 T Product = SaturatingMultiply(X, Y, &Overflowed);
861 if (Overflowed)
862 return Product;
863
864 return SaturatingAdd(A, Product, &Overflowed);
865}
866
867/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
868extern const float huge_valf;
869
870
871/// Add two signed integers, computing the two's complement truncated result,
872/// returning true if overflow occured.
873template <typename T>
874typename std::enable_if<std::is_signed<T>::value, T>::type
875AddOverflow(T X, T Y, T &Result) {
876#if __has_builtin(__builtin_add_overflow)1
877 return __builtin_add_overflow(X, Y, &Result);
878#else
879 // Perform the unsigned addition.
880 using U = typename std::make_unsigned<T>::type;
881 const U UX = static_cast<U>(X);
882 const U UY = static_cast<U>(Y);
883 const U UResult = UX + UY;
884
885 // Convert to signed.
886 Result = static_cast<T>(UResult);
887
888 // Adding two positive numbers should result in a positive number.
889 if (X > 0 && Y > 0)
890 return Result <= 0;
891 // Adding two negatives should result in a negative number.
892 if (X < 0 && Y < 0)
893 return Result >= 0;
894 return false;
895#endif
896}
897
898/// Subtract two signed integers, computing the two's complement truncated
899/// result, returning true if an overflow ocurred.
900template <typename T>
901typename std::enable_if<std::is_signed<T>::value, T>::type
902SubOverflow(T X, T Y, T &Result) {
903#if __has_builtin(__builtin_sub_overflow)1
904 return __builtin_sub_overflow(X, Y, &Result);
905#else
906 // Perform the unsigned addition.
907 using U = typename std::make_unsigned<T>::type;
908 const U UX = static_cast<U>(X);
909 const U UY = static_cast<U>(Y);
910 const U UResult = UX - UY;
911
912 // Convert to signed.
913 Result = static_cast<T>(UResult);
914
915 // Subtracting a positive number from a negative results in a negative number.
916 if (X <= 0 && Y > 0)
917 return Result >= 0;
918 // Subtracting a negative number from a positive results in a positive number.
919 if (X >= 0 && Y < 0)
920 return Result <= 0;
921 return false;
922#endif
923}
924
925
926/// Multiply two signed integers, computing the two's complement truncated
927/// result, returning true if an overflow ocurred.
928template <typename T>
929typename std::enable_if<std::is_signed<T>::value, T>::type
930MulOverflow(T X, T Y, T &Result) {
931 // Perform the unsigned multiplication on absolute values.
932 using U = typename std::make_unsigned<T>::type;
933 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
934 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
935 const U UResult = UX * UY;
936
937 // Convert to signed.
938 const bool IsNegative = (X < 0) ^ (Y < 0);
939 Result = IsNegative ? (0 - UResult) : UResult;
940
941 // If any of the args was 0, result is 0 and no overflow occurs.
942 if (UX == 0 || UY == 0)
943 return false;
944
945 // UX and UY are in [1, 2^n], where n is the number of digits.
946 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
947 // positive) divided by an argument compares to the other.
948 if (IsNegative)
949 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
950 else
951 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
952}
953
954} // End llvm namespace
955
956#endif