Bug Summary

File:lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Warning:line 745, column 21
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn345461/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/AArch64 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-10-27-211344-32123-1 -x c++ /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/Support/Debug.h"
19#include <algorithm>
20using namespace llvm;
21
22#define DEBUG_TYPE"aarch64tti" "aarch64tti"
23
24static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
25 cl::init(true), cl::Hidden);
26
27bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
28 const Function *Callee) const {
29 const TargetMachine &TM = getTLI()->getTargetMachine();
30
31 const FeatureBitset &CallerBits =
32 TM.getSubtargetImpl(*Caller)->getFeatureBits();
33 const FeatureBitset &CalleeBits =
34 TM.getSubtargetImpl(*Callee)->getFeatureBits();
35
36 // Inline a callee if its target-features are a subset of the callers
37 // target-features.
38 return (CallerBits & CalleeBits) == CalleeBits;
39}
40
41/// Calculate the cost of materializing a 64-bit value. This helper
42/// method might only calculate a fraction of a larger immediate. Therefore it
43/// is valid to return a cost of ZERO.
44int AArch64TTIImpl::getIntImmCost(int64_t Val) {
45 // Check if the immediate can be encoded within an instruction.
46 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47 return 0;
48
49 if (Val < 0)
50 Val = ~Val;
51
52 // Calculate how many moves we will need to materialize this constant.
53 unsigned LZ = countLeadingZeros((uint64_t)Val);
54 return (64 - LZ + 15) / 16;
55}
56
57/// Calculate the cost of materializing the given constant.
58int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
59 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 59, __PRETTY_FUNCTION__))
;
60
61 unsigned BitSize = Ty->getPrimitiveSizeInBits();
62 if (BitSize == 0)
63 return ~0U;
64
65 // Sign-extend all constants to a multiple of 64-bit.
66 APInt ImmVal = Imm;
67 if (BitSize & 0x3f)
68 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
69
70 // Split the constant into 64-bit chunks and calculate the cost for each
71 // chunk.
72 int Cost = 0;
73 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
74 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
75 int64_t Val = Tmp.getSExtValue();
76 Cost += getIntImmCost(Val);
77 }
78 // We need at least one instruction to materialze the constant.
79 return std::max(1, Cost);
80}
81
82int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
83 const APInt &Imm, Type *Ty) {
84 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 84, __PRETTY_FUNCTION__))
;
85
86 unsigned BitSize = Ty->getPrimitiveSizeInBits();
87 // There is no cost model for constants with a bit size of 0. Return TCC_Free
88 // here, so that constant hoisting will ignore this constant.
89 if (BitSize == 0)
90 return TTI::TCC_Free;
91
92 unsigned ImmIdx = ~0U;
93 switch (Opcode) {
94 default:
95 return TTI::TCC_Free;
96 case Instruction::GetElementPtr:
97 // Always hoist the base address of a GetElementPtr.
98 if (Idx == 0)
99 return 2 * TTI::TCC_Basic;
100 return TTI::TCC_Free;
101 case Instruction::Store:
102 ImmIdx = 0;
103 break;
104 case Instruction::Add:
105 case Instruction::Sub:
106 case Instruction::Mul:
107 case Instruction::UDiv:
108 case Instruction::SDiv:
109 case Instruction::URem:
110 case Instruction::SRem:
111 case Instruction::And:
112 case Instruction::Or:
113 case Instruction::Xor:
114 case Instruction::ICmp:
115 ImmIdx = 1;
116 break;
117 // Always return TCC_Free for the shift value of a shift instruction.
118 case Instruction::Shl:
119 case Instruction::LShr:
120 case Instruction::AShr:
121 if (Idx == 1)
122 return TTI::TCC_Free;
123 break;
124 case Instruction::Trunc:
125 case Instruction::ZExt:
126 case Instruction::SExt:
127 case Instruction::IntToPtr:
128 case Instruction::PtrToInt:
129 case Instruction::BitCast:
130 case Instruction::PHI:
131 case Instruction::Call:
132 case Instruction::Select:
133 case Instruction::Ret:
134 case Instruction::Load:
135 break;
136 }
137
138 if (Idx == ImmIdx) {
139 int NumConstants = (BitSize + 63) / 64;
140 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
141 return (Cost <= NumConstants * TTI::TCC_Basic)
142 ? static_cast<int>(TTI::TCC_Free)
143 : Cost;
144 }
145 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
146}
147
148int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
149 const APInt &Imm, Type *Ty) {
150 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 150, __PRETTY_FUNCTION__))
;
151
152 unsigned BitSize = Ty->getPrimitiveSizeInBits();
153 // There is no cost model for constants with a bit size of 0. Return TCC_Free
154 // here, so that constant hoisting will ignore this constant.
155 if (BitSize == 0)
156 return TTI::TCC_Free;
157
158 switch (IID) {
159 default:
160 return TTI::TCC_Free;
161 case Intrinsic::sadd_with_overflow:
162 case Intrinsic::uadd_with_overflow:
163 case Intrinsic::ssub_with_overflow:
164 case Intrinsic::usub_with_overflow:
165 case Intrinsic::smul_with_overflow:
166 case Intrinsic::umul_with_overflow:
167 if (Idx == 1) {
168 int NumConstants = (BitSize + 63) / 64;
169 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
170 return (Cost <= NumConstants * TTI::TCC_Basic)
171 ? static_cast<int>(TTI::TCC_Free)
172 : Cost;
173 }
174 break;
175 case Intrinsic::experimental_stackmap:
176 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
177 return TTI::TCC_Free;
178 break;
179 case Intrinsic::experimental_patchpoint_void:
180 case Intrinsic::experimental_patchpoint_i64:
181 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
182 return TTI::TCC_Free;
183 break;
184 }
185 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
186}
187
188TargetTransformInfo::PopcntSupportKind
189AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
190 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 190, __PRETTY_FUNCTION__))
;
191 if (TyWidth == 32 || TyWidth == 64)
192 return TTI::PSK_FastHardware;
193 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
194 return TTI::PSK_Software;
195}
196
197bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
198 ArrayRef<const Value *> Args) {
199
200 // A helper that returns a vector type from the given type. The number of
201 // elements in type Ty determine the vector width.
202 auto toVectorTy = [&](Type *ArgTy) {
203 return VectorType::get(ArgTy->getScalarType(),
204 DstTy->getVectorNumElements());
205 };
206
207 // Exit early if DstTy is not a vector type whose elements are at least
208 // 16-bits wide.
209 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
210 return false;
211
212 // Determine if the operation has a widening variant. We consider both the
213 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
214 // instructions.
215 //
216 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
217 // verify that their extending operands are eliminated during code
218 // generation.
219 switch (Opcode) {
220 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
221 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
222 break;
223 default:
224 return false;
225 }
226
227 // To be a widening instruction (either the "wide" or "long" versions), the
228 // second operand must be a sign- or zero extend having a single user. We
229 // only consider extends having a single user because they may otherwise not
230 // be eliminated.
231 if (Args.size() != 2 ||
232 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
233 !Args[1]->hasOneUse())
234 return false;
235 auto *Extend = cast<CastInst>(Args[1]);
236
237 // Legalize the destination type and ensure it can be used in a widening
238 // operation.
239 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
240 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
241 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
242 return false;
243
244 // Legalize the source type and ensure it can be used in a widening
245 // operation.
246 Type *SrcTy = toVectorTy(Extend->getSrcTy());
247 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
248 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
249 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
250 return false;
251
252 // Get the total number of vector elements in the legalized types.
253 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
254 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
255
256 // Return true if the legalized types have the same number of vector elements
257 // and the destination element type size is twice that of the source type.
258 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
259}
260
261int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
262 const Instruction *I) {
263 int ISD = TLI->InstructionOpcodeToISD(Opcode);
264 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 264, __PRETTY_FUNCTION__))
;
265
266 // If the cast is observable, and it is used by a widening instruction (e.g.,
267 // uaddl, saddw, etc.), it may be free.
268 if (I && I->hasOneUse()) {
269 auto *SingleUser = cast<Instruction>(*I->user_begin());
270 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
271 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
272 // If the cast is the second operand, it is free. We will generate either
273 // a "wide" or "long" version of the widening instruction.
274 if (I == SingleUser->getOperand(1))
275 return 0;
276 // If the cast is not the second operand, it will be free if it looks the
277 // same as the second operand. In this case, we will generate a "long"
278 // version of the widening instruction.
279 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
280 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
281 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
282 return 0;
283 }
284 }
285
286 EVT SrcTy = TLI->getValueType(DL, Src);
287 EVT DstTy = TLI->getValueType(DL, Dst);
288
289 if (!SrcTy.isSimple() || !DstTy.isSimple())
290 return BaseT::getCastInstrCost(Opcode, Dst, Src);
291
292 static const TypeConversionCostTblEntry
293 ConversionTbl[] = {
294 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
295 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
296 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
297 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
298
299 // The number of shll instructions for the extension.
300 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
301 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
302 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
303 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
304 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
305 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
306 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
307 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
308 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
309 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
310 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
311 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
312 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
313 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
314 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
315 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
316
317 // LowerVectorINT_TO_FP:
318 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
319 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
320 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
321 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
322 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
323 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
324
325 // Complex: to v2f32
326 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
327 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
328 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
329 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
330 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
331 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
332
333 // Complex: to v4f32
334 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
335 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
336 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
337 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
338
339 // Complex: to v8f32
340 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
341 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
342 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
343 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
344
345 // Complex: to v16f32
346 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
347 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
348
349 // Complex: to v2f64
350 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
351 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
352 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
353 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
354 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
355 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
356
357
358 // LowerVectorFP_TO_INT
359 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
360 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
361 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
362 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
363 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
364 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
365
366 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
367 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
368 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
369 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
370 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
371 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
372 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
373
374 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
375 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
376 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
377 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
378 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
379
380 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
381 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
382 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
383 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
384 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
385 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
386 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
387 };
388
389 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
390 DstTy.getSimpleVT(),
391 SrcTy.getSimpleVT()))
392 return Entry->Cost;
393
394 return BaseT::getCastInstrCost(Opcode, Dst, Src);
395}
396
397int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
398 VectorType *VecTy,
399 unsigned Index) {
400
401 // Make sure we were given a valid extend opcode.
402 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 403, __PRETTY_FUNCTION__))
403 "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 403, __PRETTY_FUNCTION__))
;
404
405 // We are extending an element we extract from a vector, so the source type
406 // of the extend is the element type of the vector.
407 auto *Src = VecTy->getElementType();
408
409 // Sign- and zero-extends are for integer types only.
410 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType>
(Src) && "Invalid type") ? static_cast<void> (0
) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 410, __PRETTY_FUNCTION__))
;
411
412 // Get the cost for the extract. We compute the cost (if any) for the extend
413 // below.
414 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
415
416 // Legalize the types.
417 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
418 auto DstVT = TLI->getValueType(DL, Dst);
419 auto SrcVT = TLI->getValueType(DL, Src);
420
421 // If the resulting type is still a vector and the destination type is legal,
422 // we may get the extension for free. If not, get the default cost for the
423 // extend.
424 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
425 return Cost + getCastInstrCost(Opcode, Dst, Src);
426
427 // The destination type should be larger than the element type. If not, get
428 // the default cost for the extend.
429 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
430 return Cost + getCastInstrCost(Opcode, Dst, Src);
431
432 switch (Opcode) {
433 default:
434 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 434)
;
435
436 // For sign-extends, we only need a smov, which performs the extension
437 // automatically.
438 case Instruction::SExt:
439 return Cost;
440
441 // For zero-extends, the extend is performed automatically by a umov unless
442 // the destination type is i64 and the element type is i8 or i16.
443 case Instruction::ZExt:
444 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
445 return Cost;
446 }
447
448 // If we are unable to perform the extend for free, get the default cost.
449 return Cost + getCastInstrCost(Opcode, Dst, Src);
450}
451
452int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
453 unsigned Index) {
454 assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type"
) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 454, __PRETTY_FUNCTION__))
;
455
456 if (Index != -1U) {
457 // Legalize the type.
458 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
459
460 // This type is legalized to a scalar type.
461 if (!LT.second.isVector())
462 return 0;
463
464 // The type may be split. Normalize the index to the new type.
465 unsigned Width = LT.second.getVectorNumElements();
466 Index = Index % Width;
467
468 // The element at index zero is already inside the vector.
469 if (Index == 0)
470 return 0;
471 }
472
473 // All other insert/extracts cost this much.
474 return ST->getVectorInsertExtractBaseCost();
475}
476
477int AArch64TTIImpl::getArithmeticInstrCost(
478 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
479 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
480 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
481 // Legalize the type.
482 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
483
484 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
485 // add in the widening overhead specified by the sub-target. Since the
486 // extends feeding widening instructions are performed automatically, they
487 // aren't present in the generated code and have a zero cost. By adding a
488 // widening overhead here, we attach the total cost of the combined operation
489 // to the widening instruction.
490 int Cost = 0;
491 if (isWideningInstruction(Ty, Opcode, Args))
492 Cost += ST->getWideningBaseCost();
493
494 int ISD = TLI->InstructionOpcodeToISD(Opcode);
495
496 switch (ISD) {
497 default:
498 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
499 Opd1PropInfo, Opd2PropInfo);
500 case ISD::SDIV:
501 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
502 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
503 // On AArch64, scalar signed division by constants power-of-two are
504 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
505 // The OperandValue properties many not be same as that of previous
506 // operation; conservatively assume OP_None.
507 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
508 TargetTransformInfo::OP_None,
509 TargetTransformInfo::OP_None);
510 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
511 TargetTransformInfo::OP_None,
512 TargetTransformInfo::OP_None);
513 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
514 TargetTransformInfo::OP_None,
515 TargetTransformInfo::OP_None);
516 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
517 TargetTransformInfo::OP_None,
518 TargetTransformInfo::OP_None);
519 return Cost;
520 }
521 LLVM_FALLTHROUGH[[clang::fallthrough]];
522 case ISD::UDIV:
523 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
524 auto VT = TLI->getValueType(DL, Ty);
525 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
526 // Vector signed division by constant are expanded to the
527 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
528 // to MULHS + SUB + SRL + ADD + SRL.
529 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
530 Opd2Info,
531 TargetTransformInfo::OP_None,
532 TargetTransformInfo::OP_None);
533 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
534 Opd2Info,
535 TargetTransformInfo::OP_None,
536 TargetTransformInfo::OP_None);
537 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
538 Opd2Info,
539 TargetTransformInfo::OP_None,
540 TargetTransformInfo::OP_None);
541 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
542 }
543 }
544
545 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
546 Opd1PropInfo, Opd2PropInfo);
547 if (Ty->isVectorTy()) {
548 // On AArch64, vector divisions are not supported natively and are
549 // expanded into scalar divisions of each pair of elements.
550 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
551 Opd2Info, Opd1PropInfo, Opd2PropInfo);
552 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
553 Opd2Info, Opd1PropInfo, Opd2PropInfo);
554 // TODO: if one of the arguments is scalar, then it's not necessary to
555 // double the cost of handling the vector elements.
556 Cost += Cost;
557 }
558 return Cost;
559
560 case ISD::ADD:
561 case ISD::MUL:
562 case ISD::XOR:
563 case ISD::OR:
564 case ISD::AND:
565 // These nodes are marked as 'custom' for combining purposes only.
566 // We know that they are legal. See LowerAdd in ISelLowering.
567 return (Cost + 1) * LT.first;
568 }
569}
570
571int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
572 const SCEV *Ptr) {
573 // Address computations in vectorized code with non-consecutive addresses will
574 // likely result in more instructions compared to scalar code where the
575 // computation can more often be merged into the index mode. The resulting
576 // extra micro-ops can significantly decrease throughput.
577 unsigned NumVectorInstToHideOverhead = 10;
578 int MaxMergeDistance = 64;
579
580 if (Ty->isVectorTy() && SE &&
581 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
582 return NumVectorInstToHideOverhead;
583
584 // In many cases the address computation is not merged into the instruction
585 // addressing mode.
586 return 1;
587}
588
589int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
590 Type *CondTy, const Instruction *I) {
591
592 int ISD = TLI->InstructionOpcodeToISD(Opcode);
593 // We don't lower some vector selects well that are wider than the register
594 // width.
595 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
596 // We would need this many instructions to hide the scalarization happening.
597 const int AmortizationCost = 20;
598 static const TypeConversionCostTblEntry
599 VectorSelectTbl[] = {
600 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
601 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
602 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
603 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
604 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
605 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
606 };
607
608 EVT SelCondTy = TLI->getValueType(DL, CondTy);
609 EVT SelValTy = TLI->getValueType(DL, ValTy);
610 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
611 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
612 SelCondTy.getSimpleVT(),
613 SelValTy.getSimpleVT()))
614 return Entry->Cost;
615 }
616 }
617 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
618}
619
620int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
621 unsigned Alignment, unsigned AddressSpace,
622 const Instruction *I) {
623 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
624
625 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
626 LT.second.is128BitVector() && Alignment < 16) {
627 // Unaligned stores are extremely inefficient. We don't split all
628 // unaligned 128-bit stores because the negative impact that has shown in
629 // practice on inlined block copy code.
630 // We make such stores expensive so that we will only vectorize if there
631 // are 6 other instructions getting vectorized.
632 const int AmortizationCost = 6;
633
634 return LT.first * 2 * AmortizationCost;
635 }
636
637 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
638 unsigned ProfitableNumElements;
639 if (Opcode == Instruction::Store)
640 // We use a custom trunc store lowering so v.4b should be profitable.
641 ProfitableNumElements = 4;
642 else
643 // We scalarize the loads because there is not v.4b register and we
644 // have to promote the elements to v.2.
645 ProfitableNumElements = 8;
646
647 if (Ty->getVectorNumElements() < ProfitableNumElements) {
648 unsigned NumVecElts = Ty->getVectorNumElements();
649 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
650 // We generate 2 instructions per vector element.
651 return NumVectorizableInstsToAmortize * NumVecElts * 2;
652 }
653 }
654
655 return LT.first;
656}
657
658int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
659 unsigned Factor,
660 ArrayRef<unsigned> Indices,
661 unsigned Alignment,
662 unsigned AddressSpace,
663 bool IsMasked) {
664 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 664, __PRETTY_FUNCTION__))
;
665 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 665, __PRETTY_FUNCTION__))
;
666
667 if (!IsMasked && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
668 unsigned NumElts = VecTy->getVectorNumElements();
669 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
670
671 // ldN/stN only support legal vector types of size 64 or 128 in bits.
672 // Accesses having vector types that are a multiple of 128 bits can be
673 // matched to more than one ldN/stN instruction.
674 if (NumElts % Factor == 0 &&
675 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
676 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
677 }
678
679 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
680 Alignment, AddressSpace, IsMasked);
681}
682
683int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
684 int Cost = 0;
685 for (auto *I : Tys) {
686 if (!I->isVectorTy())
687 continue;
688 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
689 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
690 getMemoryOpCost(Instruction::Load, I, 128, 0);
691 }
692 return Cost;
693}
694
695unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
696 return ST->getMaxInterleaveFactor();
697}
698
699// For Falkor, we want to avoid having too many strided loads in a loop since
700// that can exhaust the HW prefetcher resources. We adjust the unroller
701// MaxCount preference below to attempt to ensure unrolling doesn't create too
702// many strided loads.
703static void
704getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
705 TargetTransformInfo::UnrollingPreferences &UP) {
706 enum { MaxStridedLoads = 7 };
707 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
708 int StridedLoads = 0;
709 // FIXME? We could make this more precise by looking at the CFG and
710 // e.g. not counting loads in each side of an if-then-else diamond.
711 for (const auto BB : L->blocks()) {
712 for (auto &I : *BB) {
713 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
714 if (!LMemI)
715 continue;
716
717 Value *PtrValue = LMemI->getPointerOperand();
718 if (L->isLoopInvariant(PtrValue))
719 continue;
720
721 const SCEV *LSCEV = SE.getSCEV(PtrValue);
722 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
723 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
724 continue;
725
726 // FIXME? We could take pairing of unrolled load copies into account
727 // by looking at the AddRec, but we would probably have to limit this
728 // to loops with no stores or other memory optimization barriers.
729 ++StridedLoads;
730 // We've seen enough strided loads that seeing more won't make a
731 // difference.
732 if (StridedLoads > MaxStridedLoads / 2)
733 return StridedLoads;
734 }
735 }
736 return StridedLoads;
737 };
738
739 int StridedLoads = countStridedLoads(L, SE);
740 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
741 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
742 // Pick the largest power of 2 unroll count that won't result in too many
743 // strided loads.
744 if (StridedLoads) {
7
Assuming 'StridedLoads' is not equal to 0
8
Taking true branch
745 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
9
Calling 'Log2_32'
11
Returning from 'Log2_32'
12
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
746 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
747 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
748 }
749}
750
751void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
752 TTI::UnrollingPreferences &UP) {
753 // Enable partial unrolling and runtime unrolling.
754 BaseT::getUnrollingPreferences(L, SE, UP);
755
756 // For inner loop, it is more likely to be a hot one, and the runtime check
757 // can be promoted out from LICM pass, so the overhead is less, let's try
758 // a larger threshold to unroll more loops.
759 if (L->getLoopDepth() > 1)
1
Assuming the condition is false
2
Taking false branch
760 UP.PartialThreshold *= 2;
761
762 // Disable partial & runtime unrolling on -Os.
763 UP.PartialOptSizeThreshold = 0;
764
765 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
3
Assuming the condition is true
5
Taking true branch
766 EnableFalkorHWPFUnrollFix)
4
Assuming the condition is true
767 getFalkorUnrollingPreferences(L, SE, UP);
6
Calling 'getFalkorUnrollingPreferences'
768}
769
770Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
771 Type *ExpectedType) {
772 switch (Inst->getIntrinsicID()) {
773 default:
774 return nullptr;
775 case Intrinsic::aarch64_neon_st2:
776 case Intrinsic::aarch64_neon_st3:
777 case Intrinsic::aarch64_neon_st4: {
778 // Create a struct type
779 StructType *ST = dyn_cast<StructType>(ExpectedType);
780 if (!ST)
781 return nullptr;
782 unsigned NumElts = Inst->getNumArgOperands() - 1;
783 if (ST->getNumElements() != NumElts)
784 return nullptr;
785 for (unsigned i = 0, e = NumElts; i != e; ++i) {
786 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
787 return nullptr;
788 }
789 Value *Res = UndefValue::get(ExpectedType);
790 IRBuilder<> Builder(Inst);
791 for (unsigned i = 0, e = NumElts; i != e; ++i) {
792 Value *L = Inst->getArgOperand(i);
793 Res = Builder.CreateInsertValue(Res, L, i);
794 }
795 return Res;
796 }
797 case Intrinsic::aarch64_neon_ld2:
798 case Intrinsic::aarch64_neon_ld3:
799 case Intrinsic::aarch64_neon_ld4:
800 if (Inst->getType() == ExpectedType)
801 return Inst;
802 return nullptr;
803 }
804}
805
806bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
807 MemIntrinsicInfo &Info) {
808 switch (Inst->getIntrinsicID()) {
809 default:
810 break;
811 case Intrinsic::aarch64_neon_ld2:
812 case Intrinsic::aarch64_neon_ld3:
813 case Intrinsic::aarch64_neon_ld4:
814 Info.ReadMem = true;
815 Info.WriteMem = false;
816 Info.PtrVal = Inst->getArgOperand(0);
817 break;
818 case Intrinsic::aarch64_neon_st2:
819 case Intrinsic::aarch64_neon_st3:
820 case Intrinsic::aarch64_neon_st4:
821 Info.ReadMem = false;
822 Info.WriteMem = true;
823 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
824 break;
825 }
826
827 switch (Inst->getIntrinsicID()) {
828 default:
829 return false;
830 case Intrinsic::aarch64_neon_ld2:
831 case Intrinsic::aarch64_neon_st2:
832 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
833 break;
834 case Intrinsic::aarch64_neon_ld3:
835 case Intrinsic::aarch64_neon_st3:
836 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
837 break;
838 case Intrinsic::aarch64_neon_ld4:
839 case Intrinsic::aarch64_neon_st4:
840 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
841 break;
842 }
843 return true;
844}
845
846/// See if \p I should be considered for address type promotion. We check if \p
847/// I is a sext with right type and used in memory accesses. If it used in a
848/// "complex" getelementptr, we allow it to be promoted without finding other
849/// sext instructions that sign extended the same initial value. A getelementptr
850/// is considered as "complex" if it has more than 2 operands.
851bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
852 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
853 bool Considerable = false;
854 AllowPromotionWithoutCommonHeader = false;
855 if (!isa<SExtInst>(&I))
856 return false;
857 Type *ConsideredSExtType =
858 Type::getInt64Ty(I.getParent()->getParent()->getContext());
859 if (I.getType() != ConsideredSExtType)
860 return false;
861 // See if the sext is the one with the right type and used in at least one
862 // GetElementPtrInst.
863 for (const User *U : I.users()) {
864 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
865 Considerable = true;
866 // A getelementptr is considered as "complex" if it has more than 2
867 // operands. We will promote a SExt used in such complex GEP as we
868 // expect some computation to be merged if they are done on 64 bits.
869 if (GEPInst->getNumOperands() > 2) {
870 AllowPromotionWithoutCommonHeader = true;
871 break;
872 }
873 }
874 }
875 return Considerable;
876}
877
878unsigned AArch64TTIImpl::getCacheLineSize() {
879 return ST->getCacheLineSize();
880}
881
882unsigned AArch64TTIImpl::getPrefetchDistance() {
883 return ST->getPrefetchDistance();
884}
885
886unsigned AArch64TTIImpl::getMinPrefetchStride() {
887 return ST->getMinPrefetchStride();
888}
889
890unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
891 return ST->getMaxPrefetchIterationsAhead();
892}
893
894bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
895 TTI::ReductionFlags Flags) const {
896 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 896, __PRETTY_FUNCTION__))
;
897 unsigned ScalarBits = Ty->getScalarSizeInBits();
898 switch (Opcode) {
899 case Instruction::FAdd:
900 case Instruction::FMul:
901 case Instruction::And:
902 case Instruction::Or:
903 case Instruction::Xor:
904 case Instruction::Mul:
905 return false;
906 case Instruction::Add:
907 return ScalarBits * Ty->getVectorNumElements() >= 128;
908 case Instruction::ICmp:
909 return (ScalarBits < 64) &&
910 (ScalarBits * Ty->getVectorNumElements() >= 128);
911 case Instruction::FCmp:
912 return Flags.NoNaN;
913 default:
914 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 914)
;
915 }
916 return false;
917}
918
919int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
920 bool IsPairwiseForm) {
921
922 if (IsPairwiseForm)
923 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
924
925 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
926 MVT MTy = LT.second;
927 int ISD = TLI->InstructionOpcodeToISD(Opcode);
928 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 928, __PRETTY_FUNCTION__))
;
929
930 // Horizontal adds can use the 'addv' instruction. We model the cost of these
931 // instructions as normal vector adds. This is the only arithmetic vector
932 // reduction operation for which we have an instruction.
933 static const CostTblEntry CostTblNoPairwise[]{
934 {ISD::ADD, MVT::v8i8, 1},
935 {ISD::ADD, MVT::v16i8, 1},
936 {ISD::ADD, MVT::v4i16, 1},
937 {ISD::ADD, MVT::v8i16, 1},
938 {ISD::ADD, MVT::v4i32, 1},
939 };
940
941 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
942 return LT.first * Entry->Cost;
943
944 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
945}
946
947int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
948 Type *SubTp) {
949 if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
950 Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
951 static const CostTblEntry ShuffleTbl[] = {
952 // Broadcast shuffle kinds can be performed with 'dup'.
953 { TTI::SK_Broadcast, MVT::v8i8, 1 },
954 { TTI::SK_Broadcast, MVT::v16i8, 1 },
955 { TTI::SK_Broadcast, MVT::v4i16, 1 },
956 { TTI::SK_Broadcast, MVT::v8i16, 1 },
957 { TTI::SK_Broadcast, MVT::v2i32, 1 },
958 { TTI::SK_Broadcast, MVT::v4i32, 1 },
959 { TTI::SK_Broadcast, MVT::v2i64, 1 },
960 { TTI::SK_Broadcast, MVT::v2f32, 1 },
961 { TTI::SK_Broadcast, MVT::v4f32, 1 },
962 { TTI::SK_Broadcast, MVT::v2f64, 1 },
963 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
964 // 'zip1/zip2' instructions.
965 { TTI::SK_Transpose, MVT::v8i8, 1 },
966 { TTI::SK_Transpose, MVT::v16i8, 1 },
967 { TTI::SK_Transpose, MVT::v4i16, 1 },
968 { TTI::SK_Transpose, MVT::v8i16, 1 },
969 { TTI::SK_Transpose, MVT::v2i32, 1 },
970 { TTI::SK_Transpose, MVT::v4i32, 1 },
971 { TTI::SK_Transpose, MVT::v2i64, 1 },
972 { TTI::SK_Transpose, MVT::v2f32, 1 },
973 { TTI::SK_Transpose, MVT::v4f32, 1 },
974 { TTI::SK_Transpose, MVT::v2f64, 1 },
975 // Select shuffle kinds.
976 // TODO: handle vXi8/vXi16.
977 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
978 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
979 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
980 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
981 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
982 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
983 // PermuteSingleSrc shuffle kinds.
984 // TODO: handle vXi8/vXi16.
985 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
986 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
987 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
988 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
989 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
990 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
991 };
992 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
993 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
994 return LT.first * Entry->Cost;
995 }
996
997 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
998}

/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains some functions that are useful for math stuff.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_MATHEXTRAS_H
15#define LLVM_SUPPORT_MATHEXTRAS_H
16
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/SwapByteOrder.h"
19#include <algorithm>
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <limits>
24#include <type_traits>
25
26#ifdef __ANDROID_NDK__
27#include <android/api-level.h>
28#endif
29
30#ifdef _MSC_VER
31// Declare these intrinsics manually rather including intrin.h. It's very
32// expensive, and MathExtras.h is popular.
33// #include <intrin.h>
34extern "C" {
35unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
36unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
37unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
38unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
39}
40#endif
41
42namespace llvm {
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53namespace detail {
54template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
55 static std::size_t count(T Val, ZeroBehavior) {
56 if (!Val)
57 return std::numeric_limits<T>::digits;
58 if (Val & 0x1)
59 return 0;
60
61 // Bisection method.
62 std::size_t ZeroBits = 0;
63 T Shift = std::numeric_limits<T>::digits >> 1;
64 T Mask = std::numeric_limits<T>::max() >> Shift;
65 while (Shift) {
66 if ((Val & Mask) == 0) {
67 Val >>= Shift;
68 ZeroBits |= Shift;
69 }
70 Shift >>= 1;
71 Mask >>= Shift;
72 }
73 return ZeroBits;
74 }
75};
76
77#if __GNUC__4 >= 4 || defined(_MSC_VER)
78template <typename T> struct TrailingZerosCounter<T, 4> {
79 static std::size_t count(T Val, ZeroBehavior ZB) {
80 if (ZB != ZB_Undefined && Val == 0)
81 return 32;
82
83#if __has_builtin(__builtin_ctz)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
84 return __builtin_ctz(Val);
85#elif defined(_MSC_VER)
86 unsigned long Index;
87 _BitScanForward(&Index, Val);
88 return Index;
89#endif
90 }
91};
92
93#if !defined(_MSC_VER) || defined(_M_X64)
94template <typename T> struct TrailingZerosCounter<T, 8> {
95 static std::size_t count(T Val, ZeroBehavior ZB) {
96 if (ZB != ZB_Undefined && Val == 0)
97 return 64;
98
99#if __has_builtin(__builtin_ctzll)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
100 return __builtin_ctzll(Val);
101#elif defined(_MSC_VER)
102 unsigned long Index;
103 _BitScanForward64(&Index, Val);
104 return Index;
105#endif
106 }
107};
108#endif
109#endif
110} // namespace detail
111
112/// Count number of 0's from the least significant bit to the most
113/// stopping at the first 1.
114///
115/// Only unsigned integral types are allowed.
116///
117/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
118/// valid arguments.
119template <typename T>
120std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
121 static_assert(std::numeric_limits<T>::is_integer &&
122 !std::numeric_limits<T>::is_signed,
123 "Only unsigned integral types are allowed.");
124 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
125}
126
127namespace detail {
128template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
129 static std::size_t count(T Val, ZeroBehavior) {
130 if (!Val)
131 return std::numeric_limits<T>::digits;
132
133 // Bisection method.
134 std::size_t ZeroBits = 0;
135 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
136 T Tmp = Val >> Shift;
137 if (Tmp)
138 Val = Tmp;
139 else
140 ZeroBits |= Shift;
141 }
142 return ZeroBits;
143 }
144};
145
146#if __GNUC__4 >= 4 || defined(_MSC_VER)
147template <typename T> struct LeadingZerosCounter<T, 4> {
148 static std::size_t count(T Val, ZeroBehavior ZB) {
149 if (ZB != ZB_Undefined && Val == 0)
150 return 32;
151
152#if __has_builtin(__builtin_clz)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
153 return __builtin_clz(Val);
154#elif defined(_MSC_VER)
155 unsigned long Index;
156 _BitScanReverse(&Index, Val);
157 return Index ^ 31;
158#endif
159 }
160};
161
162#if !defined(_MSC_VER) || defined(_M_X64)
163template <typename T> struct LeadingZerosCounter<T, 8> {
164 static std::size_t count(T Val, ZeroBehavior ZB) {
165 if (ZB != ZB_Undefined && Val == 0)
166 return 64;
167
168#if __has_builtin(__builtin_clzll)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
169 return __builtin_clzll(Val);
170#elif defined(_MSC_VER)
171 unsigned long Index;
172 _BitScanReverse64(&Index, Val);
173 return Index ^ 63;
174#endif
175 }
176};
177#endif
178#endif
179} // namespace detail
180
181/// Count number of 0's from the most significant bit to the least
182/// stopping at the first 1.
183///
184/// Only unsigned integral types are allowed.
185///
186/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
187/// valid arguments.
188template <typename T>
189std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
190 static_assert(std::numeric_limits<T>::is_integer &&
191 !std::numeric_limits<T>::is_signed,
192 "Only unsigned integral types are allowed.");
193 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
194}
195
196/// Get the index of the first set bit starting from the least
197/// significant bit.
198///
199/// Only unsigned integral types are allowed.
200///
201/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
202/// valid arguments.
203template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
204 if (ZB == ZB_Max && Val == 0)
205 return std::numeric_limits<T>::max();
206
207 return countTrailingZeros(Val, ZB_Undefined);
208}
209
210/// Create a bitmask with the N right-most bits set to 1, and all other
211/// bits set to 0. Only unsigned types are allowed.
212template <typename T> T maskTrailingOnes(unsigned N) {
213 static_assert(std::is_unsigned<T>::value, "Invalid type!");
214 const unsigned Bits = CHAR_BIT8 * sizeof(T);
215 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 215, __PRETTY_FUNCTION__))
;
216 return N == 0 ? 0 : (T(-1) >> (Bits - N));
217}
218
219/// Create a bitmask with the N left-most bits set to 1, and all other
220/// bits set to 0. Only unsigned types are allowed.
221template <typename T> T maskLeadingOnes(unsigned N) {
222 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
223}
224
225/// Create a bitmask with the N right-most bits set to 0, and all other
226/// bits set to 1. Only unsigned types are allowed.
227template <typename T> T maskTrailingZeros(unsigned N) {
228 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
229}
230
231/// Create a bitmask with the N left-most bits set to 0, and all other
232/// bits set to 1. Only unsigned types are allowed.
233template <typename T> T maskLeadingZeros(unsigned N) {
234 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
235}
236
237/// Get the index of the last set bit starting from the least
238/// significant bit.
239///
240/// Only unsigned integral types are allowed.
241///
242/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
243/// valid arguments.
244template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
245 if (ZB == ZB_Max && Val == 0)
246 return std::numeric_limits<T>::max();
247
248 // Use ^ instead of - because both gcc and llvm can remove the associated ^
249 // in the __builtin_clz intrinsic on x86.
250 return countLeadingZeros(Val, ZB_Undefined) ^
251 (std::numeric_limits<T>::digits - 1);
252}
253
254/// Macro compressed bit reversal table for 256 bits.
255///
256/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
257static const unsigned char BitReverseTable256[256] = {
258#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
259#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
260#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
261 R6(0), R6(2), R6(1), R6(3)
262#undef R2
263#undef R4
264#undef R6
265};
266
267/// Reverse the bits in \p Val.
268template <typename T>
269T reverseBits(T Val) {
270 unsigned char in[sizeof(Val)];
271 unsigned char out[sizeof(Val)];
272 std::memcpy(in, &Val, sizeof(Val));
273 for (unsigned i = 0; i < sizeof(Val); ++i)
274 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
275 std::memcpy(&Val, out, sizeof(Val));
276 return Val;
277}
278
279// NOTE: The following support functions use the _32/_64 extensions instead of
280// type overloading so that signed and unsigned integers can be used without
281// ambiguity.
282
283/// Return the high 32 bits of a 64 bit value.
284constexpr inline uint32_t Hi_32(uint64_t Value) {
285 return static_cast<uint32_t>(Value >> 32);
286}
287
288/// Return the low 32 bits of a 64 bit value.
289constexpr inline uint32_t Lo_32(uint64_t Value) {
290 return static_cast<uint32_t>(Value);
291}
292
293/// Make a 64-bit integer from a high / low pair of 32-bit integers.
294constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
295 return ((uint64_t)High << 32) | (uint64_t)Low;
296}
297
298/// Checks if an integer fits into the given bit width.
299template <unsigned N> constexpr inline bool isInt(int64_t x) {
300 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
301}
302// Template specializations to get better code for common cases.
303template <> constexpr inline bool isInt<8>(int64_t x) {
304 return static_cast<int8_t>(x) == x;
305}
306template <> constexpr inline bool isInt<16>(int64_t x) {
307 return static_cast<int16_t>(x) == x;
308}
309template <> constexpr inline bool isInt<32>(int64_t x) {
310 return static_cast<int32_t>(x) == x;
311}
312
313/// Checks if a signed integer is an N bit number shifted left by S.
314template <unsigned N, unsigned S>
315constexpr inline bool isShiftedInt(int64_t x) {
316 static_assert(
317 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
318 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
319 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
320}
321
322/// Checks if an unsigned integer fits into the given bit width.
323///
324/// This is written as two functions rather than as simply
325///
326/// return N >= 64 || X < (UINT64_C(1) << N);
327///
328/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
329/// left too many places.
330template <unsigned N>
331constexpr inline typename std::enable_if<(N < 64), bool>::type
332isUInt(uint64_t X) {
333 static_assert(N > 0, "isUInt<0> doesn't make sense");
334 return X < (UINT64_C(1)1UL << (N));
335}
336template <unsigned N>
337constexpr inline typename std::enable_if<N >= 64, bool>::type
338isUInt(uint64_t X) {
339 return true;
340}
341
342// Template specializations to get better code for common cases.
343template <> constexpr inline bool isUInt<8>(uint64_t x) {
344 return static_cast<uint8_t>(x) == x;
345}
346template <> constexpr inline bool isUInt<16>(uint64_t x) {
347 return static_cast<uint16_t>(x) == x;
348}
349template <> constexpr inline bool isUInt<32>(uint64_t x) {
350 return static_cast<uint32_t>(x) == x;
351}
352
353/// Checks if a unsigned integer is an N bit number shifted left by S.
354template <unsigned N, unsigned S>
355constexpr inline bool isShiftedUInt(uint64_t x) {
356 static_assert(
357 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
358 static_assert(N + S <= 64,
359 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
360 // Per the two static_asserts above, S must be strictly less than 64. So
361 // 1 << S is not undefined behavior.
362 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
363}
364
365/// Gets the maximum value for a N-bit unsigned integer.
366inline uint64_t maxUIntN(uint64_t N) {
367 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 367, __PRETTY_FUNCTION__))
;
368
369 // uint64_t(1) << 64 is undefined behavior, so we can't do
370 // (uint64_t(1) << N) - 1
371 // without checking first that N != 64. But this works and doesn't have a
372 // branch.
373 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
374}
375
376/// Gets the minimum value for a N-bit signed integer.
377inline int64_t minIntN(int64_t N) {
378 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 378, __PRETTY_FUNCTION__))
;
379
380 return -(UINT64_C(1)1UL<<(N-1));
381}
382
383/// Gets the maximum value for a N-bit signed integer.
384inline int64_t maxIntN(int64_t N) {
385 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 385, __PRETTY_FUNCTION__))
;
386
387 // This relies on two's complement wraparound when N == 64, so we convert to
388 // int64_t only at the very end to avoid UB.
389 return (UINT64_C(1)1UL << (N - 1)) - 1;
390}
391
392/// Checks if an unsigned integer fits into the given (dynamic) bit width.
393inline bool isUIntN(unsigned N, uint64_t x) {
394 return N >= 64 || x <= maxUIntN(N);
395}
396
397/// Checks if an signed integer fits into the given (dynamic) bit width.
398inline bool isIntN(unsigned N, int64_t x) {
399 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
400}
401
402/// Return true if the argument is a non-empty sequence of ones starting at the
403/// least significant bit with the remainder zero (32 bit version).
404/// Ex. isMask_32(0x0000FFFFU) == true.
405constexpr inline bool isMask_32(uint32_t Value) {
406 return Value && ((Value + 1) & Value) == 0;
407}
408
409/// Return true if the argument is a non-empty sequence of ones starting at the
410/// least significant bit with the remainder zero (64 bit version).
411constexpr inline bool isMask_64(uint64_t Value) {
412 return Value && ((Value + 1) & Value) == 0;
413}
414
415/// Return true if the argument contains a non-empty sequence of ones with the
416/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
417constexpr inline bool isShiftedMask_32(uint32_t Value) {
418 return Value && isMask_32((Value - 1) | Value);
419}
420
421/// Return true if the argument contains a non-empty sequence of ones with the
422/// remainder zero (64 bit version.)
423constexpr inline bool isShiftedMask_64(uint64_t Value) {
424 return Value && isMask_64((Value - 1) | Value);
425}
426
427/// Return true if the argument is a power of two > 0.
428/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
429constexpr inline bool isPowerOf2_32(uint32_t Value) {
430 return Value && !(Value & (Value - 1));
431}
432
433/// Return true if the argument is a power of two > 0 (64 bit edition.)
434constexpr inline bool isPowerOf2_64(uint64_t Value) {
435 return Value && !(Value & (Value - 1));
436}
437
438/// Return a byte-swapped representation of the 16-bit argument.
439inline uint16_t ByteSwap_16(uint16_t Value) {
440 return sys::SwapByteOrder_16(Value);
441}
442
443/// Return a byte-swapped representation of the 32-bit argument.
444inline uint32_t ByteSwap_32(uint32_t Value) {
445 return sys::SwapByteOrder_32(Value);
446}
447
448/// Return a byte-swapped representation of the 64-bit argument.
449inline uint64_t ByteSwap_64(uint64_t Value) {
450 return sys::SwapByteOrder_64(Value);
451}
452
453/// Count the number of ones from the most significant bit to the first
454/// zero bit.
455///
456/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
457/// Only unsigned integral types are allowed.
458///
459/// \param ZB the behavior on an input of all ones. Only ZB_Width and
460/// ZB_Undefined are valid arguments.
461template <typename T>
462std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
463 static_assert(std::numeric_limits<T>::is_integer &&
464 !std::numeric_limits<T>::is_signed,
465 "Only unsigned integral types are allowed.");
466 return countLeadingZeros<T>(~Value, ZB);
467}
468
469/// Count the number of ones from the least significant bit to the first
470/// zero bit.
471///
472/// Ex. countTrailingOnes(0x00FF00FF) == 8.
473/// Only unsigned integral types are allowed.
474///
475/// \param ZB the behavior on an input of all ones. Only ZB_Width and
476/// ZB_Undefined are valid arguments.
477template <typename T>
478std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
479 static_assert(std::numeric_limits<T>::is_integer &&
480 !std::numeric_limits<T>::is_signed,
481 "Only unsigned integral types are allowed.");
482 return countTrailingZeros<T>(~Value, ZB);
483}
484
485namespace detail {
486template <typename T, std::size_t SizeOfT> struct PopulationCounter {
487 static unsigned count(T Value) {
488 // Generic version, forward to 32 bits.
489 static_assert(SizeOfT <= 4, "Not implemented!");
490#if __GNUC__4 >= 4
491 return __builtin_popcount(Value);
492#else
493 uint32_t v = Value;
494 v = v - ((v >> 1) & 0x55555555);
495 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
496 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
497#endif
498 }
499};
500
501template <typename T> struct PopulationCounter<T, 8> {
502 static unsigned count(T Value) {
503#if __GNUC__4 >= 4
504 return __builtin_popcountll(Value);
505#else
506 uint64_t v = Value;
507 v = v - ((v >> 1) & 0x5555555555555555ULL);
508 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
509 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
510 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
511#endif
512 }
513};
514} // namespace detail
515
516/// Count the number of set bits in a value.
517/// Ex. countPopulation(0xF000F000) = 8
518/// Returns 0 if the word is zero.
519template <typename T>
520inline unsigned countPopulation(T Value) {
521 static_assert(std::numeric_limits<T>::is_integer &&
522 !std::numeric_limits<T>::is_signed,
523 "Only unsigned integral types are allowed.");
524 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
525}
526
527/// Return the log base 2 of the specified value.
528inline double Log2(double Value) {
529#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
530 return __builtin_log(Value) / __builtin_log(2.0);
531#else
532 return log2(Value);
533#endif
534}
535
536/// Return the floor log base 2 of the specified value, -1 if the value is zero.
537/// (32 bit edition.)
538/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
539inline unsigned Log2_32(uint32_t Value) {
540 return 31 - countLeadingZeros(Value);
10
Returning the value 4294967295
541}
542
543/// Return the floor log base 2 of the specified value, -1 if the value is zero.
544/// (64 bit edition.)
545inline unsigned Log2_64(uint64_t Value) {
546 return 63 - countLeadingZeros(Value);
547}
548
549/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
550/// (32 bit edition).
551/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
552inline unsigned Log2_32_Ceil(uint32_t Value) {
553 return 32 - countLeadingZeros(Value - 1);
554}
555
556/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
557/// (64 bit edition.)
558inline unsigned Log2_64_Ceil(uint64_t Value) {
559 return 64 - countLeadingZeros(Value - 1);
560}
561
562/// Return the greatest common divisor of the values using Euclid's algorithm.
563inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
564 while (B) {
565 uint64_t T = B;
566 B = A % B;
567 A = T;
568 }
569 return A;
570}
571
572/// This function takes a 64-bit integer and returns the bit equivalent double.
573inline double BitsToDouble(uint64_t Bits) {
574 double D;
575 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
576 memcpy(&D, &Bits, sizeof(Bits));
577 return D;
578}
579
580/// This function takes a 32-bit integer and returns the bit equivalent float.
581inline float BitsToFloat(uint32_t Bits) {
582 float F;
583 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
584 memcpy(&F, &Bits, sizeof(Bits));
585 return F;
586}
587
588/// This function takes a double and returns the bit equivalent 64-bit integer.
589/// Note that copying doubles around changes the bits of NaNs on some hosts,
590/// notably x86, so this routine cannot be used if these bits are needed.
591inline uint64_t DoubleToBits(double Double) {
592 uint64_t Bits;
593 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
594 memcpy(&Bits, &Double, sizeof(Double));
595 return Bits;
596}
597
598/// This function takes a float and returns the bit equivalent 32-bit integer.
599/// Note that copying floats around changes the bits of NaNs on some hosts,
600/// notably x86, so this routine cannot be used if these bits are needed.
601inline uint32_t FloatToBits(float Float) {
602 uint32_t Bits;
603 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
604 memcpy(&Bits, &Float, sizeof(Float));
605 return Bits;
606}
607
608/// A and B are either alignments or offsets. Return the minimum alignment that
609/// may be assumed after adding the two together.
610constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
611 // The largest power of 2 that divides both A and B.
612 //
613 // Replace "-Value" by "1+~Value" in the following commented code to avoid
614 // MSVC warning C4146
615 // return (A | B) & -(A | B);
616 return (A | B) & (1 + ~(A | B));
617}
618
619/// Aligns \c Addr to \c Alignment bytes, rounding up.
620///
621/// Alignment should be a power of two. This method rounds up, so
622/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8.
623inline uintptr_t alignAddr(const void *Addr, size_t Alignment) {
624 assert(Alignment && isPowerOf2_64((uint64_t)Alignment) &&((Alignment && isPowerOf2_64((uint64_t)Alignment) &&
"Alignment is not a power of two!") ? static_cast<void>
(0) : __assert_fail ("Alignment && isPowerOf2_64((uint64_t)Alignment) && \"Alignment is not a power of two!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 625, __PRETTY_FUNCTION__))
625 "Alignment is not a power of two!")((Alignment && isPowerOf2_64((uint64_t)Alignment) &&
"Alignment is not a power of two!") ? static_cast<void>
(0) : __assert_fail ("Alignment && isPowerOf2_64((uint64_t)Alignment) && \"Alignment is not a power of two!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 625, __PRETTY_FUNCTION__))
;
626
627 assert((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr)(((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr) ? static_cast
<void> (0) : __assert_fail ("(uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr"
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 627, __PRETTY_FUNCTION__))
;
628
629 return (((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1));
630}
631
632/// Returns the necessary adjustment for aligning \c Ptr to \c Alignment
633/// bytes, rounding up.
634inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) {
635 return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr;
636}
637
638/// Returns the next power of two (in 64-bits) that is strictly greater than A.
639/// Returns zero on overflow.
640inline uint64_t NextPowerOf2(uint64_t A) {
641 A |= (A >> 1);
642 A |= (A >> 2);
643 A |= (A >> 4);
644 A |= (A >> 8);
645 A |= (A >> 16);
646 A |= (A >> 32);
647 return A + 1;
648}
649
650/// Returns the power of two which is less than or equal to the given value.
651/// Essentially, it is a floor operation across the domain of powers of two.
652inline uint64_t PowerOf2Floor(uint64_t A) {
653 if (!A) return 0;
654 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
655}
656
657/// Returns the power of two which is greater than or equal to the given value.
658/// Essentially, it is a ceil operation across the domain of powers of two.
659inline uint64_t PowerOf2Ceil(uint64_t A) {
660 if (!A)
661 return 0;
662 return NextPowerOf2(A - 1);
663}
664
665/// Returns the next integer (mod 2**64) that is greater than or equal to
666/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
667///
668/// If non-zero \p Skew is specified, the return value will be a minimal
669/// integer that is greater than or equal to \p Value and equal to
670/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
671/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
672///
673/// Examples:
674/// \code
675/// alignTo(5, 8) = 8
676/// alignTo(17, 8) = 24
677/// alignTo(~0LL, 8) = 0
678/// alignTo(321, 255) = 510
679///
680/// alignTo(5, 8, 7) = 7
681/// alignTo(17, 8, 1) = 17
682/// alignTo(~0LL, 8, 3) = 3
683/// alignTo(321, 255, 42) = 552
684/// \endcode
685inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
686 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 686, __PRETTY_FUNCTION__))
;
687 Skew %= Align;
688 return (Value + Align - 1 - Skew) / Align * Align + Skew;
689}
690
691/// Returns the next integer (mod 2**64) that is greater than or equal to
692/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
693template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
694 static_assert(Align != 0u, "Align must be non-zero");
695 return (Value + Align - 1) / Align * Align;
696}
697
698/// Returns the integer ceil(Numerator / Denominator).
699inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
700 return alignTo(Numerator, Denominator) / Denominator;
701}
702
703/// \c alignTo for contexts where a constant expression is required.
704/// \sa alignTo
705///
706/// \todo FIXME: remove when \c constexpr becomes really \c constexpr
707template <uint64_t Align>
708struct AlignTo {
709 static_assert(Align != 0u, "Align must be non-zero");
710 template <uint64_t Value>
711 struct from_value {
712 static const uint64_t value = (Value + Align - 1) / Align * Align;
713 };
714};
715
716/// Returns the largest uint64_t less than or equal to \p Value and is
717/// \p Skew mod \p Align. \p Align must be non-zero
718inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
719 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 719, __PRETTY_FUNCTION__))
;
720 Skew %= Align;
721 return (Value - Skew) / Align * Align + Skew;
722}
723
724/// Returns the offset to the next integer (mod 2**64) that is greater than
725/// or equal to \p Value and is a multiple of \p Align. \p Align must be
726/// non-zero.
727inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
728 return alignTo(Value, Align) - Value;
729}
730
731/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
732/// Requires 0 < B <= 32.
733template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
734 static_assert(B > 0, "Bit width can't be 0.");
735 static_assert(B <= 32, "Bit width out of range.");
736 return int32_t(X << (32 - B)) >> (32 - B);
737}
738
739/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
740/// Requires 0 < B < 32.
741inline int32_t SignExtend32(uint32_t X, unsigned B) {
742 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 742, __PRETTY_FUNCTION__))
;
743 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 743, __PRETTY_FUNCTION__))
;
744 return int32_t(X << (32 - B)) >> (32 - B);
745}
746
747/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
748/// Requires 0 < B < 64.
749template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
750 static_assert(B > 0, "Bit width can't be 0.");
751 static_assert(B <= 64, "Bit width out of range.");
752 return int64_t(x << (64 - B)) >> (64 - B);
753}
754
755/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
756/// Requires 0 < B < 64.
757inline int64_t SignExtend64(uint64_t X, unsigned B) {
758 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 758, __PRETTY_FUNCTION__))
;
759 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 759, __PRETTY_FUNCTION__))
;
760 return int64_t(X << (64 - B)) >> (64 - B);
761}
762
763/// Subtract two unsigned integers, X and Y, of type T and return the absolute
764/// value of the result.
765template <typename T>
766typename std::enable_if<std::is_unsigned<T>::value, T>::type
767AbsoluteDifference(T X, T Y) {
768 return std::max(X, Y) - std::min(X, Y);
769}
770
771/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
772/// maximum representable value of T on overflow. ResultOverflowed indicates if
773/// the result is larger than the maximum representable value of type T.
774template <typename T>
775typename std::enable_if<std::is_unsigned<T>::value, T>::type
776SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
777 bool Dummy;
778 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
779 // Hacker's Delight, p. 29
780 T Z = X + Y;
781 Overflowed = (Z < X || Z < Y);
782 if (Overflowed)
783 return std::numeric_limits<T>::max();
784 else
785 return Z;
786}
787
788/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
789/// maximum representable value of T on overflow. ResultOverflowed indicates if
790/// the result is larger than the maximum representable value of type T.
791template <typename T>
792typename std::enable_if<std::is_unsigned<T>::value, T>::type
793SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
794 bool Dummy;
795 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
796
797 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
798 // because it fails for uint16_t (where multiplication can have undefined
799 // behavior due to promotion to int), and requires a division in addition
800 // to the multiplication.
801
802 Overflowed = false;
803
804 // Log2(Z) would be either Log2Z or Log2Z + 1.
805 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
806 // will necessarily be less than Log2Max as desired.
807 int Log2Z = Log2_64(X) + Log2_64(Y);
808 const T Max = std::numeric_limits<T>::max();
809 int Log2Max = Log2_64(Max);
810 if (Log2Z < Log2Max) {
811 return X * Y;
812 }
813 if (Log2Z > Log2Max) {
814 Overflowed = true;
815 return Max;
816 }
817
818 // We're going to use the top bit, and maybe overflow one
819 // bit past it. Multiply all but the bottom bit then add
820 // that on at the end.
821 T Z = (X >> 1) * Y;
822 if (Z & ~(Max >> 1)) {
823 Overflowed = true;
824 return Max;
825 }
826 Z <<= 1;
827 if (X & 1)
828 return SaturatingAdd(Z, Y, ResultOverflowed);
829
830 return Z;
831}
832
833/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
834/// the product. Clamp the result to the maximum representable value of T on
835/// overflow. ResultOverflowed indicates if the result is larger than the
836/// maximum representable value of type T.
837template <typename T>
838typename std::enable_if<std::is_unsigned<T>::value, T>::type
839SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
840 bool Dummy;
841 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
842
843 T Product = SaturatingMultiply(X, Y, &Overflowed);
844 if (Overflowed)
845 return Product;
846
847 return SaturatingAdd(A, Product, &Overflowed);
848}
849
850/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
851extern const float huge_valf;
852} // End llvm namespace
853
854#endif