Bug Summary

File:lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Warning:line 744, column 21
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn336939/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-7~svn336939/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn336939/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn336939/build-llvm/lib/Target/AArch64 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-07-13-043813-3945-1 -x c++ /build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/Support/Debug.h"
19#include <algorithm>
20using namespace llvm;
21
22#define DEBUG_TYPE"aarch64tti" "aarch64tti"
23
24static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
25 cl::init(true), cl::Hidden);
26
27bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
28 const Function *Callee) const {
29 const TargetMachine &TM = getTLI()->getTargetMachine();
30
31 const FeatureBitset &CallerBits =
32 TM.getSubtargetImpl(*Caller)->getFeatureBits();
33 const FeatureBitset &CalleeBits =
34 TM.getSubtargetImpl(*Callee)->getFeatureBits();
35
36 // Inline a callee if its target-features are a subset of the callers
37 // target-features.
38 return (CallerBits & CalleeBits) == CalleeBits;
39}
40
41/// Calculate the cost of materializing a 64-bit value. This helper
42/// method might only calculate a fraction of a larger immediate. Therefore it
43/// is valid to return a cost of ZERO.
44int AArch64TTIImpl::getIntImmCost(int64_t Val) {
45 // Check if the immediate can be encoded within an instruction.
46 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47 return 0;
48
49 if (Val < 0)
50 Val = ~Val;
51
52 // Calculate how many moves we will need to materialize this constant.
53 unsigned LZ = countLeadingZeros((uint64_t)Val);
54 return (64 - LZ + 15) / 16;
55}
56
57/// Calculate the cost of materializing the given constant.
58int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
59 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 59, __extension__ __PRETTY_FUNCTION__))
;
60
61 unsigned BitSize = Ty->getPrimitiveSizeInBits();
62 if (BitSize == 0)
63 return ~0U;
64
65 // Sign-extend all constants to a multiple of 64-bit.
66 APInt ImmVal = Imm;
67 if (BitSize & 0x3f)
68 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
69
70 // Split the constant into 64-bit chunks and calculate the cost for each
71 // chunk.
72 int Cost = 0;
73 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
74 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
75 int64_t Val = Tmp.getSExtValue();
76 Cost += getIntImmCost(Val);
77 }
78 // We need at least one instruction to materialze the constant.
79 return std::max(1, Cost);
80}
81
82int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
83 const APInt &Imm, Type *Ty) {
84 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 84, __extension__ __PRETTY_FUNCTION__))
;
85
86 unsigned BitSize = Ty->getPrimitiveSizeInBits();
87 // There is no cost model for constants with a bit size of 0. Return TCC_Free
88 // here, so that constant hoisting will ignore this constant.
89 if (BitSize == 0)
90 return TTI::TCC_Free;
91
92 unsigned ImmIdx = ~0U;
93 switch (Opcode) {
94 default:
95 return TTI::TCC_Free;
96 case Instruction::GetElementPtr:
97 // Always hoist the base address of a GetElementPtr.
98 if (Idx == 0)
99 return 2 * TTI::TCC_Basic;
100 return TTI::TCC_Free;
101 case Instruction::Store:
102 ImmIdx = 0;
103 break;
104 case Instruction::Add:
105 case Instruction::Sub:
106 case Instruction::Mul:
107 case Instruction::UDiv:
108 case Instruction::SDiv:
109 case Instruction::URem:
110 case Instruction::SRem:
111 case Instruction::And:
112 case Instruction::Or:
113 case Instruction::Xor:
114 case Instruction::ICmp:
115 ImmIdx = 1;
116 break;
117 // Always return TCC_Free for the shift value of a shift instruction.
118 case Instruction::Shl:
119 case Instruction::LShr:
120 case Instruction::AShr:
121 if (Idx == 1)
122 return TTI::TCC_Free;
123 break;
124 case Instruction::Trunc:
125 case Instruction::ZExt:
126 case Instruction::SExt:
127 case Instruction::IntToPtr:
128 case Instruction::PtrToInt:
129 case Instruction::BitCast:
130 case Instruction::PHI:
131 case Instruction::Call:
132 case Instruction::Select:
133 case Instruction::Ret:
134 case Instruction::Load:
135 break;
136 }
137
138 if (Idx == ImmIdx) {
139 int NumConstants = (BitSize + 63) / 64;
140 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
141 return (Cost <= NumConstants * TTI::TCC_Basic)
142 ? static_cast<int>(TTI::TCC_Free)
143 : Cost;
144 }
145 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
146}
147
148int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
149 const APInt &Imm, Type *Ty) {
150 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 150, __extension__ __PRETTY_FUNCTION__))
;
151
152 unsigned BitSize = Ty->getPrimitiveSizeInBits();
153 // There is no cost model for constants with a bit size of 0. Return TCC_Free
154 // here, so that constant hoisting will ignore this constant.
155 if (BitSize == 0)
156 return TTI::TCC_Free;
157
158 switch (IID) {
159 default:
160 return TTI::TCC_Free;
161 case Intrinsic::sadd_with_overflow:
162 case Intrinsic::uadd_with_overflow:
163 case Intrinsic::ssub_with_overflow:
164 case Intrinsic::usub_with_overflow:
165 case Intrinsic::smul_with_overflow:
166 case Intrinsic::umul_with_overflow:
167 if (Idx == 1) {
168 int NumConstants = (BitSize + 63) / 64;
169 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
170 return (Cost <= NumConstants * TTI::TCC_Basic)
171 ? static_cast<int>(TTI::TCC_Free)
172 : Cost;
173 }
174 break;
175 case Intrinsic::experimental_stackmap:
176 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
177 return TTI::TCC_Free;
178 break;
179 case Intrinsic::experimental_patchpoint_void:
180 case Intrinsic::experimental_patchpoint_i64:
181 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
182 return TTI::TCC_Free;
183 break;
184 }
185 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
186}
187
188TargetTransformInfo::PopcntSupportKind
189AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
190 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")(static_cast <bool> (isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? void (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 190, __extension__ __PRETTY_FUNCTION__))
;
191 if (TyWidth == 32 || TyWidth == 64)
192 return TTI::PSK_FastHardware;
193 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
194 return TTI::PSK_Software;
195}
196
197bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
198 ArrayRef<const Value *> Args) {
199
200 // A helper that returns a vector type from the given type. The number of
201 // elements in type Ty determine the vector width.
202 auto toVectorTy = [&](Type *ArgTy) {
203 return VectorType::get(ArgTy->getScalarType(),
204 DstTy->getVectorNumElements());
205 };
206
207 // Exit early if DstTy is not a vector type whose elements are at least
208 // 16-bits wide.
209 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
210 return false;
211
212 // Determine if the operation has a widening variant. We consider both the
213 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
214 // instructions.
215 //
216 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
217 // verify that their extending operands are eliminated during code
218 // generation.
219 switch (Opcode) {
220 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
221 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
222 break;
223 default:
224 return false;
225 }
226
227 // To be a widening instruction (either the "wide" or "long" versions), the
228 // second operand must be a sign- or zero extend having a single user. We
229 // only consider extends having a single user because they may otherwise not
230 // be eliminated.
231 if (Args.size() != 2 ||
232 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
233 !Args[1]->hasOneUse())
234 return false;
235 auto *Extend = cast<CastInst>(Args[1]);
236
237 // Legalize the destination type and ensure it can be used in a widening
238 // operation.
239 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
240 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
241 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
242 return false;
243
244 // Legalize the source type and ensure it can be used in a widening
245 // operation.
246 Type *SrcTy = toVectorTy(Extend->getSrcTy());
247 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
248 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
249 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
250 return false;
251
252 // Get the total number of vector elements in the legalized types.
253 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
254 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
255
256 // Return true if the legalized types have the same number of vector elements
257 // and the destination element type size is twice that of the source type.
258 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
259}
260
261int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
262 const Instruction *I) {
263 int ISD = TLI->InstructionOpcodeToISD(Opcode);
264 assert(ISD && "Invalid opcode")(static_cast <bool> (ISD && "Invalid opcode") ?
void (0) : __assert_fail ("ISD && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 264, __extension__ __PRETTY_FUNCTION__))
;
265
266 // If the cast is observable, and it is used by a widening instruction (e.g.,
267 // uaddl, saddw, etc.), it may be free.
268 if (I && I->hasOneUse()) {
269 auto *SingleUser = cast<Instruction>(*I->user_begin());
270 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
271 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
272 // If the cast is the second operand, it is free. We will generate either
273 // a "wide" or "long" version of the widening instruction.
274 if (I == SingleUser->getOperand(1))
275 return 0;
276 // If the cast is not the second operand, it will be free if it looks the
277 // same as the second operand. In this case, we will generate a "long"
278 // version of the widening instruction.
279 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
280 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
281 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
282 return 0;
283 }
284 }
285
286 EVT SrcTy = TLI->getValueType(DL, Src);
287 EVT DstTy = TLI->getValueType(DL, Dst);
288
289 if (!SrcTy.isSimple() || !DstTy.isSimple())
290 return BaseT::getCastInstrCost(Opcode, Dst, Src);
291
292 static const TypeConversionCostTblEntry
293 ConversionTbl[] = {
294 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
295 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
296 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
297 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
298
299 // The number of shll instructions for the extension.
300 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
301 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
302 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
303 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
304 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
305 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
306 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
307 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
308 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
309 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
310 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
311 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
312 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
313 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
314 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
315 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
316
317 // LowerVectorINT_TO_FP:
318 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
319 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
320 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
321 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
322 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
323 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
324
325 // Complex: to v2f32
326 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
327 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
328 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
329 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
330 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
331 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
332
333 // Complex: to v4f32
334 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
335 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
336 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
337 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
338
339 // Complex: to v8f32
340 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
341 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
342 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
343 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
344
345 // Complex: to v16f32
346 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
347 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
348
349 // Complex: to v2f64
350 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
351 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
352 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
353 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
354 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
355 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
356
357
358 // LowerVectorFP_TO_INT
359 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
360 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
361 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
362 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
363 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
364 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
365
366 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
367 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
368 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
369 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
370 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
371 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
372 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
373
374 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
375 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
376 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
377 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
378 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
379
380 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
381 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
382 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
383 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
384 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
385 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
386 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
387 };
388
389 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
390 DstTy.getSimpleVT(),
391 SrcTy.getSimpleVT()))
392 return Entry->Cost;
393
394 return BaseT::getCastInstrCost(Opcode, Dst, Src);
395}
396
397int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
398 VectorType *VecTy,
399 unsigned Index) {
400
401 // Make sure we were given a valid extend opcode.
402 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(static_cast <bool> ((Opcode == Instruction::SExt || Opcode
== Instruction::ZExt) && "Invalid opcode") ? void (0
) : __assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 403, __extension__ __PRETTY_FUNCTION__))
403 "Invalid opcode")(static_cast <bool> ((Opcode == Instruction::SExt || Opcode
== Instruction::ZExt) && "Invalid opcode") ? void (0
) : __assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 403, __extension__ __PRETTY_FUNCTION__))
;
404
405 // We are extending an element we extract from a vector, so the source type
406 // of the extend is the element type of the vector.
407 auto *Src = VecTy->getElementType();
408
409 // Sign- and zero-extends are for integer types only.
410 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")(static_cast <bool> (isa<IntegerType>(Dst) &&
isa<IntegerType>(Src) && "Invalid type") ? void
(0) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 410, __extension__ __PRETTY_FUNCTION__))
;
411
412 // Get the cost for the extract. We compute the cost (if any) for the extend
413 // below.
414 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
415
416 // Legalize the types.
417 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
418 auto DstVT = TLI->getValueType(DL, Dst);
419 auto SrcVT = TLI->getValueType(DL, Src);
420
421 // If the resulting type is still a vector and the destination type is legal,
422 // we may get the extension for free. If not, get the default cost for the
423 // extend.
424 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
425 return Cost + getCastInstrCost(Opcode, Dst, Src);
426
427 // The destination type should be larger than the element type. If not, get
428 // the default cost for the extend.
429 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
430 return Cost + getCastInstrCost(Opcode, Dst, Src);
431
432 switch (Opcode) {
433 default:
434 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 434)
;
435
436 // For sign-extends, we only need a smov, which performs the extension
437 // automatically.
438 case Instruction::SExt:
439 return Cost;
440
441 // For zero-extends, the extend is performed automatically by a umov unless
442 // the destination type is i64 and the element type is i8 or i16.
443 case Instruction::ZExt:
444 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
445 return Cost;
446 }
447
448 // If we are unable to perform the extend for free, get the default cost.
449 return Cost + getCastInstrCost(Opcode, Dst, Src);
450}
451
452int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
453 unsigned Index) {
454 assert(Val->isVectorTy() && "This must be a vector type")(static_cast <bool> (Val->isVectorTy() && "This must be a vector type"
) ? void (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 454, __extension__ __PRETTY_FUNCTION__))
;
455
456 if (Index != -1U) {
457 // Legalize the type.
458 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
459
460 // This type is legalized to a scalar type.
461 if (!LT.second.isVector())
462 return 0;
463
464 // The type may be split. Normalize the index to the new type.
465 unsigned Width = LT.second.getVectorNumElements();
466 Index = Index % Width;
467
468 // The element at index zero is already inside the vector.
469 if (Index == 0)
470 return 0;
471 }
472
473 // All other insert/extracts cost this much.
474 return ST->getVectorInsertExtractBaseCost();
475}
476
477int AArch64TTIImpl::getArithmeticInstrCost(
478 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
479 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
480 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
481 // Legalize the type.
482 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
483
484 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
485 // add in the widening overhead specified by the sub-target. Since the
486 // extends feeding widening instructions are performed automatically, they
487 // aren't present in the generated code and have a zero cost. By adding a
488 // widening overhead here, we attach the total cost of the combined operation
489 // to the widening instruction.
490 int Cost = 0;
491 if (isWideningInstruction(Ty, Opcode, Args))
492 Cost += ST->getWideningBaseCost();
493
494 int ISD = TLI->InstructionOpcodeToISD(Opcode);
495
496 switch (ISD) {
497 default:
498 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
499 Opd1PropInfo, Opd2PropInfo);
500 case ISD::SDIV:
501 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
502 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
503 // On AArch64, scalar signed division by constants power-of-two are
504 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
505 // The OperandValue properties many not be same as that of previous
506 // operation; conservatively assume OP_None.
507 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
508 TargetTransformInfo::OP_None,
509 TargetTransformInfo::OP_None);
510 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
511 TargetTransformInfo::OP_None,
512 TargetTransformInfo::OP_None);
513 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
514 TargetTransformInfo::OP_None,
515 TargetTransformInfo::OP_None);
516 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
517 TargetTransformInfo::OP_None,
518 TargetTransformInfo::OP_None);
519 return Cost;
520 }
521 LLVM_FALLTHROUGH[[clang::fallthrough]];
522 case ISD::UDIV:
523 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
524 auto VT = TLI->getValueType(DL, Ty);
525 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
526 // Vector signed division by constant are expanded to the
527 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
528 // to MULHS + SUB + SRL + ADD + SRL.
529 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
530 Opd2Info,
531 TargetTransformInfo::OP_None,
532 TargetTransformInfo::OP_None);
533 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
534 Opd2Info,
535 TargetTransformInfo::OP_None,
536 TargetTransformInfo::OP_None);
537 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
538 Opd2Info,
539 TargetTransformInfo::OP_None,
540 TargetTransformInfo::OP_None);
541 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
542 }
543 }
544
545 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
546 Opd1PropInfo, Opd2PropInfo);
547 if (Ty->isVectorTy()) {
548 // On AArch64, vector divisions are not supported natively and are
549 // expanded into scalar divisions of each pair of elements.
550 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
551 Opd2Info, Opd1PropInfo, Opd2PropInfo);
552 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
553 Opd2Info, Opd1PropInfo, Opd2PropInfo);
554 // TODO: if one of the arguments is scalar, then it's not necessary to
555 // double the cost of handling the vector elements.
556 Cost += Cost;
557 }
558 return Cost;
559
560 case ISD::ADD:
561 case ISD::MUL:
562 case ISD::XOR:
563 case ISD::OR:
564 case ISD::AND:
565 // These nodes are marked as 'custom' for combining purposes only.
566 // We know that they are legal. See LowerAdd in ISelLowering.
567 return (Cost + 1) * LT.first;
568 }
569}
570
571int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
572 const SCEV *Ptr) {
573 // Address computations in vectorized code with non-consecutive addresses will
574 // likely result in more instructions compared to scalar code where the
575 // computation can more often be merged into the index mode. The resulting
576 // extra micro-ops can significantly decrease throughput.
577 unsigned NumVectorInstToHideOverhead = 10;
578 int MaxMergeDistance = 64;
579
580 if (Ty->isVectorTy() && SE &&
581 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
582 return NumVectorInstToHideOverhead;
583
584 // In many cases the address computation is not merged into the instruction
585 // addressing mode.
586 return 1;
587}
588
589int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
590 Type *CondTy, const Instruction *I) {
591
592 int ISD = TLI->InstructionOpcodeToISD(Opcode);
593 // We don't lower some vector selects well that are wider than the register
594 // width.
595 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
596 // We would need this many instructions to hide the scalarization happening.
597 const int AmortizationCost = 20;
598 static const TypeConversionCostTblEntry
599 VectorSelectTbl[] = {
600 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
601 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
602 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
603 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
604 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
605 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
606 };
607
608 EVT SelCondTy = TLI->getValueType(DL, CondTy);
609 EVT SelValTy = TLI->getValueType(DL, ValTy);
610 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
611 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
612 SelCondTy.getSimpleVT(),
613 SelValTy.getSimpleVT()))
614 return Entry->Cost;
615 }
616 }
617 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
618}
619
620int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
621 unsigned Alignment, unsigned AddressSpace,
622 const Instruction *I) {
623 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
624
625 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
626 LT.second.is128BitVector() && Alignment < 16) {
627 // Unaligned stores are extremely inefficient. We don't split all
628 // unaligned 128-bit stores because the negative impact that has shown in
629 // practice on inlined block copy code.
630 // We make such stores expensive so that we will only vectorize if there
631 // are 6 other instructions getting vectorized.
632 const int AmortizationCost = 6;
633
634 return LT.first * 2 * AmortizationCost;
635 }
636
637 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
638 unsigned ProfitableNumElements;
639 if (Opcode == Instruction::Store)
640 // We use a custom trunc store lowering so v.4b should be profitable.
641 ProfitableNumElements = 4;
642 else
643 // We scalarize the loads because there is not v.4b register and we
644 // have to promote the elements to v.2.
645 ProfitableNumElements = 8;
646
647 if (Ty->getVectorNumElements() < ProfitableNumElements) {
648 unsigned NumVecElts = Ty->getVectorNumElements();
649 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
650 // We generate 2 instructions per vector element.
651 return NumVectorizableInstsToAmortize * NumVecElts * 2;
652 }
653 }
654
655 return LT.first;
656}
657
658int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
659 unsigned Factor,
660 ArrayRef<unsigned> Indices,
661 unsigned Alignment,
662 unsigned AddressSpace) {
663 assert(Factor >= 2 && "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 663, __extension__ __PRETTY_FUNCTION__))
;
664 assert(isa<VectorType>(VecTy) && "Expect a vector type")(static_cast <bool> (isa<VectorType>(VecTy) &&
"Expect a vector type") ? void (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 664, __extension__ __PRETTY_FUNCTION__))
;
665
666 if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
667 unsigned NumElts = VecTy->getVectorNumElements();
668 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
669
670 // ldN/stN only support legal vector types of size 64 or 128 in bits.
671 // Accesses having vector types that are a multiple of 128 bits can be
672 // matched to more than one ldN/stN instruction.
673 if (NumElts % Factor == 0 &&
674 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
675 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
676 }
677
678 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
679 Alignment, AddressSpace);
680}
681
682int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
683 int Cost = 0;
684 for (auto *I : Tys) {
685 if (!I->isVectorTy())
686 continue;
687 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
688 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
689 getMemoryOpCost(Instruction::Load, I, 128, 0);
690 }
691 return Cost;
692}
693
694unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
695 return ST->getMaxInterleaveFactor();
696}
697
698// For Falkor, we want to avoid having too many strided loads in a loop since
699// that can exhaust the HW prefetcher resources. We adjust the unroller
700// MaxCount preference below to attempt to ensure unrolling doesn't create too
701// many strided loads.
702static void
703getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
704 TargetTransformInfo::UnrollingPreferences &UP) {
705 enum { MaxStridedLoads = 7 };
706 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
707 int StridedLoads = 0;
708 // FIXME? We could make this more precise by looking at the CFG and
709 // e.g. not counting loads in each side of an if-then-else diamond.
710 for (const auto BB : L->blocks()) {
711 for (auto &I : *BB) {
712 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
713 if (!LMemI)
714 continue;
715
716 Value *PtrValue = LMemI->getPointerOperand();
717 if (L->isLoopInvariant(PtrValue))
718 continue;
719
720 const SCEV *LSCEV = SE.getSCEV(PtrValue);
721 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
722 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
723 continue;
724
725 // FIXME? We could take pairing of unrolled load copies into account
726 // by looking at the AddRec, but we would probably have to limit this
727 // to loops with no stores or other memory optimization barriers.
728 ++StridedLoads;
729 // We've seen enough strided loads that seeing more won't make a
730 // difference.
731 if (StridedLoads > MaxStridedLoads / 2)
732 return StridedLoads;
733 }
734 }
735 return StridedLoads;
736 };
737
738 int StridedLoads = countStridedLoads(L, SE);
739 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
740 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
741 // Pick the largest power of 2 unroll count that won't result in too many
742 // strided loads.
743 if (StridedLoads) {
7
Assuming 'StridedLoads' is not equal to 0
8
Taking true branch
744 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
9
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'int'
745 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
746 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
747 }
748}
749
750void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
751 TTI::UnrollingPreferences &UP) {
752 // Enable partial unrolling and runtime unrolling.
753 BaseT::getUnrollingPreferences(L, SE, UP);
754
755 // For inner loop, it is more likely to be a hot one, and the runtime check
756 // can be promoted out from LICM pass, so the overhead is less, let's try
757 // a larger threshold to unroll more loops.
758 if (L->getLoopDepth() > 1)
1
Assuming the condition is false
2
Taking false branch
759 UP.PartialThreshold *= 2;
760
761 // Disable partial & runtime unrolling on -Os.
762 UP.PartialOptSizeThreshold = 0;
763
764 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
3
Assuming the condition is true
5
Taking true branch
765 EnableFalkorHWPFUnrollFix)
4
Assuming the condition is true
766 getFalkorUnrollingPreferences(L, SE, UP);
6
Calling 'getFalkorUnrollingPreferences'
767}
768
769Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
770 Type *ExpectedType) {
771 switch (Inst->getIntrinsicID()) {
772 default:
773 return nullptr;
774 case Intrinsic::aarch64_neon_st2:
775 case Intrinsic::aarch64_neon_st3:
776 case Intrinsic::aarch64_neon_st4: {
777 // Create a struct type
778 StructType *ST = dyn_cast<StructType>(ExpectedType);
779 if (!ST)
780 return nullptr;
781 unsigned NumElts = Inst->getNumArgOperands() - 1;
782 if (ST->getNumElements() != NumElts)
783 return nullptr;
784 for (unsigned i = 0, e = NumElts; i != e; ++i) {
785 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
786 return nullptr;
787 }
788 Value *Res = UndefValue::get(ExpectedType);
789 IRBuilder<> Builder(Inst);
790 for (unsigned i = 0, e = NumElts; i != e; ++i) {
791 Value *L = Inst->getArgOperand(i);
792 Res = Builder.CreateInsertValue(Res, L, i);
793 }
794 return Res;
795 }
796 case Intrinsic::aarch64_neon_ld2:
797 case Intrinsic::aarch64_neon_ld3:
798 case Intrinsic::aarch64_neon_ld4:
799 if (Inst->getType() == ExpectedType)
800 return Inst;
801 return nullptr;
802 }
803}
804
805bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
806 MemIntrinsicInfo &Info) {
807 switch (Inst->getIntrinsicID()) {
808 default:
809 break;
810 case Intrinsic::aarch64_neon_ld2:
811 case Intrinsic::aarch64_neon_ld3:
812 case Intrinsic::aarch64_neon_ld4:
813 Info.ReadMem = true;
814 Info.WriteMem = false;
815 Info.PtrVal = Inst->getArgOperand(0);
816 break;
817 case Intrinsic::aarch64_neon_st2:
818 case Intrinsic::aarch64_neon_st3:
819 case Intrinsic::aarch64_neon_st4:
820 Info.ReadMem = false;
821 Info.WriteMem = true;
822 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
823 break;
824 }
825
826 switch (Inst->getIntrinsicID()) {
827 default:
828 return false;
829 case Intrinsic::aarch64_neon_ld2:
830 case Intrinsic::aarch64_neon_st2:
831 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
832 break;
833 case Intrinsic::aarch64_neon_ld3:
834 case Intrinsic::aarch64_neon_st3:
835 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
836 break;
837 case Intrinsic::aarch64_neon_ld4:
838 case Intrinsic::aarch64_neon_st4:
839 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
840 break;
841 }
842 return true;
843}
844
845/// See if \p I should be considered for address type promotion. We check if \p
846/// I is a sext with right type and used in memory accesses. If it used in a
847/// "complex" getelementptr, we allow it to be promoted without finding other
848/// sext instructions that sign extended the same initial value. A getelementptr
849/// is considered as "complex" if it has more than 2 operands.
850bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
851 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
852 bool Considerable = false;
853 AllowPromotionWithoutCommonHeader = false;
854 if (!isa<SExtInst>(&I))
855 return false;
856 Type *ConsideredSExtType =
857 Type::getInt64Ty(I.getParent()->getParent()->getContext());
858 if (I.getType() != ConsideredSExtType)
859 return false;
860 // See if the sext is the one with the right type and used in at least one
861 // GetElementPtrInst.
862 for (const User *U : I.users()) {
863 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
864 Considerable = true;
865 // A getelementptr is considered as "complex" if it has more than 2
866 // operands. We will promote a SExt used in such complex GEP as we
867 // expect some computation to be merged if they are done on 64 bits.
868 if (GEPInst->getNumOperands() > 2) {
869 AllowPromotionWithoutCommonHeader = true;
870 break;
871 }
872 }
873 }
874 return Considerable;
875}
876
877unsigned AArch64TTIImpl::getCacheLineSize() {
878 return ST->getCacheLineSize();
879}
880
881unsigned AArch64TTIImpl::getPrefetchDistance() {
882 return ST->getPrefetchDistance();
883}
884
885unsigned AArch64TTIImpl::getMinPrefetchStride() {
886 return ST->getMinPrefetchStride();
887}
888
889unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
890 return ST->getMaxPrefetchIterationsAhead();
891}
892
893bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
894 TTI::ReductionFlags Flags) const {
895 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")(static_cast <bool> (isa<VectorType>(Ty) &&
"Expected Ty to be a vector type") ? void (0) : __assert_fail
("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 895, __extension__ __PRETTY_FUNCTION__))
;
896 unsigned ScalarBits = Ty->getScalarSizeInBits();
897 switch (Opcode) {
898 case Instruction::FAdd:
899 case Instruction::FMul:
900 case Instruction::And:
901 case Instruction::Or:
902 case Instruction::Xor:
903 case Instruction::Mul:
904 return false;
905 case Instruction::Add:
906 return ScalarBits * Ty->getVectorNumElements() >= 128;
907 case Instruction::ICmp:
908 return (ScalarBits < 64) &&
909 (ScalarBits * Ty->getVectorNumElements() >= 128);
910 case Instruction::FCmp:
911 return Flags.NoNaN;
912 default:
913 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 913)
;
914 }
915 return false;
916}
917
918int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
919 bool IsPairwiseForm) {
920
921 if (IsPairwiseForm)
922 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
923
924 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
925 MVT MTy = LT.second;
926 int ISD = TLI->InstructionOpcodeToISD(Opcode);
927 assert(ISD && "Invalid opcode")(static_cast <bool> (ISD && "Invalid opcode") ?
void (0) : __assert_fail ("ISD && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-7~svn336939/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 927, __extension__ __PRETTY_FUNCTION__))
;
928
929 // Horizontal adds can use the 'addv' instruction. We model the cost of these
930 // instructions as normal vector adds. This is the only arithmetic vector
931 // reduction operation for which we have an instruction.
932 static const CostTblEntry CostTblNoPairwise[]{
933 {ISD::ADD, MVT::v8i8, 1},
934 {ISD::ADD, MVT::v16i8, 1},
935 {ISD::ADD, MVT::v4i16, 1},
936 {ISD::ADD, MVT::v8i16, 1},
937 {ISD::ADD, MVT::v4i32, 1},
938 };
939
940 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
941 return LT.first * Entry->Cost;
942
943 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
944}
945
946int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
947 Type *SubTp) {
948 if (Kind == TTI::SK_Transpose || Kind == TTI::SK_Select ||
949 Kind == TTI::SK_PermuteSingleSrc) {
950 static const CostTblEntry ShuffleTbl[] = {
951 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
952 // 'zip1/zip2' instructions.
953 { TTI::SK_Transpose, MVT::v8i8, 1 },
954 { TTI::SK_Transpose, MVT::v16i8, 1 },
955 { TTI::SK_Transpose, MVT::v4i16, 1 },
956 { TTI::SK_Transpose, MVT::v8i16, 1 },
957 { TTI::SK_Transpose, MVT::v2i32, 1 },
958 { TTI::SK_Transpose, MVT::v4i32, 1 },
959 { TTI::SK_Transpose, MVT::v2i64, 1 },
960 { TTI::SK_Transpose, MVT::v2f32, 1 },
961 { TTI::SK_Transpose, MVT::v4f32, 1 },
962 { TTI::SK_Transpose, MVT::v2f64, 1 },
963 // Select shuffle kinds.
964 // TODO: handle vXi8/vXi16.
965 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
966 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
967 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
968 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
969 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
970 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
971 // PermuteSingleSrc shuffle kinds.
972 // TODO: handle vXi8/vXi16.
973 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
974 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
975 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
976 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
977 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
978 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
979 };
980 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
981 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
982 return LT.first * Entry->Cost;
983 }
984
985 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
986}