Bug Summary

File:include/llvm/CodeGen/TargetLowering.h
Warning:line 1247, column 41
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/include -I /build/llvm-toolchain-snapshot-10~svn374877/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~svn374877=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-10-15-233810-7101-1 -x c++ /build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64ExpandImm.h"
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/Support/Debug.h"
19#include <algorithm>
20using namespace llvm;
21
22#define DEBUG_TYPE"aarch64tti" "aarch64tti"
23
24static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
25 cl::init(true), cl::Hidden);
26
27bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
28 const Function *Callee) const {
29 const TargetMachine &TM = getTLI()->getTargetMachine();
30
31 const FeatureBitset &CallerBits =
32 TM.getSubtargetImpl(*Caller)->getFeatureBits();
33 const FeatureBitset &CalleeBits =
34 TM.getSubtargetImpl(*Callee)->getFeatureBits();
35
36 // Inline a callee if its target-features are a subset of the callers
37 // target-features.
38 return (CallerBits & CalleeBits) == CalleeBits;
39}
40
41/// Calculate the cost of materializing a 64-bit value. This helper
42/// method might only calculate a fraction of a larger immediate. Therefore it
43/// is valid to return a cost of ZERO.
44int AArch64TTIImpl::getIntImmCost(int64_t Val) {
45 // Check if the immediate can be encoded within an instruction.
46 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47 return 0;
48
49 if (Val < 0)
50 Val = ~Val;
51
52 // Calculate how many moves we will need to materialize this constant.
53 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
54 AArch64_IMM::expandMOVImm(Val, 64, Insn);
55 return Insn.size();
56}
57
58/// Calculate the cost of materializing the given constant.
59int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
60 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 60, __PRETTY_FUNCTION__))
;
61
62 unsigned BitSize = Ty->getPrimitiveSizeInBits();
63 if (BitSize == 0)
64 return ~0U;
65
66 // Sign-extend all constants to a multiple of 64-bit.
67 APInt ImmVal = Imm;
68 if (BitSize & 0x3f)
69 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
70
71 // Split the constant into 64-bit chunks and calculate the cost for each
72 // chunk.
73 int Cost = 0;
74 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
75 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
76 int64_t Val = Tmp.getSExtValue();
77 Cost += getIntImmCost(Val);
78 }
79 // We need at least one instruction to materialze the constant.
80 return std::max(1, Cost);
81}
82
83int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
84 const APInt &Imm, Type *Ty) {
85 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 85, __PRETTY_FUNCTION__))
;
86
87 unsigned BitSize = Ty->getPrimitiveSizeInBits();
88 // There is no cost model for constants with a bit size of 0. Return TCC_Free
89 // here, so that constant hoisting will ignore this constant.
90 if (BitSize == 0)
91 return TTI::TCC_Free;
92
93 unsigned ImmIdx = ~0U;
94 switch (Opcode) {
95 default:
96 return TTI::TCC_Free;
97 case Instruction::GetElementPtr:
98 // Always hoist the base address of a GetElementPtr.
99 if (Idx == 0)
100 return 2 * TTI::TCC_Basic;
101 return TTI::TCC_Free;
102 case Instruction::Store:
103 ImmIdx = 0;
104 break;
105 case Instruction::Add:
106 case Instruction::Sub:
107 case Instruction::Mul:
108 case Instruction::UDiv:
109 case Instruction::SDiv:
110 case Instruction::URem:
111 case Instruction::SRem:
112 case Instruction::And:
113 case Instruction::Or:
114 case Instruction::Xor:
115 case Instruction::ICmp:
116 ImmIdx = 1;
117 break;
118 // Always return TCC_Free for the shift value of a shift instruction.
119 case Instruction::Shl:
120 case Instruction::LShr:
121 case Instruction::AShr:
122 if (Idx == 1)
123 return TTI::TCC_Free;
124 break;
125 case Instruction::Trunc:
126 case Instruction::ZExt:
127 case Instruction::SExt:
128 case Instruction::IntToPtr:
129 case Instruction::PtrToInt:
130 case Instruction::BitCast:
131 case Instruction::PHI:
132 case Instruction::Call:
133 case Instruction::Select:
134 case Instruction::Ret:
135 case Instruction::Load:
136 break;
137 }
138
139 if (Idx == ImmIdx) {
140 int NumConstants = (BitSize + 63) / 64;
141 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
142 return (Cost <= NumConstants * TTI::TCC_Basic)
143 ? static_cast<int>(TTI::TCC_Free)
144 : Cost;
145 }
146 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
147}
148
149int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
150 const APInt &Imm, Type *Ty) {
151 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 151, __PRETTY_FUNCTION__))
;
152
153 unsigned BitSize = Ty->getPrimitiveSizeInBits();
154 // There is no cost model for constants with a bit size of 0. Return TCC_Free
155 // here, so that constant hoisting will ignore this constant.
156 if (BitSize == 0)
157 return TTI::TCC_Free;
158
159 switch (IID) {
160 default:
161 return TTI::TCC_Free;
162 case Intrinsic::sadd_with_overflow:
163 case Intrinsic::uadd_with_overflow:
164 case Intrinsic::ssub_with_overflow:
165 case Intrinsic::usub_with_overflow:
166 case Intrinsic::smul_with_overflow:
167 case Intrinsic::umul_with_overflow:
168 if (Idx == 1) {
169 int NumConstants = (BitSize + 63) / 64;
170 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
171 return (Cost <= NumConstants * TTI::TCC_Basic)
172 ? static_cast<int>(TTI::TCC_Free)
173 : Cost;
174 }
175 break;
176 case Intrinsic::experimental_stackmap:
177 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
178 return TTI::TCC_Free;
179 break;
180 case Intrinsic::experimental_patchpoint_void:
181 case Intrinsic::experimental_patchpoint_i64:
182 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
183 return TTI::TCC_Free;
184 break;
185 }
186 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
187}
188
189TargetTransformInfo::PopcntSupportKind
190AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
191 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 191, __PRETTY_FUNCTION__))
;
192 if (TyWidth == 32 || TyWidth == 64)
193 return TTI::PSK_FastHardware;
194 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
195 return TTI::PSK_Software;
196}
197
198bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
199 ArrayRef<const Value *> Args) {
200
201 // A helper that returns a vector type from the given type. The number of
202 // elements in type Ty determine the vector width.
203 auto toVectorTy = [&](Type *ArgTy) {
204 return VectorType::get(ArgTy->getScalarType(),
205 DstTy->getVectorNumElements());
206 };
207
208 // Exit early if DstTy is not a vector type whose elements are at least
209 // 16-bits wide.
210 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
211 return false;
212
213 // Determine if the operation has a widening variant. We consider both the
214 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
215 // instructions.
216 //
217 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
218 // verify that their extending operands are eliminated during code
219 // generation.
220 switch (Opcode) {
221 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
222 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
223 break;
224 default:
225 return false;
226 }
227
228 // To be a widening instruction (either the "wide" or "long" versions), the
229 // second operand must be a sign- or zero extend having a single user. We
230 // only consider extends having a single user because they may otherwise not
231 // be eliminated.
232 if (Args.size() != 2 ||
233 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
234 !Args[1]->hasOneUse())
235 return false;
236 auto *Extend = cast<CastInst>(Args[1]);
237
238 // Legalize the destination type and ensure it can be used in a widening
239 // operation.
240 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
241 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
242 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
243 return false;
244
245 // Legalize the source type and ensure it can be used in a widening
246 // operation.
247 Type *SrcTy = toVectorTy(Extend->getSrcTy());
248 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
249 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
250 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
251 return false;
252
253 // Get the total number of vector elements in the legalized types.
254 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
255 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
256
257 // Return true if the legalized types have the same number of vector elements
258 // and the destination element type size is twice that of the source type.
259 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
260}
261
262int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
263 const Instruction *I) {
264 int ISD = TLI->InstructionOpcodeToISD(Opcode);
265 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 265, __PRETTY_FUNCTION__))
;
266
267 // If the cast is observable, and it is used by a widening instruction (e.g.,
268 // uaddl, saddw, etc.), it may be free.
269 if (I && I->hasOneUse()) {
270 auto *SingleUser = cast<Instruction>(*I->user_begin());
271 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
272 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
273 // If the cast is the second operand, it is free. We will generate either
274 // a "wide" or "long" version of the widening instruction.
275 if (I == SingleUser->getOperand(1))
276 return 0;
277 // If the cast is not the second operand, it will be free if it looks the
278 // same as the second operand. In this case, we will generate a "long"
279 // version of the widening instruction.
280 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
281 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
282 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
283 return 0;
284 }
285 }
286
287 EVT SrcTy = TLI->getValueType(DL, Src);
288 EVT DstTy = TLI->getValueType(DL, Dst);
289
290 if (!SrcTy.isSimple() || !DstTy.isSimple())
291 return BaseT::getCastInstrCost(Opcode, Dst, Src);
292
293 static const TypeConversionCostTblEntry
294 ConversionTbl[] = {
295 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
296 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
297 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
298 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
299
300 // The number of shll instructions for the extension.
301 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
302 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
303 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
304 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
305 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
306 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
307 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
308 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
309 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
310 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
311 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
312 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
313 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
314 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
315 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
316 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
317
318 // LowerVectorINT_TO_FP:
319 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
320 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
321 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
322 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
323 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
324 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
325
326 // Complex: to v2f32
327 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
328 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
329 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
330 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
331 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
332 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
333
334 // Complex: to v4f32
335 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
336 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
337 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
338 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
339
340 // Complex: to v8f32
341 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
342 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
343 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
344 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
345
346 // Complex: to v16f32
347 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
348 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
349
350 // Complex: to v2f64
351 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
352 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
353 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
354 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
355 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
356 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
357
358
359 // LowerVectorFP_TO_INT
360 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
361 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
362 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
363 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
364 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
365 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
366
367 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
368 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
369 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
370 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
371 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
372 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
373 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
374
375 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
376 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
377 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
378 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
379 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
380
381 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
382 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
383 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
384 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
385 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
386 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
387 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
388 };
389
390 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
391 DstTy.getSimpleVT(),
392 SrcTy.getSimpleVT()))
393 return Entry->Cost;
394
395 return BaseT::getCastInstrCost(Opcode, Dst, Src);
396}
397
398int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
399 VectorType *VecTy,
400 unsigned Index) {
401
402 // Make sure we were given a valid extend opcode.
403 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 404, __PRETTY_FUNCTION__))
404 "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 404, __PRETTY_FUNCTION__))
;
405
406 // We are extending an element we extract from a vector, so the source type
407 // of the extend is the element type of the vector.
408 auto *Src = VecTy->getElementType();
409
410 // Sign- and zero-extends are for integer types only.
411 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType>
(Src) && "Invalid type") ? static_cast<void> (0
) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
;
412
413 // Get the cost for the extract. We compute the cost (if any) for the extend
414 // below.
415 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
416
417 // Legalize the types.
418 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
419 auto DstVT = TLI->getValueType(DL, Dst);
420 auto SrcVT = TLI->getValueType(DL, Src);
421
422 // If the resulting type is still a vector and the destination type is legal,
423 // we may get the extension for free. If not, get the default cost for the
424 // extend.
425 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
426 return Cost + getCastInstrCost(Opcode, Dst, Src);
427
428 // The destination type should be larger than the element type. If not, get
429 // the default cost for the extend.
430 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
431 return Cost + getCastInstrCost(Opcode, Dst, Src);
432
433 switch (Opcode) {
434 default:
435 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 435)
;
436
437 // For sign-extends, we only need a smov, which performs the extension
438 // automatically.
439 case Instruction::SExt:
440 return Cost;
441
442 // For zero-extends, the extend is performed automatically by a umov unless
443 // the destination type is i64 and the element type is i8 or i16.
444 case Instruction::ZExt:
445 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
446 return Cost;
447 }
448
449 // If we are unable to perform the extend for free, get the default cost.
450 return Cost + getCastInstrCost(Opcode, Dst, Src);
451}
452
453int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
454 unsigned Index) {
455 assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type"
) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 455, __PRETTY_FUNCTION__))
;
456
457 if (Index != -1U) {
458 // Legalize the type.
459 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
460
461 // This type is legalized to a scalar type.
462 if (!LT.second.isVector())
463 return 0;
464
465 // The type may be split. Normalize the index to the new type.
466 unsigned Width = LT.second.getVectorNumElements();
467 Index = Index % Width;
468
469 // The element at index zero is already inside the vector.
470 if (Index == 0)
471 return 0;
472 }
473
474 // All other insert/extracts cost this much.
475 return ST->getVectorInsertExtractBaseCost();
476}
477
478int AArch64TTIImpl::getArithmeticInstrCost(
479 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
480 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
481 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
482 // Legalize the type.
483 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
484
485 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
486 // add in the widening overhead specified by the sub-target. Since the
487 // extends feeding widening instructions are performed automatically, they
488 // aren't present in the generated code and have a zero cost. By adding a
489 // widening overhead here, we attach the total cost of the combined operation
490 // to the widening instruction.
491 int Cost = 0;
492 if (isWideningInstruction(Ty, Opcode, Args))
493 Cost += ST->getWideningBaseCost();
494
495 int ISD = TLI->InstructionOpcodeToISD(Opcode);
496
497 switch (ISD) {
498 default:
499 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
500 Opd1PropInfo, Opd2PropInfo);
501 case ISD::SDIV:
502 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
503 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
504 // On AArch64, scalar signed division by constants power-of-two are
505 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
506 // The OperandValue properties many not be same as that of previous
507 // operation; conservatively assume OP_None.
508 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
509 TargetTransformInfo::OP_None,
510 TargetTransformInfo::OP_None);
511 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
512 TargetTransformInfo::OP_None,
513 TargetTransformInfo::OP_None);
514 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
515 TargetTransformInfo::OP_None,
516 TargetTransformInfo::OP_None);
517 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
518 TargetTransformInfo::OP_None,
519 TargetTransformInfo::OP_None);
520 return Cost;
521 }
522 LLVM_FALLTHROUGH[[gnu::fallthrough]];
523 case ISD::UDIV:
524 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
525 auto VT = TLI->getValueType(DL, Ty);
526 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
527 // Vector signed division by constant are expanded to the
528 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
529 // to MULHS + SUB + SRL + ADD + SRL.
530 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
531 Opd2Info,
532 TargetTransformInfo::OP_None,
533 TargetTransformInfo::OP_None);
534 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
535 Opd2Info,
536 TargetTransformInfo::OP_None,
537 TargetTransformInfo::OP_None);
538 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
539 Opd2Info,
540 TargetTransformInfo::OP_None,
541 TargetTransformInfo::OP_None);
542 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
543 }
544 }
545
546 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
547 Opd1PropInfo, Opd2PropInfo);
548 if (Ty->isVectorTy()) {
549 // On AArch64, vector divisions are not supported natively and are
550 // expanded into scalar divisions of each pair of elements.
551 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
552 Opd2Info, Opd1PropInfo, Opd2PropInfo);
553 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
554 Opd2Info, Opd1PropInfo, Opd2PropInfo);
555 // TODO: if one of the arguments is scalar, then it's not necessary to
556 // double the cost of handling the vector elements.
557 Cost += Cost;
558 }
559 return Cost;
560
561 case ISD::ADD:
562 case ISD::MUL:
563 case ISD::XOR:
564 case ISD::OR:
565 case ISD::AND:
566 // These nodes are marked as 'custom' for combining purposes only.
567 // We know that they are legal. See LowerAdd in ISelLowering.
568 return (Cost + 1) * LT.first;
569 }
570}
571
572int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
573 const SCEV *Ptr) {
574 // Address computations in vectorized code with non-consecutive addresses will
575 // likely result in more instructions compared to scalar code where the
576 // computation can more often be merged into the index mode. The resulting
577 // extra micro-ops can significantly decrease throughput.
578 unsigned NumVectorInstToHideOverhead = 10;
579 int MaxMergeDistance = 64;
580
581 if (Ty->isVectorTy() && SE &&
582 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
583 return NumVectorInstToHideOverhead;
584
585 // In many cases the address computation is not merged into the instruction
586 // addressing mode.
587 return 1;
588}
589
590int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
591 Type *CondTy, const Instruction *I) {
592
593 int ISD = TLI->InstructionOpcodeToISD(Opcode);
594 // We don't lower some vector selects well that are wider than the register
595 // width.
596 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
1
Calling 'Type::isVectorTy'
4
Returning from 'Type::isVectorTy'
20
Calling 'Type::isVectorTy'
23
Returning from 'Type::isVectorTy'
24
Assuming 'ISD' is equal to SELECT
25
Taking true branch
597 // We would need this many instructions to hide the scalarization happening.
598 const int AmortizationCost = 20;
599 static const TypeConversionCostTblEntry
600 VectorSelectTbl[] = {
601 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
602 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
603 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
604 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
605 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
606 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
607 };
608
609 EVT SelCondTy = TLI->getValueType(DL, CondTy);
26
Passing null pointer value via 2nd parameter 'Ty'
27
Calling 'TargetLoweringBase::getValueType'
610 EVT SelValTy = TLI->getValueType(DL, ValTy);
611 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
612 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
613 SelCondTy.getSimpleVT(),
614 SelValTy.getSimpleVT()))
615 return Entry->Cost;
616 }
617 }
618 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
5
Passing value via 3rd parameter 'CondTy'
6
Calling 'BasicTTIImplBase::getCmpSelInstrCost'
619}
620
621AArch64TTIImpl::TTI::MemCmpExpansionOptions
622AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
623 TTI::MemCmpExpansionOptions Options;
624 Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
625 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
626 Options.NumLoadsPerBlock = Options.MaxNumLoads;
627 // TODO: Though vector loads usually perform well on AArch64, in some targets
628 // they may wake up the FP unit, which raises the power consumption. Perhaps
629 // they could be used with no holds barred (-O3).
630 Options.LoadSizes = {8, 4, 2, 1};
631 return Options;
632}
633
634int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
635 unsigned Alignment, unsigned AddressSpace,
636 const Instruction *I) {
637 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
638
639 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
640 LT.second.is128BitVector() && Alignment < 16) {
641 // Unaligned stores are extremely inefficient. We don't split all
642 // unaligned 128-bit stores because the negative impact that has shown in
643 // practice on inlined block copy code.
644 // We make such stores expensive so that we will only vectorize if there
645 // are 6 other instructions getting vectorized.
646 const int AmortizationCost = 6;
647
648 return LT.first * 2 * AmortizationCost;
649 }
650
651 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
652 unsigned ProfitableNumElements;
653 if (Opcode == Instruction::Store)
654 // We use a custom trunc store lowering so v.4b should be profitable.
655 ProfitableNumElements = 4;
656 else
657 // We scalarize the loads because there is not v.4b register and we
658 // have to promote the elements to v.2.
659 ProfitableNumElements = 8;
660
661 if (Ty->getVectorNumElements() < ProfitableNumElements) {
662 unsigned NumVecElts = Ty->getVectorNumElements();
663 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
664 // We generate 2 instructions per vector element.
665 return NumVectorizableInstsToAmortize * NumVecElts * 2;
666 }
667 }
668
669 return LT.first;
670}
671
672int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
673 unsigned Factor,
674 ArrayRef<unsigned> Indices,
675 unsigned Alignment,
676 unsigned AddressSpace,
677 bool UseMaskForCond,
678 bool UseMaskForGaps) {
679 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 679, __PRETTY_FUNCTION__))
;
680 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 680, __PRETTY_FUNCTION__))
;
681
682 if (!UseMaskForCond && !UseMaskForGaps &&
683 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
684 unsigned NumElts = VecTy->getVectorNumElements();
685 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
686
687 // ldN/stN only support legal vector types of size 64 or 128 in bits.
688 // Accesses having vector types that are a multiple of 128 bits can be
689 // matched to more than one ldN/stN instruction.
690 if (NumElts % Factor == 0 &&
691 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
692 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
693 }
694
695 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
696 Alignment, AddressSpace,
697 UseMaskForCond, UseMaskForGaps);
698}
699
700int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
701 int Cost = 0;
702 for (auto *I : Tys) {
703 if (!I->isVectorTy())
704 continue;
705 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
706 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
707 getMemoryOpCost(Instruction::Load, I, 128, 0);
708 }
709 return Cost;
710}
711
712unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
713 return ST->getMaxInterleaveFactor();
714}
715
716// For Falkor, we want to avoid having too many strided loads in a loop since
717// that can exhaust the HW prefetcher resources. We adjust the unroller
718// MaxCount preference below to attempt to ensure unrolling doesn't create too
719// many strided loads.
720static void
721getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
722 TargetTransformInfo::UnrollingPreferences &UP) {
723 enum { MaxStridedLoads = 7 };
724 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
725 int StridedLoads = 0;
726 // FIXME? We could make this more precise by looking at the CFG and
727 // e.g. not counting loads in each side of an if-then-else diamond.
728 for (const auto BB : L->blocks()) {
729 for (auto &I : *BB) {
730 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
731 if (!LMemI)
732 continue;
733
734 Value *PtrValue = LMemI->getPointerOperand();
735 if (L->isLoopInvariant(PtrValue))
736 continue;
737
738 const SCEV *LSCEV = SE.getSCEV(PtrValue);
739 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
740 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
741 continue;
742
743 // FIXME? We could take pairing of unrolled load copies into account
744 // by looking at the AddRec, but we would probably have to limit this
745 // to loops with no stores or other memory optimization barriers.
746 ++StridedLoads;
747 // We've seen enough strided loads that seeing more won't make a
748 // difference.
749 if (StridedLoads > MaxStridedLoads / 2)
750 return StridedLoads;
751 }
752 }
753 return StridedLoads;
754 };
755
756 int StridedLoads = countStridedLoads(L, SE);
757 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
758 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
759 // Pick the largest power of 2 unroll count that won't result in too many
760 // strided loads.
761 if (StridedLoads) {
762 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
763 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
764 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
765 }
766}
767
768void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
769 TTI::UnrollingPreferences &UP) {
770 // Enable partial unrolling and runtime unrolling.
771 BaseT::getUnrollingPreferences(L, SE, UP);
772
773 // For inner loop, it is more likely to be a hot one, and the runtime check
774 // can be promoted out from LICM pass, so the overhead is less, let's try
775 // a larger threshold to unroll more loops.
776 if (L->getLoopDepth() > 1)
777 UP.PartialThreshold *= 2;
778
779 // Disable partial & runtime unrolling on -Os.
780 UP.PartialOptSizeThreshold = 0;
781
782 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
783 EnableFalkorHWPFUnrollFix)
784 getFalkorUnrollingPreferences(L, SE, UP);
785}
786
787Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
788 Type *ExpectedType) {
789 switch (Inst->getIntrinsicID()) {
790 default:
791 return nullptr;
792 case Intrinsic::aarch64_neon_st2:
793 case Intrinsic::aarch64_neon_st3:
794 case Intrinsic::aarch64_neon_st4: {
795 // Create a struct type
796 StructType *ST = dyn_cast<StructType>(ExpectedType);
797 if (!ST)
798 return nullptr;
799 unsigned NumElts = Inst->getNumArgOperands() - 1;
800 if (ST->getNumElements() != NumElts)
801 return nullptr;
802 for (unsigned i = 0, e = NumElts; i != e; ++i) {
803 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
804 return nullptr;
805 }
806 Value *Res = UndefValue::get(ExpectedType);
807 IRBuilder<> Builder(Inst);
808 for (unsigned i = 0, e = NumElts; i != e; ++i) {
809 Value *L = Inst->getArgOperand(i);
810 Res = Builder.CreateInsertValue(Res, L, i);
811 }
812 return Res;
813 }
814 case Intrinsic::aarch64_neon_ld2:
815 case Intrinsic::aarch64_neon_ld3:
816 case Intrinsic::aarch64_neon_ld4:
817 if (Inst->getType() == ExpectedType)
818 return Inst;
819 return nullptr;
820 }
821}
822
823bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
824 MemIntrinsicInfo &Info) {
825 switch (Inst->getIntrinsicID()) {
826 default:
827 break;
828 case Intrinsic::aarch64_neon_ld2:
829 case Intrinsic::aarch64_neon_ld3:
830 case Intrinsic::aarch64_neon_ld4:
831 Info.ReadMem = true;
832 Info.WriteMem = false;
833 Info.PtrVal = Inst->getArgOperand(0);
834 break;
835 case Intrinsic::aarch64_neon_st2:
836 case Intrinsic::aarch64_neon_st3:
837 case Intrinsic::aarch64_neon_st4:
838 Info.ReadMem = false;
839 Info.WriteMem = true;
840 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
841 break;
842 }
843
844 switch (Inst->getIntrinsicID()) {
845 default:
846 return false;
847 case Intrinsic::aarch64_neon_ld2:
848 case Intrinsic::aarch64_neon_st2:
849 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
850 break;
851 case Intrinsic::aarch64_neon_ld3:
852 case Intrinsic::aarch64_neon_st3:
853 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
854 break;
855 case Intrinsic::aarch64_neon_ld4:
856 case Intrinsic::aarch64_neon_st4:
857 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
858 break;
859 }
860 return true;
861}
862
863/// See if \p I should be considered for address type promotion. We check if \p
864/// I is a sext with right type and used in memory accesses. If it used in a
865/// "complex" getelementptr, we allow it to be promoted without finding other
866/// sext instructions that sign extended the same initial value. A getelementptr
867/// is considered as "complex" if it has more than 2 operands.
868bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
869 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
870 bool Considerable = false;
871 AllowPromotionWithoutCommonHeader = false;
872 if (!isa<SExtInst>(&I))
873 return false;
874 Type *ConsideredSExtType =
875 Type::getInt64Ty(I.getParent()->getParent()->getContext());
876 if (I.getType() != ConsideredSExtType)
877 return false;
878 // See if the sext is the one with the right type and used in at least one
879 // GetElementPtrInst.
880 for (const User *U : I.users()) {
881 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
882 Considerable = true;
883 // A getelementptr is considered as "complex" if it has more than 2
884 // operands. We will promote a SExt used in such complex GEP as we
885 // expect some computation to be merged if they are done on 64 bits.
886 if (GEPInst->getNumOperands() > 2) {
887 AllowPromotionWithoutCommonHeader = true;
888 break;
889 }
890 }
891 }
892 return Considerable;
893}
894
895bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
896 TTI::ReductionFlags Flags) const {
897 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 897, __PRETTY_FUNCTION__))
;
898 unsigned ScalarBits = Ty->getScalarSizeInBits();
899 switch (Opcode) {
900 case Instruction::FAdd:
901 case Instruction::FMul:
902 case Instruction::And:
903 case Instruction::Or:
904 case Instruction::Xor:
905 case Instruction::Mul:
906 return false;
907 case Instruction::Add:
908 return ScalarBits * Ty->getVectorNumElements() >= 128;
909 case Instruction::ICmp:
910 return (ScalarBits < 64) &&
911 (ScalarBits * Ty->getVectorNumElements() >= 128);
912 case Instruction::FCmp:
913 return Flags.NoNaN;
914 default:
915 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 915)
;
916 }
917 return false;
918}
919
920int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
921 bool IsPairwiseForm) {
922
923 if (IsPairwiseForm)
924 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
925
926 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
927 MVT MTy = LT.second;
928 int ISD = TLI->InstructionOpcodeToISD(Opcode);
929 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 929, __PRETTY_FUNCTION__))
;
930
931 // Horizontal adds can use the 'addv' instruction. We model the cost of these
932 // instructions as normal vector adds. This is the only arithmetic vector
933 // reduction operation for which we have an instruction.
934 static const CostTblEntry CostTblNoPairwise[]{
935 {ISD::ADD, MVT::v8i8, 1},
936 {ISD::ADD, MVT::v16i8, 1},
937 {ISD::ADD, MVT::v4i16, 1},
938 {ISD::ADD, MVT::v8i16, 1},
939 {ISD::ADD, MVT::v4i32, 1},
940 };
941
942 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
943 return LT.first * Entry->Cost;
944
945 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
946}
947
948int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
949 Type *SubTp) {
950 if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
951 Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
952 static const CostTblEntry ShuffleTbl[] = {
953 // Broadcast shuffle kinds can be performed with 'dup'.
954 { TTI::SK_Broadcast, MVT::v8i8, 1 },
955 { TTI::SK_Broadcast, MVT::v16i8, 1 },
956 { TTI::SK_Broadcast, MVT::v4i16, 1 },
957 { TTI::SK_Broadcast, MVT::v8i16, 1 },
958 { TTI::SK_Broadcast, MVT::v2i32, 1 },
959 { TTI::SK_Broadcast, MVT::v4i32, 1 },
960 { TTI::SK_Broadcast, MVT::v2i64, 1 },
961 { TTI::SK_Broadcast, MVT::v2f32, 1 },
962 { TTI::SK_Broadcast, MVT::v4f32, 1 },
963 { TTI::SK_Broadcast, MVT::v2f64, 1 },
964 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
965 // 'zip1/zip2' instructions.
966 { TTI::SK_Transpose, MVT::v8i8, 1 },
967 { TTI::SK_Transpose, MVT::v16i8, 1 },
968 { TTI::SK_Transpose, MVT::v4i16, 1 },
969 { TTI::SK_Transpose, MVT::v8i16, 1 },
970 { TTI::SK_Transpose, MVT::v2i32, 1 },
971 { TTI::SK_Transpose, MVT::v4i32, 1 },
972 { TTI::SK_Transpose, MVT::v2i64, 1 },
973 { TTI::SK_Transpose, MVT::v2f32, 1 },
974 { TTI::SK_Transpose, MVT::v4f32, 1 },
975 { TTI::SK_Transpose, MVT::v2f64, 1 },
976 // Select shuffle kinds.
977 // TODO: handle vXi8/vXi16.
978 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
979 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
980 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
981 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
982 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
983 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
984 // PermuteSingleSrc shuffle kinds.
985 // TODO: handle vXi8/vXi16.
986 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
987 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
988 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
989 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
990 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
991 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
992 };
993 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
994 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
995 return LT.first * Entry->Cost;
996 }
997
998 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
999}

/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/IR/Type.h

1//===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declaration of the Type class. For more "Type"
10// stuff, look in DerivedTypes.h.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_IR_TYPE_H
15#define LLVM_IR_TYPE_H
16
17#include "llvm/ADT/APFloat.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/Support/CBindingWrapping.h"
21#include "llvm/Support/Casting.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/TypeSize.h"
25#include <cassert>
26#include <cstdint>
27#include <iterator>
28
29namespace llvm {
30
31template<class GraphType> struct GraphTraits;
32class IntegerType;
33class LLVMContext;
34class PointerType;
35class raw_ostream;
36class StringRef;
37
38/// The instances of the Type class are immutable: once they are created,
39/// they are never changed. Also note that only one instance of a particular
40/// type is ever created. Thus seeing if two types are equal is a matter of
41/// doing a trivial pointer comparison. To enforce that no two equal instances
42/// are created, Type instances can only be created via static factory methods
43/// in class Type and in derived classes. Once allocated, Types are never
44/// free'd.
45///
46class Type {
47public:
48 //===--------------------------------------------------------------------===//
49 /// Definitions of all of the base types for the Type system. Based on this
50 /// value, you can cast to a class defined in DerivedTypes.h.
51 /// Note: If you add an element to this, you need to add an element to the
52 /// Type::getPrimitiveType function, or else things will break!
53 /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
54 ///
55 enum TypeID {
56 // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date.
57 VoidTyID = 0, ///< 0: type with no size
58 HalfTyID, ///< 1: 16-bit floating point type
59 FloatTyID, ///< 2: 32-bit floating point type
60 DoubleTyID, ///< 3: 64-bit floating point type
61 X86_FP80TyID, ///< 4: 80-bit floating point type (X87)
62 FP128TyID, ///< 5: 128-bit floating point type (112-bit mantissa)
63 PPC_FP128TyID, ///< 6: 128-bit floating point type (two 64-bits, PowerPC)
64 LabelTyID, ///< 7: Labels
65 MetadataTyID, ///< 8: Metadata
66 X86_MMXTyID, ///< 9: MMX vectors (64 bits, X86 specific)
67 TokenTyID, ///< 10: Tokens
68
69 // Derived types... see DerivedTypes.h file.
70 // Make sure FirstDerivedTyID stays up to date!
71 IntegerTyID, ///< 11: Arbitrary bit width integers
72 FunctionTyID, ///< 12: Functions
73 StructTyID, ///< 13: Structures
74 ArrayTyID, ///< 14: Arrays
75 PointerTyID, ///< 15: Pointers
76 VectorTyID ///< 16: SIMD 'packed' format, or other vector type
77 };
78
79private:
80 /// This refers to the LLVMContext in which this type was uniqued.
81 LLVMContext &Context;
82
83 TypeID ID : 8; // The current base type of this type.
84 unsigned SubclassData : 24; // Space for subclasses to store data.
85 // Note that this should be synchronized with
86 // MAX_INT_BITS value in IntegerType class.
87
88protected:
89 friend class LLVMContextImpl;
90
91 explicit Type(LLVMContext &C, TypeID tid)
92 : Context(C), ID(tid), SubclassData(0) {}
93 ~Type() = default;
94
95 unsigned getSubclassData() const { return SubclassData; }
96
97 void setSubclassData(unsigned val) {
98 SubclassData = val;
99 // Ensure we don't have any accidental truncation.
100 assert(getSubclassData() == val && "Subclass data too large for field")((getSubclassData() == val && "Subclass data too large for field"
) ? static_cast<void> (0) : __assert_fail ("getSubclassData() == val && \"Subclass data too large for field\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/IR/Type.h"
, 100, __PRETTY_FUNCTION__))
;
101 }
102
103 /// Keeps track of how many Type*'s there are in the ContainedTys list.
104 unsigned NumContainedTys = 0;
105
106 /// A pointer to the array of Types contained by this Type. For example, this
107 /// includes the arguments of a function type, the elements of a structure,
108 /// the pointee of a pointer, the element type of an array, etc. This pointer
109 /// may be 0 for types that don't contain other types (Integer, Double,
110 /// Float).
111 Type * const *ContainedTys = nullptr;
112
113 static bool isSequentialType(TypeID TyID) {
114 return TyID == ArrayTyID || TyID == VectorTyID;
115 }
116
117public:
118 /// Print the current type.
119 /// Omit the type details if \p NoDetails == true.
120 /// E.g., let %st = type { i32, i16 }
121 /// When \p NoDetails is true, we only print %st.
122 /// Put differently, \p NoDetails prints the type as if
123 /// inlined with the operands when printing an instruction.
124 void print(raw_ostream &O, bool IsForDebug = false,
125 bool NoDetails = false) const;
126
127 void dump() const;
128
129 /// Return the LLVMContext in which this type was uniqued.
130 LLVMContext &getContext() const { return Context; }
131
132 //===--------------------------------------------------------------------===//
133 // Accessors for working with types.
134 //
135
136 /// Return the type id for the type. This will return one of the TypeID enum
137 /// elements defined above.
138 TypeID getTypeID() const { return ID; }
139
140 /// Return true if this is 'void'.
141 bool isVoidTy() const { return getTypeID() == VoidTyID; }
142
143 /// Return true if this is 'half', a 16-bit IEEE fp type.
144 bool isHalfTy() const { return getTypeID() == HalfTyID; }
145
146 /// Return true if this is 'float', a 32-bit IEEE fp type.
147 bool isFloatTy() const { return getTypeID() == FloatTyID; }
148
149 /// Return true if this is 'double', a 64-bit IEEE fp type.
150 bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
151
152 /// Return true if this is x86 long double.
153 bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; }
154
155 /// Return true if this is 'fp128'.
156 bool isFP128Ty() const { return getTypeID() == FP128TyID; }
157
158 /// Return true if this is powerpc long double.
159 bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; }
160
161 /// Return true if this is one of the six floating-point types
162 bool isFloatingPointTy() const {
163 return getTypeID() == HalfTyID || getTypeID() == FloatTyID ||
164 getTypeID() == DoubleTyID ||
165 getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID ||
166 getTypeID() == PPC_FP128TyID;
167 }
168
169 const fltSemantics &getFltSemantics() const {
170 switch (getTypeID()) {
171 case HalfTyID: return APFloat::IEEEhalf();
172 case FloatTyID: return APFloat::IEEEsingle();
173 case DoubleTyID: return APFloat::IEEEdouble();
174 case X86_FP80TyID: return APFloat::x87DoubleExtended();
175 case FP128TyID: return APFloat::IEEEquad();
176 case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
177 default: llvm_unreachable("Invalid floating type")::llvm::llvm_unreachable_internal("Invalid floating type", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/IR/Type.h"
, 177)
;
178 }
179 }
180
181 /// Return true if this is X86 MMX.
182 bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
183
184 /// Return true if this is a FP type or a vector of FP.
185 bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
186
187 /// Return true if this is 'label'.
188 bool isLabelTy() const { return getTypeID() == LabelTyID; }
189
190 /// Return true if this is 'metadata'.
191 bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
192
193 /// Return true if this is 'token'.
194 bool isTokenTy() const { return getTypeID() == TokenTyID; }
195
196 /// True if this is an instance of IntegerType.
197 bool isIntegerTy() const { return getTypeID() == IntegerTyID; }
198
199 /// Return true if this is an IntegerType of the given width.
200 bool isIntegerTy(unsigned Bitwidth) const;
201
202 /// Return true if this is an integer type or a vector of integer types.
203 bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); }
204
205 /// Return true if this is an integer type or a vector of integer types of
206 /// the given width.
207 bool isIntOrIntVectorTy(unsigned BitWidth) const {
208 return getScalarType()->isIntegerTy(BitWidth);
209 }
210
211 /// Return true if this is an integer type or a pointer type.
212 bool isIntOrPtrTy() const { return isIntegerTy() || isPointerTy(); }
213
214 /// True if this is an instance of FunctionType.
215 bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
216
217 /// True if this is an instance of StructType.
218 bool isStructTy() const { return getTypeID() == StructTyID; }
219
220 /// True if this is an instance of ArrayType.
221 bool isArrayTy() const { return getTypeID() == ArrayTyID; }
222
223 /// True if this is an instance of PointerType.
224 bool isPointerTy() const { return getTypeID() == PointerTyID; }
225
226 /// Return true if this is a pointer type or a vector of pointer types.
227 bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
228
229 /// True if this is an instance of VectorType.
230 bool isVectorTy() const { return getTypeID() == VectorTyID; }
2
Assuming the condition is false
3
Returning zero, which participates in a condition later
13
Returning the value 1, which participates in a condition later
21
Assuming the condition is true
22
Returning the value 1, which participates in a condition later
231
232 /// Return true if this type could be converted with a lossless BitCast to
233 /// type 'Ty'. For example, i8* to i32*. BitCasts are valid for types of the
234 /// same size only where no re-interpretation of the bits is done.
235 /// Determine if this type could be losslessly bitcast to Ty
236 bool canLosslesslyBitCastTo(Type *Ty) const;
237
238 /// Return true if this type is empty, that is, it has no elements or all of
239 /// its elements are empty.
240 bool isEmptyTy() const;
241
242 /// Return true if the type is "first class", meaning it is a valid type for a
243 /// Value.
244 bool isFirstClassType() const {
245 return getTypeID() != FunctionTyID && getTypeID() != VoidTyID;
246 }
247
248 /// Return true if the type is a valid type for a register in codegen. This
249 /// includes all first-class types except struct and array types.
250 bool isSingleValueType() const {
251 return isFloatingPointTy() || isX86_MMXTy() || isIntegerTy() ||
252 isPointerTy() || isVectorTy();
253 }
254
255 /// Return true if the type is an aggregate type. This means it is valid as
256 /// the first operand of an insertvalue or extractvalue instruction. This
257 /// includes struct and array types, but does not include vector types.
258 bool isAggregateType() const {
259 return getTypeID() == StructTyID || getTypeID() == ArrayTyID;
260 }
261
262 /// Return true if it makes sense to take the size of this type. To get the
263 /// actual size for a particular target, it is reasonable to use the
264 /// DataLayout subsystem to do this.
265 bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const {
266 // If it's a primitive, it is always sized.
267 if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
268 getTypeID() == PointerTyID ||
269 getTypeID() == X86_MMXTyID)
270 return true;
271 // If it is not something that can have a size (e.g. a function or label),
272 // it doesn't have a size.
273 if (getTypeID() != StructTyID && getTypeID() != ArrayTyID &&
274 getTypeID() != VectorTyID)
275 return false;
276 // Otherwise we have to try harder to decide.
277 return isSizedDerivedType(Visited);
278 }
279
280 /// Return the basic size of this type if it is a primitive type. These are
281 /// fixed by LLVM and are not target-dependent.
282 /// This will return zero if the type does not have a size or is not a
283 /// primitive type.
284 ///
285 /// If this is a scalable vector type, the scalable property will be set and
286 /// the runtime size will be a positive integer multiple of the base size.
287 ///
288 /// Note that this may not reflect the size of memory allocated for an
289 /// instance of the type or the number of bytes that are written when an
290 /// instance of the type is stored to memory. The DataLayout class provides
291 /// additional query functions to provide this information.
292 ///
293 TypeSize getPrimitiveSizeInBits() const LLVM_READONLY__attribute__((__pure__));
294
295 /// If this is a vector type, return the getPrimitiveSizeInBits value for the
296 /// element type. Otherwise return the getPrimitiveSizeInBits value for this
297 /// type.
298 unsigned getScalarSizeInBits() const LLVM_READONLY__attribute__((__pure__));
299
300 /// Return the width of the mantissa of this type. This is only valid on
301 /// floating-point types. If the FP type does not have a stable mantissa (e.g.
302 /// ppc long double), this method returns -1.
303 int getFPMantissaWidth() const;
304
305 /// If this is a vector type, return the element type, otherwise return
306 /// 'this'.
307 Type *getScalarType() const {
308 if (isVectorTy())
309 return getVectorElementType();
310 return const_cast<Type*>(this);
311 }
312
313 //===--------------------------------------------------------------------===//
314 // Type Iteration support.
315 //
316 using subtype_iterator = Type * const *;
317
318 subtype_iterator subtype_begin() const { return ContainedTys; }
319 subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];}
320 ArrayRef<Type*> subtypes() const {
321 return makeArrayRef(subtype_begin(), subtype_end());
322 }
323
324 using subtype_reverse_iterator = std::reverse_iterator<subtype_iterator>;
325
326 subtype_reverse_iterator subtype_rbegin() const {
327 return subtype_reverse_iterator(subtype_end());
328 }
329 subtype_reverse_iterator subtype_rend() const {
330 return subtype_reverse_iterator(subtype_begin());
331 }
332
333 /// This method is used to implement the type iterator (defined at the end of
334 /// the file). For derived types, this returns the types 'contained' in the
335 /// derived type.
336 Type *getContainedType(unsigned i) const {
337 assert(i < NumContainedTys && "Index out of range!")((i < NumContainedTys && "Index out of range!") ? static_cast
<void> (0) : __assert_fail ("i < NumContainedTys && \"Index out of range!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/IR/Type.h"
, 337, __PRETTY_FUNCTION__))
;
338 return ContainedTys[i];
339 }
340
341 /// Return the number of types in the derived type.
342 unsigned getNumContainedTypes() const { return NumContainedTys; }
343
344 //===--------------------------------------------------------------------===//
345 // Helper methods corresponding to subclass methods. This forces a cast to
346 // the specified subclass and calls its accessor. "getVectorNumElements" (for
347 // example) is shorthand for cast<VectorType>(Ty)->getNumElements(). This is
348 // only intended to cover the core methods that are frequently used, helper
349 // methods should not be added here.
350
351 inline unsigned getIntegerBitWidth() const;
352
353 inline Type *getFunctionParamType(unsigned i) const;
354 inline unsigned getFunctionNumParams() const;
355 inline bool isFunctionVarArg() const;
356
357 inline StringRef getStructName() const;
358 inline unsigned getStructNumElements() const;
359 inline Type *getStructElementType(unsigned N) const;
360
361 inline Type *getSequentialElementType() const {
362 assert(isSequentialType(getTypeID()) && "Not a sequential type!")((isSequentialType(getTypeID()) && "Not a sequential type!"
) ? static_cast<void> (0) : __assert_fail ("isSequentialType(getTypeID()) && \"Not a sequential type!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/IR/Type.h"
, 362, __PRETTY_FUNCTION__))
;
363 return ContainedTys[0];
364 }
365
366 inline uint64_t getArrayNumElements() const;
367
368 Type *getArrayElementType() const {
369 assert(getTypeID() == ArrayTyID)((getTypeID() == ArrayTyID) ? static_cast<void> (0) : __assert_fail
("getTypeID() == ArrayTyID", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/IR/Type.h"
, 369, __PRETTY_FUNCTION__))
;
370 return ContainedTys[0];
371 }
372
373 inline bool getVectorIsScalable() const;
374 inline unsigned getVectorNumElements() const;
375 Type *getVectorElementType() const {
376 assert(getTypeID() == VectorTyID)((getTypeID() == VectorTyID) ? static_cast<void> (0) : __assert_fail
("getTypeID() == VectorTyID", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/IR/Type.h"
, 376, __PRETTY_FUNCTION__))
;
377 return ContainedTys[0];
378 }
379
380 Type *getPointerElementType() const {
381 assert(getTypeID() == PointerTyID)((getTypeID() == PointerTyID) ? static_cast<void> (0) :
__assert_fail ("getTypeID() == PointerTyID", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/IR/Type.h"
, 381, __PRETTY_FUNCTION__))
;
382 return ContainedTys[0];
383 }
384
385 /// Given scalar/vector integer type, returns a type with elements twice as
386 /// wide as in the original type. For vectors, preserves element count.
387 inline Type *getExtendedType() const;
388
389 /// Get the address space of this pointer or pointer vector type.
390 inline unsigned getPointerAddressSpace() const;
391
392 //===--------------------------------------------------------------------===//
393 // Static members exported by the Type class itself. Useful for getting
394 // instances of Type.
395 //
396
397 /// Return a type based on an identifier.
398 static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
399
400 //===--------------------------------------------------------------------===//
401 // These are the builtin types that are always available.
402 //
403 static Type *getVoidTy(LLVMContext &C);
404 static Type *getLabelTy(LLVMContext &C);
405 static Type *getHalfTy(LLVMContext &C);
406 static Type *getFloatTy(LLVMContext &C);
407 static Type *getDoubleTy(LLVMContext &C);
408 static Type *getMetadataTy(LLVMContext &C);
409 static Type *getX86_FP80Ty(LLVMContext &C);
410 static Type *getFP128Ty(LLVMContext &C);
411 static Type *getPPC_FP128Ty(LLVMContext &C);
412 static Type *getX86_MMXTy(LLVMContext &C);
413 static Type *getTokenTy(LLVMContext &C);
414 static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
415 static IntegerType *getInt1Ty(LLVMContext &C);
416 static IntegerType *getInt8Ty(LLVMContext &C);
417 static IntegerType *getInt16Ty(LLVMContext &C);
418 static IntegerType *getInt32Ty(LLVMContext &C);
419 static IntegerType *getInt64Ty(LLVMContext &C);
420 static IntegerType *getInt128Ty(LLVMContext &C);
421 template <typename ScalarTy> static Type *getScalarTy(LLVMContext &C) {
422 int noOfBits = sizeof(ScalarTy) * CHAR_BIT8;
423 if (std::is_integral<ScalarTy>::value) {
424 return (Type*) Type::getIntNTy(C, noOfBits);
425 } else if (std::is_floating_point<ScalarTy>::value) {
426 switch (noOfBits) {
427 case 32:
428 return Type::getFloatTy(C);
429 case 64:
430 return Type::getDoubleTy(C);
431 }
432 }
433 llvm_unreachable("Unsupported type in Type::getScalarTy")::llvm::llvm_unreachable_internal("Unsupported type in Type::getScalarTy"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/IR/Type.h"
, 433)
;
434 }
435
436 //===--------------------------------------------------------------------===//
437 // Convenience methods for getting pointer types with one of the above builtin
438 // types as pointee.
439 //
440 static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0);
441 static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
442 static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
443 static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
444 static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
445 static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
446 static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
447 static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0);
448 static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
449 static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
450 static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
451 static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
452 static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
453
454 /// Return a pointer to the current type. This is equivalent to
455 /// PointerType::get(Foo, AddrSpace).
456 PointerType *getPointerTo(unsigned AddrSpace = 0) const;
457
458private:
459 /// Derived types like structures and arrays are sized iff all of the members
460 /// of the type are sized as well. Since asking for their size is relatively
461 /// uncommon, move this operation out-of-line.
462 bool isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited = nullptr) const;
463};
464
465// Printing of types.
466inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
467 T.print(OS);
468 return OS;
469}
470
471// allow isa<PointerType>(x) to work without DerivedTypes.h included.
472template <> struct isa_impl<PointerType, Type> {
473 static inline bool doit(const Type &Ty) {
474 return Ty.getTypeID() == Type::PointerTyID;
475 }
476};
477
478// Create wrappers for C Binding types (see CBindingWrapping.h).
479DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)inline Type *unwrap(LLVMTypeRef P) { return reinterpret_cast<
Type*>(P); } inline LLVMTypeRef wrap(const Type *P) { return
reinterpret_cast<LLVMTypeRef>(const_cast<Type*>(
P)); } template<typename T> inline T *unwrap(LLVMTypeRef
P) { return cast<T>(unwrap(P)); }
480
481/* Specialized opaque type conversions.
482 */
483inline Type **unwrap(LLVMTypeRef* Tys) {
484 return reinterpret_cast<Type**>(Tys);
485}
486
487inline LLVMTypeRef *wrap(Type **Tys) {
488 return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
489}
490
491} // end namespace llvm
492
493#endif // LLVM_IR_TYPE_H

/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h

1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/BitVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/Analysis/TargetTransformInfoImpl.h"
27#include "llvm/CodeGen/ISDOpcodes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/CodeGen/ValueTypes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CallSite.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/Value.h"
44#include "llvm/MC/MCSchedule.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/MachineValueType.h"
49#include "llvm/Support/MathExtras.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <limits>
54#include <utility>
55
56namespace llvm {
57
58class Function;
59class GlobalValue;
60class LLVMContext;
61class ScalarEvolution;
62class SCEV;
63class TargetMachine;
64
65extern cl::opt<unsigned> PartialUnrollingThreshold;
66
67/// Base class which can be used to help build a TTI implementation.
68///
69/// This class provides as much implementation of the TTI interface as is
70/// possible using the target independent parts of the code generator.
71///
72/// In order to subclass it, your class must implement a getST() method to
73/// return the subtarget, and a getTLI() method to return the target lowering.
74/// We need these methods implemented in the derived class so that this class
75/// doesn't have to duplicate storage for them.
76template <typename T>
77class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
78private:
79 using BaseT = TargetTransformInfoImplCRTPBase<T>;
80 using TTI = TargetTransformInfo;
81
82 /// Estimate a cost of Broadcast as an extract and sequence of insert
83 /// operations.
84 unsigned getBroadcastShuffleOverhead(Type *Ty) {
85 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 85, __PRETTY_FUNCTION__))
;
86 unsigned Cost = 0;
87 // Broadcast cost is equal to the cost of extracting the zero'th element
88 // plus the cost of inserting it into every element of the result vector.
89 Cost += static_cast<T *>(this)->getVectorInstrCost(
90 Instruction::ExtractElement, Ty, 0);
91
92 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93 Cost += static_cast<T *>(this)->getVectorInstrCost(
94 Instruction::InsertElement, Ty, i);
95 }
96 return Cost;
97 }
98
99 /// Estimate a cost of shuffle as a sequence of extract and insert
100 /// operations.
101 unsigned getPermuteShuffleOverhead(Type *Ty) {
102 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 102, __PRETTY_FUNCTION__))
;
103 unsigned Cost = 0;
104 // Shuffle cost is equal to the cost of extracting element from its argument
105 // plus the cost of inserting them onto the result vector.
106
107 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108 // index 0 of first vector, index 1 of second vector,index 2 of first
109 // vector and finally index 3 of second vector and insert them at index
110 // <0,1,2,3> of result vector.
111 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112 Cost += static_cast<T *>(this)
113 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114 Cost += static_cast<T *>(this)
115 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116 }
117 return Cost;
118 }
119
120 /// Estimate a cost of subvector extraction as a sequence of extract and
121 /// insert operations.
122 unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
124 "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
;
125 int NumSubElts = SubTy->getVectorNumElements();
126 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
127 "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
;
128
129 unsigned Cost = 0;
130 // Subvector extraction cost is equal to the cost of extracting element from
131 // the source type plus the cost of inserting them into the result vector
132 // type.
133 for (int i = 0; i != NumSubElts; ++i) {
134 Cost += static_cast<T *>(this)->getVectorInstrCost(
135 Instruction::ExtractElement, Ty, i + Index);
136 Cost += static_cast<T *>(this)->getVectorInstrCost(
137 Instruction::InsertElement, SubTy, i);
138 }
139 return Cost;
140 }
141
142 /// Estimate a cost of subvector insertion as a sequence of extract and
143 /// insert operations.
144 unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
146 "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
;
147 int NumSubElts = SubTy->getVectorNumElements();
148 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
149 "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
;
150
151 unsigned Cost = 0;
152 // Subvector insertion cost is equal to the cost of extracting element from
153 // the source type plus the cost of inserting them into the result vector
154 // type.
155 for (int i = 0; i != NumSubElts; ++i) {
156 Cost += static_cast<T *>(this)->getVectorInstrCost(
157 Instruction::ExtractElement, SubTy, i);
158 Cost += static_cast<T *>(this)->getVectorInstrCost(
159 Instruction::InsertElement, Ty, i + Index);
160 }
161 return Cost;
162 }
163
164 /// Local query method delegates up to T which *must* implement this!
165 const TargetSubtargetInfo *getST() const {
166 return static_cast<const T *>(this)->getST();
167 }
168
169 /// Local query method delegates up to T which *must* implement this!
170 const TargetLoweringBase *getTLI() const {
171 return static_cast<const T *>(this)->getTLI();
172 }
173
174 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175 switch (M) {
176 case TTI::MIM_Unindexed:
177 return ISD::UNINDEXED;
178 case TTI::MIM_PreInc:
179 return ISD::PRE_INC;
180 case TTI::MIM_PreDec:
181 return ISD::PRE_DEC;
182 case TTI::MIM_PostInc:
183 return ISD::POST_INC;
184 case TTI::MIM_PostDec:
185 return ISD::POST_DEC;
186 }
187 llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 187)
;
188 }
189
190protected:
191 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192 : BaseT(DL) {}
193 virtual ~BasicTTIImplBase() = default;
194
195 using TargetTransformInfoImplBase::DL;
196
197public:
198 /// \name Scalar TTI Implementations
199 /// @{
200 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
201 unsigned AddressSpace, unsigned Alignment,
202 bool *Fast) const {
203 EVT E = EVT::getIntegerVT(Context, BitWidth);
204 return getTLI()->allowsMisalignedMemoryAccesses(
205 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
206 }
207
208 bool hasBranchDivergence() { return false; }
209
210 bool isSourceOfDivergence(const Value *V) { return false; }
211
212 bool isAlwaysUniform(const Value *V) { return false; }
213
214 unsigned getFlatAddressSpace() {
215 // Return an invalid address space.
216 return -1;
217 }
218
219 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
220 Intrinsic::ID IID) const {
221 return false;
222 }
223
224 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
225 Value *OldV, Value *NewV) const {
226 return false;
227 }
228
229 bool isLegalAddImmediate(int64_t imm) {
230 return getTLI()->isLegalAddImmediate(imm);
231 }
232
233 bool isLegalICmpImmediate(int64_t imm) {
234 return getTLI()->isLegalICmpImmediate(imm);
235 }
236
237 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
238 bool HasBaseReg, int64_t Scale,
239 unsigned AddrSpace, Instruction *I = nullptr) {
240 TargetLoweringBase::AddrMode AM;
241 AM.BaseGV = BaseGV;
242 AM.BaseOffs = BaseOffset;
243 AM.HasBaseReg = HasBaseReg;
244 AM.Scale = Scale;
245 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
246 }
247
248 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
249 const DataLayout &DL) const {
250 EVT VT = getTLI()->getValueType(DL, Ty);
251 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
252 }
253
254 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
255 const DataLayout &DL) const {
256 EVT VT = getTLI()->getValueType(DL, Ty);
257 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
258 }
259
260 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
261 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
262 }
263
264 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
265 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
266 TargetLoweringBase::AddrMode AM;
267 AM.BaseGV = BaseGV;
268 AM.BaseOffs = BaseOffset;
269 AM.HasBaseReg = HasBaseReg;
270 AM.Scale = Scale;
271 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
272 }
273
274 bool isTruncateFree(Type *Ty1, Type *Ty2) {
275 return getTLI()->isTruncateFree(Ty1, Ty2);
276 }
277
278 bool isProfitableToHoist(Instruction *I) {
279 return getTLI()->isProfitableToHoist(I);
280 }
281
282 bool useAA() const { return getST()->useAA(); }
283
284 bool isTypeLegal(Type *Ty) {
285 EVT VT = getTLI()->getValueType(DL, Ty);
286 return getTLI()->isTypeLegal(VT);
287 }
288
289 int getGEPCost(Type *PointeeType, const Value *Ptr,
290 ArrayRef<const Value *> Operands) {
291 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
292 }
293
294 int getExtCost(const Instruction *I, const Value *Src) {
295 if (getTLI()->isExtFree(I))
296 return TargetTransformInfo::TCC_Free;
297
298 if (isa<ZExtInst>(I) || isa<SExtInst>(I))
299 if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
300 if (getTLI()->isExtLoad(LI, I, DL))
301 return TargetTransformInfo::TCC_Free;
302
303 return TargetTransformInfo::TCC_Basic;
304 }
305
306 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
307 ArrayRef<const Value *> Arguments, const User *U) {
308 return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
309 }
310
311 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
312 ArrayRef<Type *> ParamTys, const User *U) {
313 if (IID == Intrinsic::cttz) {
314 if (getTLI()->isCheapToSpeculateCttz())
315 return TargetTransformInfo::TCC_Basic;
316 return TargetTransformInfo::TCC_Expensive;
317 }
318
319 if (IID == Intrinsic::ctlz) {
320 if (getTLI()->isCheapToSpeculateCtlz())
321 return TargetTransformInfo::TCC_Basic;
322 return TargetTransformInfo::TCC_Expensive;
323 }
324
325 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
326 }
327
328 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
329 unsigned &JumpTableSize) {
330 /// Try to find the estimated number of clusters. Note that the number of
331 /// clusters identified in this function could be different from the actual
332 /// numbers found in lowering. This function ignore switches that are
333 /// lowered with a mix of jump table / bit test / BTree. This function was
334 /// initially intended to be used when estimating the cost of switch in
335 /// inline cost heuristic, but it's a generic cost model to be used in other
336 /// places (e.g., in loop unrolling).
337 unsigned N = SI.getNumCases();
338 const TargetLoweringBase *TLI = getTLI();
339 const DataLayout &DL = this->getDataLayout();
340
341 JumpTableSize = 0;
342 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
343
344 // Early exit if both a jump table and bit test are not allowed.
345 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
346 return N;
347
348 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
349 APInt MinCaseVal = MaxCaseVal;
350 for (auto CI : SI.cases()) {
351 const APInt &CaseVal = CI.getCaseValue()->getValue();
352 if (CaseVal.sgt(MaxCaseVal))
353 MaxCaseVal = CaseVal;
354 if (CaseVal.slt(MinCaseVal))
355 MinCaseVal = CaseVal;
356 }
357
358 // Check if suitable for a bit test
359 if (N <= DL.getIndexSizeInBits(0u)) {
360 SmallPtrSet<const BasicBlock *, 4> Dests;
361 for (auto I : SI.cases())
362 Dests.insert(I.getCaseSuccessor());
363
364 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
365 DL))
366 return 1;
367 }
368
369 // Check if suitable for a jump table.
370 if (IsJTAllowed) {
371 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
372 return N;
373 uint64_t Range =
374 (MaxCaseVal - MinCaseVal)
375 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
376 // Check whether a range of clusters is dense enough for a jump table
377 if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
378 JumpTableSize = Range;
379 return 1;
380 }
381 }
382 return N;
383 }
384
385 bool shouldBuildLookupTables() {
386 const TargetLoweringBase *TLI = getTLI();
387 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
388 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
389 }
390
391 bool haveFastSqrt(Type *Ty) {
392 const TargetLoweringBase *TLI = getTLI();
393 EVT VT = TLI->getValueType(DL, Ty);
394 return TLI->isTypeLegal(VT) &&
395 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
396 }
397
398 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
399 return true;
400 }
401
402 unsigned getFPOpCost(Type *Ty) {
403 // Check whether FADD is available, as a proxy for floating-point in
404 // general.
405 const TargetLoweringBase *TLI = getTLI();
406 EVT VT = TLI->getValueType(DL, Ty);
407 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
408 return TargetTransformInfo::TCC_Basic;
409 return TargetTransformInfo::TCC_Expensive;
410 }
411
412 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
413 const TargetLoweringBase *TLI = getTLI();
414 switch (Opcode) {
415 default: break;
416 case Instruction::Trunc:
417 if (TLI->isTruncateFree(OpTy, Ty))
418 return TargetTransformInfo::TCC_Free;
419 return TargetTransformInfo::TCC_Basic;
420 case Instruction::ZExt:
421 if (TLI->isZExtFree(OpTy, Ty))
422 return TargetTransformInfo::TCC_Free;
423 return TargetTransformInfo::TCC_Basic;
424
425 case Instruction::AddrSpaceCast:
426 if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
427 Ty->getPointerAddressSpace()))
428 return TargetTransformInfo::TCC_Free;
429 return TargetTransformInfo::TCC_Basic;
430 }
431
432 return BaseT::getOperationCost(Opcode, Ty, OpTy);
433 }
434
435 unsigned getInliningThresholdMultiplier() { return 1; }
436
437 int getInlinerVectorBonusPercent() { return 150; }
438
439 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
440 TTI::UnrollingPreferences &UP) {
441 // This unrolling functionality is target independent, but to provide some
442 // motivation for its intended use, for x86:
443
444 // According to the Intel 64 and IA-32 Architectures Optimization Reference
445 // Manual, Intel Core models and later have a loop stream detector (and
446 // associated uop queue) that can benefit from partial unrolling.
447 // The relevant requirements are:
448 // - The loop must have no more than 4 (8 for Nehalem and later) branches
449 // taken, and none of them may be calls.
450 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
451
452 // According to the Software Optimization Guide for AMD Family 15h
453 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
454 // and loop buffer which can benefit from partial unrolling.
455 // The relevant requirements are:
456 // - The loop must have fewer than 16 branches
457 // - The loop must have less than 40 uops in all executed loop branches
458
459 // The number of taken branches in a loop is hard to estimate here, and
460 // benchmarking has revealed that it is better not to be conservative when
461 // estimating the branch count. As a result, we'll ignore the branch limits
462 // until someone finds a case where it matters in practice.
463
464 unsigned MaxOps;
465 const TargetSubtargetInfo *ST = getST();
466 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
467 MaxOps = PartialUnrollingThreshold;
468 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
469 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
470 else
471 return;
472
473 // Scan the loop: don't unroll loops with calls.
474 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
475 ++I) {
476 BasicBlock *BB = *I;
477
478 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
479 if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
480 ImmutableCallSite CS(&*J);
481 if (const Function *F = CS.getCalledFunction()) {
482 if (!static_cast<T *>(this)->isLoweredToCall(F))
483 continue;
484 }
485
486 return;
487 }
488 }
489
490 // Enable runtime and partial unrolling up to the specified size.
491 // Enable using trip count upper bound to unroll loops.
492 UP.Partial = UP.Runtime = UP.UpperBound = true;
493 UP.PartialThreshold = MaxOps;
494
495 // Avoid unrolling when optimizing for size.
496 UP.OptSizeThreshold = 0;
497 UP.PartialOptSizeThreshold = 0;
498
499 // Set number of instructions optimized when "back edge"
500 // becomes "fall through" to default value of 2.
501 UP.BEInsns = 2;
502 }
503
504 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
505 AssumptionCache &AC,
506 TargetLibraryInfo *LibInfo,
507 HardwareLoopInfo &HWLoopInfo) {
508 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
509 }
510
511 int getInstructionLatency(const Instruction *I) {
512 if (isa<LoadInst>(I))
513 return getST()->getSchedModel().DefaultLoadLatency;
514
515 return BaseT::getInstructionLatency(I);
516 }
517
518 virtual Optional<unsigned>
519 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
520 return Optional<unsigned>(
521 getST()->getCacheSize(static_cast<unsigned>(Level)));
522 }
523
524 virtual Optional<unsigned>
525 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
526 Optional<unsigned> TargetResult =
527 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
528
529 if (TargetResult)
530 return TargetResult;
531
532 return BaseT::getCacheAssociativity(Level);
533 }
534
535 virtual unsigned getCacheLineSize() const {
536 return getST()->getCacheLineSize();
537 }
538
539 virtual unsigned getPrefetchDistance() const {
540 return getST()->getPrefetchDistance();
541 }
542
543 virtual unsigned getMinPrefetchStride() const {
544 return getST()->getMinPrefetchStride();
545 }
546
547 virtual unsigned getMaxPrefetchIterationsAhead() const {
548 return getST()->getMaxPrefetchIterationsAhead();
549 }
550
551 /// @}
552
553 /// \name Vector TTI Implementations
554 /// @{
555
556 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
557
558 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
559 /// are set if the result needs to be inserted and/or extracted from vectors.
560 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
561 assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 561, __PRETTY_FUNCTION__))
;
562 unsigned Cost = 0;
563
564 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
565 if (Insert)
566 Cost += static_cast<T *>(this)
567 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
568 if (Extract)
569 Cost += static_cast<T *>(this)
570 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
571 }
572
573 return Cost;
574 }
575
576 /// Estimate the overhead of scalarizing an instructions unique
577 /// non-constant operands. The types of the arguments are ordinarily
578 /// scalar, in which case the costs are multiplied with VF.
579 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
580 unsigned VF) {
581 unsigned Cost = 0;
582 SmallPtrSet<const Value*, 4> UniqueOperands;
583 for (const Value *A : Args) {
584 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
585 Type *VecTy = nullptr;
586 if (A->getType()->isVectorTy()) {
587 VecTy = A->getType();
588 // If A is a vector operand, VF should be 1 or correspond to A.
589 assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 590, __PRETTY_FUNCTION__))
590 "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 590, __PRETTY_FUNCTION__))
;
591 }
592 else
593 VecTy = VectorType::get(A->getType(), VF);
594
595 Cost += getScalarizationOverhead(VecTy, false, true);
596 }
597 }
598
599 return Cost;
600 }
601
602 unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
603 assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail
("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 603, __PRETTY_FUNCTION__))
;
604
605 unsigned Cost = 0;
606
607 Cost += getScalarizationOverhead(VecTy, true, false);
608 if (!Args.empty())
609 Cost += getOperandsScalarizationOverhead(Args,
610 VecTy->getVectorNumElements());
611 else
612 // When no information on arguments is provided, we add the cost
613 // associated with one argument as a heuristic.
614 Cost += getScalarizationOverhead(VecTy, false, true);
615
616 return Cost;
617 }
618
619 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
620
621 unsigned getArithmeticInstrCost(
622 unsigned Opcode, Type *Ty,
623 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
624 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
625 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
626 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
627 ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
628 // Check if any of the operands are vector operands.
629 const TargetLoweringBase *TLI = getTLI();
630 int ISD = TLI->InstructionOpcodeToISD(Opcode);
631 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 631, __PRETTY_FUNCTION__))
;
632
633 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
634
635 bool IsFloat = Ty->isFPOrFPVectorTy();
636 // Assume that floating point arithmetic operations cost twice as much as
637 // integer operations.
638 unsigned OpCost = (IsFloat ? 2 : 1);
639
640 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
641 // The operation is legal. Assume it costs 1.
642 // TODO: Once we have extract/insert subvector cost we need to use them.
643 return LT.first * OpCost;
644 }
645
646 if (!TLI->isOperationExpand(ISD, LT.second)) {
647 // If the operation is custom lowered, then assume that the code is twice
648 // as expensive.
649 return LT.first * 2 * OpCost;
650 }
651
652 // Else, assume that we need to scalarize this op.
653 // TODO: If one of the types get legalized by splitting, handle this
654 // similarly to what getCastInstrCost() does.
655 if (Ty->isVectorTy()) {
656 unsigned Num = Ty->getVectorNumElements();
657 unsigned Cost = static_cast<T *>(this)
658 ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
659 // Return the cost of multiple scalar invocation plus the cost of
660 // inserting and extracting the values.
661 return getScalarizationOverhead(Ty, Args) + Num * Cost;
662 }
663
664 // We don't know anything about this scalar instruction.
665 return OpCost;
666 }
667
668 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
669 Type *SubTp) {
670 switch (Kind) {
671 case TTI::SK_Broadcast:
672 return getBroadcastShuffleOverhead(Tp);
673 case TTI::SK_Select:
674 case TTI::SK_Reverse:
675 case TTI::SK_Transpose:
676 case TTI::SK_PermuteSingleSrc:
677 case TTI::SK_PermuteTwoSrc:
678 return getPermuteShuffleOverhead(Tp);
679 case TTI::SK_ExtractSubvector:
680 return getExtractSubvectorOverhead(Tp, Index, SubTp);
681 case TTI::SK_InsertSubvector:
682 return getInsertSubvectorOverhead(Tp, Index, SubTp);
683 }
684 llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind",
"/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 684)
;
685 }
686
687 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
688 const Instruction *I = nullptr) {
689 const TargetLoweringBase *TLI = getTLI();
690 int ISD = TLI->InstructionOpcodeToISD(Opcode);
691 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 691, __PRETTY_FUNCTION__))
;
692 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
693 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
694
695 // Check for NOOP conversions.
696 if (SrcLT.first == DstLT.first &&
697 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
698
699 // Bitcast between types that are legalized to the same type are free.
700 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
701 return 0;
702 }
703
704 if (Opcode == Instruction::Trunc &&
705 TLI->isTruncateFree(SrcLT.second, DstLT.second))
706 return 0;
707
708 if (Opcode == Instruction::ZExt &&
709 TLI->isZExtFree(SrcLT.second, DstLT.second))
710 return 0;
711
712 if (Opcode == Instruction::AddrSpaceCast &&
713 TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
714 Dst->getPointerAddressSpace()))
715 return 0;
716
717 // If this is a zext/sext of a load, return 0 if the corresponding
718 // extending load exists on target.
719 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
720 I && isa<LoadInst>(I->getOperand(0))) {
721 EVT ExtVT = EVT::getEVT(Dst);
722 EVT LoadVT = EVT::getEVT(Src);
723 unsigned LType =
724 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
725 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
726 return 0;
727 }
728
729 // If the cast is marked as legal (or promote) then assume low cost.
730 if (SrcLT.first == DstLT.first &&
731 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
732 return 1;
733
734 // Handle scalar conversions.
735 if (!Src->isVectorTy() && !Dst->isVectorTy()) {
736 // Scalar bitcasts are usually free.
737 if (Opcode == Instruction::BitCast)
738 return 0;
739
740 // Just check the op cost. If the operation is legal then assume it costs
741 // 1.
742 if (!TLI->isOperationExpand(ISD, DstLT.second))
743 return 1;
744
745 // Assume that illegal scalar instruction are expensive.
746 return 4;
747 }
748
749 // Check vector-to-vector casts.
750 if (Dst->isVectorTy() && Src->isVectorTy()) {
751 // If the cast is between same-sized registers, then the check is simple.
752 if (SrcLT.first == DstLT.first &&
753 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
754
755 // Assume that Zext is done using AND.
756 if (Opcode == Instruction::ZExt)
757 return 1;
758
759 // Assume that sext is done using SHL and SRA.
760 if (Opcode == Instruction::SExt)
761 return 2;
762
763 // Just check the op cost. If the operation is legal then assume it
764 // costs
765 // 1 and multiply by the type-legalization overhead.
766 if (!TLI->isOperationExpand(ISD, DstLT.second))
767 return SrcLT.first * 1;
768 }
769
770 // If we are legalizing by splitting, query the concrete TTI for the cost
771 // of casting the original vector twice. We also need to factor in the
772 // cost of the split itself. Count that as 1, to be consistent with
773 // TLI->getTypeLegalizationCost().
774 if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
775 TargetLowering::TypeSplitVector) ||
776 (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
777 TargetLowering::TypeSplitVector)) {
778 Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
779 Dst->getVectorNumElements() / 2);
780 Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
781 Src->getVectorNumElements() / 2);
782 T *TTI = static_cast<T *>(this);
783 return TTI->getVectorSplitCost() +
784 (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
785 }
786
787 // In other cases where the source or destination are illegal, assume
788 // the operation will get scalarized.
789 unsigned Num = Dst->getVectorNumElements();
790 unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
791 Opcode, Dst->getScalarType(), Src->getScalarType(), I);
792
793 // Return the cost of multiple scalar invocation plus the cost of
794 // inserting and extracting the values.
795 return getScalarizationOverhead(Dst, true, true) + Num * Cost;
796 }
797
798 // We already handled vector-to-vector and scalar-to-scalar conversions.
799 // This
800 // is where we handle bitcast between vectors and scalars. We need to assume
801 // that the conversion is scalarized in one way or another.
802 if (Opcode == Instruction::BitCast)
803 // Illegal bitcasts are done by storing and loading from a stack slot.
804 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
805 : 0) +
806 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
807 : 0);
808
809 llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 809)
;
810 }
811
812 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
813 VectorType *VecTy, unsigned Index) {
814 return static_cast<T *>(this)->getVectorInstrCost(
815 Instruction::ExtractElement, VecTy, Index) +
816 static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
817 VecTy->getElementType());
818 }
819
820 unsigned getCFInstrCost(unsigned Opcode) {
821 // Branches are assumed to be predicted.
822 return 0;
823 }
824
825 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
826 const Instruction *I) {
827 const TargetLoweringBase *TLI = getTLI();
828 int ISD = TLI->InstructionOpcodeToISD(Opcode);
829 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 829, __PRETTY_FUNCTION__))
;
7
Assuming 'ISD' is not equal to 0
8
'?' condition is true
830
831 // Selects on vectors are actually vector selects.
832 if (ISD == ISD::SELECT) {
9
Assuming 'ISD' is not equal to SELECT
10
Taking false branch
833 assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void
> (0) : __assert_fail ("CondTy && \"CondTy must exist\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 833, __PRETTY_FUNCTION__))
;
834 if (CondTy->isVectorTy())
835 ISD = ISD::VSELECT;
836 }
837 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
838
839 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
11
Taking false branch
840 !TLI->isOperationExpand(ISD, LT.second)) {
841 // The operation is legal. Assume it costs 1. Multiply
842 // by the type-legalization overhead.
843 return LT.first * 1;
844 }
845
846 // Otherwise, assume that the cast is scalarized.
847 // TODO: If one of the types get legalized by splitting, handle this
848 // similarly to what getCastInstrCost() does.
849 if (ValTy->isVectorTy()) {
12
Calling 'Type::isVectorTy'
14
Returning from 'Type::isVectorTy'
15
Taking true branch
850 unsigned Num = ValTy->getVectorNumElements();
851 if (CondTy)
16
Assuming 'CondTy' is null
17
Taking false branch
852 CondTy = CondTy->getScalarType();
853 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
19
Calling 'AArch64TTIImpl::getCmpSelInstrCost'
854 Opcode, ValTy->getScalarType(), CondTy, I);
18
Passing null pointer value via 3rd parameter 'CondTy'
855
856 // Return the cost of multiple scalar invocation plus the cost of
857 // inserting and extracting the values.
858 return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
859 }
860
861 // Unknown scalar opcode.
862 return 1;
863 }
864
865 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
866 std::pair<unsigned, MVT> LT =
867 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
868
869 return LT.first;
870 }
871
872 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
873 unsigned AddressSpace, const Instruction *I = nullptr) {
874 assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast
<void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 874, __PRETTY_FUNCTION__))
;
875 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
876
877 // Assuming that all loads of legal types cost 1.
878 unsigned Cost = LT.first;
879
880 if (Src->isVectorTy() &&
881 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
882 // This is a vector load that legalizes to a larger type than the vector
883 // itself. Unless the corresponding extending load or truncating store is
884 // legal, then this will scalarize.
885 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
886 EVT MemVT = getTLI()->getValueType(DL, Src);
887 if (Opcode == Instruction::Store)
888 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
889 else
890 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
891
892 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
893 // This is a vector load/store for some illegal type that is scalarized.
894 // We must account for the cost of building or decomposing the vector.
895 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
896 Opcode == Instruction::Store);
897 }
898 }
899
900 return Cost;
901 }
902
903 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
904 unsigned Factor,
905 ArrayRef<unsigned> Indices,
906 unsigned Alignment, unsigned AddressSpace,
907 bool UseMaskForCond = false,
908 bool UseMaskForGaps = false) {
909 VectorType *VT = dyn_cast<VectorType>(VecTy);
910 assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 910, __PRETTY_FUNCTION__))
;
911
912 unsigned NumElts = VT->getNumElements();
913 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"
) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 913, __PRETTY_FUNCTION__))
;
914
915 unsigned NumSubElts = NumElts / Factor;
916 VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
917
918 // Firstly, the cost of load/store operation.
919 unsigned Cost;
920 if (UseMaskForCond || UseMaskForGaps)
921 Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
922 Opcode, VecTy, Alignment, AddressSpace);
923 else
924 Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
925 AddressSpace);
926
927 // Legalize the vector type, and get the legalized and unlegalized type
928 // sizes.
929 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
930 unsigned VecTySize =
931 static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
932 unsigned VecTyLTSize = VecTyLT.getStoreSize();
933
934 // Return the ceiling of dividing A by B.
935 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
936
937 // Scale the cost of the memory operation by the fraction of legalized
938 // instructions that will actually be used. We shouldn't account for the
939 // cost of dead instructions since they will be removed.
940 //
941 // E.g., An interleaved load of factor 8:
942 // %vec = load <16 x i64>, <16 x i64>* %ptr
943 // %v0 = shufflevector %vec, undef, <0, 8>
944 //
945 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
946 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
947 // type). The other loads are unused.
948 //
949 // We only scale the cost of loads since interleaved store groups aren't
950 // allowed to have gaps.
951 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
952 // The number of loads of a legal type it will take to represent a load
953 // of the unlegalized vector type.
954 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
955
956 // The number of elements of the unlegalized type that correspond to a
957 // single legal instruction.
958 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
959
960 // Determine which legal instructions will be used.
961 BitVector UsedInsts(NumLegalInsts, false);
962 for (unsigned Index : Indices)
963 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
964 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
965
966 // Scale the cost of the load by the fraction of legal instructions that
967 // will be used.
968 Cost *= UsedInsts.count() / NumLegalInsts;
969 }
970
971 // Then plus the cost of interleave operation.
972 if (Opcode == Instruction::Load) {
973 // The interleave cost is similar to extract sub vectors' elements
974 // from the wide vector, and insert them into sub vectors.
975 //
976 // E.g. An interleaved load of factor 2 (with one member of index 0):
977 // %vec = load <8 x i32>, <8 x i32>* %ptr
978 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
979 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
980 // <8 x i32> vector and insert them into a <4 x i32> vector.
981
982 assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 983, __PRETTY_FUNCTION__))
983 "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 983, __PRETTY_FUNCTION__))
;
984
985 for (unsigned Index : Indices) {
986 assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 986, __PRETTY_FUNCTION__))
;
987
988 // Extract elements from loaded vector for each sub vector.
989 for (unsigned i = 0; i < NumSubElts; i++)
990 Cost += static_cast<T *>(this)->getVectorInstrCost(
991 Instruction::ExtractElement, VT, Index + i * Factor);
992 }
993
994 unsigned InsSubCost = 0;
995 for (unsigned i = 0; i < NumSubElts; i++)
996 InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
997 Instruction::InsertElement, SubVT, i);
998
999 Cost += Indices.size() * InsSubCost;
1000 } else {
1001 // The interleave cost is extract all elements from sub vectors, and
1002 // insert them into the wide vector.
1003 //
1004 // E.g. An interleaved store of factor 2:
1005 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1006 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1007 // The cost is estimated as extract all elements from both <4 x i32>
1008 // vectors and insert into the <8 x i32> vector.
1009
1010 unsigned ExtSubCost = 0;
1011 for (unsigned i = 0; i < NumSubElts; i++)
1012 ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
1013 Instruction::ExtractElement, SubVT, i);
1014 Cost += ExtSubCost * Factor;
1015
1016 for (unsigned i = 0; i < NumElts; i++)
1017 Cost += static_cast<T *>(this)
1018 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1019 }
1020
1021 if (!UseMaskForCond)
1022 return Cost;
1023
1024 Type *I8Type = Type::getInt8Ty(VT->getContext());
1025 VectorType *MaskVT = VectorType::get(I8Type, NumElts);
1026 SubVT = VectorType::get(I8Type, NumSubElts);
1027
1028 // The Mask shuffling cost is extract all the elements of the Mask
1029 // and insert each of them Factor times into the wide vector:
1030 //
1031 // E.g. an interleaved group with factor 3:
1032 // %mask = icmp ult <8 x i32> %vec1, %vec2
1033 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1034 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1035 // The cost is estimated as extract all mask elements from the <8xi1> mask
1036 // vector and insert them factor times into the <24xi1> shuffled mask
1037 // vector.
1038 for (unsigned i = 0; i < NumSubElts; i++)
1039 Cost += static_cast<T *>(this)->getVectorInstrCost(
1040 Instruction::ExtractElement, SubVT, i);
1041
1042 for (unsigned i = 0; i < NumElts; i++)
1043 Cost += static_cast<T *>(this)->getVectorInstrCost(
1044 Instruction::InsertElement, MaskVT, i);
1045
1046 // The Gaps mask is invariant and created outside the loop, therefore the
1047 // cost of creating it is not accounted for here. However if we have both
1048 // a MaskForGaps and some other mask that guards the execution of the
1049 // memory access, we need to account for the cost of And-ing the two masks
1050 // inside the loop.
1051 if (UseMaskForGaps)
1052 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1053 BinaryOperator::And, MaskVT);
1054
1055 return Cost;
1056 }
1057
1058 /// Get intrinsic cost based on arguments.
1059 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
1060 ArrayRef<Value *> Args, FastMathFlags FMF,
1061 unsigned VF = 1) {
1062 unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1063 assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"
) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 1063, __PRETTY_FUNCTION__))
;
1064 auto *ConcreteTTI = static_cast<T *>(this);
1065
1066 switch (IID) {
1067 default: {
1068 // Assume that we need to scalarize this intrinsic.
1069 SmallVector<Type *, 4> Types;
1070 for (Value *Op : Args) {
1071 Type *OpTy = Op->getType();
1072 assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void>
(0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 1072, __PRETTY_FUNCTION__))
;
1073 Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1074 }
1075
1076 if (VF > 1 && !RetTy->isVoidTy())
1077 RetTy = VectorType::get(RetTy, VF);
1078
1079 // Compute the scalarization overhead based on Args for a vector
1080 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1081 // CostModel will pass a vector RetTy and VF is 1.
1082 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1083 if (RetVF > 1 || VF > 1) {
1084 ScalarizationCost = 0;
1085 if (!RetTy->isVoidTy())
1086 ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1087 ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1088 }
1089
1090 return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1091 ScalarizationCost);
1092 }
1093 case Intrinsic::masked_scatter: {
1094 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 1094, __PRETTY_FUNCTION__))
;
1095 Value *Mask = Args[3];
1096 bool VarMask = !isa<Constant>(Mask);
1097 unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1098 return ConcreteTTI->getGatherScatterOpCost(
1099 Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1100 }
1101 case Intrinsic::masked_gather: {
1102 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 1102, __PRETTY_FUNCTION__))
;
1103 Value *Mask = Args[2];
1104 bool VarMask = !isa<Constant>(Mask);
1105 unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1106 return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1107 Args[0], VarMask, Alignment);
1108 }
1109 case Intrinsic::experimental_vector_reduce_add:
1110 case Intrinsic::experimental_vector_reduce_mul:
1111 case Intrinsic::experimental_vector_reduce_and:
1112 case Intrinsic::experimental_vector_reduce_or:
1113 case Intrinsic::experimental_vector_reduce_xor:
1114 case Intrinsic::experimental_vector_reduce_v2_fadd:
1115 case Intrinsic::experimental_vector_reduce_v2_fmul:
1116 case Intrinsic::experimental_vector_reduce_smax:
1117 case Intrinsic::experimental_vector_reduce_smin:
1118 case Intrinsic::experimental_vector_reduce_fmax:
1119 case Intrinsic::experimental_vector_reduce_fmin:
1120 case Intrinsic::experimental_vector_reduce_umax:
1121 case Intrinsic::experimental_vector_reduce_umin:
1122 return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1123 case Intrinsic::fshl:
1124 case Intrinsic::fshr: {
1125 Value *X = Args[0];
1126 Value *Y = Args[1];
1127 Value *Z = Args[2];
1128 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1129 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1130 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1131 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1132 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1133 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1134 : TTI::OP_None;
1135 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1136 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1137 unsigned Cost = 0;
1138 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1139 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1140 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1141 OpKindX, OpKindZ, OpPropsX);
1142 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1143 OpKindY, OpKindZ, OpPropsY);
1144 // Non-constant shift amounts requires a modulo.
1145 if (OpKindZ != TTI::OK_UniformConstantValue &&
1146 OpKindZ != TTI::OK_NonUniformConstantValue)
1147 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1148 OpKindZ, OpKindBW, OpPropsZ,
1149 OpPropsBW);
1150 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1151 if (X != Y) {
1152 Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1153 if (RetVF > 1)
1154 CondTy = VectorType::get(CondTy, RetVF);
1155 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1156 CondTy, nullptr);
1157 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1158 CondTy, nullptr);
1159 }
1160 return Cost;
1161 }
1162 }
1163 }
1164
1165 /// Get intrinsic cost based on argument types.
1166 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1167 /// cost of scalarizing the arguments and the return value will be computed
1168 /// based on types.
1169 unsigned getIntrinsicInstrCost(
1170 Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1171 unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1172 unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1173 auto *ConcreteTTI = static_cast<T *>(this);
1174
1175 SmallVector<unsigned, 2> ISDs;
1176 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1177 switch (IID) {
1178 default: {
1179 // Assume that we need to scalarize this intrinsic.
1180 unsigned ScalarizationCost = ScalarizationCostPassed;
1181 unsigned ScalarCalls = 1;
1182 Type *ScalarRetTy = RetTy;
1183 if (RetTy->isVectorTy()) {
1184 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1185 ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1186 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1187 ScalarRetTy = RetTy->getScalarType();
1188 }
1189 SmallVector<Type *, 4> ScalarTys;
1190 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1191 Type *Ty = Tys[i];
1192 if (Ty->isVectorTy()) {
1193 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1194 ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1195 ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1196 Ty = Ty->getScalarType();
1197 }
1198 ScalarTys.push_back(Ty);
1199 }
1200 if (ScalarCalls == 1)
1201 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1202
1203 unsigned ScalarCost =
1204 ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1205
1206 return ScalarCalls * ScalarCost + ScalarizationCost;
1207 }
1208 // Look for intrinsics that can be lowered directly or turned into a scalar
1209 // intrinsic call.
1210 case Intrinsic::sqrt:
1211 ISDs.push_back(ISD::FSQRT);
1212 break;
1213 case Intrinsic::sin:
1214 ISDs.push_back(ISD::FSIN);
1215 break;
1216 case Intrinsic::cos:
1217 ISDs.push_back(ISD::FCOS);
1218 break;
1219 case Intrinsic::exp:
1220 ISDs.push_back(ISD::FEXP);
1221 break;
1222 case Intrinsic::exp2:
1223 ISDs.push_back(ISD::FEXP2);
1224 break;
1225 case Intrinsic::log:
1226 ISDs.push_back(ISD::FLOG);
1227 break;
1228 case Intrinsic::log10:
1229 ISDs.push_back(ISD::FLOG10);
1230 break;
1231 case Intrinsic::log2:
1232 ISDs.push_back(ISD::FLOG2);
1233 break;
1234 case Intrinsic::fabs:
1235 ISDs.push_back(ISD::FABS);
1236 break;
1237 case Intrinsic::canonicalize:
1238 ISDs.push_back(ISD::FCANONICALIZE);
1239 break;
1240 case Intrinsic::minnum:
1241 ISDs.push_back(ISD::FMINNUM);
1242 if (FMF.noNaNs())
1243 ISDs.push_back(ISD::FMINIMUM);
1244 break;
1245 case Intrinsic::maxnum:
1246 ISDs.push_back(ISD::FMAXNUM);
1247 if (FMF.noNaNs())
1248 ISDs.push_back(ISD::FMAXIMUM);
1249 break;
1250 case Intrinsic::copysign:
1251 ISDs.push_back(ISD::FCOPYSIGN);
1252 break;
1253 case Intrinsic::floor:
1254 ISDs.push_back(ISD::FFLOOR);
1255 break;
1256 case Intrinsic::ceil:
1257 ISDs.push_back(ISD::FCEIL);
1258 break;
1259 case Intrinsic::trunc:
1260 ISDs.push_back(ISD::FTRUNC);
1261 break;
1262 case Intrinsic::nearbyint:
1263 ISDs.push_back(ISD::FNEARBYINT);
1264 break;
1265 case Intrinsic::rint:
1266 ISDs.push_back(ISD::FRINT);
1267 break;
1268 case Intrinsic::round:
1269 ISDs.push_back(ISD::FROUND);
1270 break;
1271 case Intrinsic::pow:
1272 ISDs.push_back(ISD::FPOW);
1273 break;
1274 case Intrinsic::fma:
1275 ISDs.push_back(ISD::FMA);
1276 break;
1277 case Intrinsic::fmuladd:
1278 ISDs.push_back(ISD::FMA);
1279 break;
1280 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1281 case Intrinsic::lifetime_start:
1282 case Intrinsic::lifetime_end:
1283 case Intrinsic::sideeffect:
1284 return 0;
1285 case Intrinsic::masked_store:
1286 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1287 0);
1288 case Intrinsic::masked_load:
1289 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1290 case Intrinsic::experimental_vector_reduce_add:
1291 return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1292 /*IsPairwiseForm=*/false);
1293 case Intrinsic::experimental_vector_reduce_mul:
1294 return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1295 /*IsPairwiseForm=*/false);
1296 case Intrinsic::experimental_vector_reduce_and:
1297 return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1298 /*IsPairwiseForm=*/false);
1299 case Intrinsic::experimental_vector_reduce_or:
1300 return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1301 /*IsPairwiseForm=*/false);
1302 case Intrinsic::experimental_vector_reduce_xor:
1303 return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1304 /*IsPairwiseForm=*/false);
1305 case Intrinsic::experimental_vector_reduce_v2_fadd:
1306 return ConcreteTTI->getArithmeticReductionCost(
1307 Instruction::FAdd, Tys[0],
1308 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1309 // reductions.
1310 case Intrinsic::experimental_vector_reduce_v2_fmul:
1311 return ConcreteTTI->getArithmeticReductionCost(
1312 Instruction::FMul, Tys[0],
1313 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1314 // reductions.
1315 case Intrinsic::experimental_vector_reduce_smax:
1316 case Intrinsic::experimental_vector_reduce_smin:
1317 case Intrinsic::experimental_vector_reduce_fmax:
1318 case Intrinsic::experimental_vector_reduce_fmin:
1319 return ConcreteTTI->getMinMaxReductionCost(
1320 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1321 /*IsUnsigned=*/true);
1322 case Intrinsic::experimental_vector_reduce_umax:
1323 case Intrinsic::experimental_vector_reduce_umin:
1324 return ConcreteTTI->getMinMaxReductionCost(
1325 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1326 /*IsUnsigned=*/false);
1327 case Intrinsic::sadd_sat:
1328 case Intrinsic::ssub_sat: {
1329 Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1330 if (RetVF > 1)
1331 CondTy = VectorType::get(CondTy, RetVF);
1332
1333 Type *OpTy = StructType::create({RetTy, CondTy});
1334 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1335 ? Intrinsic::sadd_with_overflow
1336 : Intrinsic::ssub_with_overflow;
1337
1338 // SatMax -> Overflow && SumDiff < 0
1339 // SatMin -> Overflow && SumDiff >= 0
1340 unsigned Cost = 0;
1341 Cost += ConcreteTTI->getIntrinsicInstrCost(
1342 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1343 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1344 CondTy, nullptr);
1345 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1346 CondTy, nullptr);
1347 return Cost;
1348 }
1349 case Intrinsic::uadd_sat:
1350 case Intrinsic::usub_sat: {
1351 Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1352 if (RetVF > 1)
1353 CondTy = VectorType::get(CondTy, RetVF);
1354
1355 Type *OpTy = StructType::create({RetTy, CondTy});
1356 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1357 ? Intrinsic::uadd_with_overflow
1358 : Intrinsic::usub_with_overflow;
1359
1360 unsigned Cost = 0;
1361 Cost += ConcreteTTI->getIntrinsicInstrCost(
1362 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1363 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1364 CondTy, nullptr);
1365 return Cost;
1366 }
1367 case Intrinsic::smul_fix:
1368 case Intrinsic::umul_fix: {
1369 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1370 Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1371 if (RetVF > 1)
1372 ExtTy = VectorType::get(ExtTy, RetVF);
1373
1374 unsigned ExtOp =
1375 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1376
1377 unsigned Cost = 0;
1378 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1379 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1380 Cost +=
1381 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1382 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1383 TTI::OK_AnyValue,
1384 TTI::OK_UniformConstantValue);
1385 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1386 TTI::OK_AnyValue,
1387 TTI::OK_UniformConstantValue);
1388 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1389 return Cost;
1390 }
1391 case Intrinsic::sadd_with_overflow:
1392 case Intrinsic::ssub_with_overflow: {
1393 Type *SumTy = RetTy->getContainedType(0);
1394 Type *OverflowTy = RetTy->getContainedType(1);
1395 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1396 ? BinaryOperator::Add
1397 : BinaryOperator::Sub;
1398
1399 // LHSSign -> LHS >= 0
1400 // RHSSign -> RHS >= 0
1401 // SumSign -> Sum >= 0
1402 //
1403 // Add:
1404 // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1405 // Sub:
1406 // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1407 unsigned Cost = 0;
1408 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1409 Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1410 OverflowTy, nullptr);
1411 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1412 BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1413 Cost +=
1414 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1415 return Cost;
1416 }
1417 case Intrinsic::uadd_with_overflow:
1418 case Intrinsic::usub_with_overflow: {
1419 Type *SumTy = RetTy->getContainedType(0);
1420 Type *OverflowTy = RetTy->getContainedType(1);
1421 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1422 ? BinaryOperator::Add
1423 : BinaryOperator::Sub;
1424
1425 unsigned Cost = 0;
1426 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1427 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1428 OverflowTy, nullptr);
1429 return Cost;
1430 }
1431 case Intrinsic::smul_with_overflow:
1432 case Intrinsic::umul_with_overflow: {
1433 Type *MulTy = RetTy->getContainedType(0);
1434 Type *OverflowTy = RetTy->getContainedType(1);
1435 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1436 Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1437 if (MulTy->isVectorTy())
1438 ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1439
1440 unsigned ExtOp =
1441 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1442
1443 unsigned Cost = 0;
1444 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1445 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1446 Cost +=
1447 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1448 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1449 TTI::OK_AnyValue,
1450 TTI::OK_UniformConstantValue);
1451
1452 if (IID == Intrinsic::smul_with_overflow)
1453 Cost += ConcreteTTI->getArithmeticInstrCost(
1454 Instruction::AShr, MulTy, TTI::OK_AnyValue,
1455 TTI::OK_UniformConstantValue);
1456
1457 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1458 OverflowTy, nullptr);
1459 return Cost;
1460 }
1461 case Intrinsic::ctpop:
1462 ISDs.push_back(ISD::CTPOP);
1463 // In case of legalization use TCC_Expensive. This is cheaper than a
1464 // library call but still not a cheap instruction.
1465 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1466 break;
1467 // FIXME: ctlz, cttz, ...
1468 }
1469
1470 const TargetLoweringBase *TLI = getTLI();
1471 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1472
1473 SmallVector<unsigned, 2> LegalCost;
1474 SmallVector<unsigned, 2> CustomCost;
1475 for (unsigned ISD : ISDs) {
1476 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1477 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1478 TLI->isFAbsFree(LT.second)) {
1479 return 0;
1480 }
1481
1482 // The operation is legal. Assume it costs 1.
1483 // If the type is split to multiple registers, assume that there is some
1484 // overhead to this.
1485 // TODO: Once we have extract/insert subvector cost we need to use them.
1486 if (LT.first > 1)
1487 LegalCost.push_back(LT.first * 2);
1488 else
1489 LegalCost.push_back(LT.first * 1);
1490 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1491 // If the operation is custom lowered then assume
1492 // that the code is twice as expensive.
1493 CustomCost.push_back(LT.first * 2);
1494 }
1495 }
1496
1497 auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1498 if (MinLegalCostI != LegalCost.end())
1499 return *MinLegalCostI;
1500
1501 auto MinCustomCostI =
1502 std::min_element(CustomCost.begin(), CustomCost.end());
1503 if (MinCustomCostI != CustomCost.end())
1504 return *MinCustomCostI;
1505
1506 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1507 // point mul followed by an add.
1508 if (IID == Intrinsic::fmuladd)
1509 return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1510 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1511
1512 // Else, assume that we need to scalarize this intrinsic. For math builtins
1513 // this will emit a costly libcall, adding call overhead and spills. Make it
1514 // very expensive.
1515 if (RetTy->isVectorTy()) {
1516 unsigned ScalarizationCost =
1517 ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1518 ? ScalarizationCostPassed
1519 : getScalarizationOverhead(RetTy, true, false));
1520 unsigned ScalarCalls = RetTy->getVectorNumElements();
1521 SmallVector<Type *, 4> ScalarTys;
1522 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1523 Type *Ty = Tys[i];
1524 if (Ty->isVectorTy())
1525 Ty = Ty->getScalarType();
1526 ScalarTys.push_back(Ty);
1527 }
1528 unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1529 IID, RetTy->getScalarType(), ScalarTys, FMF);
1530 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1531 if (Tys[i]->isVectorTy()) {
1532 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1533 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1534 ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1535 }
1536 }
1537
1538 return ScalarCalls * ScalarCost + ScalarizationCost;
1539 }
1540
1541 // This is going to be turned into a library call, make it expensive.
1542 return SingleCallCost;
1543 }
1544
1545 /// Compute a cost of the given call instruction.
1546 ///
1547 /// Compute the cost of calling function F with return type RetTy and
1548 /// argument types Tys. F might be nullptr, in this case the cost of an
1549 /// arbitrary call with the specified signature will be returned.
1550 /// This is used, for instance, when we estimate call of a vector
1551 /// counterpart of the given function.
1552 /// \param F Called function, might be nullptr.
1553 /// \param RetTy Return value types.
1554 /// \param Tys Argument types.
1555 /// \returns The cost of Call instruction.
1556 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1557 return 10;
1558 }
1559
1560 unsigned getNumberOfParts(Type *Tp) {
1561 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1562 return LT.first;
1563 }
1564
1565 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1566 const SCEV *) {
1567 return 0;
1568 }
1569
1570 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1571 /// We're assuming that reduction operation are performing the following way:
1572 /// 1. Non-pairwise reduction
1573 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1574 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1575 /// \----------------v-------------/ \----------v------------/
1576 /// n/2 elements n/2 elements
1577 /// %red1 = op <n x t> %val, <n x t> val1
1578 /// After this operation we have a vector %red1 where only the first n/2
1579 /// elements are meaningful, the second n/2 elements are undefined and can be
1580 /// dropped. All other operations are actually working with the vector of
1581 /// length n/2, not n, though the real vector length is still n.
1582 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1583 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1584 /// \----------------v-------------/ \----------v------------/
1585 /// n/4 elements 3*n/4 elements
1586 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1587 /// length n/2, the resulting vector has length n/4 etc.
1588 /// 2. Pairwise reduction:
1589 /// Everything is the same except for an additional shuffle operation which
1590 /// is used to produce operands for pairwise kind of reductions.
1591 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1592 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1593 /// \-------------v----------/ \----------v------------/
1594 /// n/2 elements n/2 elements
1595 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1596 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1597 /// \-------------v----------/ \----------v------------/
1598 /// n/2 elements n/2 elements
1599 /// %red1 = op <n x t> %val1, <n x t> val2
1600 /// Again, the operation is performed on <n x t> vector, but the resulting
1601 /// vector %red1 is <n/2 x t> vector.
1602 ///
1603 /// The cost model should take into account that the actual length of the
1604 /// vector is reduced on each iteration.
1605 unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1606 bool IsPairwise) {
1607 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 1607, __PRETTY_FUNCTION__))
;
1608 Type *ScalarTy = Ty->getVectorElementType();
1609 unsigned NumVecElts = Ty->getVectorNumElements();
1610 unsigned NumReduxLevels = Log2_32(NumVecElts);
1611 unsigned ArithCost = 0;
1612 unsigned ShuffleCost = 0;
1613 auto *ConcreteTTI = static_cast<T *>(this);
1614 std::pair<unsigned, MVT> LT =
1615 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1616 unsigned LongVectorCount = 0;
1617 unsigned MVTLen =
1618 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1619 while (NumVecElts > MVTLen) {
1620 NumVecElts /= 2;
1621 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1622 // Assume the pairwise shuffles add a cost.
1623 ShuffleCost += (IsPairwise + 1) *
1624 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1625 NumVecElts, SubTy);
1626 ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1627 Ty = SubTy;
1628 ++LongVectorCount;
1629 }
1630
1631 NumReduxLevels -= LongVectorCount;
1632
1633 // The minimal length of the vector is limited by the real length of vector
1634 // operations performed on the current platform. That's why several final
1635 // reduction operations are performed on the vectors with the same
1636 // architecture-dependent length.
1637
1638 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1639 // reductions need two shuffles on every level, but the last one. On that
1640 // level one of the shuffles is <0, u, u, ...> which is identity.
1641 unsigned NumShuffles = NumReduxLevels;
1642 if (IsPairwise && NumReduxLevels >= 1)
1643 NumShuffles += NumReduxLevels - 1;
1644 ShuffleCost += NumShuffles *
1645 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1646 0, Ty);
1647 ArithCost += NumReduxLevels *
1648 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1649 return ShuffleCost + ArithCost +
1650 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1651 }
1652
1653 /// Try to calculate op costs for min/max reduction operations.
1654 /// \param CondTy Conditional type for the Select instruction.
1655 unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1656 bool) {
1657 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 1657, __PRETTY_FUNCTION__))
;
1658 Type *ScalarTy = Ty->getVectorElementType();
1659 Type *ScalarCondTy = CondTy->getVectorElementType();
1660 unsigned NumVecElts = Ty->getVectorNumElements();
1661 unsigned NumReduxLevels = Log2_32(NumVecElts);
1662 unsigned CmpOpcode;
1663 if (Ty->isFPOrFPVectorTy()) {
1664 CmpOpcode = Instruction::FCmp;
1665 } else {
1666 assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 1667, __PRETTY_FUNCTION__))
1667 "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/BasicTTIImpl.h"
, 1667, __PRETTY_FUNCTION__))
;
1668 CmpOpcode = Instruction::ICmp;
1669 }
1670 unsigned MinMaxCost = 0;
1671 unsigned ShuffleCost = 0;
1672 auto *ConcreteTTI = static_cast<T *>(this);
1673 std::pair<unsigned, MVT> LT =
1674 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1675 unsigned LongVectorCount = 0;
1676 unsigned MVTLen =
1677 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1678 while (NumVecElts > MVTLen) {
1679 NumVecElts /= 2;
1680 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1681 CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1682
1683 // Assume the pairwise shuffles add a cost.
1684 ShuffleCost += (IsPairwise + 1) *
1685 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1686 NumVecElts, SubTy);
1687 MinMaxCost +=
1688 ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1689 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1690 nullptr);
1691 Ty = SubTy;
1692 ++LongVectorCount;
1693 }
1694
1695 NumReduxLevels -= LongVectorCount;
1696
1697 // The minimal length of the vector is limited by the real length of vector
1698 // operations performed on the current platform. That's why several final
1699 // reduction opertions are perfomed on the vectors with the same
1700 // architecture-dependent length.
1701
1702 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1703 // reductions need two shuffles on every level, but the last one. On that
1704 // level one of the shuffles is <0, u, u, ...> which is identity.
1705 unsigned NumShuffles = NumReduxLevels;
1706 if (IsPairwise && NumReduxLevels >= 1)
1707 NumShuffles += NumReduxLevels - 1;
1708 ShuffleCost += NumShuffles *
1709 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1710 0, Ty);
1711 MinMaxCost +=
1712 NumReduxLevels *
1713 (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1714 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1715 nullptr));
1716 // The last min/max should be in vector registers and we counted it above.
1717 // So just need a single extractelement.
1718 return ShuffleCost + MinMaxCost +
1719 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1720 }
1721
1722 unsigned getVectorSplitCost() { return 1; }
1723
1724 /// @}
1725};
1726
1727/// Concrete BasicTTIImpl that can be used if no further customization
1728/// is needed.
1729class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1730 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1731
1732 friend class BasicTTIImplBase<BasicTTIImpl>;
1733
1734 const TargetSubtargetInfo *ST;
1735 const TargetLoweringBase *TLI;
1736
1737 const TargetSubtargetInfo *getST() const { return ST; }
1738 const TargetLoweringBase *getTLI() const { return TLI; }
1739
1740public:
1741 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1742};
1743
1744} // end namespace llvm
1745
1746#endif // LLVM_CODEGEN_BASICTTIIMPL_H

/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h

1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file describes how to lower LLVM code to machine code. This has two
11/// main components:
12///
13/// 1. Which ValueTypes are natively supported by the target.
14/// 2. Which operations are supported for supported ValueTypes.
15/// 3. Cost thresholds for alternative implementations of certain operations.
16///
17/// In addition it has a few other components, like information about FP
18/// immediates.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_TARGETLOWERING_H
23#define LLVM_CODEGEN_TARGETLOWERING_H
24
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
32#include "llvm/CodeGen/DAGCombine.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/RuntimeLibcalls.h"
35#include "llvm/CodeGen/SelectionDAG.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetCallingConv.h"
38#include "llvm/CodeGen/ValueTypes.h"
39#include "llvm/IR/Attributes.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/IRBuilder.h"
46#include "llvm/IR/InlineAsm.h"
47#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Type.h"
50#include "llvm/MC/MCRegisterInfo.h"
51#include "llvm/Support/Alignment.h"
52#include "llvm/Support/AtomicOrdering.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/MachineValueType.h"
56#include "llvm/Target/TargetMachine.h"
57#include <algorithm>
58#include <cassert>
59#include <climits>
60#include <cstdint>
61#include <iterator>
62#include <map>
63#include <string>
64#include <utility>
65#include <vector>
66
67namespace llvm {
68
69class BranchProbability;
70class CCState;
71class CCValAssign;
72class Constant;
73class FastISel;
74class FunctionLoweringInfo;
75class GlobalValue;
76class GISelKnownBits;
77class IntrinsicInst;
78struct KnownBits;
79class LLVMContext;
80class MachineBasicBlock;
81class MachineFunction;
82class MachineInstr;
83class MachineJumpTableInfo;
84class MachineLoop;
85class MachineRegisterInfo;
86class MCContext;
87class MCExpr;
88class Module;
89class TargetRegisterClass;
90class TargetLibraryInfo;
91class TargetRegisterInfo;
92class Value;
93
94namespace Sched {
95
96 enum Preference {
97 None, // No preference
98 Source, // Follow source order.
99 RegPressure, // Scheduling for lowest register pressure.
100 Hybrid, // Scheduling for both latency and register pressure.
101 ILP, // Scheduling for ILP in low register pressure mode.
102 VLIW // Scheduling for VLIW targets.
103 };
104
105} // end namespace Sched
106
107/// This base class for TargetLowering contains the SelectionDAG-independent
108/// parts that can be used from the rest of CodeGen.
109class TargetLoweringBase {
110public:
111 /// This enum indicates whether operations are valid for a target, and if not,
112 /// what action should be used to make them valid.
113 enum LegalizeAction : uint8_t {
114 Legal, // The target natively supports this operation.
115 Promote, // This operation should be executed in a larger type.
116 Expand, // Try to expand this to other ops, otherwise use a libcall.
117 LibCall, // Don't try to expand this to other ops, always use a libcall.
118 Custom // Use the LowerOperation hook to implement custom lowering.
119 };
120
121 /// This enum indicates whether a types are legal for a target, and if not,
122 /// what action should be used to make them valid.
123 enum LegalizeTypeAction : uint8_t {
124 TypeLegal, // The target natively supports this type.
125 TypePromoteInteger, // Replace this integer with a larger one.
126 TypeExpandInteger, // Split this integer into two of half the size.
127 TypeSoftenFloat, // Convert this float to a same size integer type.
128 TypeExpandFloat, // Split this float into two of half the size.
129 TypeScalarizeVector, // Replace this one-element vector with its element.
130 TypeSplitVector, // Split this vector into two of half the size.
131 TypeWidenVector, // This vector should be widened into a larger vector.
132 TypePromoteFloat // Replace this float with a larger one.
133 };
134
135 /// LegalizeKind holds the legalization kind that needs to happen to EVT
136 /// in order to type-legalize it.
137 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
138
139 /// Enum that describes how the target represents true/false values.
140 enum BooleanContent {
141 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
142 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
143 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
144 };
145
146 /// Enum that describes what type of support for selects the target has.
147 enum SelectSupportKind {
148 ScalarValSelect, // The target supports scalar selects (ex: cmov).
149 ScalarCondVectorVal, // The target supports selects with a scalar condition
150 // and vector values (ex: cmov).
151 VectorMaskSelect // The target supports vector selects with a vector
152 // mask (ex: x86 blends).
153 };
154
155 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
156 /// to, if at all. Exists because different targets have different levels of
157 /// support for these atomic instructions, and also have different options
158 /// w.r.t. what they should expand to.
159 enum class AtomicExpansionKind {
160 None, // Don't expand the instruction.
161 LLSC, // Expand the instruction into loadlinked/storeconditional; used
162 // by ARM/AArch64.
163 LLOnly, // Expand the (load) instruction into just a load-linked, which has
164 // greater atomic guarantees than a normal load.
165 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
166 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
167 };
168
169 /// Enum that specifies when a multiplication should be expanded.
170 enum class MulExpansionKind {
171 Always, // Always expand the instruction.
172 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
173 // or custom.
174 };
175
176 class ArgListEntry {
177 public:
178 Value *Val = nullptr;
179 SDValue Node = SDValue();
180 Type *Ty = nullptr;
181 bool IsSExt : 1;
182 bool IsZExt : 1;
183 bool IsInReg : 1;
184 bool IsSRet : 1;
185 bool IsNest : 1;
186 bool IsByVal : 1;
187 bool IsInAlloca : 1;
188 bool IsReturned : 1;
189 bool IsSwiftSelf : 1;
190 bool IsSwiftError : 1;
191 uint16_t Alignment = 0;
192 Type *ByValType = nullptr;
193
194 ArgListEntry()
195 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
196 IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
197 IsSwiftSelf(false), IsSwiftError(false) {}
198
199 void setAttributes(const CallBase *Call, unsigned ArgIdx);
200
201 void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) {
202 return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx);
203 }
204 };
205 using ArgListTy = std::vector<ArgListEntry>;
206
207 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
208 ArgListTy &Args) const {};
209
210 static ISD::NodeType getExtendForContent(BooleanContent Content) {
211 switch (Content) {
212 case UndefinedBooleanContent:
213 // Extend by adding rubbish bits.
214 return ISD::ANY_EXTEND;
215 case ZeroOrOneBooleanContent:
216 // Extend by adding zero bits.
217 return ISD::ZERO_EXTEND;
218 case ZeroOrNegativeOneBooleanContent:
219 // Extend by copying the sign bit.
220 return ISD::SIGN_EXTEND;
221 }
222 llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 222)
;
223 }
224
225 /// NOTE: The TargetMachine owns TLOF.
226 explicit TargetLoweringBase(const TargetMachine &TM);
227 TargetLoweringBase(const TargetLoweringBase &) = delete;
228 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
229 virtual ~TargetLoweringBase() = default;
230
231protected:
232 /// Initialize all of the actions to default values.
233 void initActions();
234
235public:
236 const TargetMachine &getTargetMachine() const { return TM; }
237
238 virtual bool useSoftFloat() const { return false; }
239
240 /// Return the pointer type for the given address space, defaults to
241 /// the pointer type from the data layout.
242 /// FIXME: The default needs to be removed once all the code is updated.
243 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
244 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
245 }
246
247 /// Return the in-memory pointer type for the given address space, defaults to
248 /// the pointer type from the data layout. FIXME: The default needs to be
249 /// removed once all the code is updated.
250 MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
251 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
252 }
253
254 /// Return the type for frame index, which is determined by
255 /// the alloca address space specified through the data layout.
256 MVT getFrameIndexTy(const DataLayout &DL) const {
257 return getPointerTy(DL, DL.getAllocaAddrSpace());
258 }
259
260 /// Return the type for operands of fence.
261 /// TODO: Let fence operands be of i32 type and remove this.
262 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
263 return getPointerTy(DL);
264 }
265
266 /// EVT is not used in-tree, but is used by out-of-tree target.
267 /// A documentation for this function would be nice...
268 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
269
270 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
271 bool LegalTypes = true) const;
272
273 /// Returns the type to be used for the index operand of:
274 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
275 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
276 virtual MVT getVectorIdxTy(const DataLayout &DL) const {
277 return getPointerTy(DL);
278 }
279
280 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
281 return true;
282 }
283
284 /// Return true if it is profitable to convert a select of FP constants into
285 /// a constant pool load whose address depends on the select condition. The
286 /// parameter may be used to differentiate a select with FP compare from
287 /// integer compare.
288 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
289 return true;
290 }
291
292 /// Return true if multiple condition registers are available.
293 bool hasMultipleConditionRegisters() const {
294 return HasMultipleConditionRegisters;
295 }
296
297 /// Return true if the target has BitExtract instructions.
298 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
299
300 /// Return the preferred vector type legalization action.
301 virtual TargetLoweringBase::LegalizeTypeAction
302 getPreferredVectorAction(MVT VT) const {
303 // The default action for one element vectors is to scalarize
304 if (VT.getVectorNumElements() == 1)
305 return TypeScalarizeVector;
306 // The default action for an odd-width vector is to widen.
307 if (!VT.isPow2VectorType())
308 return TypeWidenVector;
309 // The default action for other vectors is to promote
310 return TypePromoteInteger;
311 }
312
313 // There are two general methods for expanding a BUILD_VECTOR node:
314 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
315 // them together.
316 // 2. Build the vector on the stack and then load it.
317 // If this function returns true, then method (1) will be used, subject to
318 // the constraint that all of the necessary shuffles are legal (as determined
319 // by isShuffleMaskLegal). If this function returns false, then method (2) is
320 // always used. The vector type, and the number of defined values, are
321 // provided.
322 virtual bool
323 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
324 unsigned DefinedValues) const {
325 return DefinedValues < 3;
326 }
327
328 /// Return true if integer divide is usually cheaper than a sequence of
329 /// several shifts, adds, and multiplies for this target.
330 /// The definition of "cheaper" may depend on whether we're optimizing
331 /// for speed or for size.
332 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
333
334 /// Return true if the target can handle a standalone remainder operation.
335 virtual bool hasStandaloneRem(EVT VT) const {
336 return true;
337 }
338
339 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
340 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
341 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
342 return false;
343 }
344
345 /// Reciprocal estimate status values used by the functions below.
346 enum ReciprocalEstimate : int {
347 Unspecified = -1,
348 Disabled = 0,
349 Enabled = 1
350 };
351
352 /// Return a ReciprocalEstimate enum value for a square root of the given type
353 /// based on the function's attributes. If the operation is not overridden by
354 /// the function's attributes, "Unspecified" is returned and target defaults
355 /// are expected to be used for instruction selection.
356 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
357
358 /// Return a ReciprocalEstimate enum value for a division of the given type
359 /// based on the function's attributes. If the operation is not overridden by
360 /// the function's attributes, "Unspecified" is returned and target defaults
361 /// are expected to be used for instruction selection.
362 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
363
364 /// Return the refinement step count for a square root of the given type based
365 /// on the function's attributes. If the operation is not overridden by
366 /// the function's attributes, "Unspecified" is returned and target defaults
367 /// are expected to be used for instruction selection.
368 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
369
370 /// Return the refinement step count for a division of the given type based
371 /// on the function's attributes. If the operation is not overridden by
372 /// the function's attributes, "Unspecified" is returned and target defaults
373 /// are expected to be used for instruction selection.
374 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
375
376 /// Returns true if target has indicated at least one type should be bypassed.
377 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
378
379 /// Returns map of slow types for division or remainder with corresponding
380 /// fast types
381 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
382 return BypassSlowDivWidths;
383 }
384
385 /// Return true if Flow Control is an expensive operation that should be
386 /// avoided.
387 bool isJumpExpensive() const { return JumpIsExpensive; }
388
389 /// Return true if selects are only cheaper than branches if the branch is
390 /// unlikely to be predicted right.
391 bool isPredictableSelectExpensive() const {
392 return PredictableSelectIsExpensive;
393 }
394
395 /// If a branch or a select condition is skewed in one direction by more than
396 /// this factor, it is very likely to be predicted correctly.
397 virtual BranchProbability getPredictableBranchThreshold() const;
398
399 /// Return true if the following transform is beneficial:
400 /// fold (conv (load x)) -> (load (conv*)x)
401 /// On architectures that don't natively support some vector loads
402 /// efficiently, casting the load to a smaller vector of larger types and
403 /// loading is more efficient, however, this can be undone by optimizations in
404 /// dag combiner.
405 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
406 const SelectionDAG &DAG,
407 const MachineMemOperand &MMO) const {
408 // Don't do if we could do an indexed load on the original type, but not on
409 // the new one.
410 if (!LoadVT.isSimple() || !BitcastVT.isSimple())
411 return true;
412
413 MVT LoadMVT = LoadVT.getSimpleVT();
414
415 // Don't bother doing this if it's just going to be promoted again later, as
416 // doing so might interfere with other combines.
417 if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
418 getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
419 return false;
420
421 bool Fast = false;
422 return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
423 MMO, &Fast) && Fast;
424 }
425
426 /// Return true if the following transform is beneficial:
427 /// (store (y (conv x)), y*)) -> (store x, (x*))
428 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
429 const SelectionDAG &DAG,
430 const MachineMemOperand &MMO) const {
431 // Default to the same logic as loads.
432 return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO);
433 }
434
435 /// Return true if it is expected to be cheaper to do a store of a non-zero
436 /// vector constant with the given size and type for the address space than to
437 /// store the individual scalar element constants.
438 virtual bool storeOfVectorConstantIsCheap(EVT MemVT,
439 unsigned NumElem,
440 unsigned AddrSpace) const {
441 return false;
442 }
443
444 /// Allow store merging for the specified type after legalization in addition
445 /// to before legalization. This may transform stores that do not exist
446 /// earlier (for example, stores created from intrinsics).
447 virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
448 return true;
449 }
450
451 /// Returns if it's reasonable to merge stores to MemVT size.
452 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
453 const SelectionDAG &DAG) const {
454 return true;
455 }
456
457 /// Return true if it is cheap to speculate a call to intrinsic cttz.
458 virtual bool isCheapToSpeculateCttz() const {
459 return false;
460 }
461
462 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
463 virtual bool isCheapToSpeculateCtlz() const {
464 return false;
465 }
466
467 /// Return true if ctlz instruction is fast.
468 virtual bool isCtlzFast() const {
469 return false;
470 }
471
472 /// Return true if it is safe to transform an integer-domain bitwise operation
473 /// into the equivalent floating-point operation. This should be set to true
474 /// if the target has IEEE-754-compliant fabs/fneg operations for the input
475 /// type.
476 virtual bool hasBitPreservingFPLogic(EVT VT) const {
477 return false;
478 }
479
480 /// Return true if it is cheaper to split the store of a merged int val
481 /// from a pair of smaller values into multiple stores.
482 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
483 return false;
484 }
485
486 /// Return if the target supports combining a
487 /// chain like:
488 /// \code
489 /// %andResult = and %val1, #mask
490 /// %icmpResult = icmp %andResult, 0
491 /// \endcode
492 /// into a single machine instruction of a form like:
493 /// \code
494 /// cc = test %register, #mask
495 /// \endcode
496 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
497 return false;
498 }
499
500 /// Use bitwise logic to make pairs of compares more efficient. For example:
501 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
502 /// This should be true when it takes more than one instruction to lower
503 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
504 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
505 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
506 return false;
507 }
508
509 /// Return the preferred operand type if the target has a quick way to compare
510 /// integer values of the given size. Assume that any legal integer type can
511 /// be compared efficiently. Targets may override this to allow illegal wide
512 /// types to return a vector type if there is support to compare that type.
513 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
514 MVT VT = MVT::getIntegerVT(NumBits);
515 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
516 }
517
518 /// Return true if the target should transform:
519 /// (X & Y) == Y ---> (~X & Y) == 0
520 /// (X & Y) != Y ---> (~X & Y) != 0
521 ///
522 /// This may be profitable if the target has a bitwise and-not operation that
523 /// sets comparison flags. A target may want to limit the transformation based
524 /// on the type of Y or if Y is a constant.
525 ///
526 /// Note that the transform will not occur if Y is known to be a power-of-2
527 /// because a mask and compare of a single bit can be handled by inverting the
528 /// predicate, for example:
529 /// (X & 8) == 8 ---> (X & 8) != 0
530 virtual bool hasAndNotCompare(SDValue Y) const {
531 return false;
532 }
533
534 /// Return true if the target has a bitwise and-not operation:
535 /// X = ~A & B
536 /// This can be used to simplify select or other instructions.
537 virtual bool hasAndNot(SDValue X) const {
538 // If the target has the more complex version of this operation, assume that
539 // it has this operation too.
540 return hasAndNotCompare(X);
541 }
542
543 /// Return true if the target has a bit-test instruction:
544 /// (X & (1 << Y)) ==/!= 0
545 /// This knowledge can be used to prevent breaking the pattern,
546 /// or creating it if it could be recognized.
547 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
548
549 /// There are two ways to clear extreme bits (either low or high):
550 /// Mask: x & (-1 << y) (the instcombine canonical form)
551 /// Shifts: x >> y << y
552 /// Return true if the variant with 2 variable shifts is preferred.
553 /// Return false if there is no preference.
554 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
555 // By default, let's assume that no one prefers shifts.
556 return false;
557 }
558
559 /// Return true if it is profitable to fold a pair of shifts into a mask.
560 /// This is usually true on most targets. But some targets, like Thumb1,
561 /// have immediate shift instructions, but no immediate "and" instruction;
562 /// this makes the fold unprofitable.
563 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
564 CombineLevel Level) const {
565 return true;
566 }
567
568 /// Should we tranform the IR-optimal check for whether given truncation
569 /// down into KeptBits would be truncating or not:
570 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
571 /// Into it's more traditional form:
572 /// ((%x << C) a>> C) dstcond %x
573 /// Return true if we should transform.
574 /// Return false if there is no preference.
575 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
576 unsigned KeptBits) const {
577 // By default, let's assume that no one prefers shifts.
578 return false;
579 }
580
581 /// Given the pattern
582 /// (X & (C l>>/<< Y)) ==/!= 0
583 /// return true if it should be transformed into:
584 /// ((X <</l>> Y) & C) ==/!= 0
585 /// WARNING: if 'X' is a constant, the fold may deadlock!
586 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
587 /// here because it can end up being not linked in.
588 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
589 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
590 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
591 SelectionDAG &DAG) const {
592 if (hasBitTest(X, Y)) {
593 // One interesting pattern that we'd want to form is 'bit test':
594 // ((1 << Y) & C) ==/!= 0
595 // But we also need to be careful not to try to reverse that fold.
596
597 // Is this '1 << Y' ?
598 if (OldShiftOpcode == ISD::SHL && CC->isOne())
599 return false; // Keep the 'bit test' pattern.
600
601 // Will it be '1 << Y' after the transform ?
602 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
603 return true; // Do form the 'bit test' pattern.
604 }
605
606 // If 'X' is a constant, and we transform, then we will immediately
607 // try to undo the fold, thus causing endless combine loop.
608 // So by default, let's assume everyone prefers the fold
609 // iff 'X' is not a constant.
610 return !XC;
611 }
612
613 /// These two forms are equivalent:
614 /// sub %y, (xor %x, -1)
615 /// add (add %x, 1), %y
616 /// The variant with two add's is IR-canonical.
617 /// Some targets may prefer one to the other.
618 virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
619 // By default, let's assume that everyone prefers the form with two add's.
620 return true;
621 }
622
623 /// Return true if the target wants to use the optimization that
624 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
625 /// promotedInst1(...(promotedInstN(ext(load)))).
626 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
627
628 /// Return true if the target can combine store(extractelement VectorTy,
629 /// Idx).
630 /// \p Cost[out] gives the cost of that transformation when this is true.
631 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
632 unsigned &Cost) const {
633 return false;
634 }
635
636 /// Return true if inserting a scalar into a variable element of an undef
637 /// vector is more efficiently handled by splatting the scalar instead.
638 virtual bool shouldSplatInsEltVarIndex(EVT) const {
639 return false;
640 }
641
642 /// Return true if target always beneficiates from combining into FMA for a
643 /// given value type. This must typically return false on targets where FMA
644 /// takes more cycles to execute than FADD.
645 virtual bool enableAggressiveFMAFusion(EVT VT) const {
646 return false;
647 }
648
649 /// Return the ValueType of the result of SETCC operations.
650 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
651 EVT VT) const;
652
653 /// Return the ValueType for comparison libcalls. Comparions libcalls include
654 /// floating point comparion calls, and Ordered/Unordered check calls on
655 /// floating point numbers.
656 virtual
657 MVT::SimpleValueType getCmpLibcallReturnType() const;
658
659 /// For targets without i1 registers, this gives the nature of the high-bits
660 /// of boolean values held in types wider than i1.
661 ///
662 /// "Boolean values" are special true/false values produced by nodes like
663 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
664 /// Not to be confused with general values promoted from i1. Some cpus
665 /// distinguish between vectors of boolean and scalars; the isVec parameter
666 /// selects between the two kinds. For example on X86 a scalar boolean should
667 /// be zero extended from i1, while the elements of a vector of booleans
668 /// should be sign extended from i1.
669 ///
670 /// Some cpus also treat floating point types the same way as they treat
671 /// vectors instead of the way they treat scalars.
672 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
673 if (isVec)
674 return BooleanVectorContents;
675 return isFloat ? BooleanFloatContents : BooleanContents;
676 }
677
678 BooleanContent getBooleanContents(EVT Type) const {
679 return getBooleanContents(Type.isVector(), Type.isFloatingPoint());
680 }
681
682 /// Return target scheduling preference.
683 Sched::Preference getSchedulingPreference() const {
684 return SchedPreferenceInfo;
685 }
686
687 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
688 /// for different nodes. This function returns the preference (or none) for
689 /// the given node.
690 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
691 return Sched::None;
692 }
693
694 /// Return the register class that should be used for the specified value
695 /// type.
696 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
697 (void)isDivergent;
698 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
699 assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!")
? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 699, __PRETTY_FUNCTION__))
;
700 return RC;
701 }
702
703 /// Allows target to decide about the register class of the
704 /// specific value that is live outside the defining block.
705 /// Returns true if the value needs uniform register class.
706 virtual bool requiresUniformRegister(MachineFunction &MF,
707 const Value *) const {
708 return false;
709 }
710
711 /// Return the 'representative' register class for the specified value
712 /// type.
713 ///
714 /// The 'representative' register class is the largest legal super-reg
715 /// register class for the register class of the value type. For example, on
716 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
717 /// register class is GR64 on x86_64.
718 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
719 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
720 return RC;
721 }
722
723 /// Return the cost of the 'representative' register class for the specified
724 /// value type.
725 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
726 return RepRegClassCostForVT[VT.SimpleTy];
727 }
728
729 /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS
730 /// instructions, and false if a library call is preferred (e.g for code-size
731 /// reasons).
732 virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
733 return true;
734 }
735
736 /// Return true if the target has native support for the specified value type.
737 /// This means that it has a register that directly holds it without
738 /// promotions or expansions.
739 bool isTypeLegal(EVT VT) const {
740 assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 741, __PRETTY_FUNCTION__))
741 (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 741, __PRETTY_FUNCTION__))
;
742 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
743 }
744
745 class ValueTypeActionImpl {
746 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
747 /// that indicates how instruction selection should deal with the type.
748 LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
749
750 public:
751 ValueTypeActionImpl() {
752 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
753 TypeLegal);
754 }
755
756 LegalizeTypeAction getTypeAction(MVT VT) const {
757 return ValueTypeActions[VT.SimpleTy];
758 }
759
760 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
761 ValueTypeActions[VT.SimpleTy] = Action;
762 }
763 };
764
765 const ValueTypeActionImpl &getValueTypeActions() const {
766 return ValueTypeActions;
767 }
768
769 /// Return how we should legalize values of this type, either it is already
770 /// legal (return 'Legal') or we need to promote it to a larger type (return
771 /// 'Promote'), or we need to expand it into multiple registers of smaller
772 /// integer type (return 'Expand'). 'Custom' is not an option.
773 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
774 return getTypeConversion(Context, VT).first;
775 }
776 LegalizeTypeAction getTypeAction(MVT VT) const {
777 return ValueTypeActions.getTypeAction(VT);
778 }
779
780 /// For types supported by the target, this is an identity function. For
781 /// types that must be promoted to larger types, this returns the larger type
782 /// to promote to. For integer types that are larger than the largest integer
783 /// register, this contains one step in the expansion to get to the smaller
784 /// register. For illegal floating point types, this returns the integer type
785 /// to transform to.
786 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
787 return getTypeConversion(Context, VT).second;
788 }
789
790 /// For types supported by the target, this is an identity function. For
791 /// types that must be expanded (i.e. integer types that are larger than the
792 /// largest integer register or illegal floating point types), this returns
793 /// the largest legal type it will be expanded to.
794 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
795 assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail
("!VT.isVector()", "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 795, __PRETTY_FUNCTION__))
;
796 while (true) {
797 switch (getTypeAction(Context, VT)) {
798 case TypeLegal:
799 return VT;
800 case TypeExpandInteger:
801 VT = getTypeToTransformTo(Context, VT);
802 break;
803 default:
804 llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 804)
;
805 }
806 }
807 }
808
809 /// Vector types are broken down into some number of legal first class types.
810 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
811 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
812 /// turns into 4 EVT::i32 values with both PPC and X86.
813 ///
814 /// This method returns the number of registers needed, and the VT for each
815 /// register. It also returns the VT and quantity of the intermediate values
816 /// before they are promoted/expanded.
817 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
818 EVT &IntermediateVT,
819 unsigned &NumIntermediates,
820 MVT &RegisterVT) const;
821
822 /// Certain targets such as MIPS require that some types such as vectors are
823 /// always broken down into scalars in some contexts. This occurs even if the
824 /// vector type is legal.
825 virtual unsigned getVectorTypeBreakdownForCallingConv(
826 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
827 unsigned &NumIntermediates, MVT &RegisterVT) const {
828 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
829 RegisterVT);
830 }
831
832 struct IntrinsicInfo {
833 unsigned opc = 0; // target opcode
834 EVT memVT; // memory VT
835
836 // value representing memory location
837 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
838
839 int offset = 0; // offset off of ptrVal
840 uint64_t size = 0; // the size of the memory location
841 // (taken from memVT if zero)
842 MaybeAlign align = Align::None(); // alignment
843
844 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
845 IntrinsicInfo() = default;
846 };
847
848 /// Given an intrinsic, checks if on the target the intrinsic will need to map
849 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
850 /// true and store the intrinsic information into the IntrinsicInfo that was
851 /// passed to the function.
852 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
853 MachineFunction &,
854 unsigned /*Intrinsic*/) const {
855 return false;
856 }
857
858 /// Returns true if the target can instruction select the specified FP
859 /// immediate natively. If false, the legalizer will materialize the FP
860 /// immediate as a load from a constant pool.
861 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
862 bool ForCodeSize = false) const {
863 return false;
864 }
865
866 /// Targets can use this to indicate that they only support *some*
867 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
868 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
869 /// legal.
870 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
871 return true;
872 }
873
874 /// Returns true if the operation can trap for the value type.
875 ///
876 /// VT must be a legal type. By default, we optimistically assume most
877 /// operations don't trap except for integer divide and remainder.
878 virtual bool canOpTrap(unsigned Op, EVT VT) const;
879
880 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
881 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
882 /// constant pool entry.
883 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
884 EVT /*VT*/) const {
885 return false;
886 }
887
888 /// Return how this operation should be treated: either it is legal, needs to
889 /// be promoted to a larger size, needs to be expanded to some other code
890 /// sequence, or the target has a custom expander for it.
891 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
892 if (VT.isExtended()) return Expand;
893 // If a target-specific SDNode requires legalization, require the target
894 // to provide custom legalization for it.
895 if (Op >= array_lengthof(OpActions[0])) return Custom;
896 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
897 }
898
899 /// Custom method defined by each target to indicate if an operation which
900 /// may require a scale is supported natively by the target.
901 /// If not, the operation is illegal.
902 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
903 unsigned Scale) const {
904 return false;
905 }
906
907 /// Some fixed point operations may be natively supported by the target but
908 /// only for specific scales. This method allows for checking
909 /// if the width is supported by the target for a given operation that may
910 /// depend on scale.
911 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
912 unsigned Scale) const {
913 auto Action = getOperationAction(Op, VT);
914 if (Action != Legal)
915 return Action;
916
917 // This operation is supported in this type but may only work on specific
918 // scales.
919 bool Supported;
920 switch (Op) {
921 default:
922 llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation."
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 922)
;
923 case ISD::SMULFIX:
924 case ISD::SMULFIXSAT:
925 case ISD::UMULFIX:
926 case ISD::UMULFIXSAT:
927 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
928 break;
929 }
930
931 return Supported ? Action : Expand;
932 }
933
934 // If Op is a strict floating-point operation, return the result
935 // of getOperationAction for the equivalent non-strict operation.
936 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
937 unsigned EqOpc;
938 switch (Op) {
939 default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 939)
;
940 case ISD::STRICT_FADD: EqOpc = ISD::FADD; break;
941 case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
942 case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
943 case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
944 case ISD::STRICT_FREM: EqOpc = ISD::FREM; break;
945 case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
946 case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
947 case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
948 case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
949 case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
950 case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
951 case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
952 case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
953 case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
954 case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
955 case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
956 case ISD::STRICT_LRINT: EqOpc = ISD::LRINT; break;
957 case ISD::STRICT_LLRINT: EqOpc = ISD::LLRINT; break;
958 case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
959 case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
960 case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
961 case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
962 case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
963 case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
964 case ISD::STRICT_LROUND: EqOpc = ISD::LROUND; break;
965 case ISD::STRICT_LLROUND: EqOpc = ISD::LLROUND; break;
966 case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
967 case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
968 case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break;
969 case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break;
970 case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
971 case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
972 }
973
974 return getOperationAction(EqOpc, VT);
975 }
976
977 /// Return true if the specified operation is legal on this target or can be
978 /// made legal with custom lowering. This is used to help guide high-level
979 /// lowering decisions.
980 bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
981 return (VT == MVT::Other || isTypeLegal(VT)) &&
982 (getOperationAction(Op, VT) == Legal ||
983 getOperationAction(Op, VT) == Custom);
984 }
985
986 /// Return true if the specified operation is legal on this target or can be
987 /// made legal using promotion. This is used to help guide high-level lowering
988 /// decisions.
989 bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
990 return (VT == MVT::Other || isTypeLegal(VT)) &&
991 (getOperationAction(Op, VT) == Legal ||
992 getOperationAction(Op, VT) == Promote);
993 }
994
995 /// Return true if the specified operation is legal on this target or can be
996 /// made legal with custom lowering or using promotion. This is used to help
997 /// guide high-level lowering decisions.
998 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const {
999 return (VT == MVT::Other || isTypeLegal(VT)) &&
1000 (getOperationAction(Op, VT) == Legal ||
1001 getOperationAction(Op, VT) == Custom ||
1002 getOperationAction(Op, VT) == Promote);
1003 }
1004
1005 /// Return true if the operation uses custom lowering, regardless of whether
1006 /// the type is legal or not.
1007 bool isOperationCustom(unsigned Op, EVT VT) const {
1008 return getOperationAction(Op, VT) == Custom;
1009 }
1010
1011 /// Return true if lowering to a jump table is allowed.
1012 virtual bool areJTsAllowed(const Function *Fn) const {
1013 if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
1014 return false;
1015
1016 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1017 isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
1018 }
1019
1020 /// Check whether the range [Low,High] fits in a machine word.
1021 bool rangeFitsInWord(const APInt &Low, const APInt &High,
1022 const DataLayout &DL) const {
1023 // FIXME: Using the pointer type doesn't seem ideal.
1024 uint64_t BW = DL.getIndexSizeInBits(0u);
1025 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1;
1026 return Range <= BW;
1027 }
1028
1029 /// Return true if lowering to a jump table is suitable for a set of case
1030 /// clusters which may contain \p NumCases cases, \p Range range of values.
1031 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
1032 uint64_t Range) const {
1033 // FIXME: This function check the maximum table size and density, but the
1034 // minimum size is not checked. It would be nice if the minimum size is
1035 // also combined within this function. Currently, the minimum size check is
1036 // performed in findJumpTable() in SelectionDAGBuiler and
1037 // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
1038 const bool OptForSize = SI->getParent()->getParent()->hasOptSize();
1039 const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
1040 const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
1041
1042 // Check whether the number of cases is small enough and
1043 // the range is dense enough for a jump table.
1044 if ((OptForSize || Range <= MaxJumpTableSize) &&
1045 (NumCases * 100 >= Range * MinDensity)) {
1046 return true;
1047 }
1048 return false;
1049 }
1050
1051 /// Return true if lowering to a bit test is suitable for a set of case
1052 /// clusters which contains \p NumDests unique destinations, \p Low and
1053 /// \p High as its lowest and highest case values, and expects \p NumCmps
1054 /// case value comparisons. Check if the number of destinations, comparison
1055 /// metric, and range are all suitable.
1056 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1057 const APInt &Low, const APInt &High,
1058 const DataLayout &DL) const {
1059 // FIXME: I don't think NumCmps is the correct metric: a single case and a
1060 // range of cases both require only one branch to lower. Just looking at the
1061 // number of clusters and destinations should be enough to decide whether to
1062 // build bit tests.
1063
1064 // To lower a range with bit tests, the range must fit the bitwidth of a
1065 // machine word.
1066 if (!rangeFitsInWord(Low, High, DL))
1067 return false;
1068
1069 // Decide whether it's profitable to lower this range with bit tests. Each
1070 // destination requires a bit test and branch, and there is an overall range
1071 // check branch. For a small number of clusters, separate comparisons might
1072 // be cheaper, and for many destinations, splitting the range might be
1073 // better.
1074 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
1075 (NumDests == 3 && NumCmps >= 6);
1076 }
1077
1078 /// Return true if the specified operation is illegal on this target or
1079 /// unlikely to be made legal with custom lowering. This is used to help guide
1080 /// high-level lowering decisions.
1081 bool isOperationExpand(unsigned Op, EVT VT) const {
1082 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
1083 }
1084
1085 /// Return true if the specified operation is legal on this target.
1086 bool isOperationLegal(unsigned Op, EVT VT) const {
1087 return (VT == MVT::Other || isTypeLegal(VT)) &&
1088 getOperationAction(Op, VT) == Legal;
1089 }
1090
1091 /// Return how this load with extension should be treated: either it is legal,
1092 /// needs to be promoted to a larger size, needs to be expanded to some other
1093 /// code sequence, or the target has a custom expander for it.
1094 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
1095 EVT MemVT) const {
1096 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1097 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1098 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1099 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1100, __PRETTY_FUNCTION__))
1100 MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1100, __PRETTY_FUNCTION__))
;
1101 unsigned Shift = 4 * ExtType;
1102 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1103 }
1104
1105 /// Return true if the specified load with extension is legal on this target.
1106 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1107 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1108 }
1109
1110 /// Return true if the specified load with extension is legal or custom
1111 /// on this target.
1112 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1113 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1114 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1115 }
1116
1117 /// Return how this store with truncation should be treated: either it is
1118 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1119 /// other code sequence, or the target has a custom expander for it.
1120 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1121 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1122 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1123 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1124 assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1125, __PRETTY_FUNCTION__))
1125 "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1125, __PRETTY_FUNCTION__))
;
1126 return TruncStoreActions[ValI][MemI];
1127 }
1128
1129 /// Return true if the specified store with truncation is legal on this
1130 /// target.
1131 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1132 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1133 }
1134
1135 /// Return true if the specified store with truncation has solution on this
1136 /// target.
1137 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1138 return isTypeLegal(ValVT) &&
1139 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1140 getTruncStoreAction(ValVT, MemVT) == Custom);
1141 }
1142
1143 /// Return how the indexed load should be treated: either it is legal, needs
1144 /// to be promoted to a larger size, needs to be expanded to some other code
1145 /// sequence, or the target has a custom expander for it.
1146 LegalizeAction
1147 getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1148 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1149, __PRETTY_FUNCTION__))
1149 "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1149, __PRETTY_FUNCTION__))
;
1150 unsigned Ty = (unsigned)VT.SimpleTy;
1151 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
1152 }
1153
1154 /// Return true if the specified indexed load is legal on this target.
1155 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1156 return VT.isSimple() &&
1157 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1158 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1159 }
1160
1161 /// Return how the indexed store should be treated: either it is legal, needs
1162 /// to be promoted to a larger size, needs to be expanded to some other code
1163 /// sequence, or the target has a custom expander for it.
1164 LegalizeAction
1165 getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1166 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1167, __PRETTY_FUNCTION__))
1167 "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1167, __PRETTY_FUNCTION__))
;
1168 unsigned Ty = (unsigned)VT.SimpleTy;
1169 return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
1170 }
1171
1172 /// Return true if the specified indexed load is legal on this target.
1173 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1174 return VT.isSimple() &&
1175 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1176 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1177 }
1178
1179 /// Return how the condition code should be treated: either it is legal, needs
1180 /// to be expanded to some other code sequence, or the target has a custom
1181 /// expander for it.
1182 LegalizeAction
1183 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1184 assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1186, __PRETTY_FUNCTION__))
1185 ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1186, __PRETTY_FUNCTION__))
1186 "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1186, __PRETTY_FUNCTION__))
;
1187 // See setCondCodeAction for how this is encoded.
1188 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1189 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1190 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1191 assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!"
) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1191, __PRETTY_FUNCTION__))
;
1192 return Action;
1193 }
1194
1195 /// Return true if the specified condition code is legal on this target.
1196 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1197 return getCondCodeAction(CC, VT) == Legal;
1198 }
1199
1200 /// Return true if the specified condition code is legal or custom on this
1201 /// target.
1202 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1203 return getCondCodeAction(CC, VT) == Legal ||
1204 getCondCodeAction(CC, VT) == Custom;
1205 }
1206
1207 /// If the action for this operation is to promote, this method returns the
1208 /// ValueType to promote to.
1209 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1210 assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1211, __PRETTY_FUNCTION__))
1211 "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1211, __PRETTY_FUNCTION__))
;
1212
1213 // See if this has an explicit type specified.
1214 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1215 MVT::SimpleValueType>::const_iterator PTTI =
1216 PromoteToType.find(std::make_pair(Op, VT.SimpleTy));
1217 if (PTTI != PromoteToType.end()) return PTTI->second;
1218
1219 assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1220, __PRETTY_FUNCTION__))
1220 "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1220, __PRETTY_FUNCTION__))
;
1221
1222 MVT NVT = VT;
1223 do {
1224 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1225 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1226, __PRETTY_FUNCTION__))
1226 "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1226, __PRETTY_FUNCTION__))
;
1227 } while (!isTypeLegal(NVT) ||
1228 getOperationAction(Op, NVT) == Promote);
1229 return NVT;
1230 }
1231
1232 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1233 /// operations except for the pointer size. If AllowUnknown is true, this
1234 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1235 /// otherwise it will assert.
1236 EVT getValueType(const DataLayout &DL, Type *Ty,
1237 bool AllowUnknown = false) const {
1238 // Lower scalar pointers to native pointer types.
1239 if (auto *PTy = dyn_cast<PointerType>(Ty))
28
Assuming 'PTy' is null
29
Taking false branch
1240 return getPointerTy(DL, PTy->getAddressSpace());
1241
1242 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
30
Assuming 'VTy' is non-null
31
Taking true branch
1243 Type *EltTy = VTy->getElementType();
1244 // Lower vectors of pointers to native pointer types.
1245 if (auto *PTy
32.1
'PTy' is non-null
32.1
'PTy' is non-null
32.1
'PTy' is non-null
32.1
'PTy' is non-null
= dyn_cast<PointerType>(EltTy)) {
32
Assuming 'EltTy' is a 'PointerType'
33
Taking true branch
1246 EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace()));
1247 EltTy = PointerTy.getTypeForEVT(Ty->getContext());
34
Called C++ object pointer is null
1248 }
1249 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
1250 VTy->getElementCount());
1251 }
1252
1253 return EVT::getEVT(Ty, AllowUnknown);
1254 }
1255
1256 EVT getMemValueType(const DataLayout &DL, Type *Ty,
1257 bool AllowUnknown = false) const {
1258 // Lower scalar pointers to native pointer types.
1259 if (PointerType *PTy = dyn_cast<PointerType>(Ty))
1260 return getPointerMemTy(DL, PTy->getAddressSpace());
1261 else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1262 Type *Elm = VTy->getElementType();
1263 if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
1264 EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
1265 Elm = PointerTy.getTypeForEVT(Ty->getContext());
1266 }
1267 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
1268 VTy->getNumElements());
1269 }
1270
1271 return getValueType(DL, Ty, AllowUnknown);
1272 }
1273
1274
1275 /// Return the MVT corresponding to this LLVM type. See getValueType.
1276 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1277 bool AllowUnknown = false) const {
1278 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1279 }
1280
1281 /// Return the desired alignment for ByVal or InAlloca aggregate function
1282 /// arguments in the caller parameter area. This is the actual alignment, not
1283 /// its logarithm.
1284 virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1285
1286 /// Return the type of registers that this ValueType will eventually require.
1287 MVT getRegisterType(MVT VT) const {
1288 assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1288, __PRETTY_FUNCTION__))
;
1289 return RegisterTypeForVT[VT.SimpleTy];
1290 }
1291
1292 /// Return the type of registers that this ValueType will eventually require.
1293 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1294 if (VT.isSimple()) {
1295 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1296, __PRETTY_FUNCTION__))
1296 array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1296, __PRETTY_FUNCTION__))
;
1297 return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
1298 }
1299 if (VT.isVector()) {
1300 EVT VT1;
1301 MVT RegisterVT;
1302 unsigned NumIntermediates;
1303 (void)getVectorTypeBreakdown(Context, VT, VT1,
1304 NumIntermediates, RegisterVT);
1305 return RegisterVT;
1306 }
1307 if (VT.isInteger()) {
1308 return getRegisterType(Context, getTypeToTransformTo(Context, VT));
1309 }
1310 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1310)
;
1311 }
1312
1313 /// Return the number of registers that this ValueType will eventually
1314 /// require.
1315 ///
1316 /// This is one for any types promoted to live in larger registers, but may be
1317 /// more than one for types (like i64) that are split into pieces. For types
1318 /// like i140, which are first promoted then expanded, it is the number of
1319 /// registers needed to hold all the bits of the original type. For an i140
1320 /// on a 32 bit machine this means 5 registers.
1321 unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
1322 if (VT.isSimple()) {
1323 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1324, __PRETTY_FUNCTION__))
1324 array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1324, __PRETTY_FUNCTION__))
;
1325 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1326 }
1327 if (VT.isVector()) {
1328 EVT VT1;
1329 MVT VT2;
1330 unsigned NumIntermediates;
1331 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
1332 }
1333 if (VT.isInteger()) {
1334 unsigned BitWidth = VT.getSizeInBits();
1335 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1336 return (BitWidth + RegWidth - 1) / RegWidth;
1337 }
1338 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1338)
;
1339 }
1340
1341 /// Certain combinations of ABIs, Targets and features require that types
1342 /// are legal for some operations and not for other operations.
1343 /// For MIPS all vector types must be passed through the integer register set.
1344 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1345 CallingConv::ID CC, EVT VT) const {
1346 return getRegisterType(Context, VT);
1347 }
1348
1349 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1350 /// this occurs when a vector type is used, as vector are passed through the
1351 /// integer register set.
1352 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1353 CallingConv::ID CC,
1354 EVT VT) const {
1355 return getNumRegisters(Context, VT);
1356 }
1357
1358 /// Certain targets have context senstive alignment requirements, where one
1359 /// type has the alignment requirement of another type.
1360 virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
1361 DataLayout DL) const {
1362 return DL.getABITypeAlignment(ArgTy);
1363 }
1364
1365 /// If true, then instruction selection should seek to shrink the FP constant
1366 /// of the specified type to a smaller type in order to save space and / or
1367 /// reduce runtime.
1368 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1369
1370 /// Return true if it is profitable to reduce a load to a smaller type.
1371 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1372 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1373 EVT NewVT) const {
1374 // By default, assume that it is cheaper to extract a subvector from a wide
1375 // vector load rather than creating multiple narrow vector loads.
1376 if (NewVT.isVector() && !Load->hasOneUse())
1377 return false;
1378
1379 return true;
1380 }
1381
1382 /// When splitting a value of the specified type into parts, does the Lo
1383 /// or Hi part come first? This usually follows the endianness, except
1384 /// for ppcf128, where the Hi part always comes first.
1385 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1386 return DL.isBigEndian() || VT == MVT::ppcf128;
1387 }
1388
1389 /// If true, the target has custom DAG combine transformations that it can
1390 /// perform for the specified node.
1391 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1392 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1392, __PRETTY_FUNCTION__))
;
1393 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1394 }
1395
1396 unsigned getGatherAllAliasesMaxDepth() const {
1397 return GatherAllAliasesMaxDepth;
1398 }
1399
1400 /// Returns the size of the platform's va_list object.
1401 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1402 return getPointerTy(DL).getSizeInBits();
1403 }
1404
1405 /// Get maximum # of store operations permitted for llvm.memset
1406 ///
1407 /// This function returns the maximum number of store operations permitted
1408 /// to replace a call to llvm.memset. The value is set by the target at the
1409 /// performance threshold for such a replacement. If OptSize is true,
1410 /// return the limit for functions that have OptSize attribute.
1411 unsigned getMaxStoresPerMemset(bool OptSize) const {
1412 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1413 }
1414
1415 /// Get maximum # of store operations permitted for llvm.memcpy
1416 ///
1417 /// This function returns the maximum number of store operations permitted
1418 /// to replace a call to llvm.memcpy. The value is set by the target at the
1419 /// performance threshold for such a replacement. If OptSize is true,
1420 /// return the limit for functions that have OptSize attribute.
1421 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1422 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1423 }
1424
1425 /// \brief Get maximum # of store operations to be glued together
1426 ///
1427 /// This function returns the maximum number of store operations permitted
1428 /// to glue together during lowering of llvm.memcpy. The value is set by
1429 // the target at the performance threshold for such a replacement.
1430 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1431 return MaxGluedStoresPerMemcpy;
1432 }
1433
1434 /// Get maximum # of load operations permitted for memcmp
1435 ///
1436 /// This function returns the maximum number of load operations permitted
1437 /// to replace a call to memcmp. The value is set by the target at the
1438 /// performance threshold for such a replacement. If OptSize is true,
1439 /// return the limit for functions that have OptSize attribute.
1440 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1441 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1442 }
1443
1444 /// Get maximum # of store operations permitted for llvm.memmove
1445 ///
1446 /// This function returns the maximum number of store operations permitted
1447 /// to replace a call to llvm.memmove. The value is set by the target at the
1448 /// performance threshold for such a replacement. If OptSize is true,
1449 /// return the limit for functions that have OptSize attribute.
1450 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1451 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1452 }
1453
1454 /// Determine if the target supports unaligned memory accesses.
1455 ///
1456 /// This function returns true if the target allows unaligned memory accesses
1457 /// of the specified type in the given address space. If true, it also returns
1458 /// whether the unaligned memory access is "fast" in the last argument by
1459 /// reference. This is used, for example, in situations where an array
1460 /// copy/move/set is converted to a sequence of store operations. Its use
1461 /// helps to ensure that such replacements don't generate code that causes an
1462 /// alignment error (trap) on the target machine.
1463 virtual bool allowsMisalignedMemoryAccesses(
1464 EVT, unsigned AddrSpace = 0, unsigned Align = 1,
1465 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1466 bool * /*Fast*/ = nullptr) const {
1467 return false;
1468 }
1469
1470 /// LLT handling variant.
1471 virtual bool allowsMisalignedMemoryAccesses(
1472 LLT, unsigned AddrSpace = 0, unsigned Align = 1,
1473 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1474 bool * /*Fast*/ = nullptr) const {
1475 return false;
1476 }
1477
1478 /// This function returns true if the memory access is aligned or if the
1479 /// target allows this specific unaligned memory access. If the access is
1480 /// allowed, the optional final parameter returns if the access is also fast
1481 /// (as defined by the target).
1482 bool allowsMemoryAccessForAlignment(
1483 LLVMContext &Context, const DataLayout &DL, EVT VT,
1484 unsigned AddrSpace = 0, unsigned Alignment = 1,
1485 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1486 bool *Fast = nullptr) const;
1487
1488 /// Return true if the memory access of this type is aligned or if the target
1489 /// allows this specific unaligned access for the given MachineMemOperand.
1490 /// If the access is allowed, the optional final parameter returns if the
1491 /// access is also fast (as defined by the target).
1492 bool allowsMemoryAccessForAlignment(LLVMContext &Context,
1493 const DataLayout &DL, EVT VT,
1494 const MachineMemOperand &MMO,
1495 bool *Fast = nullptr) const;
1496
1497 /// Return true if the target supports a memory access of this type for the
1498 /// given address space and alignment. If the access is allowed, the optional
1499 /// final parameter returns if the access is also fast (as defined by the
1500 /// target).
1501 virtual bool
1502 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1503 unsigned AddrSpace = 0, unsigned Alignment = 1,
1504 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1505 bool *Fast = nullptr) const;
1506
1507 /// Return true if the target supports a memory access of this type for the
1508 /// given MachineMemOperand. If the access is allowed, the optional
1509 /// final parameter returns if the access is also fast (as defined by the
1510 /// target).
1511 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1512 const MachineMemOperand &MMO,
1513 bool *Fast = nullptr) const;
1514
1515 /// Returns the target specific optimal type for load and store operations as
1516 /// a result of memset, memcpy, and memmove lowering.
1517 ///
1518 /// If DstAlign is zero that means it's safe to destination alignment can
1519 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
1520 /// a need to check it against alignment requirement, probably because the
1521 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
1522 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
1523 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
1524 /// does not need to be loaded. It returns EVT::Other if the type should be
1525 /// determined using generic target-independent logic.
1526 virtual EVT
1527 getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
1528 unsigned /*SrcAlign*/, bool /*IsMemset*/,
1529 bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
1530 const AttributeList & /*FuncAttributes*/) const {
1531 return MVT::Other;
1532 }
1533
1534
1535 /// LLT returning variant.
1536 virtual LLT
1537 getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/,
1538 unsigned /*SrcAlign*/, bool /*IsMemset*/,
1539 bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
1540 const AttributeList & /*FuncAttributes*/) const {
1541 return LLT();
1542 }
1543
1544 /// Returns true if it's safe to use load / store of the specified type to
1545 /// expand memcpy / memset inline.
1546 ///
1547 /// This is mostly true for all types except for some special cases. For
1548 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
1549 /// fstpl which also does type conversion. Note the specified type doesn't
1550 /// have to be legal as the hook is used before type legalization.
1551 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
1552
1553 /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp.
1554 bool usesUnderscoreSetJmp() const {
1555 return UseUnderscoreSetJmp;
1556 }
1557
1558 /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp.
1559 bool usesUnderscoreLongJmp() const {
1560 return UseUnderscoreLongJmp;
1561 }
1562
1563 /// Return lower limit for number of blocks in a jump table.
1564 virtual unsigned getMinimumJumpTableEntries() const;
1565
1566 /// Return lower limit of the density in a jump table.
1567 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
1568
1569 /// Return upper limit for number of entries in a jump table.
1570 /// Zero if no limit.
1571 unsigned getMaximumJumpTableSize() const;
1572
1573 virtual bool isJumpTableRelative() const {
1574 return TM.isPositionIndependent();
1575 }
1576
1577 /// If a physical register, this specifies the register that
1578 /// llvm.savestack/llvm.restorestack should save and restore.
1579 unsigned getStackPointerRegisterToSaveRestore() const {
1580 return StackPointerRegisterToSaveRestore;
1581 }
1582
1583 /// If a physical register, this returns the register that receives the
1584 /// exception address on entry to an EH pad.
1585 virtual unsigned
1586 getExceptionPointerRegister(const Constant *PersonalityFn) const {
1587 // 0 is guaranteed to be the NoRegister value on all targets
1588 return 0;
1589 }
1590
1591 /// If a physical register, this returns the register that receives the
1592 /// exception typeid on entry to a landing pad.
1593 virtual unsigned
1594 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
1595 // 0 is guaranteed to be the NoRegister value on all targets
1596 return 0;
1597 }
1598
1599 virtual bool needsFixedCatchObjects() const {
1600 report_fatal_error("Funclet EH is not implemented for this target");
1601 }
1602
1603 /// Return the minimum stack alignment of an argument.
1604 Align getMinStackArgumentAlignment() const {
1605 return MinStackArgumentAlignment;
1606 }
1607
1608 /// Return the minimum function alignment.
1609 Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
1610
1611 /// Return the preferred function alignment.
1612 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
1613
1614 /// Return the preferred loop alignment.
1615 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
1616 return PrefLoopAlignment;
1617 }
1618
1619 /// Should loops be aligned even when the function is marked OptSize (but not
1620 /// MinSize).
1621 virtual bool alignLoopsWithOptSize() const {
1622 return false;
1623 }
1624
1625 /// If the target has a standard location for the stack protector guard,
1626 /// returns the address of that location. Otherwise, returns nullptr.
1627 /// DEPRECATED: please override useLoadStackGuardNode and customize
1628 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
1629 virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
1630
1631 /// Inserts necessary declarations for SSP (stack protection) purpose.
1632 /// Should be used only when getIRStackGuard returns nullptr.
1633 virtual void insertSSPDeclarations(Module &M) const;
1634
1635 /// Return the variable that's previously inserted by insertSSPDeclarations,
1636 /// if any, otherwise return nullptr. Should be used only when
1637 /// getIRStackGuard returns nullptr.
1638 virtual Value *getSDagStackGuard(const Module &M) const;
1639
1640 /// If this function returns true, stack protection checks should XOR the
1641 /// frame pointer (or whichever pointer is used to address locals) into the
1642 /// stack guard value before checking it. getIRStackGuard must return nullptr
1643 /// if this returns true.
1644 virtual bool useStackGuardXorFP() const { return false; }
1645
1646 /// If the target has a standard stack protection check function that
1647 /// performs validation and error handling, returns the function. Otherwise,
1648 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
1649 /// Should be used only when getIRStackGuard returns nullptr.
1650 virtual Function *getSSPStackGuardCheck(const Module &M) const;
1651
1652protected:
1653 Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
1654 bool UseTLS) const;
1655
1656public:
1657 /// Returns the target-specific address of the unsafe stack pointer.
1658 virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
1659
1660 /// Returns the name of the symbol used to emit stack probes or the empty
1661 /// string if not applicable.
1662 virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
1663 return "";
1664 }
1665
1666 /// Returns true if a cast between SrcAS and DestAS is a noop.
1667 virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1668 return false;
1669 }
1670
1671 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
1672 /// are happy to sink it into basic blocks. A cast may be free, but not
1673 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
1674 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1675 return isNoopAddrSpaceCast(SrcAS, DestAS);
1676 }
1677
1678 /// Return true if the pointer arguments to CI should be aligned by aligning
1679 /// the object whose address is being passed. If so then MinSize is set to the
1680 /// minimum size the object must be to be aligned and PrefAlign is set to the
1681 /// preferred alignment.
1682 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
1683 unsigned & /*PrefAlign*/) const {
1684 return false;
1685 }
1686
1687 //===--------------------------------------------------------------------===//
1688 /// \name Helpers for TargetTransformInfo implementations
1689 /// @{
1690
1691 /// Get the ISD node that corresponds to the Instruction class opcode.
1692 int InstructionOpcodeToISD(unsigned Opcode) const;
1693
1694 /// Estimate the cost of type-legalization and the legalized type.
1695 std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
1696 Type *Ty) const;
1697
1698 /// @}
1699
1700 //===--------------------------------------------------------------------===//
1701 /// \name Helpers for atomic expansion.
1702 /// @{
1703
1704 /// Returns the maximum atomic operation size (in bits) supported by
1705 /// the backend. Atomic operations greater than this size (as well
1706 /// as ones that are not naturally aligned), will be expanded by
1707 /// AtomicExpandPass into an __atomic_* library call.
1708 unsigned getMaxAtomicSizeInBitsSupported() const {
1709 return MaxAtomicSizeInBitsSupported;
1710 }
1711
1712 /// Returns the size of the smallest cmpxchg or ll/sc instruction
1713 /// the backend supports. Any smaller operations are widened in
1714 /// AtomicExpandPass.
1715 ///
1716 /// Note that *unlike* operations above the maximum size, atomic ops
1717 /// are still natively supported below the minimum; they just
1718 /// require a more complex expansion.
1719 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
1720
1721 /// Whether the target supports unaligned atomic operations.
1722 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
1723
1724 /// Whether AtomicExpandPass should automatically insert fences and reduce
1725 /// ordering for this atomic. This should be true for most architectures with
1726 /// weak memory ordering. Defaults to false.
1727 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
1728 return false;
1729 }
1730
1731 /// Perform a load-linked operation on Addr, returning a "Value *" with the
1732 /// corresponding pointee type. This may entail some non-trivial operations to
1733 /// truncate or reconstruct types that will be illegal in the backend. See
1734 /// ARMISelLowering for an example implementation.
1735 virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
1736 AtomicOrdering Ord) const {
1737 llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1737)
;
1738 }
1739
1740 /// Perform a store-conditional operation to Addr. Return the status of the
1741 /// store. This should be 0 if the store succeeded, non-zero otherwise.
1742 virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
1743 Value *Addr, AtomicOrdering Ord) const {
1744 llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1744)
;
1745 }
1746
1747 /// Perform a masked atomicrmw using a target-specific intrinsic. This
1748 /// represents the core LL/SC loop which will be lowered at a late stage by
1749 /// the backend.
1750 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
1751 AtomicRMWInst *AI,
1752 Value *AlignedAddr, Value *Incr,
1753 Value *Mask, Value *ShiftAmt,
1754 AtomicOrdering Ord) const {
1755 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1755)
;
1756 }
1757
1758 /// Perform a masked cmpxchg using a target-specific intrinsic. This
1759 /// represents the core LL/SC loop which will be lowered at a late stage by
1760 /// the backend.
1761 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
1762 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1763 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
1764 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 1764)
;
1765 }
1766
1767 /// Inserts in the IR a target-specific intrinsic specifying a fence.
1768 /// It is called by AtomicExpandPass before expanding an
1769 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
1770 /// if shouldInsertFencesForAtomic returns true.
1771 ///
1772 /// Inst is the original atomic instruction, prior to other expansions that
1773 /// may be performed.
1774 ///
1775 /// This function should either return a nullptr, or a pointer to an IR-level
1776 /// Instruction*. Even complex fence sequences can be represented by a
1777 /// single Instruction* through an intrinsic to be lowered later.
1778 /// Backends should override this method to produce target-specific intrinsic
1779 /// for their fences.
1780 /// FIXME: Please note that the default implementation here in terms of
1781 /// IR-level fences exists for historical/compatibility reasons and is
1782 /// *unsound* ! Fences cannot, in general, be used to restore sequential
1783 /// consistency. For example, consider the following example:
1784 /// atomic<int> x = y = 0;
1785 /// int r1, r2, r3, r4;
1786 /// Thread 0:
1787 /// x.store(1);
1788 /// Thread 1:
1789 /// y.store(1);
1790 /// Thread 2:
1791 /// r1 = x.load();
1792 /// r2 = y.load();
1793 /// Thread 3:
1794 /// r3 = y.load();
1795 /// r4 = x.load();
1796 /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all
1797 /// seq_cst. But if they are lowered to monotonic accesses, no amount of
1798 /// IR-level fences can prevent it.
1799 /// @{
1800 virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
1801 AtomicOrdering Ord) const {
1802 if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
1803 return Builder.CreateFence(Ord);
1804 else
1805 return nullptr;
1806 }
1807
1808 virtual Instruction *emitTrailingFence(IRBuilder<> &Builder,
1809 Instruction *Inst,
1810 AtomicOrdering Ord) const {
1811 if (isAcquireOrStronger(Ord))
1812 return Builder.CreateFence(Ord);
1813 else
1814 return nullptr;
1815 }
1816 /// @}
1817
1818 // Emits code that executes when the comparison result in the ll/sc
1819 // expansion of a cmpxchg instruction is such that the store-conditional will
1820 // not execute. This makes it possible to balance out the load-linked with
1821 // a dedicated instruction, if desired.
1822 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
1823 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
1824 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
1825
1826 /// Returns true if the given (atomic) store should be expanded by the
1827 /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
1828 virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
1829 return false;
1830 }
1831
1832 /// Returns true if arguments should be sign-extended in lib calls.
1833 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
1834 return IsSigned;
1835 }
1836
1837 /// Returns true if arguments should be extended in lib calls.
1838 virtual bool shouldExtendTypeInLibCall(EVT Type) const {
1839 return true;
1840 }
1841
1842 /// Returns how the given (atomic) load should be expanded by the
1843 /// IR-level AtomicExpand pass.
1844 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
1845 return AtomicExpansionKind::None;
1846 }
1847
1848 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
1849 /// AtomicExpand pass.
1850 virtual AtomicExpansionKind
1851 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1852 return AtomicExpansionKind::None;
1853 }
1854
1855 /// Returns how the IR-level AtomicExpand pass should expand the given
1856 /// AtomicRMW, if at all. Default is to never expand.
1857 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1858 return RMW->isFloatingPointOperation() ?
1859 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
1860 }
1861
1862 /// On some platforms, an AtomicRMW that never actually modifies the value
1863 /// (such as fetch_add of 0) can be turned into a fence followed by an
1864 /// atomic load. This may sound useless, but it makes it possible for the
1865 /// processor to keep the cacheline shared, dramatically improving
1866 /// performance. And such idempotent RMWs are useful for implementing some
1867 /// kinds of locks, see for example (justification + benchmarks):
1868 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
1869 /// This method tries doing that transformation, returning the atomic load if
1870 /// it succeeds, and nullptr otherwise.
1871 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
1872 /// another round of expansion.
1873 virtual LoadInst *
1874 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
1875 return nullptr;
1876 }
1877
1878 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
1879 /// SIGN_EXTEND, or ANY_EXTEND).
1880 virtual ISD::NodeType getExtendForAtomicOps() const {
1881 return ISD::ZERO_EXTEND;
1882 }
1883
1884 /// @}
1885
1886 /// Returns true if we should normalize
1887 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1888 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
1889 /// that it saves us from materializing N0 and N1 in an integer register.
1890 /// Targets that are able to perform and/or on flags should return false here.
1891 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
1892 EVT VT) const {
1893 // If a target has multiple condition registers, then it likely has logical
1894 // operations on those registers.
1895 if (hasMultipleConditionRegisters())
1896 return false;
1897 // Only do the transform if the value won't be split into multiple
1898 // registers.
1899 LegalizeTypeAction Action = getTypeAction(Context, VT);
1900 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
1901 Action != TypeSplitVector;
1902 }
1903
1904 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
1905
1906 /// Return true if a select of constants (select Cond, C1, C2) should be
1907 /// transformed into simple math ops with the condition value. For example:
1908 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
1909 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
1910 return false;
1911 }
1912
1913 /// Return true if it is profitable to transform an integer
1914 /// multiplication-by-constant into simpler operations like shifts and adds.
1915 /// This may be true if the target does not directly support the
1916 /// multiplication operation for the specified type or the sequence of simpler
1917 /// ops is faster than the multiply.
1918 virtual bool decomposeMulByConstant(LLVMContext &Context,
1919 EVT VT, SDValue C) const {
1920 return false;
1921 }
1922
1923 /// Return true if it is more correct/profitable to use strict FP_TO_INT
1924 /// conversion operations - canonicalizing the FP source value instead of
1925 /// converting all cases and then selecting based on value.
1926 /// This may be true if the target throws exceptions for out of bounds
1927 /// conversions or has fast FP CMOV.
1928 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1929 bool IsSigned) const {
1930 return false;
1931 }
1932
1933 //===--------------------------------------------------------------------===//
1934 // TargetLowering Configuration Methods - These methods should be invoked by
1935 // the derived class constructor to configure this object for the target.
1936 //
1937protected:
1938 /// Specify how the target extends the result of integer and floating point
1939 /// boolean values from i1 to a wider type. See getBooleanContents.
1940 void setBooleanContents(BooleanContent Ty) {
1941 BooleanContents = Ty;
1942 BooleanFloatContents = Ty;
1943 }
1944
1945 /// Specify how the target extends the result of integer and floating point
1946 /// boolean values from i1 to a wider type. See getBooleanContents.
1947 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
1948 BooleanContents = IntTy;
1949 BooleanFloatContents = FloatTy;
1950 }
1951
1952 /// Specify how the target extends the result of a vector boolean value from a
1953 /// vector of i1 to a wider type. See getBooleanContents.
1954 void setBooleanVectorContents(BooleanContent Ty) {
1955 BooleanVectorContents = Ty;
1956 }
1957
1958 /// Specify the target scheduling preference.
1959 void setSchedulingPreference(Sched::Preference Pref) {
1960 SchedPreferenceInfo = Pref;
1961 }
1962
1963 /// Indicate whether this target prefers to use _setjmp to implement
1964 /// llvm.setjmp or the version without _. Defaults to false.
1965 void setUseUnderscoreSetJmp(bool Val) {
1966 UseUnderscoreSetJmp = Val;
1967 }
1968
1969 /// Indicate whether this target prefers to use _longjmp to implement
1970 /// llvm.longjmp or the version without _. Defaults to false.
1971 void setUseUnderscoreLongJmp(bool Val) {
1972 UseUnderscoreLongJmp = Val;
1973 }
1974
1975 /// Indicate the minimum number of blocks to generate jump tables.
1976 void setMinimumJumpTableEntries(unsigned Val);
1977
1978 /// Indicate the maximum number of entries in jump tables.
1979 /// Set to zero to generate unlimited jump tables.
1980 void setMaximumJumpTableSize(unsigned);
1981
1982 /// If set to a physical register, this specifies the register that
1983 /// llvm.savestack/llvm.restorestack should save and restore.
1984 void setStackPointerRegisterToSaveRestore(unsigned R) {
1985 StackPointerRegisterToSaveRestore = R;
1986 }
1987
1988 /// Tells the code generator that the target has multiple (allocatable)
1989 /// condition registers that can be used to store the results of comparisons
1990 /// for use by selects and conditional branches. With multiple condition
1991 /// registers, the code generator will not aggressively sink comparisons into
1992 /// the blocks of their users.
1993 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
1994 HasMultipleConditionRegisters = hasManyRegs;
1995 }
1996
1997 /// Tells the code generator that the target has BitExtract instructions.
1998 /// The code generator will aggressively sink "shift"s into the blocks of
1999 /// their users if the users will generate "and" instructions which can be
2000 /// combined with "shift" to BitExtract instructions.
2001 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
2002 HasExtractBitsInsn = hasExtractInsn;
2003 }
2004
2005 /// Tells the code generator not to expand logic operations on comparison
2006 /// predicates into separate sequences that increase the amount of flow
2007 /// control.
2008 void setJumpIsExpensive(bool isExpensive = true);
2009
2010 /// Tells the code generator which bitwidths to bypass.
2011 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
2012 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
2013 }
2014
2015 /// Add the specified register class as an available regclass for the
2016 /// specified value type. This indicates the selector can handle values of
2017 /// that class natively.
2018 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
2019 assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ?
static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2019, __PRETTY_FUNCTION__))
;
2020 RegClassForVT[VT.SimpleTy] = RC;
2021 }
2022
2023 /// Return the largest legal super-reg register class of the register class
2024 /// for the specified type and its associated "cost".
2025 virtual std::pair<const TargetRegisterClass *, uint8_t>
2026 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
2027
2028 /// Once all of the register classes are added, this allows us to compute
2029 /// derived properties we expose.
2030 void computeRegisterProperties(const TargetRegisterInfo *TRI);
2031
2032 /// Indicate that the specified operation does not work with the specified
2033 /// type and indicate what to do about it. Note that VT may refer to either
2034 /// the type of a result or that of an operand of Op.
2035 void setOperationAction(unsigned Op, MVT VT,
2036 LegalizeAction Action) {
2037 assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2037, __PRETTY_FUNCTION__))
;
2038 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
2039 }
2040
2041 /// Indicate that the specified load with extension does not work with the
2042 /// specified type and indicate what to do about it.
2043 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2044 LegalizeAction Action) {
2045 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2046, __PRETTY_FUNCTION__))
2046 MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2046, __PRETTY_FUNCTION__))
;
2047 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2047, __PRETTY_FUNCTION__))
;
2048 unsigned Shift = 4 * ExtType;
2049 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
2050 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
2051 }
2052
2053 /// Indicate that the specified truncating store does not work with the
2054 /// specified type and indicate what to do about it.
2055 void setTruncStoreAction(MVT ValVT, MVT MemVT,
2056 LegalizeAction Action) {
2057 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2057, __PRETTY_FUNCTION__))
;
2058 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
2059 }
2060
2061 /// Indicate that the specified indexed load does or does not work with the
2062 /// specified type and indicate what to do abort it.
2063 ///
2064 /// NOTE: All indexed mode loads are initialized to Expand in
2065 /// TargetLowering.cpp
2066 void setIndexedLoadAction(unsigned IdxMode, MVT VT,
2067 LegalizeAction Action) {
2068 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2069, __PRETTY_FUNCTION__))
2069 (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2069, __PRETTY_FUNCTION__))
;
2070 // Load action are kept in the upper half.
2071 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
2072 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
2073 }
2074
2075 /// Indicate that the specified indexed store does or does not work with the
2076 /// specified type and indicate what to do about it.
2077 ///
2078 /// NOTE: All indexed mode stores are initialized to Expand in
2079 /// TargetLowering.cpp
2080 void setIndexedStoreAction(unsigned IdxMode, MVT VT,
2081 LegalizeAction Action) {
2082 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2083, __PRETTY_FUNCTION__))
2083 (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2083, __PRETTY_FUNCTION__))
;
2084 // Store action are kept in the lower half.
2085 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
2086 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
2087 }
2088
2089 /// Indicate that the specified condition code is or isn't supported on the
2090 /// target and indicate what to do about it.
2091 void setCondCodeAction(ISD::CondCode CC, MVT VT,
2092 LegalizeAction Action) {
2093 assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2094, __PRETTY_FUNCTION__))
2094 "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2094, __PRETTY_FUNCTION__))
;
2095 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2095, __PRETTY_FUNCTION__))
;
2096 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
2097 /// value and the upper 29 bits index into the second dimension of the array
2098 /// to select what 32-bit value to use.
2099 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
2100 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
2101 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
2102 }
2103
2104 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
2105 /// to trying a larger integer/fp until it can find one that works. If that
2106 /// default is insufficient, this method can be used by the target to override
2107 /// the default.
2108 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2109 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
2110 }
2111
2112 /// Convenience method to set an operation to Promote and specify the type
2113 /// in a single call.
2114 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2115 setOperationAction(Opc, OrigVT, Promote);
2116 AddPromotedToType(Opc, OrigVT, DestVT);
2117 }
2118
2119 /// Targets should invoke this method for each target independent node that
2120 /// they want to provide a custom DAG combiner for by implementing the
2121 /// PerformDAGCombine virtual method.
2122 void setTargetDAGCombine(ISD::NodeType NT) {
2123 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2123, __PRETTY_FUNCTION__))
;
2124 TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
2125 }
2126
2127 /// Set the target's minimum function alignment.
2128 void setMinFunctionAlignment(Align Alignment) {
2129 MinFunctionAlignment = Alignment;
2130 }
2131
2132 /// Set the target's preferred function alignment. This should be set if
2133 /// there is a performance benefit to higher-than-minimum alignment
2134 void setPrefFunctionAlignment(Align Alignment) {
2135 PrefFunctionAlignment = Alignment;
2136 }
2137
2138 /// Set the target's preferred loop alignment. Default alignment is one, it
2139 /// means the target does not care about loop alignment. The target may also
2140 /// override getPrefLoopAlignment to provide per-loop values.
2141 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
2142
2143 /// Set the minimum stack alignment of an argument.
2144 void setMinStackArgumentAlignment(Align Alignment) {
2145 MinStackArgumentAlignment = Alignment;
2146 }
2147
2148 /// Set the maximum atomic operation size supported by the
2149 /// backend. Atomic operations greater than this size (as well as
2150 /// ones that are not naturally aligned), will be expanded by
2151 /// AtomicExpandPass into an __atomic_* library call.
2152 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2153 MaxAtomicSizeInBitsSupported = SizeInBits;
2154 }
2155
2156 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2157 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2158 MinCmpXchgSizeInBits = SizeInBits;
2159 }
2160
2161 /// Sets whether unaligned atomic operations are supported.
2162 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2163 SupportsUnalignedAtomics = UnalignedSupported;
2164 }
2165
2166public:
2167 //===--------------------------------------------------------------------===//
2168 // Addressing mode description hooks (used by LSR etc).
2169 //
2170
2171 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2172 /// instructions reading the address. This allows as much computation as
2173 /// possible to be done in the address mode for that operand. This hook lets
2174 /// targets also pass back when this should be done on intrinsics which
2175 /// load/store.
2176 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
2177 SmallVectorImpl<Value*> &/*Ops*/,
2178 Type *&/*AccessTy*/) const {
2179 return false;
2180 }
2181
2182 /// This represents an addressing mode of:
2183 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
2184 /// If BaseGV is null, there is no BaseGV.
2185 /// If BaseOffs is zero, there is no base offset.
2186 /// If HasBaseReg is false, there is no base register.
2187 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2188 /// no scale.
2189 struct AddrMode {
2190 GlobalValue *BaseGV = nullptr;
2191 int64_t BaseOffs = 0;
2192 bool HasBaseReg = false;
2193 int64_t Scale = 0;
2194 AddrMode() = default;
2195 };
2196
2197 /// Return true if the addressing mode represented by AM is legal for this
2198 /// target, for a load/store of the specified type.
2199 ///
2200 /// The type may be VoidTy, in which case only return true if the addressing
2201 /// mode is legal for a load/store of any legal type. TODO: Handle
2202 /// pre/postinc as well.
2203 ///
2204 /// If the address space cannot be determined, it will be -1.
2205 ///
2206 /// TODO: Remove default argument
2207 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2208 Type *Ty, unsigned AddrSpace,
2209 Instruction *I = nullptr) const;
2210
2211 /// Return the cost of the scaling factor used in the addressing mode
2212 /// represented by AM for this target, for a load/store of the specified type.
2213 ///
2214 /// If the AM is supported, the return value must be >= 0.
2215 /// If the AM is not supported, it returns a negative value.
2216 /// TODO: Handle pre/postinc as well.
2217 /// TODO: Remove default argument
2218 virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
2219 Type *Ty, unsigned AS = 0) const {
2220 // Default: assume that any scaling factor used in a legal AM is free.
2221 if (isLegalAddressingMode(DL, AM, Ty, AS))
2222 return 0;
2223 return -1;
2224 }
2225
2226 /// Return true if the specified immediate is legal icmp immediate, that is
2227 /// the target has icmp instructions which can compare a register against the
2228 /// immediate without having to materialize the immediate into a register.
2229 virtual bool isLegalICmpImmediate(int64_t) const {
2230 return true;
2231 }
2232
2233 /// Return true if the specified immediate is legal add immediate, that is the
2234 /// target has add instructions which can add a register with the immediate
2235 /// without having to materialize the immediate into a register.
2236 virtual bool isLegalAddImmediate(int64_t) const {
2237 return true;
2238 }
2239
2240 /// Return true if the specified immediate is legal for the value input of a
2241 /// store instruction.
2242 virtual bool isLegalStoreImmediate(int64_t Value) const {
2243 // Default implementation assumes that at least 0 works since it is likely
2244 // that a zero register exists or a zero immediate is allowed.
2245 return Value == 0;
2246 }
2247
2248 /// Return true if it's significantly cheaper to shift a vector by a uniform
2249 /// scalar than by an amount which will vary across each lane. On x86, for
2250 /// example, there is a "psllw" instruction for the former case, but no simple
2251 /// instruction for a general "a << b" operation on vectors.
2252 virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
2253 return false;
2254 }
2255
2256 /// Returns true if the opcode is a commutative binary operation.
2257 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2258 // FIXME: This should get its info from the td file.
2259 switch (Opcode) {
2260 case ISD::ADD:
2261 case ISD::SMIN:
2262 case ISD::SMAX:
2263 case ISD::UMIN:
2264 case ISD::UMAX:
2265 case ISD::MUL:
2266 case ISD::MULHU:
2267 case ISD::MULHS:
2268 case ISD::SMUL_LOHI:
2269 case ISD::UMUL_LOHI:
2270 case ISD::FADD:
2271 case ISD::FMUL:
2272 case ISD::AND:
2273 case ISD::OR:
2274 case ISD::XOR:
2275 case ISD::SADDO:
2276 case ISD::UADDO:
2277 case ISD::ADDC:
2278 case ISD::ADDE:
2279 case ISD::SADDSAT:
2280 case ISD::UADDSAT:
2281 case ISD::FMINNUM:
2282 case ISD::FMAXNUM:
2283 case ISD::FMINNUM_IEEE:
2284 case ISD::FMAXNUM_IEEE:
2285 case ISD::FMINIMUM:
2286 case ISD::FMAXIMUM:
2287 return true;
2288 default: return false;
2289 }
2290 }
2291
2292 /// Return true if the node is a math/logic binary operator.
2293 virtual bool isBinOp(unsigned Opcode) const {
2294 // A commutative binop must be a binop.
2295 if (isCommutativeBinOp(Opcode))
2296 return true;
2297 // These are non-commutative binops.
2298 switch (Opcode) {
2299 case ISD::SUB:
2300 case ISD::SHL:
2301 case ISD::SRL:
2302 case ISD::SRA:
2303 case ISD::SDIV:
2304 case ISD::UDIV:
2305 case ISD::SREM:
2306 case ISD::UREM:
2307 case ISD::FSUB:
2308 case ISD::FDIV:
2309 case ISD::FREM:
2310 return true;
2311 default:
2312 return false;
2313 }
2314 }
2315
2316 /// Return true if it's free to truncate a value of type FromTy to type
2317 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
2318 /// by referencing its sub-register AX.
2319 /// Targets must return false when FromTy <= ToTy.
2320 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
2321 return false;
2322 }
2323
2324 /// Return true if a truncation from FromTy to ToTy is permitted when deciding
2325 /// whether a call is in tail position. Typically this means that both results
2326 /// would be assigned to the same register or stack slot, but it could mean
2327 /// the target performs adequate checks of its own before proceeding with the
2328 /// tail call. Targets must return false when FromTy <= ToTy.
2329 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
2330 return false;
2331 }
2332
2333 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
2334 return false;
2335 }
2336
2337 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
2338
2339 /// Return true if the extension represented by \p I is free.
2340 /// Unlikely the is[Z|FP]ExtFree family which is based on types,
2341 /// this method can use the context provided by \p I to decide
2342 /// whether or not \p I is free.
2343 /// This method extends the behavior of the is[Z|FP]ExtFree family.
2344 /// In other words, if is[Z|FP]Free returns true, then this method
2345 /// returns true as well. The converse is not true.
2346 /// The target can perform the adequate checks by overriding isExtFreeImpl.
2347 /// \pre \p I must be a sign, zero, or fp extension.
2348 bool isExtFree(const Instruction *I) const {
2349 switch (I->getOpcode()) {
2350 case Instruction::FPExt:
2351 if (isFPExtFree(EVT::getEVT(I->getType()),
2352 EVT::getEVT(I->getOperand(0)->getType())))
2353 return true;
2354 break;
2355 case Instruction::ZExt:
2356 if (isZExtFree(I->getOperand(0)->getType(), I->getType()))
2357 return true;
2358 break;
2359 case Instruction::SExt:
2360 break;
2361 default:
2362 llvm_unreachable("Instruction is not an extension")::llvm::llvm_unreachable_internal("Instruction is not an extension"
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2362)
;
2363 }
2364 return isExtFreeImpl(I);
2365 }
2366
2367 /// Return true if \p Load and \p Ext can form an ExtLoad.
2368 /// For example, in AArch64
2369 /// %L = load i8, i8* %ptr
2370 /// %E = zext i8 %L to i32
2371 /// can be lowered into one load instruction
2372 /// ldrb w0, [x0]
2373 bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
2374 const DataLayout &DL) const {
2375 EVT VT = getValueType(DL, Ext->getType());
2376 EVT LoadVT = getValueType(DL, Load->getType());
2377
2378 // If the load has other users and the truncate is not free, the ext
2379 // probably isn't free.
2380 if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) &&
2381 !isTruncateFree(Ext->getType(), Load->getType()))
2382 return false;
2383
2384 // Check whether the target supports casts folded into loads.
2385 unsigned LType;
2386 if (isa<ZExtInst>(Ext))
2387 LType = ISD::ZEXTLOAD;
2388 else {
2389 assert(isa<SExtInst>(Ext) && "Unexpected ext type!")((isa<SExtInst>(Ext) && "Unexpected ext type!")
? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Ext) && \"Unexpected ext type!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/CodeGen/TargetLowering.h"
, 2389, __PRETTY_FUNCTION__))
;
2390 LType = ISD::SEXTLOAD;
2391 }
2392
2393 return isLoadExtLegal(LType, VT, LoadVT);
2394 }
2395
2396 /// Return true if any actual instruction that defines a value of type FromTy
2397 /// implicitly zero-extends the value to ToTy in the result register.