Bug Summary

File:include/llvm/CodeGen/TargetLowering.h
Warning:line 1146, column 9
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn350071/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-8~svn350071/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn350071/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn350071/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-8~svn350071=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-12-27-042839-1215-1 -x c++ /build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/Support/Debug.h"
19#include <algorithm>
20using namespace llvm;
21
22#define DEBUG_TYPE"aarch64tti" "aarch64tti"
23
24static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
25 cl::init(true), cl::Hidden);
26
27bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
28 const Function *Callee) const {
29 const TargetMachine &TM = getTLI()->getTargetMachine();
30
31 const FeatureBitset &CallerBits =
32 TM.getSubtargetImpl(*Caller)->getFeatureBits();
33 const FeatureBitset &CalleeBits =
34 TM.getSubtargetImpl(*Callee)->getFeatureBits();
35
36 // Inline a callee if its target-features are a subset of the callers
37 // target-features.
38 return (CallerBits & CalleeBits) == CalleeBits;
39}
40
41/// Calculate the cost of materializing a 64-bit value. This helper
42/// method might only calculate a fraction of a larger immediate. Therefore it
43/// is valid to return a cost of ZERO.
44int AArch64TTIImpl::getIntImmCost(int64_t Val) {
45 // Check if the immediate can be encoded within an instruction.
46 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47 return 0;
48
49 if (Val < 0)
50 Val = ~Val;
51
52 // Calculate how many moves we will need to materialize this constant.
53 unsigned LZ = countLeadingZeros((uint64_t)Val);
54 return (64 - LZ + 15) / 16;
55}
56
57/// Calculate the cost of materializing the given constant.
58int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
59 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 59, __PRETTY_FUNCTION__))
;
60
61 unsigned BitSize = Ty->getPrimitiveSizeInBits();
62 if (BitSize == 0)
63 return ~0U;
64
65 // Sign-extend all constants to a multiple of 64-bit.
66 APInt ImmVal = Imm;
67 if (BitSize & 0x3f)
68 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
69
70 // Split the constant into 64-bit chunks and calculate the cost for each
71 // chunk.
72 int Cost = 0;
73 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
74 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
75 int64_t Val = Tmp.getSExtValue();
76 Cost += getIntImmCost(Val);
77 }
78 // We need at least one instruction to materialze the constant.
79 return std::max(1, Cost);
80}
81
82int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
83 const APInt &Imm, Type *Ty) {
84 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 84, __PRETTY_FUNCTION__))
;
85
86 unsigned BitSize = Ty->getPrimitiveSizeInBits();
87 // There is no cost model for constants with a bit size of 0. Return TCC_Free
88 // here, so that constant hoisting will ignore this constant.
89 if (BitSize == 0)
90 return TTI::TCC_Free;
91
92 unsigned ImmIdx = ~0U;
93 switch (Opcode) {
94 default:
95 return TTI::TCC_Free;
96 case Instruction::GetElementPtr:
97 // Always hoist the base address of a GetElementPtr.
98 if (Idx == 0)
99 return 2 * TTI::TCC_Basic;
100 return TTI::TCC_Free;
101 case Instruction::Store:
102 ImmIdx = 0;
103 break;
104 case Instruction::Add:
105 case Instruction::Sub:
106 case Instruction::Mul:
107 case Instruction::UDiv:
108 case Instruction::SDiv:
109 case Instruction::URem:
110 case Instruction::SRem:
111 case Instruction::And:
112 case Instruction::Or:
113 case Instruction::Xor:
114 case Instruction::ICmp:
115 ImmIdx = 1;
116 break;
117 // Always return TCC_Free for the shift value of a shift instruction.
118 case Instruction::Shl:
119 case Instruction::LShr:
120 case Instruction::AShr:
121 if (Idx == 1)
122 return TTI::TCC_Free;
123 break;
124 case Instruction::Trunc:
125 case Instruction::ZExt:
126 case Instruction::SExt:
127 case Instruction::IntToPtr:
128 case Instruction::PtrToInt:
129 case Instruction::BitCast:
130 case Instruction::PHI:
131 case Instruction::Call:
132 case Instruction::Select:
133 case Instruction::Ret:
134 case Instruction::Load:
135 break;
136 }
137
138 if (Idx == ImmIdx) {
139 int NumConstants = (BitSize + 63) / 64;
140 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
141 return (Cost <= NumConstants * TTI::TCC_Basic)
142 ? static_cast<int>(TTI::TCC_Free)
143 : Cost;
144 }
145 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
146}
147
148int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
149 const APInt &Imm, Type *Ty) {
150 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 150, __PRETTY_FUNCTION__))
;
151
152 unsigned BitSize = Ty->getPrimitiveSizeInBits();
153 // There is no cost model for constants with a bit size of 0. Return TCC_Free
154 // here, so that constant hoisting will ignore this constant.
155 if (BitSize == 0)
156 return TTI::TCC_Free;
157
158 switch (IID) {
159 default:
160 return TTI::TCC_Free;
161 case Intrinsic::sadd_with_overflow:
162 case Intrinsic::uadd_with_overflow:
163 case Intrinsic::ssub_with_overflow:
164 case Intrinsic::usub_with_overflow:
165 case Intrinsic::smul_with_overflow:
166 case Intrinsic::umul_with_overflow:
167 if (Idx == 1) {
168 int NumConstants = (BitSize + 63) / 64;
169 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
170 return (Cost <= NumConstants * TTI::TCC_Basic)
171 ? static_cast<int>(TTI::TCC_Free)
172 : Cost;
173 }
174 break;
175 case Intrinsic::experimental_stackmap:
176 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
177 return TTI::TCC_Free;
178 break;
179 case Intrinsic::experimental_patchpoint_void:
180 case Intrinsic::experimental_patchpoint_i64:
181 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
182 return TTI::TCC_Free;
183 break;
184 }
185 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
186}
187
188TargetTransformInfo::PopcntSupportKind
189AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
190 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 190, __PRETTY_FUNCTION__))
;
191 if (TyWidth == 32 || TyWidth == 64)
192 return TTI::PSK_FastHardware;
193 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
194 return TTI::PSK_Software;
195}
196
197bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
198 ArrayRef<const Value *> Args) {
199
200 // A helper that returns a vector type from the given type. The number of
201 // elements in type Ty determine the vector width.
202 auto toVectorTy = [&](Type *ArgTy) {
203 return VectorType::get(ArgTy->getScalarType(),
204 DstTy->getVectorNumElements());
205 };
206
207 // Exit early if DstTy is not a vector type whose elements are at least
208 // 16-bits wide.
209 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
210 return false;
211
212 // Determine if the operation has a widening variant. We consider both the
213 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
214 // instructions.
215 //
216 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
217 // verify that their extending operands are eliminated during code
218 // generation.
219 switch (Opcode) {
220 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
221 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
222 break;
223 default:
224 return false;
225 }
226
227 // To be a widening instruction (either the "wide" or "long" versions), the
228 // second operand must be a sign- or zero extend having a single user. We
229 // only consider extends having a single user because they may otherwise not
230 // be eliminated.
231 if (Args.size() != 2 ||
232 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
233 !Args[1]->hasOneUse())
234 return false;
235 auto *Extend = cast<CastInst>(Args[1]);
236
237 // Legalize the destination type and ensure it can be used in a widening
238 // operation.
239 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
240 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
241 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
242 return false;
243
244 // Legalize the source type and ensure it can be used in a widening
245 // operation.
246 Type *SrcTy = toVectorTy(Extend->getSrcTy());
247 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
248 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
249 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
250 return false;
251
252 // Get the total number of vector elements in the legalized types.
253 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
254 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
255
256 // Return true if the legalized types have the same number of vector elements
257 // and the destination element type size is twice that of the source type.
258 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
259}
260
261int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
262 const Instruction *I) {
263 int ISD = TLI->InstructionOpcodeToISD(Opcode);
264 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 264, __PRETTY_FUNCTION__))
;
265
266 // If the cast is observable, and it is used by a widening instruction (e.g.,
267 // uaddl, saddw, etc.), it may be free.
268 if (I && I->hasOneUse()) {
269 auto *SingleUser = cast<Instruction>(*I->user_begin());
270 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
271 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
272 // If the cast is the second operand, it is free. We will generate either
273 // a "wide" or "long" version of the widening instruction.
274 if (I == SingleUser->getOperand(1))
275 return 0;
276 // If the cast is not the second operand, it will be free if it looks the
277 // same as the second operand. In this case, we will generate a "long"
278 // version of the widening instruction.
279 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
280 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
281 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
282 return 0;
283 }
284 }
285
286 EVT SrcTy = TLI->getValueType(DL, Src);
287 EVT DstTy = TLI->getValueType(DL, Dst);
288
289 if (!SrcTy.isSimple() || !DstTy.isSimple())
290 return BaseT::getCastInstrCost(Opcode, Dst, Src);
291
292 static const TypeConversionCostTblEntry
293 ConversionTbl[] = {
294 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
295 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
296 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
297 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
298
299 // The number of shll instructions for the extension.
300 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
301 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
302 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
303 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
304 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
305 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
306 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
307 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
308 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
309 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
310 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
311 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
312 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
313 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
314 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
315 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
316
317 // LowerVectorINT_TO_FP:
318 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
319 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
320 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
321 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
322 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
323 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
324
325 // Complex: to v2f32
326 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
327 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
328 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
329 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
330 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
331 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
332
333 // Complex: to v4f32
334 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
335 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
336 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
337 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
338
339 // Complex: to v8f32
340 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
341 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
342 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
343 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
344
345 // Complex: to v16f32
346 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
347 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
348
349 // Complex: to v2f64
350 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
351 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
352 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
353 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
354 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
355 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
356
357
358 // LowerVectorFP_TO_INT
359 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
360 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
361 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
362 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
363 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
364 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
365
366 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
367 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
368 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
369 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
370 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
371 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
372 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
373
374 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
375 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
376 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
377 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
378 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
379
380 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
381 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
382 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
383 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
384 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
385 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
386 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
387 };
388
389 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
390 DstTy.getSimpleVT(),
391 SrcTy.getSimpleVT()))
392 return Entry->Cost;
393
394 return BaseT::getCastInstrCost(Opcode, Dst, Src);
395}
396
397int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
398 VectorType *VecTy,
399 unsigned Index) {
400
401 // Make sure we were given a valid extend opcode.
402 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 403, __PRETTY_FUNCTION__))
403 "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 403, __PRETTY_FUNCTION__))
;
404
405 // We are extending an element we extract from a vector, so the source type
406 // of the extend is the element type of the vector.
407 auto *Src = VecTy->getElementType();
408
409 // Sign- and zero-extends are for integer types only.
410 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType>
(Src) && "Invalid type") ? static_cast<void> (0
) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 410, __PRETTY_FUNCTION__))
;
411
412 // Get the cost for the extract. We compute the cost (if any) for the extend
413 // below.
414 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
415
416 // Legalize the types.
417 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
418 auto DstVT = TLI->getValueType(DL, Dst);
419 auto SrcVT = TLI->getValueType(DL, Src);
420
421 // If the resulting type is still a vector and the destination type is legal,
422 // we may get the extension for free. If not, get the default cost for the
423 // extend.
424 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
425 return Cost + getCastInstrCost(Opcode, Dst, Src);
426
427 // The destination type should be larger than the element type. If not, get
428 // the default cost for the extend.
429 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
430 return Cost + getCastInstrCost(Opcode, Dst, Src);
431
432 switch (Opcode) {
433 default:
434 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 434)
;
435
436 // For sign-extends, we only need a smov, which performs the extension
437 // automatically.
438 case Instruction::SExt:
439 return Cost;
440
441 // For zero-extends, the extend is performed automatically by a umov unless
442 // the destination type is i64 and the element type is i8 or i16.
443 case Instruction::ZExt:
444 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
445 return Cost;
446 }
447
448 // If we are unable to perform the extend for free, get the default cost.
449 return Cost + getCastInstrCost(Opcode, Dst, Src);
450}
451
452int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
453 unsigned Index) {
454 assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type"
) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 454, __PRETTY_FUNCTION__))
;
455
456 if (Index != -1U) {
457 // Legalize the type.
458 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
459
460 // This type is legalized to a scalar type.
461 if (!LT.second.isVector())
462 return 0;
463
464 // The type may be split. Normalize the index to the new type.
465 unsigned Width = LT.second.getVectorNumElements();
466 Index = Index % Width;
467
468 // The element at index zero is already inside the vector.
469 if (Index == 0)
470 return 0;
471 }
472
473 // All other insert/extracts cost this much.
474 return ST->getVectorInsertExtractBaseCost();
475}
476
477int AArch64TTIImpl::getArithmeticInstrCost(
478 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
479 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
480 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
481 // Legalize the type.
482 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
483
484 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
485 // add in the widening overhead specified by the sub-target. Since the
486 // extends feeding widening instructions are performed automatically, they
487 // aren't present in the generated code and have a zero cost. By adding a
488 // widening overhead here, we attach the total cost of the combined operation
489 // to the widening instruction.
490 int Cost = 0;
491 if (isWideningInstruction(Ty, Opcode, Args))
492 Cost += ST->getWideningBaseCost();
493
494 int ISD = TLI->InstructionOpcodeToISD(Opcode);
495
496 switch (ISD) {
497 default:
498 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
499 Opd1PropInfo, Opd2PropInfo);
500 case ISD::SDIV:
501 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
502 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
503 // On AArch64, scalar signed division by constants power-of-two are
504 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
505 // The OperandValue properties many not be same as that of previous
506 // operation; conservatively assume OP_None.
507 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
508 TargetTransformInfo::OP_None,
509 TargetTransformInfo::OP_None);
510 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
511 TargetTransformInfo::OP_None,
512 TargetTransformInfo::OP_None);
513 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
514 TargetTransformInfo::OP_None,
515 TargetTransformInfo::OP_None);
516 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
517 TargetTransformInfo::OP_None,
518 TargetTransformInfo::OP_None);
519 return Cost;
520 }
521 LLVM_FALLTHROUGH[[clang::fallthrough]];
522 case ISD::UDIV:
523 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
524 auto VT = TLI->getValueType(DL, Ty);
525 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
526 // Vector signed division by constant are expanded to the
527 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
528 // to MULHS + SUB + SRL + ADD + SRL.
529 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
530 Opd2Info,
531 TargetTransformInfo::OP_None,
532 TargetTransformInfo::OP_None);
533 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
534 Opd2Info,
535 TargetTransformInfo::OP_None,
536 TargetTransformInfo::OP_None);
537 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
538 Opd2Info,
539 TargetTransformInfo::OP_None,
540 TargetTransformInfo::OP_None);
541 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
542 }
543 }
544
545 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
546 Opd1PropInfo, Opd2PropInfo);
547 if (Ty->isVectorTy()) {
548 // On AArch64, vector divisions are not supported natively and are
549 // expanded into scalar divisions of each pair of elements.
550 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
551 Opd2Info, Opd1PropInfo, Opd2PropInfo);
552 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
553 Opd2Info, Opd1PropInfo, Opd2PropInfo);
554 // TODO: if one of the arguments is scalar, then it's not necessary to
555 // double the cost of handling the vector elements.
556 Cost += Cost;
557 }
558 return Cost;
559
560 case ISD::ADD:
561 case ISD::MUL:
562 case ISD::XOR:
563 case ISD::OR:
564 case ISD::AND:
565 // These nodes are marked as 'custom' for combining purposes only.
566 // We know that they are legal. See LowerAdd in ISelLowering.
567 return (Cost + 1) * LT.first;
568 }
569}
570
571int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
572 const SCEV *Ptr) {
573 // Address computations in vectorized code with non-consecutive addresses will
574 // likely result in more instructions compared to scalar code where the
575 // computation can more often be merged into the index mode. The resulting
576 // extra micro-ops can significantly decrease throughput.
577 unsigned NumVectorInstToHideOverhead = 10;
578 int MaxMergeDistance = 64;
579
580 if (Ty->isVectorTy() && SE &&
581 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
582 return NumVectorInstToHideOverhead;
583
584 // In many cases the address computation is not merged into the instruction
585 // addressing mode.
586 return 1;
587}
588
589int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
590 Type *CondTy, const Instruction *I) {
591
592 int ISD = TLI->InstructionOpcodeToISD(Opcode);
593 // We don't lower some vector selects well that are wider than the register
594 // width.
595 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
13
Assuming 'ISD' is equal to SELECT
14
Taking true branch
596 // We would need this many instructions to hide the scalarization happening.
597 const int AmortizationCost = 20;
598 static const TypeConversionCostTblEntry
599 VectorSelectTbl[] = {
600 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
601 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
602 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
603 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
604 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
605 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
606 };
607
608 EVT SelCondTy = TLI->getValueType(DL, CondTy);
15
Passing null pointer value via 2nd parameter 'Ty'
16
Calling 'TargetLoweringBase::getValueType'
609 EVT SelValTy = TLI->getValueType(DL, ValTy);
610 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
611 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
612 SelCondTy.getSimpleVT(),
613 SelValTy.getSimpleVT()))
614 return Entry->Cost;
615 }
616 }
617 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1
Passing value via 3rd parameter 'CondTy'
2
Calling 'BasicTTIImplBase::getCmpSelInstrCost'
618}
619
620int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
621 unsigned Alignment, unsigned AddressSpace,
622 const Instruction *I) {
623 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
624
625 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
626 LT.second.is128BitVector() && Alignment < 16) {
627 // Unaligned stores are extremely inefficient. We don't split all
628 // unaligned 128-bit stores because the negative impact that has shown in
629 // practice on inlined block copy code.
630 // We make such stores expensive so that we will only vectorize if there
631 // are 6 other instructions getting vectorized.
632 const int AmortizationCost = 6;
633
634 return LT.first * 2 * AmortizationCost;
635 }
636
637 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
638 unsigned ProfitableNumElements;
639 if (Opcode == Instruction::Store)
640 // We use a custom trunc store lowering so v.4b should be profitable.
641 ProfitableNumElements = 4;
642 else
643 // We scalarize the loads because there is not v.4b register and we
644 // have to promote the elements to v.2.
645 ProfitableNumElements = 8;
646
647 if (Ty->getVectorNumElements() < ProfitableNumElements) {
648 unsigned NumVecElts = Ty->getVectorNumElements();
649 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
650 // We generate 2 instructions per vector element.
651 return NumVectorizableInstsToAmortize * NumVecElts * 2;
652 }
653 }
654
655 return LT.first;
656}
657
658int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
659 unsigned Factor,
660 ArrayRef<unsigned> Indices,
661 unsigned Alignment,
662 unsigned AddressSpace,
663 bool UseMaskForCond,
664 bool UseMaskForGaps) {
665 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 665, __PRETTY_FUNCTION__))
;
666 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 666, __PRETTY_FUNCTION__))
;
667
668 if (!UseMaskForCond && !UseMaskForGaps &&
669 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
670 unsigned NumElts = VecTy->getVectorNumElements();
671 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
672
673 // ldN/stN only support legal vector types of size 64 or 128 in bits.
674 // Accesses having vector types that are a multiple of 128 bits can be
675 // matched to more than one ldN/stN instruction.
676 if (NumElts % Factor == 0 &&
677 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
678 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
679 }
680
681 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
682 Alignment, AddressSpace,
683 UseMaskForCond, UseMaskForGaps);
684}
685
686int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
687 int Cost = 0;
688 for (auto *I : Tys) {
689 if (!I->isVectorTy())
690 continue;
691 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
692 Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) +
693 getMemoryOpCost(Instruction::Load, I, 128, 0);
694 }
695 return Cost;
696}
697
698unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
699 return ST->getMaxInterleaveFactor();
700}
701
702// For Falkor, we want to avoid having too many strided loads in a loop since
703// that can exhaust the HW prefetcher resources. We adjust the unroller
704// MaxCount preference below to attempt to ensure unrolling doesn't create too
705// many strided loads.
706static void
707getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
708 TargetTransformInfo::UnrollingPreferences &UP) {
709 enum { MaxStridedLoads = 7 };
710 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
711 int StridedLoads = 0;
712 // FIXME? We could make this more precise by looking at the CFG and
713 // e.g. not counting loads in each side of an if-then-else diamond.
714 for (const auto BB : L->blocks()) {
715 for (auto &I : *BB) {
716 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
717 if (!LMemI)
718 continue;
719
720 Value *PtrValue = LMemI->getPointerOperand();
721 if (L->isLoopInvariant(PtrValue))
722 continue;
723
724 const SCEV *LSCEV = SE.getSCEV(PtrValue);
725 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
726 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
727 continue;
728
729 // FIXME? We could take pairing of unrolled load copies into account
730 // by looking at the AddRec, but we would probably have to limit this
731 // to loops with no stores or other memory optimization barriers.
732 ++StridedLoads;
733 // We've seen enough strided loads that seeing more won't make a
734 // difference.
735 if (StridedLoads > MaxStridedLoads / 2)
736 return StridedLoads;
737 }
738 }
739 return StridedLoads;
740 };
741
742 int StridedLoads = countStridedLoads(L, SE);
743 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
744 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
745 // Pick the largest power of 2 unroll count that won't result in too many
746 // strided loads.
747 if (StridedLoads) {
748 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
749 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
750 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
751 }
752}
753
754void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
755 TTI::UnrollingPreferences &UP) {
756 // Enable partial unrolling and runtime unrolling.
757 BaseT::getUnrollingPreferences(L, SE, UP);
758
759 // For inner loop, it is more likely to be a hot one, and the runtime check
760 // can be promoted out from LICM pass, so the overhead is less, let's try
761 // a larger threshold to unroll more loops.
762 if (L->getLoopDepth() > 1)
763 UP.PartialThreshold *= 2;
764
765 // Disable partial & runtime unrolling on -Os.
766 UP.PartialOptSizeThreshold = 0;
767
768 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
769 EnableFalkorHWPFUnrollFix)
770 getFalkorUnrollingPreferences(L, SE, UP);
771}
772
773Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
774 Type *ExpectedType) {
775 switch (Inst->getIntrinsicID()) {
776 default:
777 return nullptr;
778 case Intrinsic::aarch64_neon_st2:
779 case Intrinsic::aarch64_neon_st3:
780 case Intrinsic::aarch64_neon_st4: {
781 // Create a struct type
782 StructType *ST = dyn_cast<StructType>(ExpectedType);
783 if (!ST)
784 return nullptr;
785 unsigned NumElts = Inst->getNumArgOperands() - 1;
786 if (ST->getNumElements() != NumElts)
787 return nullptr;
788 for (unsigned i = 0, e = NumElts; i != e; ++i) {
789 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
790 return nullptr;
791 }
792 Value *Res = UndefValue::get(ExpectedType);
793 IRBuilder<> Builder(Inst);
794 for (unsigned i = 0, e = NumElts; i != e; ++i) {
795 Value *L = Inst->getArgOperand(i);
796 Res = Builder.CreateInsertValue(Res, L, i);
797 }
798 return Res;
799 }
800 case Intrinsic::aarch64_neon_ld2:
801 case Intrinsic::aarch64_neon_ld3:
802 case Intrinsic::aarch64_neon_ld4:
803 if (Inst->getType() == ExpectedType)
804 return Inst;
805 return nullptr;
806 }
807}
808
809bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
810 MemIntrinsicInfo &Info) {
811 switch (Inst->getIntrinsicID()) {
812 default:
813 break;
814 case Intrinsic::aarch64_neon_ld2:
815 case Intrinsic::aarch64_neon_ld3:
816 case Intrinsic::aarch64_neon_ld4:
817 Info.ReadMem = true;
818 Info.WriteMem = false;
819 Info.PtrVal = Inst->getArgOperand(0);
820 break;
821 case Intrinsic::aarch64_neon_st2:
822 case Intrinsic::aarch64_neon_st3:
823 case Intrinsic::aarch64_neon_st4:
824 Info.ReadMem = false;
825 Info.WriteMem = true;
826 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
827 break;
828 }
829
830 switch (Inst->getIntrinsicID()) {
831 default:
832 return false;
833 case Intrinsic::aarch64_neon_ld2:
834 case Intrinsic::aarch64_neon_st2:
835 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
836 break;
837 case Intrinsic::aarch64_neon_ld3:
838 case Intrinsic::aarch64_neon_st3:
839 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
840 break;
841 case Intrinsic::aarch64_neon_ld4:
842 case Intrinsic::aarch64_neon_st4:
843 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
844 break;
845 }
846 return true;
847}
848
849/// See if \p I should be considered for address type promotion. We check if \p
850/// I is a sext with right type and used in memory accesses. If it used in a
851/// "complex" getelementptr, we allow it to be promoted without finding other
852/// sext instructions that sign extended the same initial value. A getelementptr
853/// is considered as "complex" if it has more than 2 operands.
854bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
855 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
856 bool Considerable = false;
857 AllowPromotionWithoutCommonHeader = false;
858 if (!isa<SExtInst>(&I))
859 return false;
860 Type *ConsideredSExtType =
861 Type::getInt64Ty(I.getParent()->getParent()->getContext());
862 if (I.getType() != ConsideredSExtType)
863 return false;
864 // See if the sext is the one with the right type and used in at least one
865 // GetElementPtrInst.
866 for (const User *U : I.users()) {
867 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
868 Considerable = true;
869 // A getelementptr is considered as "complex" if it has more than 2
870 // operands. We will promote a SExt used in such complex GEP as we
871 // expect some computation to be merged if they are done on 64 bits.
872 if (GEPInst->getNumOperands() > 2) {
873 AllowPromotionWithoutCommonHeader = true;
874 break;
875 }
876 }
877 }
878 return Considerable;
879}
880
881unsigned AArch64TTIImpl::getCacheLineSize() {
882 return ST->getCacheLineSize();
883}
884
885unsigned AArch64TTIImpl::getPrefetchDistance() {
886 return ST->getPrefetchDistance();
887}
888
889unsigned AArch64TTIImpl::getMinPrefetchStride() {
890 return ST->getMinPrefetchStride();
891}
892
893unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
894 return ST->getMaxPrefetchIterationsAhead();
895}
896
897bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
898 TTI::ReductionFlags Flags) const {
899 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 899, __PRETTY_FUNCTION__))
;
900 unsigned ScalarBits = Ty->getScalarSizeInBits();
901 switch (Opcode) {
902 case Instruction::FAdd:
903 case Instruction::FMul:
904 case Instruction::And:
905 case Instruction::Or:
906 case Instruction::Xor:
907 case Instruction::Mul:
908 return false;
909 case Instruction::Add:
910 return ScalarBits * Ty->getVectorNumElements() >= 128;
911 case Instruction::ICmp:
912 return (ScalarBits < 64) &&
913 (ScalarBits * Ty->getVectorNumElements() >= 128);
914 case Instruction::FCmp:
915 return Flags.NoNaN;
916 default:
917 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 917)
;
918 }
919 return false;
920}
921
922int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
923 bool IsPairwiseForm) {
924
925 if (IsPairwiseForm)
926 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
927
928 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
929 MVT MTy = LT.second;
930 int ISD = TLI->InstructionOpcodeToISD(Opcode);
931 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 931, __PRETTY_FUNCTION__))
;
932
933 // Horizontal adds can use the 'addv' instruction. We model the cost of these
934 // instructions as normal vector adds. This is the only arithmetic vector
935 // reduction operation for which we have an instruction.
936 static const CostTblEntry CostTblNoPairwise[]{
937 {ISD::ADD, MVT::v8i8, 1},
938 {ISD::ADD, MVT::v16i8, 1},
939 {ISD::ADD, MVT::v4i16, 1},
940 {ISD::ADD, MVT::v8i16, 1},
941 {ISD::ADD, MVT::v4i32, 1},
942 };
943
944 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
945 return LT.first * Entry->Cost;
946
947 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
948}
949
950int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
951 Type *SubTp) {
952 if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
953 Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
954 static const CostTblEntry ShuffleTbl[] = {
955 // Broadcast shuffle kinds can be performed with 'dup'.
956 { TTI::SK_Broadcast, MVT::v8i8, 1 },
957 { TTI::SK_Broadcast, MVT::v16i8, 1 },
958 { TTI::SK_Broadcast, MVT::v4i16, 1 },
959 { TTI::SK_Broadcast, MVT::v8i16, 1 },
960 { TTI::SK_Broadcast, MVT::v2i32, 1 },
961 { TTI::SK_Broadcast, MVT::v4i32, 1 },
962 { TTI::SK_Broadcast, MVT::v2i64, 1 },
963 { TTI::SK_Broadcast, MVT::v2f32, 1 },
964 { TTI::SK_Broadcast, MVT::v4f32, 1 },
965 { TTI::SK_Broadcast, MVT::v2f64, 1 },
966 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
967 // 'zip1/zip2' instructions.
968 { TTI::SK_Transpose, MVT::v8i8, 1 },
969 { TTI::SK_Transpose, MVT::v16i8, 1 },
970 { TTI::SK_Transpose, MVT::v4i16, 1 },
971 { TTI::SK_Transpose, MVT::v8i16, 1 },
972 { TTI::SK_Transpose, MVT::v2i32, 1 },
973 { TTI::SK_Transpose, MVT::v4i32, 1 },
974 { TTI::SK_Transpose, MVT::v2i64, 1 },
975 { TTI::SK_Transpose, MVT::v2f32, 1 },
976 { TTI::SK_Transpose, MVT::v4f32, 1 },
977 { TTI::SK_Transpose, MVT::v2f64, 1 },
978 // Select shuffle kinds.
979 // TODO: handle vXi8/vXi16.
980 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
981 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
982 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
983 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
984 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
985 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
986 // PermuteSingleSrc shuffle kinds.
987 // TODO: handle vXi8/vXi16.
988 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
989 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
990 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
991 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
992 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
993 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
994 };
995 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
996 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
997 return LT.first * Entry->Cost;
998 }
999
1000 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1001}

/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h

1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// This file provides a helper that implements much of the TTI interface in
12/// terms of the target-independent code generator and TargetLowering
13/// interfaces.
14//
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
18#define LLVM_CODEGEN_BASICTTIIMPL_H
19
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/BitVector.h"
23#include "llvm/ADT/SmallPtrSet.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/Analysis/LoopInfo.h"
26#include "llvm/Analysis/TargetTransformInfo.h"
27#include "llvm/Analysis/TargetTransformInfoImpl.h"
28#include "llvm/CodeGen/ISDOpcodes.h"
29#include "llvm/CodeGen/TargetLowering.h"
30#include "llvm/CodeGen/TargetSubtargetInfo.h"
31#include "llvm/CodeGen/ValueTypes.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/CallSite.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/Intrinsics.h"
42#include "llvm/IR/Operator.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/Value.h"
45#include "llvm/MC/MCSchedule.h"
46#include "llvm/Support/Casting.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/ErrorHandling.h"
49#include "llvm/Support/MachineValueType.h"
50#include "llvm/Support/MathExtras.h"
51#include <algorithm>
52#include <cassert>
53#include <cstdint>
54#include <limits>
55#include <utility>
56
57namespace llvm {
58
59class Function;
60class GlobalValue;
61class LLVMContext;
62class ScalarEvolution;
63class SCEV;
64class TargetMachine;
65
66extern cl::opt<unsigned> PartialUnrollingThreshold;
67
68/// Base class which can be used to help build a TTI implementation.
69///
70/// This class provides as much implementation of the TTI interface as is
71/// possible using the target independent parts of the code generator.
72///
73/// In order to subclass it, your class must implement a getST() method to
74/// return the subtarget, and a getTLI() method to return the target lowering.
75/// We need these methods implemented in the derived class so that this class
76/// doesn't have to duplicate storage for them.
77template <typename T>
78class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
79private:
80 using BaseT = TargetTransformInfoImplCRTPBase<T>;
81 using TTI = TargetTransformInfo;
82
83 /// Estimate a cost of Broadcast as an extract and sequence of insert
84 /// operations.
85 unsigned getBroadcastShuffleOverhead(Type *Ty) {
86 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 86, __PRETTY_FUNCTION__))
;
87 unsigned Cost = 0;
88 // Broadcast cost is equal to the cost of extracting the zero'th element
89 // plus the cost of inserting it into every element of the result vector.
90 Cost += static_cast<T *>(this)->getVectorInstrCost(
91 Instruction::ExtractElement, Ty, 0);
92
93 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
94 Cost += static_cast<T *>(this)->getVectorInstrCost(
95 Instruction::InsertElement, Ty, i);
96 }
97 return Cost;
98 }
99
100 /// Estimate a cost of shuffle as a sequence of extract and insert
101 /// operations.
102 unsigned getPermuteShuffleOverhead(Type *Ty) {
103 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 103, __PRETTY_FUNCTION__))
;
104 unsigned Cost = 0;
105 // Shuffle cost is equal to the cost of extracting element from its argument
106 // plus the cost of inserting them onto the result vector.
107
108 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
109 // index 0 of first vector, index 1 of second vector,index 2 of first
110 // vector and finally index 3 of second vector and insert them at index
111 // <0,1,2,3> of result vector.
112 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
113 Cost += static_cast<T *>(this)
114 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
115 Cost += static_cast<T *>(this)
116 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
117 }
118 return Cost;
119 }
120
121 /// Estimate a cost of subvector extraction as a sequence of extract and
122 /// insert operations.
123 unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
124 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 125, __PRETTY_FUNCTION__))
125 "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 125, __PRETTY_FUNCTION__))
;
126 int NumSubElts = SubTy->getVectorNumElements();
127 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 128, __PRETTY_FUNCTION__))
128 "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 128, __PRETTY_FUNCTION__))
;
129
130 unsigned Cost = 0;
131 // Subvector extraction cost is equal to the cost of extracting element from
132 // the source type plus the cost of inserting them into the result vector
133 // type.
134 for (int i = 0; i != NumSubElts; ++i) {
135 Cost += static_cast<T *>(this)->getVectorInstrCost(
136 Instruction::ExtractElement, Ty, i + Index);
137 Cost += static_cast<T *>(this)->getVectorInstrCost(
138 Instruction::InsertElement, SubTy, i);
139 }
140 return Cost;
141 }
142
143 /// Estimate a cost of subvector insertion as a sequence of extract and
144 /// insert operations.
145 unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
146 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 147, __PRETTY_FUNCTION__))
147 "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 147, __PRETTY_FUNCTION__))
;
148 int NumSubElts = SubTy->getVectorNumElements();
149 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 150, __PRETTY_FUNCTION__))
150 "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 150, __PRETTY_FUNCTION__))
;
151
152 unsigned Cost = 0;
153 // Subvector insertion cost is equal to the cost of extracting element from
154 // the source type plus the cost of inserting them into the result vector
155 // type.
156 for (int i = 0; i != NumSubElts; ++i) {
157 Cost += static_cast<T *>(this)->getVectorInstrCost(
158 Instruction::ExtractElement, SubTy, i);
159 Cost += static_cast<T *>(this)->getVectorInstrCost(
160 Instruction::InsertElement, Ty, i + Index);
161 }
162 return Cost;
163 }
164
165 /// Local query method delegates up to T which *must* implement this!
166 const TargetSubtargetInfo *getST() const {
167 return static_cast<const T *>(this)->getST();
168 }
169
170 /// Local query method delegates up to T which *must* implement this!
171 const TargetLoweringBase *getTLI() const {
172 return static_cast<const T *>(this)->getTLI();
173 }
174
175 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
176 switch (M) {
177 case TTI::MIM_Unindexed:
178 return ISD::UNINDEXED;
179 case TTI::MIM_PreInc:
180 return ISD::PRE_INC;
181 case TTI::MIM_PreDec:
182 return ISD::PRE_DEC;
183 case TTI::MIM_PostInc:
184 return ISD::POST_INC;
185 case TTI::MIM_PostDec:
186 return ISD::POST_DEC;
187 }
188 llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 188)
;
189 }
190
191protected:
192 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
193 : BaseT(DL) {}
194
195 using TargetTransformInfoImplBase::DL;
196
197public:
198 /// \name Scalar TTI Implementations
199 /// @{
200 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
201 unsigned BitWidth, unsigned AddressSpace,
202 unsigned Alignment, bool *Fast) const {
203 EVT E = EVT::getIntegerVT(Context, BitWidth);
204 return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
205 }
206
207 bool hasBranchDivergence() { return false; }
208
209 bool isSourceOfDivergence(const Value *V) { return false; }
210
211 bool isAlwaysUniform(const Value *V) { return false; }
212
213 unsigned getFlatAddressSpace() {
214 // Return an invalid address space.
215 return -1;
216 }
217
218 bool isLegalAddImmediate(int64_t imm) {
219 return getTLI()->isLegalAddImmediate(imm);
220 }
221
222 bool isLegalICmpImmediate(int64_t imm) {
223 return getTLI()->isLegalICmpImmediate(imm);
224 }
225
226 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
227 bool HasBaseReg, int64_t Scale,
228 unsigned AddrSpace, Instruction *I = nullptr) {
229 TargetLoweringBase::AddrMode AM;
230 AM.BaseGV = BaseGV;
231 AM.BaseOffs = BaseOffset;
232 AM.HasBaseReg = HasBaseReg;
233 AM.Scale = Scale;
234 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235 }
236
237 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
238 const DataLayout &DL) const {
239 EVT VT = getTLI()->getValueType(DL, Ty);
240 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241 }
242
243 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
244 const DataLayout &DL) const {
245 EVT VT = getTLI()->getValueType(DL, Ty);
246 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247 }
248
249 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
250 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
251 }
252
253 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
254 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255 TargetLoweringBase::AddrMode AM;
256 AM.BaseGV = BaseGV;
257 AM.BaseOffs = BaseOffset;
258 AM.HasBaseReg = HasBaseReg;
259 AM.Scale = Scale;
260 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261 }
262
263 bool isTruncateFree(Type *Ty1, Type *Ty2) {
264 return getTLI()->isTruncateFree(Ty1, Ty2);
265 }
266
267 bool isProfitableToHoist(Instruction *I) {
268 return getTLI()->isProfitableToHoist(I);
269 }
270
271 bool useAA() const { return getST()->useAA(); }
272
273 bool isTypeLegal(Type *Ty) {
274 EVT VT = getTLI()->getValueType(DL, Ty);
275 return getTLI()->isTypeLegal(VT);
276 }
277
278 int getGEPCost(Type *PointeeType, const Value *Ptr,
279 ArrayRef<const Value *> Operands) {
280 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281 }
282
283 int getExtCost(const Instruction *I, const Value *Src) {
284 if (getTLI()->isExtFree(I))
285 return TargetTransformInfo::TCC_Free;
286
287 if (isa<ZExtInst>(I) || isa<SExtInst>(I))
288 if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289 if (getTLI()->isExtLoad(LI, I, DL))
290 return TargetTransformInfo::TCC_Free;
291
292 return TargetTransformInfo::TCC_Basic;
293 }
294
295 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
296 ArrayRef<const Value *> Arguments) {
297 return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
298 }
299
300 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
301 ArrayRef<Type *> ParamTys) {
302 if (IID == Intrinsic::cttz) {
303 if (getTLI()->isCheapToSpeculateCttz())
304 return TargetTransformInfo::TCC_Basic;
305 return TargetTransformInfo::TCC_Expensive;
306 }
307
308 if (IID == Intrinsic::ctlz) {
309 if (getTLI()->isCheapToSpeculateCtlz())
310 return TargetTransformInfo::TCC_Basic;
311 return TargetTransformInfo::TCC_Expensive;
312 }
313
314 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
315 }
316
317 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
318 unsigned &JumpTableSize) {
319 /// Try to find the estimated number of clusters. Note that the number of
320 /// clusters identified in this function could be different from the actural
321 /// numbers found in lowering. This function ignore switches that are
322 /// lowered with a mix of jump table / bit test / BTree. This function was
323 /// initially intended to be used when estimating the cost of switch in
324 /// inline cost heuristic, but it's a generic cost model to be used in other
325 /// places (e.g., in loop unrolling).
326 unsigned N = SI.getNumCases();
327 const TargetLoweringBase *TLI = getTLI();
328 const DataLayout &DL = this->getDataLayout();
329
330 JumpTableSize = 0;
331 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
332
333 // Early exit if both a jump table and bit test are not allowed.
334 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
335 return N;
336
337 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
338 APInt MinCaseVal = MaxCaseVal;
339 for (auto CI : SI.cases()) {
340 const APInt &CaseVal = CI.getCaseValue()->getValue();
341 if (CaseVal.sgt(MaxCaseVal))
342 MaxCaseVal = CaseVal;
343 if (CaseVal.slt(MinCaseVal))
344 MinCaseVal = CaseVal;
345 }
346
347 // Check if suitable for a bit test
348 if (N <= DL.getIndexSizeInBits(0u)) {
349 SmallPtrSet<const BasicBlock *, 4> Dests;
350 for (auto I : SI.cases())
351 Dests.insert(I.getCaseSuccessor());
352
353 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
354 DL))
355 return 1;
356 }
357
358 // Check if suitable for a jump table.
359 if (IsJTAllowed) {
360 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
361 return N;
362 uint64_t Range =
363 (MaxCaseVal - MinCaseVal)
364 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
365 // Check whether a range of clusters is dense enough for a jump table
366 if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
367 JumpTableSize = Range;
368 return 1;
369 }
370 }
371 return N;
372 }
373
374 unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
375
376 unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
377
378 bool shouldBuildLookupTables() {
379 const TargetLoweringBase *TLI = getTLI();
380 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
381 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
382 }
383
384 bool haveFastSqrt(Type *Ty) {
385 const TargetLoweringBase *TLI = getTLI();
386 EVT VT = TLI->getValueType(DL, Ty);
387 return TLI->isTypeLegal(VT) &&
388 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
389 }
390
391 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
392 return true;
393 }
394
395 unsigned getFPOpCost(Type *Ty) {
396 // Check whether FADD is available, as a proxy for floating-point in
397 // general.
398 const TargetLoweringBase *TLI = getTLI();
399 EVT VT = TLI->getValueType(DL, Ty);
400 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
401 return TargetTransformInfo::TCC_Basic;
402 return TargetTransformInfo::TCC_Expensive;
403 }
404
405 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406 const TargetLoweringBase *TLI = getTLI();
407 switch (Opcode) {
408 default: break;
409 case Instruction::Trunc:
410 if (TLI->isTruncateFree(OpTy, Ty))
411 return TargetTransformInfo::TCC_Free;
412 return TargetTransformInfo::TCC_Basic;
413 case Instruction::ZExt:
414 if (TLI->isZExtFree(OpTy, Ty))
415 return TargetTransformInfo::TCC_Free;
416 return TargetTransformInfo::TCC_Basic;
417 }
418
419 return BaseT::getOperationCost(Opcode, Ty, OpTy);
420 }
421
422 unsigned getInliningThresholdMultiplier() { return 1; }
423
424 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
425 TTI::UnrollingPreferences &UP) {
426 // This unrolling functionality is target independent, but to provide some
427 // motivation for its intended use, for x86:
428
429 // According to the Intel 64 and IA-32 Architectures Optimization Reference
430 // Manual, Intel Core models and later have a loop stream detector (and
431 // associated uop queue) that can benefit from partial unrolling.
432 // The relevant requirements are:
433 // - The loop must have no more than 4 (8 for Nehalem and later) branches
434 // taken, and none of them may be calls.
435 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
436
437 // According to the Software Optimization Guide for AMD Family 15h
438 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
439 // and loop buffer which can benefit from partial unrolling.
440 // The relevant requirements are:
441 // - The loop must have fewer than 16 branches
442 // - The loop must have less than 40 uops in all executed loop branches
443
444 // The number of taken branches in a loop is hard to estimate here, and
445 // benchmarking has revealed that it is better not to be conservative when
446 // estimating the branch count. As a result, we'll ignore the branch limits
447 // until someone finds a case where it matters in practice.
448
449 unsigned MaxOps;
450 const TargetSubtargetInfo *ST = getST();
451 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
452 MaxOps = PartialUnrollingThreshold;
453 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
454 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
455 else
456 return;
457
458 // Scan the loop: don't unroll loops with calls.
459 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
460 ++I) {
461 BasicBlock *BB = *I;
462
463 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
464 if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
465 ImmutableCallSite CS(&*J);
466 if (const Function *F = CS.getCalledFunction()) {
467 if (!static_cast<T *>(this)->isLoweredToCall(F))
468 continue;
469 }
470
471 return;
472 }
473 }
474
475 // Enable runtime and partial unrolling up to the specified size.
476 // Enable using trip count upper bound to unroll loops.
477 UP.Partial = UP.Runtime = UP.UpperBound = true;
478 UP.PartialThreshold = MaxOps;
479
480 // Avoid unrolling when optimizing for size.
481 UP.OptSizeThreshold = 0;
482 UP.PartialOptSizeThreshold = 0;
483
484 // Set number of instructions optimized when "back edge"
485 // becomes "fall through" to default value of 2.
486 UP.BEInsns = 2;
487 }
488
489 int getInstructionLatency(const Instruction *I) {
490 if (isa<LoadInst>(I))
491 return getST()->getSchedModel().DefaultLoadLatency;
492
493 return BaseT::getInstructionLatency(I);
494 }
495
496 /// @}
497
498 /// \name Vector TTI Implementations
499 /// @{
500
501 unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
502
503 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
504
505 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
506 /// are set if the result needs to be inserted and/or extracted from vectors.
507 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
508 assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 508, __PRETTY_FUNCTION__))
;
509 unsigned Cost = 0;
510
511 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
512 if (Insert)
513 Cost += static_cast<T *>(this)
514 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
515 if (Extract)
516 Cost += static_cast<T *>(this)
517 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
518 }
519
520 return Cost;
521 }
522
523 /// Estimate the overhead of scalarizing an instructions unique
524 /// non-constant operands. The types of the arguments are ordinarily
525 /// scalar, in which case the costs are multiplied with VF.
526 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
527 unsigned VF) {
528 unsigned Cost = 0;
529 SmallPtrSet<const Value*, 4> UniqueOperands;
530 for (const Value *A : Args) {
531 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
532 Type *VecTy = nullptr;
533 if (A->getType()->isVectorTy()) {
534 VecTy = A->getType();
535 // If A is a vector operand, VF should be 1 or correspond to A.
536 assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 537, __PRETTY_FUNCTION__))
537 "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 537, __PRETTY_FUNCTION__))
;
538 }
539 else
540 VecTy = VectorType::get(A->getType(), VF);
541
542 Cost += getScalarizationOverhead(VecTy, false, true);
543 }
544 }
545
546 return Cost;
547 }
548
549 unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
550 assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail
("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 550, __PRETTY_FUNCTION__))
;
551
552 unsigned Cost = 0;
553
554 Cost += getScalarizationOverhead(VecTy, true, false);
555 if (!Args.empty())
556 Cost += getOperandsScalarizationOverhead(Args,
557 VecTy->getVectorNumElements());
558 else
559 // When no information on arguments is provided, we add the cost
560 // associated with one argument as a heuristic.
561 Cost += getScalarizationOverhead(VecTy, false, true);
562
563 return Cost;
564 }
565
566 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
567
568 unsigned getArithmeticInstrCost(
569 unsigned Opcode, Type *Ty,
570 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
571 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
572 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
573 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
574 ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
575 // Check if any of the operands are vector operands.
576 const TargetLoweringBase *TLI = getTLI();
577 int ISD = TLI->InstructionOpcodeToISD(Opcode);
578 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 578, __PRETTY_FUNCTION__))
;
579
580 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
581
582 bool IsFloat = Ty->isFPOrFPVectorTy();
583 // Assume that floating point arithmetic operations cost twice as much as
584 // integer operations.
585 unsigned OpCost = (IsFloat ? 2 : 1);
586
587 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
588 // The operation is legal. Assume it costs 1.
589 // TODO: Once we have extract/insert subvector cost we need to use them.
590 return LT.first * OpCost;
591 }
592
593 if (!TLI->isOperationExpand(ISD, LT.second)) {
594 // If the operation is custom lowered, then assume that the code is twice
595 // as expensive.
596 return LT.first * 2 * OpCost;
597 }
598
599 // Else, assume that we need to scalarize this op.
600 // TODO: If one of the types get legalized by splitting, handle this
601 // similarly to what getCastInstrCost() does.
602 if (Ty->isVectorTy()) {
603 unsigned Num = Ty->getVectorNumElements();
604 unsigned Cost = static_cast<T *>(this)
605 ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
606 // Return the cost of multiple scalar invocation plus the cost of
607 // inserting and extracting the values.
608 return getScalarizationOverhead(Ty, Args) + Num * Cost;
609 }
610
611 // We don't know anything about this scalar instruction.
612 return OpCost;
613 }
614
615 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
616 Type *SubTp) {
617 switch (Kind) {
618 case TTI::SK_Broadcast:
619 return getBroadcastShuffleOverhead(Tp);
620 case TTI::SK_Select:
621 case TTI::SK_Reverse:
622 case TTI::SK_Transpose:
623 case TTI::SK_PermuteSingleSrc:
624 case TTI::SK_PermuteTwoSrc:
625 return getPermuteShuffleOverhead(Tp);
626 case TTI::SK_ExtractSubvector:
627 return getExtractSubvectorOverhead(Tp, Index, SubTp);
628 case TTI::SK_InsertSubvector:
629 return getInsertSubvectorOverhead(Tp, Index, SubTp);
630 }
631 llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind",
"/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 631)
;
632 }
633
634 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
635 const Instruction *I = nullptr) {
636 const TargetLoweringBase *TLI = getTLI();
637 int ISD = TLI->InstructionOpcodeToISD(Opcode);
638 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 638, __PRETTY_FUNCTION__))
;
639 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
640 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
641
642 // Check for NOOP conversions.
643 if (SrcLT.first == DstLT.first &&
644 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
645
646 // Bitcast between types that are legalized to the same type are free.
647 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
648 return 0;
649 }
650
651 if (Opcode == Instruction::Trunc &&
652 TLI->isTruncateFree(SrcLT.second, DstLT.second))
653 return 0;
654
655 if (Opcode == Instruction::ZExt &&
656 TLI->isZExtFree(SrcLT.second, DstLT.second))
657 return 0;
658
659 if (Opcode == Instruction::AddrSpaceCast &&
660 TLI->isNoopAddrSpaceCast(Src->getPointerAddressSpace(),
661 Dst->getPointerAddressSpace()))
662 return 0;
663
664 // If this is a zext/sext of a load, return 0 if the corresponding
665 // extending load exists on target.
666 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
667 I && isa<LoadInst>(I->getOperand(0))) {
668 EVT ExtVT = EVT::getEVT(Dst);
669 EVT LoadVT = EVT::getEVT(Src);
670 unsigned LType =
671 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
672 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
673 return 0;
674 }
675
676 // If the cast is marked as legal (or promote) then assume low cost.
677 if (SrcLT.first == DstLT.first &&
678 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
679 return 1;
680
681 // Handle scalar conversions.
682 if (!Src->isVectorTy() && !Dst->isVectorTy()) {
683 // Scalar bitcasts are usually free.
684 if (Opcode == Instruction::BitCast)
685 return 0;
686
687 // Just check the op cost. If the operation is legal then assume it costs
688 // 1.
689 if (!TLI->isOperationExpand(ISD, DstLT.second))
690 return 1;
691
692 // Assume that illegal scalar instruction are expensive.
693 return 4;
694 }
695
696 // Check vector-to-vector casts.
697 if (Dst->isVectorTy() && Src->isVectorTy()) {
698 // If the cast is between same-sized registers, then the check is simple.
699 if (SrcLT.first == DstLT.first &&
700 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
701
702 // Assume that Zext is done using AND.
703 if (Opcode == Instruction::ZExt)
704 return 1;
705
706 // Assume that sext is done using SHL and SRA.
707 if (Opcode == Instruction::SExt)
708 return 2;
709
710 // Just check the op cost. If the operation is legal then assume it
711 // costs
712 // 1 and multiply by the type-legalization overhead.
713 if (!TLI->isOperationExpand(ISD, DstLT.second))
714 return SrcLT.first * 1;
715 }
716
717 // If we are legalizing by splitting, query the concrete TTI for the cost
718 // of casting the original vector twice. We also need to factor in the
719 // cost of the split itself. Count that as 1, to be consistent with
720 // TLI->getTypeLegalizationCost().
721 if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
722 TargetLowering::TypeSplitVector) ||
723 (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
724 TargetLowering::TypeSplitVector)) {
725 Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
726 Dst->getVectorNumElements() / 2);
727 Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
728 Src->getVectorNumElements() / 2);
729 T *TTI = static_cast<T *>(this);
730 return TTI->getVectorSplitCost() +
731 (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
732 }
733
734 // In other cases where the source or destination are illegal, assume
735 // the operation will get scalarized.
736 unsigned Num = Dst->getVectorNumElements();
737 unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
738 Opcode, Dst->getScalarType(), Src->getScalarType(), I);
739
740 // Return the cost of multiple scalar invocation plus the cost of
741 // inserting and extracting the values.
742 return getScalarizationOverhead(Dst, true, true) + Num * Cost;
743 }
744
745 // We already handled vector-to-vector and scalar-to-scalar conversions.
746 // This
747 // is where we handle bitcast between vectors and scalars. We need to assume
748 // that the conversion is scalarized in one way or another.
749 if (Opcode == Instruction::BitCast)
750 // Illegal bitcasts are done by storing and loading from a stack slot.
751 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
752 : 0) +
753 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
754 : 0);
755
756 llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 756)
;
757 }
758
759 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
760 VectorType *VecTy, unsigned Index) {
761 return static_cast<T *>(this)->getVectorInstrCost(
762 Instruction::ExtractElement, VecTy, Index) +
763 static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
764 VecTy->getElementType());
765 }
766
767 unsigned getCFInstrCost(unsigned Opcode) {
768 // Branches are assumed to be predicted.
769 return 0;
770 }
771
772 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
773 const Instruction *I) {
774 const TargetLoweringBase *TLI = getTLI();
775 int ISD = TLI->InstructionOpcodeToISD(Opcode);
776 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 776, __PRETTY_FUNCTION__))
;
3
Assuming 'ISD' is not equal to 0
4
'?' condition is true
777
778 // Selects on vectors are actually vector selects.
779 if (ISD == ISD::SELECT) {
5
Assuming 'ISD' is not equal to SELECT
6
Taking false branch
780 assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void
> (0) : __assert_fail ("CondTy && \"CondTy must exist\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 780, __PRETTY_FUNCTION__))
;
781 if (CondTy->isVectorTy())
782 ISD = ISD::VSELECT;
783 }
784 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
785
786 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
7
Taking false branch
787 !TLI->isOperationExpand(ISD, LT.second)) {
788 // The operation is legal. Assume it costs 1. Multiply
789 // by the type-legalization overhead.
790 return LT.first * 1;
791 }
792
793 // Otherwise, assume that the cast is scalarized.
794 // TODO: If one of the types get legalized by splitting, handle this
795 // similarly to what getCastInstrCost() does.
796 if (ValTy->isVectorTy()) {
8
Taking true branch
797 unsigned Num = ValTy->getVectorNumElements();
798 if (CondTy)
9
Assuming 'CondTy' is null
10
Taking false branch
799 CondTy = CondTy->getScalarType();
800 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
12
Calling 'AArch64TTIImpl::getCmpSelInstrCost'
801 Opcode, ValTy->getScalarType(), CondTy, I);
11
Passing null pointer value via 3rd parameter 'CondTy'
802
803 // Return the cost of multiple scalar invocation plus the cost of
804 // inserting and extracting the values.
805 return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
806 }
807
808 // Unknown scalar opcode.
809 return 1;
810 }
811
812 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
813 std::pair<unsigned, MVT> LT =
814 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
815
816 return LT.first;
817 }
818
819 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
820 unsigned AddressSpace, const Instruction *I = nullptr) {
821 assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast
<void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 821, __PRETTY_FUNCTION__))
;
822 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
823
824 // Assuming that all loads of legal types cost 1.
825 unsigned Cost = LT.first;
826
827 if (Src->isVectorTy() &&
828 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
829 // This is a vector load that legalizes to a larger type than the vector
830 // itself. Unless the corresponding extending load or truncating store is
831 // legal, then this will scalarize.
832 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
833 EVT MemVT = getTLI()->getValueType(DL, Src);
834 if (Opcode == Instruction::Store)
835 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
836 else
837 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
838
839 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
840 // This is a vector load/store for some illegal type that is scalarized.
841 // We must account for the cost of building or decomposing the vector.
842 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
843 Opcode == Instruction::Store);
844 }
845 }
846
847 return Cost;
848 }
849
850 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
851 unsigned Factor,
852 ArrayRef<unsigned> Indices,
853 unsigned Alignment, unsigned AddressSpace,
854 bool UseMaskForCond = false,
855 bool UseMaskForGaps = false) {
856 VectorType *VT = dyn_cast<VectorType>(VecTy);
857 assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 857, __PRETTY_FUNCTION__))
;
858
859 unsigned NumElts = VT->getNumElements();
860 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"
) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 860, __PRETTY_FUNCTION__))
;
861
862 unsigned NumSubElts = NumElts / Factor;
863 VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
864
865 // Firstly, the cost of load/store operation.
866 unsigned Cost;
867 if (UseMaskForCond || UseMaskForGaps)
868 Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
869 Opcode, VecTy, Alignment, AddressSpace);
870 else
871 Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
872 AddressSpace);
873
874 // Legalize the vector type, and get the legalized and unlegalized type
875 // sizes.
876 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
877 unsigned VecTySize =
878 static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
879 unsigned VecTyLTSize = VecTyLT.getStoreSize();
880
881 // Return the ceiling of dividing A by B.
882 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
883
884 // Scale the cost of the memory operation by the fraction of legalized
885 // instructions that will actually be used. We shouldn't account for the
886 // cost of dead instructions since they will be removed.
887 //
888 // E.g., An interleaved load of factor 8:
889 // %vec = load <16 x i64>, <16 x i64>* %ptr
890 // %v0 = shufflevector %vec, undef, <0, 8>
891 //
892 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
893 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
894 // type). The other loads are unused.
895 //
896 // We only scale the cost of loads since interleaved store groups aren't
897 // allowed to have gaps.
898 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
899 // The number of loads of a legal type it will take to represent a load
900 // of the unlegalized vector type.
901 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
902
903 // The number of elements of the unlegalized type that correspond to a
904 // single legal instruction.
905 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
906
907 // Determine which legal instructions will be used.
908 BitVector UsedInsts(NumLegalInsts, false);
909 for (unsigned Index : Indices)
910 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
911 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
912
913 // Scale the cost of the load by the fraction of legal instructions that
914 // will be used.
915 Cost *= UsedInsts.count() / NumLegalInsts;
916 }
917
918 // Then plus the cost of interleave operation.
919 if (Opcode == Instruction::Load) {
920 // The interleave cost is similar to extract sub vectors' elements
921 // from the wide vector, and insert them into sub vectors.
922 //
923 // E.g. An interleaved load of factor 2 (with one member of index 0):
924 // %vec = load <8 x i32>, <8 x i32>* %ptr
925 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
926 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
927 // <8 x i32> vector and insert them into a <4 x i32> vector.
928
929 assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 930, __PRETTY_FUNCTION__))
930 "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 930, __PRETTY_FUNCTION__))
;
931
932 for (unsigned Index : Indices) {
933 assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 933, __PRETTY_FUNCTION__))
;
934
935 // Extract elements from loaded vector for each sub vector.
936 for (unsigned i = 0; i < NumSubElts; i++)
937 Cost += static_cast<T *>(this)->getVectorInstrCost(
938 Instruction::ExtractElement, VT, Index + i * Factor);
939 }
940
941 unsigned InsSubCost = 0;
942 for (unsigned i = 0; i < NumSubElts; i++)
943 InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
944 Instruction::InsertElement, SubVT, i);
945
946 Cost += Indices.size() * InsSubCost;
947 } else {
948 // The interleave cost is extract all elements from sub vectors, and
949 // insert them into the wide vector.
950 //
951 // E.g. An interleaved store of factor 2:
952 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
953 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
954 // The cost is estimated as extract all elements from both <4 x i32>
955 // vectors and insert into the <8 x i32> vector.
956
957 unsigned ExtSubCost = 0;
958 for (unsigned i = 0; i < NumSubElts; i++)
959 ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
960 Instruction::ExtractElement, SubVT, i);
961 Cost += ExtSubCost * Factor;
962
963 for (unsigned i = 0; i < NumElts; i++)
964 Cost += static_cast<T *>(this)
965 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
966 }
967
968 if (!UseMaskForCond)
969 return Cost;
970
971 Type *I8Type = Type::getInt8Ty(VT->getContext());
972 VectorType *MaskVT = VectorType::get(I8Type, NumElts);
973 SubVT = VectorType::get(I8Type, NumSubElts);
974
975 // The Mask shuffling cost is extract all the elements of the Mask
976 // and insert each of them Factor times into the wide vector:
977 //
978 // E.g. an interleaved group with factor 3:
979 // %mask = icmp ult <8 x i32> %vec1, %vec2
980 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
981 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
982 // The cost is estimated as extract all mask elements from the <8xi1> mask
983 // vector and insert them factor times into the <24xi1> shuffled mask
984 // vector.
985 for (unsigned i = 0; i < NumSubElts; i++)
986 Cost += static_cast<T *>(this)->getVectorInstrCost(
987 Instruction::ExtractElement, SubVT, i);
988
989 for (unsigned i = 0; i < NumElts; i++)
990 Cost += static_cast<T *>(this)->getVectorInstrCost(
991 Instruction::InsertElement, MaskVT, i);
992
993 // The Gaps mask is invariant and created outside the loop, therefore the
994 // cost of creating it is not accounted for here. However if we have both
995 // a MaskForGaps and some other mask that guards the execution of the
996 // memory access, we need to account for the cost of And-ing the two masks
997 // inside the loop.
998 if (UseMaskForGaps)
999 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1000 BinaryOperator::And, MaskVT);
1001
1002 return Cost;
1003 }
1004
1005 /// Get intrinsic cost based on arguments.
1006 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
1007 ArrayRef<Value *> Args, FastMathFlags FMF,
1008 unsigned VF = 1) {
1009 unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1010 assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"
) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 1010, __PRETTY_FUNCTION__))
;
1011 auto *ConcreteTTI = static_cast<T *>(this);
1012
1013 switch (IID) {
1014 default: {
1015 // Assume that we need to scalarize this intrinsic.
1016 SmallVector<Type *, 4> Types;
1017 for (Value *Op : Args) {
1018 Type *OpTy = Op->getType();
1019 assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void>
(0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 1019, __PRETTY_FUNCTION__))
;
1020 Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1021 }
1022
1023 if (VF > 1 && !RetTy->isVoidTy())
1024 RetTy = VectorType::get(RetTy, VF);
1025
1026 // Compute the scalarization overhead based on Args for a vector
1027 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1028 // CostModel will pass a vector RetTy and VF is 1.
1029 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1030 if (RetVF > 1 || VF > 1) {
1031 ScalarizationCost = 0;
1032 if (!RetTy->isVoidTy())
1033 ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1034 ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1035 }
1036
1037 return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1038 ScalarizationCost);
1039 }
1040 case Intrinsic::masked_scatter: {
1041 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 1041, __PRETTY_FUNCTION__))
;
1042 Value *Mask = Args[3];
1043 bool VarMask = !isa<Constant>(Mask);
1044 unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1045 return ConcreteTTI->getGatherScatterOpCost(
1046 Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1047 }
1048 case Intrinsic::masked_gather: {
1049 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 1049, __PRETTY_FUNCTION__))
;
1050 Value *Mask = Args[2];
1051 bool VarMask = !isa<Constant>(Mask);
1052 unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1053 return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1054 Args[0], VarMask, Alignment);
1055 }
1056 case Intrinsic::experimental_vector_reduce_add:
1057 case Intrinsic::experimental_vector_reduce_mul:
1058 case Intrinsic::experimental_vector_reduce_and:
1059 case Intrinsic::experimental_vector_reduce_or:
1060 case Intrinsic::experimental_vector_reduce_xor:
1061 case Intrinsic::experimental_vector_reduce_fadd:
1062 case Intrinsic::experimental_vector_reduce_fmul:
1063 case Intrinsic::experimental_vector_reduce_smax:
1064 case Intrinsic::experimental_vector_reduce_smin:
1065 case Intrinsic::experimental_vector_reduce_fmax:
1066 case Intrinsic::experimental_vector_reduce_fmin:
1067 case Intrinsic::experimental_vector_reduce_umax:
1068 case Intrinsic::experimental_vector_reduce_umin:
1069 return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1070 case Intrinsic::fshl:
1071 case Intrinsic::fshr: {
1072 Value *X = Args[0];
1073 Value *Y = Args[1];
1074 Value *Z = Args[2];
1075 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1076 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1077 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1078 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1079 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1080 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1081 : TTI::OP_None;
1082 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1083 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1084 unsigned Cost = 0;
1085 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1086 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1087 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1088 OpKindX, OpKindZ, OpPropsX);
1089 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1090 OpKindY, OpKindZ, OpPropsY);
1091 // Non-constant shift amounts requires a modulo.
1092 if (OpKindZ != TTI::OK_UniformConstantValue &&
1093 OpKindZ != TTI::OK_NonUniformConstantValue)
1094 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1095 OpKindZ, OpKindBW, OpPropsZ,
1096 OpPropsBW);
1097 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1098 if (X != Y) {
1099 Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1100 if (RetVF > 1)
1101 CondTy = VectorType::get(CondTy, RetVF);
1102 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1103 CondTy, nullptr);
1104 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1105 CondTy, nullptr);
1106 }
1107 return Cost;
1108 }
1109 }
1110 }
1111
1112 /// Get intrinsic cost based on argument types.
1113 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1114 /// cost of scalarizing the arguments and the return value will be computed
1115 /// based on types.
1116 unsigned getIntrinsicInstrCost(
1117 Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1118 unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1119 SmallVector<unsigned, 2> ISDs;
1120 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1121 switch (IID) {
1122 default: {
1123 // Assume that we need to scalarize this intrinsic.
1124 unsigned ScalarizationCost = ScalarizationCostPassed;
1125 unsigned ScalarCalls = 1;
1126 Type *ScalarRetTy = RetTy;
1127 if (RetTy->isVectorTy()) {
1128 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1129 ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1130 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1131 ScalarRetTy = RetTy->getScalarType();
1132 }
1133 SmallVector<Type *, 4> ScalarTys;
1134 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1135 Type *Ty = Tys[i];
1136 if (Ty->isVectorTy()) {
1137 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1138 ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1139 ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1140 Ty = Ty->getScalarType();
1141 }
1142 ScalarTys.push_back(Ty);
1143 }
1144 if (ScalarCalls == 1)
1145 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1146
1147 unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1148 IID, ScalarRetTy, ScalarTys, FMF);
1149
1150 return ScalarCalls * ScalarCost + ScalarizationCost;
1151 }
1152 // Look for intrinsics that can be lowered directly or turned into a scalar
1153 // intrinsic call.
1154 case Intrinsic::sqrt:
1155 ISDs.push_back(ISD::FSQRT);
1156 break;
1157 case Intrinsic::sin:
1158 ISDs.push_back(ISD::FSIN);
1159 break;
1160 case Intrinsic::cos:
1161 ISDs.push_back(ISD::FCOS);
1162 break;
1163 case Intrinsic::exp:
1164 ISDs.push_back(ISD::FEXP);
1165 break;
1166 case Intrinsic::exp2:
1167 ISDs.push_back(ISD::FEXP2);
1168 break;
1169 case Intrinsic::log:
1170 ISDs.push_back(ISD::FLOG);
1171 break;
1172 case Intrinsic::log10:
1173 ISDs.push_back(ISD::FLOG10);
1174 break;
1175 case Intrinsic::log2:
1176 ISDs.push_back(ISD::FLOG2);
1177 break;
1178 case Intrinsic::fabs:
1179 ISDs.push_back(ISD::FABS);
1180 break;
1181 case Intrinsic::canonicalize:
1182 ISDs.push_back(ISD::FCANONICALIZE);
1183 break;
1184 case Intrinsic::minnum:
1185 ISDs.push_back(ISD::FMINNUM);
1186 if (FMF.noNaNs())
1187 ISDs.push_back(ISD::FMINIMUM);
1188 break;
1189 case Intrinsic::maxnum:
1190 ISDs.push_back(ISD::FMAXNUM);
1191 if (FMF.noNaNs())
1192 ISDs.push_back(ISD::FMAXIMUM);
1193 break;
1194 case Intrinsic::copysign:
1195 ISDs.push_back(ISD::FCOPYSIGN);
1196 break;
1197 case Intrinsic::floor:
1198 ISDs.push_back(ISD::FFLOOR);
1199 break;
1200 case Intrinsic::ceil:
1201 ISDs.push_back(ISD::FCEIL);
1202 break;
1203 case Intrinsic::trunc:
1204 ISDs.push_back(ISD::FTRUNC);
1205 break;
1206 case Intrinsic::nearbyint:
1207 ISDs.push_back(ISD::FNEARBYINT);
1208 break;
1209 case Intrinsic::rint:
1210 ISDs.push_back(ISD::FRINT);
1211 break;
1212 case Intrinsic::round:
1213 ISDs.push_back(ISD::FROUND);
1214 break;
1215 case Intrinsic::pow:
1216 ISDs.push_back(ISD::FPOW);
1217 break;
1218 case Intrinsic::fma:
1219 ISDs.push_back(ISD::FMA);
1220 break;
1221 case Intrinsic::fmuladd:
1222 ISDs.push_back(ISD::FMA);
1223 break;
1224 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1225 case Intrinsic::lifetime_start:
1226 case Intrinsic::lifetime_end:
1227 case Intrinsic::sideeffect:
1228 return 0;
1229 case Intrinsic::masked_store:
1230 return static_cast<T *>(this)
1231 ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
1232 case Intrinsic::masked_load:
1233 return static_cast<T *>(this)
1234 ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1235 case Intrinsic::experimental_vector_reduce_add:
1236 return static_cast<T *>(this)->getArithmeticReductionCost(
1237 Instruction::Add, Tys[0], /*IsPairwiseForm=*/false);
1238 case Intrinsic::experimental_vector_reduce_mul:
1239 return static_cast<T *>(this)->getArithmeticReductionCost(
1240 Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false);
1241 case Intrinsic::experimental_vector_reduce_and:
1242 return static_cast<T *>(this)->getArithmeticReductionCost(
1243 Instruction::And, Tys[0], /*IsPairwiseForm=*/false);
1244 case Intrinsic::experimental_vector_reduce_or:
1245 return static_cast<T *>(this)->getArithmeticReductionCost(
1246 Instruction::Or, Tys[0], /*IsPairwiseForm=*/false);
1247 case Intrinsic::experimental_vector_reduce_xor:
1248 return static_cast<T *>(this)->getArithmeticReductionCost(
1249 Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false);
1250 case Intrinsic::experimental_vector_reduce_fadd:
1251 return static_cast<T *>(this)->getArithmeticReductionCost(
1252 Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false);
1253 case Intrinsic::experimental_vector_reduce_fmul:
1254 return static_cast<T *>(this)->getArithmeticReductionCost(
1255 Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false);
1256 case Intrinsic::experimental_vector_reduce_smax:
1257 case Intrinsic::experimental_vector_reduce_smin:
1258 case Intrinsic::experimental_vector_reduce_fmax:
1259 case Intrinsic::experimental_vector_reduce_fmin:
1260 return static_cast<T *>(this)->getMinMaxReductionCost(
1261 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1262 /*IsSigned=*/true);
1263 case Intrinsic::experimental_vector_reduce_umax:
1264 case Intrinsic::experimental_vector_reduce_umin:
1265 return static_cast<T *>(this)->getMinMaxReductionCost(
1266 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1267 /*IsSigned=*/false);
1268 case Intrinsic::ctpop:
1269 ISDs.push_back(ISD::CTPOP);
1270 // In case of legalization use TCC_Expensive. This is cheaper than a
1271 // library call but still not a cheap instruction.
1272 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1273 break;
1274 // FIXME: ctlz, cttz, ...
1275 }
1276
1277 const TargetLoweringBase *TLI = getTLI();
1278 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1279
1280 SmallVector<unsigned, 2> LegalCost;
1281 SmallVector<unsigned, 2> CustomCost;
1282 for (unsigned ISD : ISDs) {
1283 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1284 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1285 TLI->isFAbsFree(LT.second)) {
1286 return 0;
1287 }
1288
1289 // The operation is legal. Assume it costs 1.
1290 // If the type is split to multiple registers, assume that there is some
1291 // overhead to this.
1292 // TODO: Once we have extract/insert subvector cost we need to use them.
1293 if (LT.first > 1)
1294 LegalCost.push_back(LT.first * 2);
1295 else
1296 LegalCost.push_back(LT.first * 1);
1297 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1298 // If the operation is custom lowered then assume
1299 // that the code is twice as expensive.
1300 CustomCost.push_back(LT.first * 2);
1301 }
1302 }
1303
1304 auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1305 if (MinLegalCostI != LegalCost.end())
1306 return *MinLegalCostI;
1307
1308 auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
1309 if (MinCustomCostI != CustomCost.end())
1310 return *MinCustomCostI;
1311
1312 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1313 // point mul followed by an add.
1314 if (IID == Intrinsic::fmuladd)
1315 return static_cast<T *>(this)
1316 ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1317 static_cast<T *>(this)
1318 ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1319
1320 // Else, assume that we need to scalarize this intrinsic. For math builtins
1321 // this will emit a costly libcall, adding call overhead and spills. Make it
1322 // very expensive.
1323 if (RetTy->isVectorTy()) {
1324 unsigned ScalarizationCost =
1325 ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1326 ? ScalarizationCostPassed
1327 : getScalarizationOverhead(RetTy, true, false));
1328 unsigned ScalarCalls = RetTy->getVectorNumElements();
1329 SmallVector<Type *, 4> ScalarTys;
1330 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1331 Type *Ty = Tys[i];
1332 if (Ty->isVectorTy())
1333 Ty = Ty->getScalarType();
1334 ScalarTys.push_back(Ty);
1335 }
1336 unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1337 IID, RetTy->getScalarType(), ScalarTys, FMF);
1338 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1339 if (Tys[i]->isVectorTy()) {
1340 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1341 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1342 ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1343 }
1344 }
1345
1346 return ScalarCalls * ScalarCost + ScalarizationCost;
1347 }
1348
1349 // This is going to be turned into a library call, make it expensive.
1350 return SingleCallCost;
1351 }
1352
1353 /// Compute a cost of the given call instruction.
1354 ///
1355 /// Compute the cost of calling function F with return type RetTy and
1356 /// argument types Tys. F might be nullptr, in this case the cost of an
1357 /// arbitrary call with the specified signature will be returned.
1358 /// This is used, for instance, when we estimate call of a vector
1359 /// counterpart of the given function.
1360 /// \param F Called function, might be nullptr.
1361 /// \param RetTy Return value types.
1362 /// \param Tys Argument types.
1363 /// \returns The cost of Call instruction.
1364 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1365 return 10;
1366 }
1367
1368 unsigned getNumberOfParts(Type *Tp) {
1369 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1370 return LT.first;
1371 }
1372
1373 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1374 const SCEV *) {
1375 return 0;
1376 }
1377
1378 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1379 /// We're assuming that reduction operation are performing the following way:
1380 /// 1. Non-pairwise reduction
1381 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1382 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1383 /// \----------------v-------------/ \----------v------------/
1384 /// n/2 elements n/2 elements
1385 /// %red1 = op <n x t> %val, <n x t> val1
1386 /// After this operation we have a vector %red1 where only the first n/2
1387 /// elements are meaningful, the second n/2 elements are undefined and can be
1388 /// dropped. All other operations are actually working with the vector of
1389 /// length n/2, not n, though the real vector length is still n.
1390 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1391 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1392 /// \----------------v-------------/ \----------v------------/
1393 /// n/4 elements 3*n/4 elements
1394 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1395 /// length n/2, the resulting vector has length n/4 etc.
1396 /// 2. Pairwise reduction:
1397 /// Everything is the same except for an additional shuffle operation which
1398 /// is used to produce operands for pairwise kind of reductions.
1399 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1400 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1401 /// \-------------v----------/ \----------v------------/
1402 /// n/2 elements n/2 elements
1403 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1404 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1405 /// \-------------v----------/ \----------v------------/
1406 /// n/2 elements n/2 elements
1407 /// %red1 = op <n x t> %val1, <n x t> val2
1408 /// Again, the operation is performed on <n x t> vector, but the resulting
1409 /// vector %red1 is <n/2 x t> vector.
1410 ///
1411 /// The cost model should take into account that the actual length of the
1412 /// vector is reduced on each iteration.
1413 unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1414 bool IsPairwise) {
1415 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 1415, __PRETTY_FUNCTION__))
;
1416 Type *ScalarTy = Ty->getVectorElementType();
1417 unsigned NumVecElts = Ty->getVectorNumElements();
1418 unsigned NumReduxLevels = Log2_32(NumVecElts);
1419 unsigned ArithCost = 0;
1420 unsigned ShuffleCost = 0;
1421 auto *ConcreteTTI = static_cast<T *>(this);
1422 std::pair<unsigned, MVT> LT =
1423 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1424 unsigned LongVectorCount = 0;
1425 unsigned MVTLen =
1426 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1427 while (NumVecElts > MVTLen) {
1428 NumVecElts /= 2;
1429 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1430 // Assume the pairwise shuffles add a cost.
1431 ShuffleCost += (IsPairwise + 1) *
1432 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1433 NumVecElts, SubTy);
1434 ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1435 Ty = SubTy;
1436 ++LongVectorCount;
1437 }
1438
1439 NumReduxLevels -= LongVectorCount;
1440
1441 // The minimal length of the vector is limited by the real length of vector
1442 // operations performed on the current platform. That's why several final
1443 // reduction operations are performed on the vectors with the same
1444 // architecture-dependent length.
1445
1446 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1447 // reductions need two shuffles on every level, but the last one. On that
1448 // level one of the shuffles is <0, u, u, ...> which is identity.
1449 unsigned NumShuffles = NumReduxLevels;
1450 if (IsPairwise && NumReduxLevels >= 1)
1451 NumShuffles += NumReduxLevels - 1;
1452 ShuffleCost += NumShuffles *
1453 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1454 0, Ty);
1455 ArithCost += NumReduxLevels *
1456 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1457 return ShuffleCost + ArithCost +
1458 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1459 }
1460
1461 /// Try to calculate op costs for min/max reduction operations.
1462 /// \param CondTy Conditional type for the Select instruction.
1463 unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1464 bool) {
1465 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 1465, __PRETTY_FUNCTION__))
;
1466 Type *ScalarTy = Ty->getVectorElementType();
1467 Type *ScalarCondTy = CondTy->getVectorElementType();
1468 unsigned NumVecElts = Ty->getVectorNumElements();
1469 unsigned NumReduxLevels = Log2_32(NumVecElts);
1470 unsigned CmpOpcode;
1471 if (Ty->isFPOrFPVectorTy()) {
1472 CmpOpcode = Instruction::FCmp;
1473 } else {
1474 assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 1475, __PRETTY_FUNCTION__))
1475 "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/BasicTTIImpl.h"
, 1475, __PRETTY_FUNCTION__))
;
1476 CmpOpcode = Instruction::ICmp;
1477 }
1478 unsigned MinMaxCost = 0;
1479 unsigned ShuffleCost = 0;
1480 auto *ConcreteTTI = static_cast<T *>(this);
1481 std::pair<unsigned, MVT> LT =
1482 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1483 unsigned LongVectorCount = 0;
1484 unsigned MVTLen =
1485 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1486 while (NumVecElts > MVTLen) {
1487 NumVecElts /= 2;
1488 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1489 CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1490
1491 // Assume the pairwise shuffles add a cost.
1492 ShuffleCost += (IsPairwise + 1) *
1493 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1494 NumVecElts, SubTy);
1495 MinMaxCost +=
1496 ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1497 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1498 nullptr);
1499 Ty = SubTy;
1500 ++LongVectorCount;
1501 }
1502
1503 NumReduxLevels -= LongVectorCount;
1504
1505 // The minimal length of the vector is limited by the real length of vector
1506 // operations performed on the current platform. That's why several final
1507 // reduction opertions are perfomed on the vectors with the same
1508 // architecture-dependent length.
1509
1510 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1511 // reductions need two shuffles on every level, but the last one. On that
1512 // level one of the shuffles is <0, u, u, ...> which is identity.
1513 unsigned NumShuffles = NumReduxLevels;
1514 if (IsPairwise && NumReduxLevels >= 1)
1515 NumShuffles += NumReduxLevels - 1;
1516 ShuffleCost += NumShuffles *
1517 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1518 0, Ty);
1519 MinMaxCost +=
1520 NumReduxLevels *
1521 (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1522 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1523 nullptr));
1524 // The last min/max should be in vector registers and we counted it above.
1525 // So just need a single extractelement.
1526 return ShuffleCost + MinMaxCost +
1527 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1528 }
1529
1530 unsigned getVectorSplitCost() { return 1; }
1531
1532 /// @}
1533};
1534
1535/// Concrete BasicTTIImpl that can be used if no further customization
1536/// is needed.
1537class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1538 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1539
1540 friend class BasicTTIImplBase<BasicTTIImpl>;
1541
1542 const TargetSubtargetInfo *ST;
1543 const TargetLoweringBase *TLI;
1544
1545 const TargetSubtargetInfo *getST() const { return ST; }
1546 const TargetLoweringBase *getTLI() const { return TLI; }
1547
1548public:
1549 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1550};
1551
1552} // end namespace llvm
1553
1554#endif // LLVM_CODEGEN_BASICTTIIMPL_H

/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h

1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// This file describes how to lower LLVM code to machine code. This has two
12/// main components:
13///
14/// 1. Which ValueTypes are natively supported by the target.
15/// 2. Which operations are supported for supported ValueTypes.
16/// 3. Cost thresholds for alternative implementations of certain operations.
17///
18/// In addition it has a few other components, like information about FP
19/// immediates.
20///
21//===----------------------------------------------------------------------===//
22
23#ifndef LLVM_CODEGEN_TARGETLOWERING_H
24#define LLVM_CODEGEN_TARGETLOWERING_H
25
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/StringRef.h"
32#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
33#include "llvm/CodeGen/DAGCombine.h"
34#include "llvm/CodeGen/ISDOpcodes.h"
35#include "llvm/CodeGen/RuntimeLibcalls.h"
36#include "llvm/CodeGen/SelectionDAG.h"
37#include "llvm/CodeGen/SelectionDAGNodes.h"
38#include "llvm/CodeGen/TargetCallingConv.h"
39#include "llvm/CodeGen/ValueTypes.h"
40#include "llvm/IR/Attributes.h"
41#include "llvm/IR/CallSite.h"
42#include "llvm/IR/CallingConv.h"
43#include "llvm/IR/DataLayout.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/IRBuilder.h"
47#include "llvm/IR/InlineAsm.h"
48#include "llvm/IR/Instruction.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Type.h"
51#include "llvm/MC/MCRegisterInfo.h"
52#include "llvm/Support/AtomicOrdering.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/MachineValueType.h"
56#include "llvm/Target/TargetMachine.h"
57#include <algorithm>
58#include <cassert>
59#include <climits>
60#include <cstdint>
61#include <iterator>
62#include <map>
63#include <string>
64#include <utility>
65#include <vector>
66
67namespace llvm {
68
69class BranchProbability;
70class CCState;
71class CCValAssign;
72class Constant;
73class FastISel;
74class FunctionLoweringInfo;
75class GlobalValue;
76class IntrinsicInst;
77struct KnownBits;
78class LLVMContext;
79class MachineBasicBlock;
80class MachineFunction;
81class MachineInstr;
82class MachineJumpTableInfo;
83class MachineLoop;
84class MachineRegisterInfo;
85class MCContext;
86class MCExpr;
87class Module;
88class TargetRegisterClass;
89class TargetLibraryInfo;
90class TargetRegisterInfo;
91class Value;
92
93namespace Sched {
94
95 enum Preference {
96 None, // No preference
97 Source, // Follow source order.
98 RegPressure, // Scheduling for lowest register pressure.
99 Hybrid, // Scheduling for both latency and register pressure.
100 ILP, // Scheduling for ILP in low register pressure mode.
101 VLIW // Scheduling for VLIW targets.
102 };
103
104} // end namespace Sched
105
106/// This base class for TargetLowering contains the SelectionDAG-independent
107/// parts that can be used from the rest of CodeGen.
108class TargetLoweringBase {
109public:
110 /// This enum indicates whether operations are valid for a target, and if not,
111 /// what action should be used to make them valid.
112 enum LegalizeAction : uint8_t {
113 Legal, // The target natively supports this operation.
114 Promote, // This operation should be executed in a larger type.
115 Expand, // Try to expand this to other ops, otherwise use a libcall.
116 LibCall, // Don't try to expand this to other ops, always use a libcall.
117 Custom // Use the LowerOperation hook to implement custom lowering.
118 };
119
120 /// This enum indicates whether a types are legal for a target, and if not,
121 /// what action should be used to make them valid.
122 enum LegalizeTypeAction : uint8_t {
123 TypeLegal, // The target natively supports this type.
124 TypePromoteInteger, // Replace this integer with a larger one.
125 TypeExpandInteger, // Split this integer into two of half the size.
126 TypeSoftenFloat, // Convert this float to a same size integer type,
127 // if an operation is not supported in target HW.
128 TypeExpandFloat, // Split this float into two of half the size.
129 TypeScalarizeVector, // Replace this one-element vector with its element.
130 TypeSplitVector, // Split this vector into two of half the size.
131 TypeWidenVector, // This vector should be widened into a larger vector.
132 TypePromoteFloat // Replace this float with a larger one.
133 };
134
135 /// LegalizeKind holds the legalization kind that needs to happen to EVT
136 /// in order to type-legalize it.
137 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
138
139 /// Enum that describes how the target represents true/false values.
140 enum BooleanContent {
141 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
142 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
143 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
144 };
145
146 /// Enum that describes what type of support for selects the target has.
147 enum SelectSupportKind {
148 ScalarValSelect, // The target supports scalar selects (ex: cmov).
149 ScalarCondVectorVal, // The target supports selects with a scalar condition
150 // and vector values (ex: cmov).
151 VectorMaskSelect // The target supports vector selects with a vector
152 // mask (ex: x86 blends).
153 };
154
155 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
156 /// to, if at all. Exists because different targets have different levels of
157 /// support for these atomic instructions, and also have different options
158 /// w.r.t. what they should expand to.
159 enum class AtomicExpansionKind {
160 None, // Don't expand the instruction.
161 LLSC, // Expand the instruction into loadlinked/storeconditional; used
162 // by ARM/AArch64.
163 LLOnly, // Expand the (load) instruction into just a load-linked, which has
164 // greater atomic guarantees than a normal load.
165 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
166 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
167 };
168
169 /// Enum that specifies when a multiplication should be expanded.
170 enum class MulExpansionKind {
171 Always, // Always expand the instruction.
172 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
173 // or custom.
174 };
175
176 class ArgListEntry {
177 public:
178 Value *Val = nullptr;
179 SDValue Node = SDValue();
180 Type *Ty = nullptr;
181 bool IsSExt : 1;
182 bool IsZExt : 1;
183 bool IsInReg : 1;
184 bool IsSRet : 1;
185 bool IsNest : 1;
186 bool IsByVal : 1;
187 bool IsInAlloca : 1;
188 bool IsReturned : 1;
189 bool IsSwiftSelf : 1;
190 bool IsSwiftError : 1;
191 uint16_t Alignment = 0;
192
193 ArgListEntry()
194 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
195 IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
196 IsSwiftSelf(false), IsSwiftError(false) {}
197
198 void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx);
199 };
200 using ArgListTy = std::vector<ArgListEntry>;
201
202 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
203 ArgListTy &Args) const {};
204
205 static ISD::NodeType getExtendForContent(BooleanContent Content) {
206 switch (Content) {
207 case UndefinedBooleanContent:
208 // Extend by adding rubbish bits.
209 return ISD::ANY_EXTEND;
210 case ZeroOrOneBooleanContent:
211 // Extend by adding zero bits.
212 return ISD::ZERO_EXTEND;
213 case ZeroOrNegativeOneBooleanContent:
214 // Extend by copying the sign bit.
215 return ISD::SIGN_EXTEND;
216 }
217 llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 217)
;
218 }
219
220 /// NOTE: The TargetMachine owns TLOF.
221 explicit TargetLoweringBase(const TargetMachine &TM);
222 TargetLoweringBase(const TargetLoweringBase &) = delete;
223 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
224 virtual ~TargetLoweringBase() = default;
225
226protected:
227 /// Initialize all of the actions to default values.
228 void initActions();
229
230public:
231 const TargetMachine &getTargetMachine() const { return TM; }
232
233 virtual bool useSoftFloat() const { return false; }
234
235 /// Return the pointer type for the given address space, defaults to
236 /// the pointer type from the data layout.
237 /// FIXME: The default needs to be removed once all the code is updated.
238 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
239 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
240 }
241
242 /// Return the type for frame index, which is determined by
243 /// the alloca address space specified through the data layout.
244 MVT getFrameIndexTy(const DataLayout &DL) const {
245 return getPointerTy(DL, DL.getAllocaAddrSpace());
246 }
247
248 /// Return the type for operands of fence.
249 /// TODO: Let fence operands be of i32 type and remove this.
250 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
251 return getPointerTy(DL);
252 }
253
254 /// EVT is not used in-tree, but is used by out-of-tree target.
255 /// A documentation for this function would be nice...
256 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
257
258 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
259 bool LegalTypes = true) const;
260
261 /// Returns the type to be used for the index operand of:
262 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
263 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
264 virtual MVT getVectorIdxTy(const DataLayout &DL) const {
265 return getPointerTy(DL);
266 }
267
268 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
269 return true;
270 }
271
272 /// Return true if it is profitable to convert a select of FP constants into
273 /// a constant pool load whose address depends on the select condition. The
274 /// parameter may be used to differentiate a select with FP compare from
275 /// integer compare.
276 virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
277 return true;
278 }
279
280 /// Return true if multiple condition registers are available.
281 bool hasMultipleConditionRegisters() const {
282 return HasMultipleConditionRegisters;
283 }
284
285 /// Return true if the target has BitExtract instructions.
286 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
287
288 /// Return the preferred vector type legalization action.
289 virtual TargetLoweringBase::LegalizeTypeAction
290 getPreferredVectorAction(MVT VT) const {
291 // The default action for one element vectors is to scalarize
292 if (VT.getVectorNumElements() == 1)
293 return TypeScalarizeVector;
294 // The default action for other vectors is to promote
295 return TypePromoteInteger;
296 }
297
298 // There are two general methods for expanding a BUILD_VECTOR node:
299 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
300 // them together.
301 // 2. Build the vector on the stack and then load it.
302 // If this function returns true, then method (1) will be used, subject to
303 // the constraint that all of the necessary shuffles are legal (as determined
304 // by isShuffleMaskLegal). If this function returns false, then method (2) is
305 // always used. The vector type, and the number of defined values, are
306 // provided.
307 virtual bool
308 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
309 unsigned DefinedValues) const {
310 return DefinedValues < 3;
311 }
312
313 /// Return true if integer divide is usually cheaper than a sequence of
314 /// several shifts, adds, and multiplies for this target.
315 /// The definition of "cheaper" may depend on whether we're optimizing
316 /// for speed or for size.
317 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
318
319 /// Return true if the target can handle a standalone remainder operation.
320 virtual bool hasStandaloneRem(EVT VT) const {
321 return true;
322 }
323
324 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
325 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
326 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
327 return false;
328 }
329
330 /// Reciprocal estimate status values used by the functions below.
331 enum ReciprocalEstimate : int {
332 Unspecified = -1,
333 Disabled = 0,
334 Enabled = 1
335 };
336
337 /// Return a ReciprocalEstimate enum value for a square root of the given type
338 /// based on the function's attributes. If the operation is not overridden by
339 /// the function's attributes, "Unspecified" is returned and target defaults
340 /// are expected to be used for instruction selection.
341 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
342
343 /// Return a ReciprocalEstimate enum value for a division of the given type
344 /// based on the function's attributes. If the operation is not overridden by
345 /// the function's attributes, "Unspecified" is returned and target defaults
346 /// are expected to be used for instruction selection.
347 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
348
349 /// Return the refinement step count for a square root of the given type based
350 /// on the function's attributes. If the operation is not overridden by
351 /// the function's attributes, "Unspecified" is returned and target defaults
352 /// are expected to be used for instruction selection.
353 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
354
355 /// Return the refinement step count for a division of the given type based
356 /// on the function's attributes. If the operation is not overridden by
357 /// the function's attributes, "Unspecified" is returned and target defaults
358 /// are expected to be used for instruction selection.
359 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
360
361 /// Returns true if target has indicated at least one type should be bypassed.
362 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
363
364 /// Returns map of slow types for division or remainder with corresponding
365 /// fast types
366 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
367 return BypassSlowDivWidths;
368 }
369
370 /// Return true if Flow Control is an expensive operation that should be
371 /// avoided.
372 bool isJumpExpensive() const { return JumpIsExpensive; }
373
374 /// Return true if selects are only cheaper than branches if the branch is
375 /// unlikely to be predicted right.
376 bool isPredictableSelectExpensive() const {
377 return PredictableSelectIsExpensive;
378 }
379
380 /// If a branch or a select condition is skewed in one direction by more than
381 /// this factor, it is very likely to be predicted correctly.
382 virtual BranchProbability getPredictableBranchThreshold() const;
383
384 /// Return true if the following transform is beneficial:
385 /// fold (conv (load x)) -> (load (conv*)x)
386 /// On architectures that don't natively support some vector loads
387 /// efficiently, casting the load to a smaller vector of larger types and
388 /// loading is more efficient, however, this can be undone by optimizations in
389 /// dag combiner.
390 virtual bool isLoadBitCastBeneficial(EVT LoadVT,
391 EVT BitcastVT) const {
392 // Don't do if we could do an indexed load on the original type, but not on
393 // the new one.
394 if (!LoadVT.isSimple() || !BitcastVT.isSimple())
395 return true;
396
397 MVT LoadMVT = LoadVT.getSimpleVT();
398
399 // Don't bother doing this if it's just going to be promoted again later, as
400 // doing so might interfere with other combines.
401 if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
402 getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
403 return false;
404
405 return true;
406 }
407
408 /// Return true if the following transform is beneficial:
409 /// (store (y (conv x)), y*)) -> (store x, (x*))
410 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const {
411 // Default to the same logic as loads.
412 return isLoadBitCastBeneficial(StoreVT, BitcastVT);
413 }
414
415 /// Return true if it is expected to be cheaper to do a store of a non-zero
416 /// vector constant with the given size and type for the address space than to
417 /// store the individual scalar element constants.
418 virtual bool storeOfVectorConstantIsCheap(EVT MemVT,
419 unsigned NumElem,
420 unsigned AddrSpace) const {
421 return false;
422 }
423
424 /// Allow store merging after legalization in addition to before legalization.
425 /// This may catch stores that do not exist earlier (eg, stores created from
426 /// intrinsics).
427 virtual bool mergeStoresAfterLegalization() const { return true; }
428
429 /// Returns if it's reasonable to merge stores to MemVT size.
430 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
431 const SelectionDAG &DAG) const {
432 return true;
433 }
434
435 /// Return true if it is cheap to speculate a call to intrinsic cttz.
436 virtual bool isCheapToSpeculateCttz() const {
437 return false;
438 }
439
440 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
441 virtual bool isCheapToSpeculateCtlz() const {
442 return false;
443 }
444
445 /// Return true if ctlz instruction is fast.
446 virtual bool isCtlzFast() const {
447 return false;
448 }
449
450 /// Return true if it is safe to transform an integer-domain bitwise operation
451 /// into the equivalent floating-point operation. This should be set to true
452 /// if the target has IEEE-754-compliant fabs/fneg operations for the input
453 /// type.
454 virtual bool hasBitPreservingFPLogic(EVT VT) const {
455 return false;
456 }
457
458 /// Return true if it is cheaper to split the store of a merged int val
459 /// from a pair of smaller values into multiple stores.
460 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
461 return false;
462 }
463
464 /// Return if the target supports combining a
465 /// chain like:
466 /// \code
467 /// %andResult = and %val1, #mask
468 /// %icmpResult = icmp %andResult, 0
469 /// \endcode
470 /// into a single machine instruction of a form like:
471 /// \code
472 /// cc = test %register, #mask
473 /// \endcode
474 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
475 return false;
476 }
477
478 /// Use bitwise logic to make pairs of compares more efficient. For example:
479 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
480 /// This should be true when it takes more than one instruction to lower
481 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
482 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
483 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
484 return false;
485 }
486
487 /// Return the preferred operand type if the target has a quick way to compare
488 /// integer values of the given size. Assume that any legal integer type can
489 /// be compared efficiently. Targets may override this to allow illegal wide
490 /// types to return a vector type if there is support to compare that type.
491 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
492 MVT VT = MVT::getIntegerVT(NumBits);
493 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
494 }
495
496 /// Return true if the target should transform:
497 /// (X & Y) == Y ---> (~X & Y) == 0
498 /// (X & Y) != Y ---> (~X & Y) != 0
499 ///
500 /// This may be profitable if the target has a bitwise and-not operation that
501 /// sets comparison flags. A target may want to limit the transformation based
502 /// on the type of Y or if Y is a constant.
503 ///
504 /// Note that the transform will not occur if Y is known to be a power-of-2
505 /// because a mask and compare of a single bit can be handled by inverting the
506 /// predicate, for example:
507 /// (X & 8) == 8 ---> (X & 8) != 0
508 virtual bool hasAndNotCompare(SDValue Y) const {
509 return false;
510 }
511
512 /// Return true if the target has a bitwise and-not operation:
513 /// X = ~A & B
514 /// This can be used to simplify select or other instructions.
515 virtual bool hasAndNot(SDValue X) const {
516 // If the target has the more complex version of this operation, assume that
517 // it has this operation too.
518 return hasAndNotCompare(X);
519 }
520
521 /// There are two ways to clear extreme bits (either low or high):
522 /// Mask: x & (-1 << y) (the instcombine canonical form)
523 /// Shifts: x >> y << y
524 /// Return true if the variant with 2 shifts is preferred.
525 /// Return false if there is no preference.
526 virtual bool preferShiftsToClearExtremeBits(SDValue X) const {
527 // By default, let's assume that no one prefers shifts.
528 return false;
529 }
530
531 /// Should we tranform the IR-optimal check for whether given truncation
532 /// down into KeptBits would be truncating or not:
533 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
534 /// Into it's more traditional form:
535 /// ((%x << C) a>> C) dstcond %x
536 /// Return true if we should transform.
537 /// Return false if there is no preference.
538 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
539 unsigned KeptBits) const {
540 // By default, let's assume that no one prefers shifts.
541 return false;
542 }
543
544 /// Return true if the target wants to use the optimization that
545 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
546 /// promotedInst1(...(promotedInstN(ext(load)))).
547 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
548
549 /// Return true if the target can combine store(extractelement VectorTy,
550 /// Idx).
551 /// \p Cost[out] gives the cost of that transformation when this is true.
552 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
553 unsigned &Cost) const {
554 return false;
555 }
556
557 /// Return true if inserting a scalar into a variable element of an undef
558 /// vector is more efficiently handled by splatting the scalar instead.
559 virtual bool shouldSplatInsEltVarIndex(EVT) const {
560 return false;
561 }
562
563 /// Return true if target supports floating point exceptions.
564 bool hasFloatingPointExceptions() const {
565 return HasFloatingPointExceptions;
566 }
567
568 /// Return true if target always beneficiates from combining into FMA for a
569 /// given value type. This must typically return false on targets where FMA
570 /// takes more cycles to execute than FADD.
571 virtual bool enableAggressiveFMAFusion(EVT VT) const {
572 return false;
573 }
574
575 /// Return the ValueType of the result of SETCC operations.
576 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
577 EVT VT) const;
578
579 /// Return the ValueType for comparison libcalls. Comparions libcalls include
580 /// floating point comparion calls, and Ordered/Unordered check calls on
581 /// floating point numbers.
582 virtual
583 MVT::SimpleValueType getCmpLibcallReturnType() const;
584
585 /// For targets without i1 registers, this gives the nature of the high-bits
586 /// of boolean values held in types wider than i1.
587 ///
588 /// "Boolean values" are special true/false values produced by nodes like
589 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
590 /// Not to be confused with general values promoted from i1. Some cpus
591 /// distinguish between vectors of boolean and scalars; the isVec parameter
592 /// selects between the two kinds. For example on X86 a scalar boolean should
593 /// be zero extended from i1, while the elements of a vector of booleans
594 /// should be sign extended from i1.
595 ///
596 /// Some cpus also treat floating point types the same way as they treat
597 /// vectors instead of the way they treat scalars.
598 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
599 if (isVec)
600 return BooleanVectorContents;
601 return isFloat ? BooleanFloatContents : BooleanContents;
602 }
603
604 BooleanContent getBooleanContents(EVT Type) const {
605 return getBooleanContents(Type.isVector(), Type.isFloatingPoint());
606 }
607
608 /// Return target scheduling preference.
609 Sched::Preference getSchedulingPreference() const {
610 return SchedPreferenceInfo;
611 }
612
613 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
614 /// for different nodes. This function returns the preference (or none) for
615 /// the given node.
616 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
617 return Sched::None;
618 }
619
620 /// Return the register class that should be used for the specified value
621 /// type.
622 virtual const TargetRegisterClass *getRegClassFor(MVT VT) const {
623 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
624 assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!")
? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 624, __PRETTY_FUNCTION__))
;
625 return RC;
626 }
627
628 /// Return the 'representative' register class for the specified value
629 /// type.
630 ///
631 /// The 'representative' register class is the largest legal super-reg
632 /// register class for the register class of the value type. For example, on
633 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
634 /// register class is GR64 on x86_64.
635 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
636 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
637 return RC;
638 }
639
640 /// Return the cost of the 'representative' register class for the specified
641 /// value type.
642 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
643 return RepRegClassCostForVT[VT.SimpleTy];
644 }
645
646 /// Return true if the target has native support for the specified value type.
647 /// This means that it has a register that directly holds it without
648 /// promotions or expansions.
649 bool isTypeLegal(EVT VT) const {
650 assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 651, __PRETTY_FUNCTION__))
651 (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 651, __PRETTY_FUNCTION__))
;
652 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
653 }
654
655 class ValueTypeActionImpl {
656 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
657 /// that indicates how instruction selection should deal with the type.
658 LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
659
660 public:
661 ValueTypeActionImpl() {
662 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
663 TypeLegal);
664 }
665
666 LegalizeTypeAction getTypeAction(MVT VT) const {
667 return ValueTypeActions[VT.SimpleTy];
668 }
669
670 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
671 ValueTypeActions[VT.SimpleTy] = Action;
672 }
673 };
674
675 const ValueTypeActionImpl &getValueTypeActions() const {
676 return ValueTypeActions;
677 }
678
679 /// Return how we should legalize values of this type, either it is already
680 /// legal (return 'Legal') or we need to promote it to a larger type (return
681 /// 'Promote'), or we need to expand it into multiple registers of smaller
682 /// integer type (return 'Expand'). 'Custom' is not an option.
683 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
684 return getTypeConversion(Context, VT).first;
685 }
686 LegalizeTypeAction getTypeAction(MVT VT) const {
687 return ValueTypeActions.getTypeAction(VT);
688 }
689
690 /// For types supported by the target, this is an identity function. For
691 /// types that must be promoted to larger types, this returns the larger type
692 /// to promote to. For integer types that are larger than the largest integer
693 /// register, this contains one step in the expansion to get to the smaller
694 /// register. For illegal floating point types, this returns the integer type
695 /// to transform to.
696 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
697 return getTypeConversion(Context, VT).second;
698 }
699
700 /// For types supported by the target, this is an identity function. For
701 /// types that must be expanded (i.e. integer types that are larger than the
702 /// largest integer register or illegal floating point types), this returns
703 /// the largest legal type it will be expanded to.
704 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
705 assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail
("!VT.isVector()", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 705, __PRETTY_FUNCTION__))
;
706 while (true) {
707 switch (getTypeAction(Context, VT)) {
708 case TypeLegal:
709 return VT;
710 case TypeExpandInteger:
711 VT = getTypeToTransformTo(Context, VT);
712 break;
713 default:
714 llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 714)
;
715 }
716 }
717 }
718
719 /// Vector types are broken down into some number of legal first class types.
720 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
721 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
722 /// turns into 4 EVT::i32 values with both PPC and X86.
723 ///
724 /// This method returns the number of registers needed, and the VT for each
725 /// register. It also returns the VT and quantity of the intermediate values
726 /// before they are promoted/expanded.
727 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
728 EVT &IntermediateVT,
729 unsigned &NumIntermediates,
730 MVT &RegisterVT) const;
731
732 /// Certain targets such as MIPS require that some types such as vectors are
733 /// always broken down into scalars in some contexts. This occurs even if the
734 /// vector type is legal.
735 virtual unsigned getVectorTypeBreakdownForCallingConv(
736 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
737 unsigned &NumIntermediates, MVT &RegisterVT) const {
738 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
739 RegisterVT);
740 }
741
742 struct IntrinsicInfo {
743 unsigned opc = 0; // target opcode
744 EVT memVT; // memory VT
745
746 // value representing memory location
747 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
748
749 int offset = 0; // offset off of ptrVal
750 unsigned size = 0; // the size of the memory location
751 // (taken from memVT if zero)
752 unsigned align = 1; // alignment
753
754 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
755 IntrinsicInfo() = default;
756 };
757
758 /// Given an intrinsic, checks if on the target the intrinsic will need to map
759 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
760 /// true and store the intrinsic information into the IntrinsicInfo that was
761 /// passed to the function.
762 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
763 MachineFunction &,
764 unsigned /*Intrinsic*/) const {
765 return false;
766 }
767
768 /// Returns true if the target can instruction select the specified FP
769 /// immediate natively. If false, the legalizer will materialize the FP
770 /// immediate as a load from a constant pool.
771 virtual bool isFPImmLegal(const APFloat &/*Imm*/, EVT /*VT*/) const {
772 return false;
773 }
774
775 /// Targets can use this to indicate that they only support *some*
776 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
777 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
778 /// legal.
779 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
780 return true;
781 }
782
783 /// Returns true if the operation can trap for the value type.
784 ///
785 /// VT must be a legal type. By default, we optimistically assume most
786 /// operations don't trap except for integer divide and remainder.
787 virtual bool canOpTrap(unsigned Op, EVT VT) const;
788
789 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
790 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
791 /// constant pool entry.
792 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
793 EVT /*VT*/) const {
794 return false;
795 }
796
797 /// Return how this operation should be treated: either it is legal, needs to
798 /// be promoted to a larger size, needs to be expanded to some other code
799 /// sequence, or the target has a custom expander for it.
800 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
801 if (VT.isExtended()) return Expand;
802 // If a target-specific SDNode requires legalization, require the target
803 // to provide custom legalization for it.
804 if (Op >= array_lengthof(OpActions[0])) return Custom;
805 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
806 }
807
808 /// Custom method defined by each target to indicate if an operation which
809 /// may require a scale is supported natively by the target.
810 /// If not, the operation is illegal.
811 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
812 unsigned Scale) const {
813 return false;
814 }
815
816 /// Some fixed point operations may be natively supported by the target but
817 /// only for specific scales. This method allows for checking
818 /// if the width is supported by the target for a given operation that may
819 /// depend on scale.
820 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
821 unsigned Scale) const {
822 auto Action = getOperationAction(Op, VT);
823 if (Action != Legal)
824 return Action;
825
826 // This operation is supported in this type but may only work on specific
827 // scales.
828 bool Supported;
829 switch (Op) {
830 default:
831 llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation."
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 831)
;
832 case ISD::SMULFIX:
833 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
834 break;
835 }
836
837 return Supported ? Action : Expand;
838 }
839
840 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
841 unsigned EqOpc;
842 switch (Op) {
843 default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 843)
;
844 case ISD::STRICT_FADD: EqOpc = ISD::FADD; break;
845 case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
846 case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
847 case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
848 case ISD::STRICT_FREM: EqOpc = ISD::FREM; break;
849 case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
850 case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
851 case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
852 case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
853 case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
854 case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
855 case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
856 case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
857 case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
858 case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
859 case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
860 case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
861 case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
862 case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
863 case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
864 case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
865 case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
866 case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
867 case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
868 }
869
870 auto Action = getOperationAction(EqOpc, VT);
871
872 // We don't currently handle Custom or Promote for strict FP pseudo-ops.
873 // For now, we just expand for those cases.
874 if (Action != Legal)
875 Action = Expand;
876
877 return Action;
878 }
879
880 /// Return true if the specified operation is legal on this target or can be
881 /// made legal with custom lowering. This is used to help guide high-level
882 /// lowering decisions.
883 bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
884 return (VT == MVT::Other || isTypeLegal(VT)) &&
885 (getOperationAction(Op, VT) == Legal ||
886 getOperationAction(Op, VT) == Custom);
887 }
888
889 /// Return true if the specified operation is legal on this target or can be
890 /// made legal using promotion. This is used to help guide high-level lowering
891 /// decisions.
892 bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
893 return (VT == MVT::Other || isTypeLegal(VT)) &&
894 (getOperationAction(Op, VT) == Legal ||
895 getOperationAction(Op, VT) == Promote);
896 }
897
898 /// Return true if the specified operation is legal on this target or can be
899 /// made legal with custom lowering or using promotion. This is used to help
900 /// guide high-level lowering decisions.
901 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const {
902 return (VT == MVT::Other || isTypeLegal(VT)) &&
903 (getOperationAction(Op, VT) == Legal ||
904 getOperationAction(Op, VT) == Custom ||
905 getOperationAction(Op, VT) == Promote);
906 }
907
908 /// Return true if the operation uses custom lowering, regardless of whether
909 /// the type is legal or not.
910 bool isOperationCustom(unsigned Op, EVT VT) const {
911 return getOperationAction(Op, VT) == Custom;
912 }
913
914 /// Return true if lowering to a jump table is allowed.
915 virtual bool areJTsAllowed(const Function *Fn) const {
916 if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
917 return false;
918
919 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
920 isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
921 }
922
923 /// Check whether the range [Low,High] fits in a machine word.
924 bool rangeFitsInWord(const APInt &Low, const APInt &High,
925 const DataLayout &DL) const {
926 // FIXME: Using the pointer type doesn't seem ideal.
927 uint64_t BW = DL.getIndexSizeInBits(0u);
928 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1;
929 return Range <= BW;
930 }
931
932 /// Return true if lowering to a jump table is suitable for a set of case
933 /// clusters which may contain \p NumCases cases, \p Range range of values.
934 /// FIXME: This function check the maximum table size and density, but the
935 /// minimum size is not checked. It would be nice if the minimum size is
936 /// also combined within this function. Currently, the minimum size check is
937 /// performed in findJumpTable() in SelectionDAGBuiler and
938 /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
939 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
940 uint64_t Range) const {
941 const bool OptForSize = SI->getParent()->getParent()->optForSize();
942 const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
943 const unsigned MaxJumpTableSize =
944 OptForSize || getMaximumJumpTableSize() == 0
945 ? UINT_MAX(2147483647 *2U +1U)
946 : getMaximumJumpTableSize();
947 // Check whether a range of clusters is dense enough for a jump table.
948 if (Range <= MaxJumpTableSize &&
949 (NumCases * 100 >= Range * MinDensity)) {
950 return true;
951 }
952 return false;
953 }
954
955 /// Return true if lowering to a bit test is suitable for a set of case
956 /// clusters which contains \p NumDests unique destinations, \p Low and
957 /// \p High as its lowest and highest case values, and expects \p NumCmps
958 /// case value comparisons. Check if the number of destinations, comparison
959 /// metric, and range are all suitable.
960 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
961 const APInt &Low, const APInt &High,
962 const DataLayout &DL) const {
963 // FIXME: I don't think NumCmps is the correct metric: a single case and a
964 // range of cases both require only one branch to lower. Just looking at the
965 // number of clusters and destinations should be enough to decide whether to
966 // build bit tests.
967
968 // To lower a range with bit tests, the range must fit the bitwidth of a
969 // machine word.
970 if (!rangeFitsInWord(Low, High, DL))
971 return false;
972
973 // Decide whether it's profitable to lower this range with bit tests. Each
974 // destination requires a bit test and branch, and there is an overall range
975 // check branch. For a small number of clusters, separate comparisons might
976 // be cheaper, and for many destinations, splitting the range might be
977 // better.
978 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
979 (NumDests == 3 && NumCmps >= 6);
980 }
981
982 /// Return true if the specified operation is illegal on this target or
983 /// unlikely to be made legal with custom lowering. This is used to help guide
984 /// high-level lowering decisions.
985 bool isOperationExpand(unsigned Op, EVT VT) const {
986 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
987 }
988
989 /// Return true if the specified operation is legal on this target.
990 bool isOperationLegal(unsigned Op, EVT VT) const {
991 return (VT == MVT::Other || isTypeLegal(VT)) &&
992 getOperationAction(Op, VT) == Legal;
993 }
994
995 /// Return how this load with extension should be treated: either it is legal,
996 /// needs to be promoted to a larger size, needs to be expanded to some other
997 /// code sequence, or the target has a custom expander for it.
998 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
999 EVT MemVT) const {
1000 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1001 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1002 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1003 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1004, __PRETTY_FUNCTION__))
1004 MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1004, __PRETTY_FUNCTION__))
;
1005 unsigned Shift = 4 * ExtType;
1006 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1007 }
1008
1009 /// Return true if the specified load with extension is legal on this target.
1010 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1011 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1012 }
1013
1014 /// Return true if the specified load with extension is legal or custom
1015 /// on this target.
1016 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1017 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1018 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1019 }
1020
1021 /// Return how this store with truncation should be treated: either it is
1022 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1023 /// other code sequence, or the target has a custom expander for it.
1024 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1025 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1026 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1027 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1028 assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1029, __PRETTY_FUNCTION__))
1029 "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1029, __PRETTY_FUNCTION__))
;
1030 return TruncStoreActions[ValI][MemI];
1031 }
1032
1033 /// Return true if the specified store with truncation is legal on this
1034 /// target.
1035 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1036 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1037 }
1038
1039 /// Return true if the specified store with truncation has solution on this
1040 /// target.
1041 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1042 return isTypeLegal(ValVT) &&
1043 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1044 getTruncStoreAction(ValVT, MemVT) == Custom);
1045 }
1046
1047 /// Return how the indexed load should be treated: either it is legal, needs
1048 /// to be promoted to a larger size, needs to be expanded to some other code
1049 /// sequence, or the target has a custom expander for it.
1050 LegalizeAction
1051 getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1052 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1053, __PRETTY_FUNCTION__))
1053 "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1053, __PRETTY_FUNCTION__))
;
1054 unsigned Ty = (unsigned)VT.SimpleTy;
1055 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
1056 }
1057
1058 /// Return true if the specified indexed load is legal on this target.
1059 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1060 return VT.isSimple() &&
1061 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1062 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1063 }
1064
1065 /// Return how the indexed store should be treated: either it is legal, needs
1066 /// to be promoted to a larger size, needs to be expanded to some other code
1067 /// sequence, or the target has a custom expander for it.
1068 LegalizeAction
1069 getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1070 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1071, __PRETTY_FUNCTION__))
1071 "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1071, __PRETTY_FUNCTION__))
;
1072 unsigned Ty = (unsigned)VT.SimpleTy;
1073 return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
1074 }
1075
1076 /// Return true if the specified indexed load is legal on this target.
1077 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1078 return VT.isSimple() &&
1079 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1080 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1081 }
1082
1083 /// Return how the condition code should be treated: either it is legal, needs
1084 /// to be expanded to some other code sequence, or the target has a custom
1085 /// expander for it.
1086 LegalizeAction
1087 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1088 assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1090, __PRETTY_FUNCTION__))
1089 ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1090, __PRETTY_FUNCTION__))
1090 "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1090, __PRETTY_FUNCTION__))
;
1091 // See setCondCodeAction for how this is encoded.
1092 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1093 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1094 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1095 assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!"
) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1095, __PRETTY_FUNCTION__))
;
1096 return Action;
1097 }
1098
1099 /// Return true if the specified condition code is legal on this target.
1100 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1101 return getCondCodeAction(CC, VT) == Legal;
1102 }
1103
1104 /// Return true if the specified condition code is legal or custom on this
1105 /// target.
1106 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1107 return getCondCodeAction(CC, VT) == Legal ||
1108 getCondCodeAction(CC, VT) == Custom;
1109 }
1110
1111 /// If the action for this operation is to promote, this method returns the
1112 /// ValueType to promote to.
1113 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1114 assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1115, __PRETTY_FUNCTION__))
1115 "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1115, __PRETTY_FUNCTION__))
;
1116
1117 // See if this has an explicit type specified.
1118 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1119 MVT::SimpleValueType>::const_iterator PTTI =
1120 PromoteToType.find(std::make_pair(Op, VT.SimpleTy));
1121 if (PTTI != PromoteToType.end()) return PTTI->second;
1122
1123 assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1124, __PRETTY_FUNCTION__))
1124 "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1124, __PRETTY_FUNCTION__))
;
1125
1126 MVT NVT = VT;
1127 do {
1128 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1129 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1130, __PRETTY_FUNCTION__))
1130 "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1130, __PRETTY_FUNCTION__))
;
1131 } while (!isTypeLegal(NVT) ||
1132 getOperationAction(Op, NVT) == Promote);
1133 return NVT;
1134 }
1135
1136 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1137 /// operations except for the pointer size. If AllowUnknown is true, this
1138 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1139 /// otherwise it will assert.
1140 EVT getValueType(const DataLayout &DL, Type *Ty,
1141 bool AllowUnknown = false) const {
1142 // Lower scalar pointers to native pointer types.
1143 if (PointerType *PTy = dyn_cast<PointerType>(Ty))
17
Taking false branch
1144 return getPointerTy(DL, PTy->getAddressSpace());
1145
1146 if (Ty->isVectorTy()) {
18
Called C++ object pointer is null
1147 VectorType *VTy = cast<VectorType>(Ty);
1148 Type *Elm = VTy->getElementType();
1149 // Lower vectors of pointers to native pointer types.
1150 if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
1151 EVT PointerTy(getPointerTy(DL, PT->getAddressSpace()));
1152 Elm = PointerTy.getTypeForEVT(Ty->getContext());
1153 }
1154
1155 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
1156 VTy->getNumElements());
1157 }
1158 return EVT::getEVT(Ty, AllowUnknown);
1159 }
1160
1161 /// Return the MVT corresponding to this LLVM type. See getValueType.
1162 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1163 bool AllowUnknown = false) const {
1164 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1165 }
1166
1167 /// Return the desired alignment for ByVal or InAlloca aggregate function
1168 /// arguments in the caller parameter area. This is the actual alignment, not
1169 /// its logarithm.
1170 virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1171
1172 /// Return the type of registers that this ValueType will eventually require.
1173 MVT getRegisterType(MVT VT) const {
1174 assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1174, __PRETTY_FUNCTION__))
;
1175 return RegisterTypeForVT[VT.SimpleTy];
1176 }
1177
1178 /// Return the type of registers that this ValueType will eventually require.
1179 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1180 if (VT.isSimple()) {
1181 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1182, __PRETTY_FUNCTION__))
1182 array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1182, __PRETTY_FUNCTION__))
;
1183 return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
1184 }
1185 if (VT.isVector()) {
1186 EVT VT1;
1187 MVT RegisterVT;
1188 unsigned NumIntermediates;
1189 (void)getVectorTypeBreakdown(Context, VT, VT1,
1190 NumIntermediates, RegisterVT);
1191 return RegisterVT;
1192 }
1193 if (VT.isInteger()) {
1194 return getRegisterType(Context, getTypeToTransformTo(Context, VT));
1195 }
1196 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1196)
;
1197 }
1198
1199 /// Return the number of registers that this ValueType will eventually
1200 /// require.
1201 ///
1202 /// This is one for any types promoted to live in larger registers, but may be
1203 /// more than one for types (like i64) that are split into pieces. For types
1204 /// like i140, which are first promoted then expanded, it is the number of
1205 /// registers needed to hold all the bits of the original type. For an i140
1206 /// on a 32 bit machine this means 5 registers.
1207 unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
1208 if (VT.isSimple()) {
1209 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1210, __PRETTY_FUNCTION__))
1210 array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1210, __PRETTY_FUNCTION__))
;
1211 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1212 }
1213 if (VT.isVector()) {
1214 EVT VT1;
1215 MVT VT2;
1216 unsigned NumIntermediates;
1217 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
1218 }
1219 if (VT.isInteger()) {
1220 unsigned BitWidth = VT.getSizeInBits();
1221 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1222 return (BitWidth + RegWidth - 1) / RegWidth;
1223 }
1224 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1224)
;
1225 }
1226
1227 /// Certain combinations of ABIs, Targets and features require that types
1228 /// are legal for some operations and not for other operations.
1229 /// For MIPS all vector types must be passed through the integer register set.
1230 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1231 CallingConv::ID CC, EVT VT) const {
1232 return getRegisterType(Context, VT);
1233 }
1234
1235 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1236 /// this occurs when a vector type is used, as vector are passed through the
1237 /// integer register set.
1238 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1239 CallingConv::ID CC,
1240 EVT VT) const {
1241 return getNumRegisters(Context, VT);
1242 }
1243
1244 /// Certain targets have context senstive alignment requirements, where one
1245 /// type has the alignment requirement of another type.
1246 virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
1247 DataLayout DL) const {
1248 return DL.getABITypeAlignment(ArgTy);
1249 }
1250
1251 /// If true, then instruction selection should seek to shrink the FP constant
1252 /// of the specified type to a smaller type in order to save space and / or
1253 /// reduce runtime.
1254 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1255
1256 /// Return true if it is profitable to reduce a load to a smaller type.
1257 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1258 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1259 EVT NewVT) const {
1260 // By default, assume that it is cheaper to extract a subvector from a wide
1261 // vector load rather than creating multiple narrow vector loads.
1262 if (NewVT.isVector() && !Load->hasOneUse())
1263 return false;
1264
1265 return true;
1266 }
1267
1268 /// When splitting a value of the specified type into parts, does the Lo
1269 /// or Hi part come first? This usually follows the endianness, except
1270 /// for ppcf128, where the Hi part always comes first.
1271 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1272 return DL.isBigEndian() || VT == MVT::ppcf128;
1273 }
1274
1275 /// If true, the target has custom DAG combine transformations that it can
1276 /// perform for the specified node.
1277 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1278 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1278, __PRETTY_FUNCTION__))
;
1279 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1280 }
1281
1282 unsigned getGatherAllAliasesMaxDepth() const {
1283 return GatherAllAliasesMaxDepth;
1284 }
1285
1286 /// Returns the size of the platform's va_list object.
1287 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1288 return getPointerTy(DL).getSizeInBits();
1289 }
1290
1291 /// Get maximum # of store operations permitted for llvm.memset
1292 ///
1293 /// This function returns the maximum number of store operations permitted
1294 /// to replace a call to llvm.memset. The value is set by the target at the
1295 /// performance threshold for such a replacement. If OptSize is true,
1296 /// return the limit for functions that have OptSize attribute.
1297 unsigned getMaxStoresPerMemset(bool OptSize) const {
1298 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1299 }
1300
1301 /// Get maximum # of store operations permitted for llvm.memcpy
1302 ///
1303 /// This function returns the maximum number of store operations permitted
1304 /// to replace a call to llvm.memcpy. The value is set by the target at the
1305 /// performance threshold for such a replacement. If OptSize is true,
1306 /// return the limit for functions that have OptSize attribute.
1307 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1308 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1309 }
1310
1311 /// \brief Get maximum # of store operations to be glued together
1312 ///
1313 /// This function returns the maximum number of store operations permitted
1314 /// to glue together during lowering of llvm.memcpy. The value is set by
1315 // the target at the performance threshold for such a replacement.
1316 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1317 return MaxGluedStoresPerMemcpy;
1318 }
1319
1320 /// Get maximum # of load operations permitted for memcmp
1321 ///
1322 /// This function returns the maximum number of load operations permitted
1323 /// to replace a call to memcmp. The value is set by the target at the
1324 /// performance threshold for such a replacement. If OptSize is true,
1325 /// return the limit for functions that have OptSize attribute.
1326 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1327 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1328 }
1329
1330 /// For memcmp expansion when the memcmp result is only compared equal or
1331 /// not-equal to 0, allow up to this number of load pairs per block. As an
1332 /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
1333 /// a0 = load2bytes &a[0]
1334 /// b0 = load2bytes &b[0]
1335 /// a2 = load1byte &a[2]
1336 /// b2 = load1byte &b[2]
1337 /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
1338 virtual unsigned getMemcmpEqZeroLoadsPerBlock() const {
1339 return 1;
1340 }
1341
1342 /// Get maximum # of store operations permitted for llvm.memmove
1343 ///
1344 /// This function returns the maximum number of store operations permitted
1345 /// to replace a call to llvm.memmove. The value is set by the target at the
1346 /// performance threshold for such a replacement. If OptSize is true,
1347 /// return the limit for functions that have OptSize attribute.
1348 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1349 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1350 }
1351
1352 /// Determine if the target supports unaligned memory accesses.
1353 ///
1354 /// This function returns true if the target allows unaligned memory accesses
1355 /// of the specified type in the given address space. If true, it also returns
1356 /// whether the unaligned memory access is "fast" in the last argument by
1357 /// reference. This is used, for example, in situations where an array
1358 /// copy/move/set is converted to a sequence of store operations. Its use
1359 /// helps to ensure that such replacements don't generate code that causes an
1360 /// alignment error (trap) on the target machine.
1361 virtual bool allowsMisalignedMemoryAccesses(EVT,
1362 unsigned AddrSpace = 0,
1363 unsigned Align = 1,
1364 bool * /*Fast*/ = nullptr) const {
1365 return false;
1366 }
1367
1368 /// Return true if the target supports a memory access of this type for the
1369 /// given address space and alignment. If the access is allowed, the optional
1370 /// final parameter returns if the access is also fast (as defined by the
1371 /// target).
1372 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1373 unsigned AddrSpace = 0, unsigned Alignment = 1,
1374 bool *Fast = nullptr) const;
1375
1376 /// Returns the target specific optimal type for load and store operations as
1377 /// a result of memset, memcpy, and memmove lowering.
1378 ///
1379 /// If DstAlign is zero that means it's safe to destination alignment can
1380 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
1381 /// a need to check it against alignment requirement, probably because the
1382 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
1383 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
1384 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
1385 /// does not need to be loaded. It returns EVT::Other if the type should be
1386 /// determined using generic target-independent logic.
1387 virtual EVT getOptimalMemOpType(uint64_t /*Size*/,
1388 unsigned /*DstAlign*/, unsigned /*SrcAlign*/,
1389 bool /*IsMemset*/,
1390 bool /*ZeroMemset*/,
1391 bool /*MemcpyStrSrc*/,
1392 MachineFunction &/*MF*/) const {
1393 return MVT::Other;
1394 }
1395
1396 /// Returns true if it's safe to use load / store of the specified type to
1397 /// expand memcpy / memset inline.
1398 ///
1399 /// This is mostly true for all types except for some special cases. For
1400 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
1401 /// fstpl which also does type conversion. Note the specified type doesn't
1402 /// have to be legal as the hook is used before type legalization.
1403 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
1404
1405 /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp.
1406 bool usesUnderscoreSetJmp() const {
1407 return UseUnderscoreSetJmp;
1408 }
1409
1410 /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp.
1411 bool usesUnderscoreLongJmp() const {
1412 return UseUnderscoreLongJmp;
1413 }
1414
1415 /// Return lower limit for number of blocks in a jump table.
1416 virtual unsigned getMinimumJumpTableEntries() const;
1417
1418 /// Return lower limit of the density in a jump table.
1419 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
1420
1421 /// Return upper limit for number of entries in a jump table.
1422 /// Zero if no limit.
1423 unsigned getMaximumJumpTableSize() const;
1424
1425 virtual bool isJumpTableRelative() const {
1426 return TM.isPositionIndependent();
1427 }
1428
1429 /// If a physical register, this specifies the register that
1430 /// llvm.savestack/llvm.restorestack should save and restore.
1431 unsigned getStackPointerRegisterToSaveRestore() const {
1432 return StackPointerRegisterToSaveRestore;
1433 }
1434
1435 /// If a physical register, this returns the register that receives the
1436 /// exception address on entry to an EH pad.
1437 virtual unsigned
1438 getExceptionPointerRegister(const Constant *PersonalityFn) const {
1439 // 0 is guaranteed to be the NoRegister value on all targets
1440 return 0;
1441 }
1442
1443 /// If a physical register, this returns the register that receives the
1444 /// exception typeid on entry to a landing pad.
1445 virtual unsigned
1446 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
1447 // 0 is guaranteed to be the NoRegister value on all targets
1448 return 0;
1449 }
1450
1451 virtual bool needsFixedCatchObjects() const {
1452 report_fatal_error("Funclet EH is not implemented for this target");
1453 }
1454
1455 /// Returns the target's jmp_buf size in bytes (if never set, the default is
1456 /// 200)
1457 unsigned getJumpBufSize() const {
1458 return JumpBufSize;
1459 }
1460
1461 /// Returns the target's jmp_buf alignment in bytes (if never set, the default
1462 /// is 0)
1463 unsigned getJumpBufAlignment() const {
1464 return JumpBufAlignment;
1465 }
1466
1467 /// Return the minimum stack alignment of an argument.
1468 unsigned getMinStackArgumentAlignment() const {
1469 return MinStackArgumentAlignment;
1470 }
1471
1472 /// Return the minimum function alignment.
1473 unsigned getMinFunctionAlignment() const {
1474 return MinFunctionAlignment;
1475 }
1476
1477 /// Return the preferred function alignment.
1478 unsigned getPrefFunctionAlignment() const {
1479 return PrefFunctionAlignment;
1480 }
1481
1482 /// Return the preferred loop alignment.
1483 virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
1484 return PrefLoopAlignment;
1485 }
1486
1487 /// Should loops be aligned even when the function is marked OptSize (but not
1488 /// MinSize).
1489 virtual bool alignLoopsWithOptSize() const {
1490 return false;
1491 }
1492
1493 /// If the target has a standard location for the stack protector guard,
1494 /// returns the address of that location. Otherwise, returns nullptr.
1495 /// DEPRECATED: please override useLoadStackGuardNode and customize
1496 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
1497 virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
1498
1499 /// Inserts necessary declarations for SSP (stack protection) purpose.
1500 /// Should be used only when getIRStackGuard returns nullptr.
1501 virtual void insertSSPDeclarations(Module &M) const;
1502
1503 /// Return the variable that's previously inserted by insertSSPDeclarations,
1504 /// if any, otherwise return nullptr. Should be used only when
1505 /// getIRStackGuard returns nullptr.
1506 virtual Value *getSDagStackGuard(const Module &M) const;
1507
1508 /// If this function returns true, stack protection checks should XOR the
1509 /// frame pointer (or whichever pointer is used to address locals) into the
1510 /// stack guard value before checking it. getIRStackGuard must return nullptr
1511 /// if this returns true.
1512 virtual bool useStackGuardXorFP() const { return false; }
1513
1514 /// If the target has a standard stack protection check function that
1515 /// performs validation and error handling, returns the function. Otherwise,
1516 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
1517 /// Should be used only when getIRStackGuard returns nullptr.
1518 virtual Value *getSSPStackGuardCheck(const Module &M) const;
1519
1520protected:
1521 Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
1522 bool UseTLS) const;
1523
1524public:
1525 /// Returns the target-specific address of the unsafe stack pointer.
1526 virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
1527
1528 /// Returns the name of the symbol used to emit stack probes or the empty
1529 /// string if not applicable.
1530 virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
1531 return "";
1532 }
1533
1534 /// Returns true if a cast between SrcAS and DestAS is a noop.
1535 virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1536 return false;
1537 }
1538
1539 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
1540 /// are happy to sink it into basic blocks.
1541 virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1542 return isNoopAddrSpaceCast(SrcAS, DestAS);
1543 }
1544
1545 /// Return true if the pointer arguments to CI should be aligned by aligning
1546 /// the object whose address is being passed. If so then MinSize is set to the
1547 /// minimum size the object must be to be aligned and PrefAlign is set to the
1548 /// preferred alignment.
1549 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
1550 unsigned & /*PrefAlign*/) const {
1551 return false;
1552 }
1553
1554 //===--------------------------------------------------------------------===//
1555 /// \name Helpers for TargetTransformInfo implementations
1556 /// @{
1557
1558 /// Get the ISD node that corresponds to the Instruction class opcode.
1559 int InstructionOpcodeToISD(unsigned Opcode) const;
1560
1561 /// Estimate the cost of type-legalization and the legalized type.
1562 std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
1563 Type *Ty) const;
1564
1565 /// @}
1566
1567 //===--------------------------------------------------------------------===//
1568 /// \name Helpers for atomic expansion.
1569 /// @{
1570
1571 /// Returns the maximum atomic operation size (in bits) supported by
1572 /// the backend. Atomic operations greater than this size (as well
1573 /// as ones that are not naturally aligned), will be expanded by
1574 /// AtomicExpandPass into an __atomic_* library call.
1575 unsigned getMaxAtomicSizeInBitsSupported() const {
1576 return MaxAtomicSizeInBitsSupported;
1577 }
1578
1579 /// Returns the size of the smallest cmpxchg or ll/sc instruction
1580 /// the backend supports. Any smaller operations are widened in
1581 /// AtomicExpandPass.
1582 ///
1583 /// Note that *unlike* operations above the maximum size, atomic ops
1584 /// are still natively supported below the minimum; they just
1585 /// require a more complex expansion.
1586 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
1587
1588 /// Whether the target supports unaligned atomic operations.
1589 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
1590
1591 /// Whether AtomicExpandPass should automatically insert fences and reduce
1592 /// ordering for this atomic. This should be true for most architectures with
1593 /// weak memory ordering. Defaults to false.
1594 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
1595 return false;
1596 }
1597
1598 /// Perform a load-linked operation on Addr, returning a "Value *" with the
1599 /// corresponding pointee type. This may entail some non-trivial operations to
1600 /// truncate or reconstruct types that will be illegal in the backend. See
1601 /// ARMISelLowering for an example implementation.
1602 virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
1603 AtomicOrdering Ord) const {
1604 llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1604)
;
1605 }
1606
1607 /// Perform a store-conditional operation to Addr. Return the status of the
1608 /// store. This should be 0 if the store succeeded, non-zero otherwise.
1609 virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
1610 Value *Addr, AtomicOrdering Ord) const {
1611 llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1611)
;
1612 }
1613
1614 /// Perform a masked atomicrmw using a target-specific intrinsic. This
1615 /// represents the core LL/SC loop which will be lowered at a late stage by
1616 /// the backend.
1617 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
1618 AtomicRMWInst *AI,
1619 Value *AlignedAddr, Value *Incr,
1620 Value *Mask, Value *ShiftAmt,
1621 AtomicOrdering Ord) const {
1622 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1622)
;
1623 }
1624
1625 /// Perform a masked cmpxchg using a target-specific intrinsic. This
1626 /// represents the core LL/SC loop which will be lowered at a late stage by
1627 /// the backend.
1628 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
1629 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1630 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
1631 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1631)
;
1632 }
1633
1634 /// Inserts in the IR a target-specific intrinsic specifying a fence.
1635 /// It is called by AtomicExpandPass before expanding an
1636 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
1637 /// if shouldInsertFencesForAtomic returns true.
1638 ///
1639 /// Inst is the original atomic instruction, prior to other expansions that
1640 /// may be performed.
1641 ///
1642 /// This function should either return a nullptr, or a pointer to an IR-level
1643 /// Instruction*. Even complex fence sequences can be represented by a
1644 /// single Instruction* through an intrinsic to be lowered later.
1645 /// Backends should override this method to produce target-specific intrinsic
1646 /// for their fences.
1647 /// FIXME: Please note that the default implementation here in terms of
1648 /// IR-level fences exists for historical/compatibility reasons and is
1649 /// *unsound* ! Fences cannot, in general, be used to restore sequential
1650 /// consistency. For example, consider the following example:
1651 /// atomic<int> x = y = 0;
1652 /// int r1, r2, r3, r4;
1653 /// Thread 0:
1654 /// x.store(1);
1655 /// Thread 1:
1656 /// y.store(1);
1657 /// Thread 2:
1658 /// r1 = x.load();
1659 /// r2 = y.load();
1660 /// Thread 3:
1661 /// r3 = y.load();
1662 /// r4 = x.load();
1663 /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all
1664 /// seq_cst. But if they are lowered to monotonic accesses, no amount of
1665 /// IR-level fences can prevent it.
1666 /// @{
1667 virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
1668 AtomicOrdering Ord) const {
1669 if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
1670 return Builder.CreateFence(Ord);
1671 else
1672 return nullptr;
1673 }
1674
1675 virtual Instruction *emitTrailingFence(IRBuilder<> &Builder,
1676 Instruction *Inst,
1677 AtomicOrdering Ord) const {
1678 if (isAcquireOrStronger(Ord))
1679 return Builder.CreateFence(Ord);
1680 else
1681 return nullptr;
1682 }
1683 /// @}
1684
1685 // Emits code that executes when the comparison result in the ll/sc
1686 // expansion of a cmpxchg instruction is such that the store-conditional will
1687 // not execute. This makes it possible to balance out the load-linked with
1688 // a dedicated instruction, if desired.
1689 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
1690 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
1691 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
1692
1693 /// Returns true if the given (atomic) store should be expanded by the
1694 /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
1695 virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
1696 return false;
1697 }
1698
1699 /// Returns true if arguments should be sign-extended in lib calls.
1700 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
1701 return IsSigned;
1702 }
1703
1704 /// Returns how the given (atomic) load should be expanded by the
1705 /// IR-level AtomicExpand pass.
1706 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
1707 return AtomicExpansionKind::None;
1708 }
1709
1710 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
1711 /// AtomicExpand pass.
1712 virtual AtomicExpansionKind
1713 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1714 return AtomicExpansionKind::None;
1715 }
1716
1717 /// Returns how the IR-level AtomicExpand pass should expand the given
1718 /// AtomicRMW, if at all. Default is to never expand.
1719 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const {
1720 return AtomicExpansionKind::None;
1721 }
1722
1723 /// On some platforms, an AtomicRMW that never actually modifies the value
1724 /// (such as fetch_add of 0) can be turned into a fence followed by an
1725 /// atomic load. This may sound useless, but it makes it possible for the
1726 /// processor to keep the cacheline shared, dramatically improving
1727 /// performance. And such idempotent RMWs are useful for implementing some
1728 /// kinds of locks, see for example (justification + benchmarks):
1729 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
1730 /// This method tries doing that transformation, returning the atomic load if
1731 /// it succeeds, and nullptr otherwise.
1732 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
1733 /// another round of expansion.
1734 virtual LoadInst *
1735 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
1736 return nullptr;
1737 }
1738
1739 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
1740 /// SIGN_EXTEND, or ANY_EXTEND).
1741 virtual ISD::NodeType getExtendForAtomicOps() const {
1742 return ISD::ZERO_EXTEND;
1743 }
1744
1745 /// @}
1746
1747 /// Returns true if we should normalize
1748 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1749 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
1750 /// that it saves us from materializing N0 and N1 in an integer register.
1751 /// Targets that are able to perform and/or on flags should return false here.
1752 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
1753 EVT VT) const {
1754 // If a target has multiple condition registers, then it likely has logical
1755 // operations on those registers.
1756 if (hasMultipleConditionRegisters())
1757 return false;
1758 // Only do the transform if the value won't be split into multiple
1759 // registers.
1760 LegalizeTypeAction Action = getTypeAction(Context, VT);
1761 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
1762 Action != TypeSplitVector;
1763 }
1764
1765 /// Return true if a select of constants (select Cond, C1, C2) should be
1766 /// transformed into simple math ops with the condition value. For example:
1767 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
1768 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
1769 return false;
1770 }
1771
1772 /// Return true if it is profitable to transform an integer
1773 /// multiplication-by-constant into simpler operations like shifts and adds.
1774 /// This may be true if the target does not directly support the
1775 /// multiplication operation for the specified type or the sequence of simpler
1776 /// ops is faster than the multiply.
1777 virtual bool decomposeMulByConstant(EVT VT, SDValue C) const {
1778 return false;
1779 }
1780
1781 /// Return true if it is more correct/profitable to use strict FP_TO_INT
1782 /// conversion operations - canonicalizing the FP source value instead of
1783 /// converting all cases and then selecting based on value.
1784 /// This may be true if the target throws exceptions for out of bounds
1785 /// conversions or has fast FP CMOV.
1786 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1787 bool IsSigned) const {
1788 return false;
1789 }
1790
1791 //===--------------------------------------------------------------------===//
1792 // TargetLowering Configuration Methods - These methods should be invoked by
1793 // the derived class constructor to configure this object for the target.
1794 //
1795protected:
1796 /// Specify how the target extends the result of integer and floating point
1797 /// boolean values from i1 to a wider type. See getBooleanContents.
1798 void setBooleanContents(BooleanContent Ty) {
1799 BooleanContents = Ty;
1800 BooleanFloatContents = Ty;
1801 }
1802
1803 /// Specify how the target extends the result of integer and floating point
1804 /// boolean values from i1 to a wider type. See getBooleanContents.
1805 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
1806 BooleanContents = IntTy;
1807 BooleanFloatContents = FloatTy;
1808 }
1809
1810 /// Specify how the target extends the result of a vector boolean value from a
1811 /// vector of i1 to a wider type. See getBooleanContents.
1812 void setBooleanVectorContents(BooleanContent Ty) {
1813 BooleanVectorContents = Ty;
1814 }
1815
1816 /// Specify the target scheduling preference.
1817 void setSchedulingPreference(Sched::Preference Pref) {
1818 SchedPreferenceInfo = Pref;
1819 }
1820
1821 /// Indicate whether this target prefers to use _setjmp to implement
1822 /// llvm.setjmp or the version without _. Defaults to false.
1823 void setUseUnderscoreSetJmp(bool Val) {
1824 UseUnderscoreSetJmp = Val;
1825 }
1826
1827 /// Indicate whether this target prefers to use _longjmp to implement
1828 /// llvm.longjmp or the version without _. Defaults to false.
1829 void setUseUnderscoreLongJmp(bool Val) {
1830 UseUnderscoreLongJmp = Val;
1831 }
1832
1833 /// Indicate the minimum number of blocks to generate jump tables.
1834 void setMinimumJumpTableEntries(unsigned Val);
1835
1836 /// Indicate the maximum number of entries in jump tables.
1837 /// Set to zero to generate unlimited jump tables.
1838 void setMaximumJumpTableSize(unsigned);
1839
1840 /// If set to a physical register, this specifies the register that
1841 /// llvm.savestack/llvm.restorestack should save and restore.
1842 void setStackPointerRegisterToSaveRestore(unsigned R) {
1843 StackPointerRegisterToSaveRestore = R;
1844 }
1845
1846 /// Tells the code generator that the target has multiple (allocatable)
1847 /// condition registers that can be used to store the results of comparisons
1848 /// for use by selects and conditional branches. With multiple condition
1849 /// registers, the code generator will not aggressively sink comparisons into
1850 /// the blocks of their users.
1851 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
1852 HasMultipleConditionRegisters = hasManyRegs;
1853 }
1854
1855 /// Tells the code generator that the target has BitExtract instructions.
1856 /// The code generator will aggressively sink "shift"s into the blocks of
1857 /// their users if the users will generate "and" instructions which can be
1858 /// combined with "shift" to BitExtract instructions.
1859 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
1860 HasExtractBitsInsn = hasExtractInsn;
1861 }
1862
1863 /// Tells the code generator not to expand logic operations on comparison
1864 /// predicates into separate sequences that increase the amount of flow
1865 /// control.
1866 void setJumpIsExpensive(bool isExpensive = true);
1867
1868 /// Tells the code generator that this target supports floating point
1869 /// exceptions and cares about preserving floating point exception behavior.
1870 void setHasFloatingPointExceptions(bool FPExceptions = true) {
1871 HasFloatingPointExceptions = FPExceptions;
1872 }
1873
1874 /// Tells the code generator which bitwidths to bypass.
1875 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
1876 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
1877 }
1878
1879 /// Add the specified register class as an available regclass for the
1880 /// specified value type. This indicates the selector can handle values of
1881 /// that class natively.
1882 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
1883 assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ?
static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1883, __PRETTY_FUNCTION__))
;
1884 RegClassForVT[VT.SimpleTy] = RC;
1885 }
1886
1887 /// Return the largest legal super-reg register class of the register class
1888 /// for the specified type and its associated "cost".
1889 virtual std::pair<const TargetRegisterClass *, uint8_t>
1890 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
1891
1892 /// Once all of the register classes are added, this allows us to compute
1893 /// derived properties we expose.
1894 void computeRegisterProperties(const TargetRegisterInfo *TRI);
1895
1896 /// Indicate that the specified operation does not work with the specified
1897 /// type and indicate what to do about it. Note that VT may refer to either
1898 /// the type of a result or that of an operand of Op.
1899 void setOperationAction(unsigned Op, MVT VT,
1900 LegalizeAction Action) {
1901 assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1901, __PRETTY_FUNCTION__))
;
1902 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
1903 }
1904
1905 /// Indicate that the specified load with extension does not work with the
1906 /// specified type and indicate what to do about it.
1907 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
1908 LegalizeAction Action) {
1909 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1910, __PRETTY_FUNCTION__))
1910 MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1910, __PRETTY_FUNCTION__))
;
1911 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1911, __PRETTY_FUNCTION__))
;
1912 unsigned Shift = 4 * ExtType;
1913 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
1914 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
1915 }
1916
1917 /// Indicate that the specified truncating store does not work with the
1918 /// specified type and indicate what to do about it.
1919 void setTruncStoreAction(MVT ValVT, MVT MemVT,
1920 LegalizeAction Action) {
1921 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1921, __PRETTY_FUNCTION__))
;
1922 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
1923 }
1924
1925 /// Indicate that the specified indexed load does or does not work with the
1926 /// specified type and indicate what to do abort it.
1927 ///
1928 /// NOTE: All indexed mode loads are initialized to Expand in
1929 /// TargetLowering.cpp
1930 void setIndexedLoadAction(unsigned IdxMode, MVT VT,
1931 LegalizeAction Action) {
1932 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1933, __PRETTY_FUNCTION__))
1933 (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1933, __PRETTY_FUNCTION__))
;
1934 // Load action are kept in the upper half.
1935 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
1936 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
1937 }
1938
1939 /// Indicate that the specified indexed store does or does not work with the
1940 /// specified type and indicate what to do about it.
1941 ///
1942 /// NOTE: All indexed mode stores are initialized to Expand in
1943 /// TargetLowering.cpp
1944 void setIndexedStoreAction(unsigned IdxMode, MVT VT,
1945 LegalizeAction Action) {
1946 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1947, __PRETTY_FUNCTION__))
1947 (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1947, __PRETTY_FUNCTION__))
;
1948 // Store action are kept in the lower half.
1949 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
1950 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
1951 }
1952
1953 /// Indicate that the specified condition code is or isn't supported on the
1954 /// target and indicate what to do about it.
1955 void setCondCodeAction(ISD::CondCode CC, MVT VT,
1956 LegalizeAction Action) {
1957 assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1958, __PRETTY_FUNCTION__))
1958 "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1958, __PRETTY_FUNCTION__))
;
1959 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1959, __PRETTY_FUNCTION__))
;
1960 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
1961 /// value and the upper 29 bits index into the second dimension of the array
1962 /// to select what 32-bit value to use.
1963 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1964 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
1965 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
1966 }
1967
1968 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
1969 /// to trying a larger integer/fp until it can find one that works. If that
1970 /// default is insufficient, this method can be used by the target to override
1971 /// the default.
1972 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
1973 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
1974 }
1975
1976 /// Convenience method to set an operation to Promote and specify the type
1977 /// in a single call.
1978 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
1979 setOperationAction(Opc, OrigVT, Promote);
1980 AddPromotedToType(Opc, OrigVT, DestVT);
1981 }
1982
1983 /// Targets should invoke this method for each target independent node that
1984 /// they want to provide a custom DAG combiner for by implementing the
1985 /// PerformDAGCombine virtual method.
1986 void setTargetDAGCombine(ISD::NodeType NT) {
1987 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 1987, __PRETTY_FUNCTION__))
;
1988 TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
1989 }
1990
1991 /// Set the target's required jmp_buf buffer size (in bytes); default is 200
1992 void setJumpBufSize(unsigned Size) {
1993 JumpBufSize = Size;
1994 }
1995
1996 /// Set the target's required jmp_buf buffer alignment (in bytes); default is
1997 /// 0
1998 void setJumpBufAlignment(unsigned Align) {
1999 JumpBufAlignment = Align;
2000 }
2001
2002 /// Set the target's minimum function alignment (in log2(bytes))
2003 void setMinFunctionAlignment(unsigned Align) {
2004 MinFunctionAlignment = Align;
2005 }
2006
2007 /// Set the target's preferred function alignment. This should be set if
2008 /// there is a performance benefit to higher-than-minimum alignment (in
2009 /// log2(bytes))
2010 void setPrefFunctionAlignment(unsigned Align) {
2011 PrefFunctionAlignment = Align;
2012 }
2013
2014 /// Set the target's preferred loop alignment. Default alignment is zero, it
2015 /// means the target does not care about loop alignment. The alignment is
2016 /// specified in log2(bytes). The target may also override
2017 /// getPrefLoopAlignment to provide per-loop values.
2018 void setPrefLoopAlignment(unsigned Align) {
2019 PrefLoopAlignment = Align;
2020 }
2021
2022 /// Set the minimum stack alignment of an argument (in log2(bytes)).
2023 void setMinStackArgumentAlignment(unsigned Align) {
2024 MinStackArgumentAlignment = Align;
2025 }
2026
2027 /// Set the maximum atomic operation size supported by the
2028 /// backend. Atomic operations greater than this size (as well as
2029 /// ones that are not naturally aligned), will be expanded by
2030 /// AtomicExpandPass into an __atomic_* library call.
2031 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2032 MaxAtomicSizeInBitsSupported = SizeInBits;
2033 }
2034
2035 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2036 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2037 MinCmpXchgSizeInBits = SizeInBits;
2038 }
2039
2040 /// Sets whether unaligned atomic operations are supported.
2041 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2042 SupportsUnalignedAtomics = UnalignedSupported;
2043 }
2044
2045public:
2046 //===--------------------------------------------------------------------===//
2047 // Addressing mode description hooks (used by LSR etc).
2048 //
2049
2050 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2051 /// instructions reading the address. This allows as much computation as
2052 /// possible to be done in the address mode for that operand. This hook lets
2053 /// targets also pass back when this should be done on intrinsics which
2054 /// load/store.
2055 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
2056 SmallVectorImpl<Value*> &/*Ops*/,
2057 Type *&/*AccessTy*/) const {
2058 return false;
2059 }
2060
2061 /// This represents an addressing mode of:
2062 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
2063 /// If BaseGV is null, there is no BaseGV.
2064 /// If BaseOffs is zero, there is no base offset.
2065 /// If HasBaseReg is false, there is no base register.
2066 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2067 /// no scale.
2068 struct AddrMode {
2069 GlobalValue *BaseGV = nullptr;
2070 int64_t BaseOffs = 0;
2071 bool HasBaseReg = false;
2072 int64_t Scale = 0;
2073 AddrMode() = default;
2074 };
2075
2076 /// Return true if the addressing mode represented by AM is legal for this
2077 /// target, for a load/store of the specified type.
2078 ///
2079 /// The type may be VoidTy, in which case only return true if the addressing
2080 /// mode is legal for a load/store of any legal type. TODO: Handle
2081 /// pre/postinc as well.
2082 ///
2083 /// If the address space cannot be determined, it will be -1.
2084 ///
2085 /// TODO: Remove default argument
2086 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2087 Type *Ty, unsigned AddrSpace,
2088 Instruction *I = nullptr) const;
2089
2090 /// Return the cost of the scaling factor used in the addressing mode
2091 /// represented by AM for this target, for a load/store of the specified type.
2092 ///
2093 /// If the AM is supported, the return value must be >= 0.
2094 /// If the AM is not supported, it returns a negative value.
2095 /// TODO: Handle pre/postinc as well.
2096 /// TODO: Remove default argument
2097 virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
2098 Type *Ty, unsigned AS = 0) const {
2099 // Default: assume that any scaling factor used in a legal AM is free.
2100 if (isLegalAddressingMode(DL, AM, Ty, AS))
2101 return 0;
2102 return -1;
2103 }
2104
2105 /// Return true if the specified immediate is legal icmp immediate, that is
2106 /// the target has icmp instructions which can compare a register against the
2107 /// immediate without having to materialize the immediate into a register.
2108 virtual bool isLegalICmpImmediate(int64_t) const {
2109 return true;
2110 }
2111
2112 /// Return true if the specified immediate is legal add immediate, that is the
2113 /// target has add instructions which can add a register with the immediate
2114 /// without having to materialize the immediate into a register.
2115 virtual bool isLegalAddImmediate(int64_t) const {
2116 return true;
2117 }
2118
2119 /// Return true if the specified immediate is legal for the value input of a
2120 /// store instruction.
2121 virtual bool isLegalStoreImmediate(int64_t Value) const {
2122 // Default implementation assumes that at least 0 works since it is likely
2123 // that a zero register exists or a zero immediate is allowed.
2124 return Value == 0;
2125 }
2126
2127 /// Return true if it's significantly cheaper to shift a vector by a uniform
2128 /// scalar than by an amount which will vary across each lane. On x86, for
2129 /// example, there is a "psllw" instruction for the former case, but no simple
2130 /// instruction for a general "a << b" operation on vectors.
2131 virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
2132 return false;
2133 }
2134
2135 /// Returns true if the opcode is a commutative binary operation.
2136 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2137 // FIXME: This should get its info from the td file.
2138 switch (Opcode) {
2139 case ISD::ADD:
2140 case ISD::SMIN:
2141 case ISD::SMAX:
2142 case ISD::UMIN:
2143 case ISD::UMAX:
2144 case ISD::MUL:
2145 case ISD::MULHU:
2146 case ISD::MULHS:
2147 case ISD::SMUL_LOHI:
2148 case ISD::UMUL_LOHI:
2149 case ISD::FADD:
2150 case ISD::FMUL:
2151 case ISD::AND:
2152 case ISD::OR:
2153 case ISD::XOR:
2154 case ISD::SADDO:
2155 case ISD::UADDO:
2156 case ISD::ADDC:
2157 case ISD::ADDE:
2158 case ISD::FMINNUM:
2159 case ISD::FMAXNUM:
2160 case ISD::FMINIMUM:
2161 case ISD::FMAXIMUM:
2162 return true;
2163 default: return false;
2164 }
2165 }
2166
2167 /// Return true if it's free to truncate a value of type FromTy to type
2168 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
2169 /// by referencing its sub-register AX.
2170 /// Targets must return false when FromTy <= ToTy.
2171 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
2172 return false;
2173 }
2174
2175 /// Return true if a truncation from FromTy to ToTy is permitted when deciding
2176 /// whether a call is in tail position. Typically this means that both results
2177 /// would be assigned to the same register or stack slot, but it could mean
2178 /// the target performs adequate checks of its own before proceeding with the
2179 /// tail call. Targets must return false when FromTy <= ToTy.
2180 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
2181 return false;
2182 }
2183
2184 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
2185 return false;
2186 }
2187
2188 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
2189
2190 /// Return true if the extension represented by \p I is free.
2191 /// Unlikely the is[Z|FP]ExtFree family which is based on types,
2192 /// this method can use the context provided by \p I to decide
2193 /// whether or not \p I is free.
2194 /// This method extends the behavior of the is[Z|FP]ExtFree family.
2195 /// In other words, if is[Z|FP]Free returns true, then this method
2196 /// returns true as well. The converse is not true.
2197 /// The target can perform the adequate checks by overriding isExtFreeImpl.
2198 /// \pre \p I must be a sign, zero, or fp extension.
2199 bool isExtFree(const Instruction *I) const {
2200 switch (I->getOpcode()) {
2201 case Instruction::FPExt:
2202 if (isFPExtFree(EVT::getEVT(I->getType()),
2203 EVT::getEVT(I->getOperand(0)->getType())))
2204 return true;
2205 break;
2206 case Instruction::ZExt:
2207 if (isZExtFree(I->getOperand(0)->getType(), I->getType()))
2208 return true;
2209 break;
2210 case Instruction::SExt:
2211 break;
2212 default:
2213 llvm_unreachable("Instruction is not an extension")::llvm::llvm_unreachable_internal("Instruction is not an extension"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 2213)
;
2214 }
2215 return isExtFreeImpl(I);
2216 }
2217
2218 /// Return true if \p Load and \p Ext can form an ExtLoad.
2219 /// For example, in AArch64
2220 /// %L = load i8, i8* %ptr
2221 /// %E = zext i8 %L to i32
2222 /// can be lowered into one load instruction
2223 /// ldrb w0, [x0]
2224 bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
2225 const DataLayout &DL) const {
2226 EVT VT = getValueType(DL, Ext->getType());
2227 EVT LoadVT = getValueType(DL, Load->getType());
2228
2229 // If the load has other users and the truncate is not free, the ext
2230 // probably isn't free.
2231 if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) &&
2232 !isTruncateFree(Ext->getType(), Load->getType()))
2233 return false;
2234
2235 // Check whether the target supports casts folded into loads.
2236 unsigned LType;
2237 if (isa<ZExtInst>(Ext))
2238 LType = ISD::ZEXTLOAD;
2239 else {
2240 assert(isa<SExtInst>(Ext) && "Unexpected ext type!")((isa<SExtInst>(Ext) && "Unexpected ext type!")
? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Ext) && \"Unexpected ext type!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 2240, __PRETTY_FUNCTION__))
;
2241 LType = ISD::SEXTLOAD;
2242 }
2243
2244 return isLoadExtLegal(LType, VT, LoadVT);
2245 }
2246
2247 /// Return true if any actual instruction that defines a value of type FromTy
2248 /// implicitly zero-extends the value to ToTy in the result register.
2249 ///
2250 /// The function should return true when it is likely that the truncate can
2251 /// be freely folded with an instruction defining a value of FromTy. If
2252 /// the defining instruction is unknown (because you're looking at a
2253 /// function argument, PHI, etc.) then the target may require an
2254 /// explicit truncate, which is not necessarily free, but this function
2255 /// does not deal with those cases.
2256 /// Targets must return false when FromTy >= ToTy.
2257 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
2258 return false;
2259 }
2260
2261 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
2262 return false;
2263 }
2264
2265 /// Return true if sign-extension from FromTy to ToTy is cheaper than
2266 /// zero-extension.
2267 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
2268 return false;
2269 }
2270
2271 /// Return true if the target supplies and combines to a paired load
2272 /// two loaded values of type LoadedType next to each other in memory.
2273 /// RequiredAlignment gives the minimal alignment constraints that must be met
2274 /// to be able to select this paired load.
2275 ///
2276 /// This information is *not* used to generate actual paired loads, but it is
2277 /// used to generate a sequence of loads that is easier to combine into a
2278 /// paired load.
2279 /// For instance, something like this:
2280 /// a = load i64* addr
2281 /// b = trunc i64 a to i32
2282 /// c = lshr i64 a, 32
2283 /// d = trunc i64 c to i32
2284 /// will be optimized into:
2285 /// b = load i32* addr1
2286 /// d = load i32* addr2
2287 /// Where addr1 = addr2 +/- sizeof(i32).
2288 ///
2289 /// In other words, unless the target performs a post-isel load combining,
2290 /// this information should not be provided because it will generate more
2291 /// loads.
2292 virtual bool hasPairedLoad(EVT /*LoadedType*/,
2293 unsigned & /*RequiredAlignment*/) const {
2294 return false;
2295 }
2296
2297 /// Return true if the target has a vector blend instruction.
2298 virtual bool hasVectorBlend() const { return false; }
2299
2300 /// Get the maximum supported factor for interleaved memory accesses.
2301 /// Default to be the minimum interleave factor: 2.
2302 virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
2303
2304 /// Lower an interleaved load to target specific intrinsics. Return
2305 /// true on success.
2306 ///
2307 /// \p LI is the vector load instruction.
2308 /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
2309 /// \p Indices is the corresponding indices for each shufflevector.
2310 /// \p Factor is the interleave factor.
2311 virtual bool lowerInterleavedLoad(LoadInst *LI,
2312 ArrayRef<ShuffleVectorInst *> Shuffles,
2313 ArrayRef<unsigned> Indices,
2314 unsigned Factor) const {
2315 return false;
2316 }
2317
2318 /// Lower an interleaved store to target specific intrinsics. Return
2319 /// true on success.
2320 ///
2321 /// \p SI is the vector store instruction.
2322 /// \p SVI is the shufflevector to RE-interleave the stored vector.
2323 /// \p Factor is the interleave factor.
2324 virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
2325 unsigned Factor) const {
2326 return false;
2327 }
2328
2329 /// Return true if zero-extending the specific node Val to type VT2 is free
2330 /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
2331 /// because it's folded such as X86 zero-extending loads).
2332 virtual bool isZExtFree(SDValue Val, EVT VT2) const {
2333 return isZExtFree(Val.getValueType(), VT2);
2334 }
2335
2336 /// Return true if an fpext operation is free (for instance, because
2337 /// single-precision floating-point numbers are implicitly extended to
2338 /// double-precision).
2339 virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const {
2340 assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 2341, __PRETTY_FUNCTION__))
2341 "invalid fpext types")((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 2341, __PRETTY_FUNCTION__))
;
2342 return false;
2343 }
2344
2345 /// Return true if an fpext operation input to an \p Opcode operation is free
2346 /// (for instance, because half-precision floating-point numbers are
2347 /// implicitly extended to float-precision) for an FMA instruction.
2348 virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const {
2349 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 2350, __PRETTY_FUNCTION__))
2350 "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 2350, __PRETTY_FUNCTION__))
;
2351 return isFPExtFree(DestVT, SrcVT);
2352 }
2353
2354 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
2355 /// extend node) is profitable.
2356 virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; }
2357
2358 /// Return true if an fneg operation is free to the point where it is never
2359 /// worthwhile to replace it with a bitwise operation.
2360 virtual bool isFNegFree(EVT VT) const {
2361 assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail
("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 2361, __PRETTY_FUNCTION__))
;
2362 return false;
2363 }
2364
2365 /// Return true if an fabs operation is free to the point where it is never
2366 /// worthwhile to replace it with a bitwise operation.
2367 virtual bool isFAbsFree(EVT VT) const {
2368 assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail
("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 2368, __PRETTY_FUNCTION__))
;
2369 return false;
2370 }
2371
2372 /// Return true if an FMA operation is faster than a pair of fmul and fadd
2373 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
2374 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
2375 ///
2376 /// NOTE: This may be called before legalization on types for which FMAs are
2377 /// not legal, but should return true if those types will eventually legalize
2378 /// to types that support FMAs. After legalization, it will only be called on
2379 /// types that support FMAs (via Legal or Custom actions)
2380 virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
2381 return false;
2382 }
2383
2384 /// Return true if it's profitable to narrow operations of type VT1 to
2385 /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
2386 /// i32 to i16.
2387 virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const {
2388 return false;
2389 }
2390
2391 /// Return true if it is beneficial to convert a load of a constant to
2392 /// just the constant itself.
2393 /// On some targets it might be more efficient to use a combination of
2394 /// arithmetic instructions to materialize the constant instead of loading it
2395 /// from a constant pool.
2396 virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
2397 Type *Ty) const {
2398 return false;
2399 }
2400
2401 /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type
2402 /// from this source type with this index. This is needed because
2403 /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of
2404 /// the first element, and only the target knows which lowering is cheap.
2405 virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2406 unsigned Index) const {
2407 return false;
2408 }
2409
2410 // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
2411 // even if the vector itself has multiple uses.
2412 virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
2413 return false;
2414 }
2415
2416 // Return true if CodeGenPrepare should consider splitting large offset of a
2417 // GEP to make the GEP fit into the addressing mode and can be sunk into the
2418 // same blocks of its users.
2419 virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
2420
2421 //===--------------------------------------------------------------------===//
2422 // Runtime Library hooks
2423 //
2424
2425 /// Rename the default libcall routine name for the specified libcall.
2426 void setLibcallName(RTLIB::Libcall Call, const char *Name) {
2427 LibcallRoutineNames[Call] = Name;
2428 }
2429
2430 /// Get the libcall routine name for the specified libcall.
2431 const char *getLibcallName(RTLIB::Libcall Call) const {
2432 return LibcallRoutineNames[Call];
2433 }
2434
2435 /// Override the default CondCode to be used to test the result of the
2436 /// comparison libcall against zero.
2437 void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
2438 CmpLibcallCCs[Call] = CC;
2439 }
2440
2441 /// Get the CondCode that's to be used to test the result of the comparison
2442 /// libcall against zero.
2443 ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
2444 return CmpLibcallCCs[Call];
2445 }
2446
2447 /// Set the CallingConv that should be used for the specified libcall.
2448 void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
2449 LibcallCallingConvs[Call] = CC;
2450 }
2451
2452 /// Get the CallingConv that should be used for the specified libcall.
2453 CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const {
2454 return LibcallCallingConvs[Call];
2455 }
2456
2457 /// Execute target specific actions to finalize target lowering.
2458 /// This is used to set extra flags in MachineFrameInformation and freezing
2459 /// the set of reserved registers.
2460 /// The default implementation just freezes the set of reserved registers.
2461 virtual void finalizeLowering(MachineFunction &MF) const;
2462
2463private:
2464 const TargetMachine &TM;
2465
2466 /// Tells the code generator that the target has multiple (allocatable)
2467 /// condition registers that can be used to store the results of comparisons
2468 /// for use by selects and conditional branches. With multiple condition
2469 /// registers, the code generator will not aggressively sink comparisons into
2470 /// the blocks of their users.
2471 bool HasMultipleConditionRegisters;
2472
2473 /// Tells the code generator that the target has BitExtract instructions.
2474 /// The code generator will aggressively sink "shift"s into the blocks of
2475 /// their users if the users will generate "and" instructions which can be
2476 /// combined with "shift" to BitExtract instructions.
2477 bool HasExtractBitsInsn;
2478
2479 /// Tells the code generator to bypass slow divide or remainder
2480 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code
2481 /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
2482 /// div/rem when the operands are positive and less than 256.
2483 DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
2484
2485 /// Tells the code generator that it shouldn't generate extra flow control
2486 /// instructions and should attempt to combine flow control instructions via
2487 /// predication.
2488 bool JumpIsExpensive;
2489
2490 /// Whether the target supports or cares about preserving floating point
2491 /// exception behavior.
2492 bool HasFloatingPointExceptions;
2493
2494 /// This target prefers to use _setjmp to implement llvm.setjmp.
2495 ///
2496 /// Defaults to false.
2497 bool UseUnderscoreSetJmp;
2498
2499 /// This target prefers to use _longjmp to implement llvm.longjmp.
2500 ///
2501 /// Defaults to false.
2502 bool UseUnderscoreLongJmp;
2503
2504 /// Information about the contents of the high-bits in boolean values held in
2505 /// a type wider than i1. See getBooleanContents.
2506 BooleanContent BooleanContents;
2507
2508 /// Information about the contents of the high-bits in boolean values held in
2509 /// a type wider than i1. See getBooleanContents.
2510 BooleanContent BooleanFloatContents;
2511
2512 /// Information about the contents of the high-bits in boolean vector values
2513 /// when the element type is wider than i1. See getBooleanContents.
2514 BooleanContent BooleanVectorContents;
2515
2516 /// The target scheduling preference: shortest possible total cycles or lowest
2517 /// register usage.
2518 Sched::Preference SchedPreferenceInfo;
2519
2520 /// The size, in bytes, of the target's jmp_buf buffers
2521 unsigned JumpBufSize;
2522
2523 /// The alignment, in bytes, of the target's jmp_buf buffers
2524 unsigned JumpBufAlignment;
2525
2526 /// The minimum alignment that any argument on the stack needs to have.
2527 unsigned MinStackArgumentAlignment;
2528
2529 /// The minimum function alignment (used when optimizing for size, and to
2530 /// prevent explicitly provided alignment from leading to incorrect code).
2531 unsigned MinFunctionAlignment;
2532
2533 /// The preferred function alignment (used when alignment unspecified and
2534 /// optimizing for speed).
2535 unsigned PrefFunctionAlignment;
2536
2537 /// The preferred loop alignment.
2538 unsigned PrefLoopAlignment;
2539
2540 /// Size in bits of the maximum atomics size the backend supports.
2541 /// Accesses larger than this will be expanded by AtomicExpandPass.
2542 unsigned MaxAtomicSizeInBitsSupported;
2543
2544 /// Size in bits of the minimum cmpxchg or ll/sc operation the
2545 /// backend supports.
2546 unsigned MinCmpXchgSizeInBits;
2547
2548 /// This indicates if the target supports unaligned atomic operations.
2549 bool SupportsUnalignedAtomics;
2550
2551 /// If set to a physical register, this specifies the register that
2552 /// llvm.savestack/llvm.restorestack should save and restore.
2553 unsigned StackPointerRegisterToSaveRestore;
2554
2555 /// This indicates the default register class to use for each ValueType the
2556 /// target supports natively.
2557 const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
2558 unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
2559 MVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
2560
2561 /// This indicates the "representative" register class to use for each
2562 /// ValueType the target supports natively. This information is used by the
2563 /// scheduler to track register pressure. By default, the representative
2564 /// register class is the largest legal super-reg register class of the
2565 /// register class of the specified type. e.g. On x86, i8, i16, and i32's
2566 /// representative class would be GR32.
2567 const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE];
2568
2569 /// This indicates the "cost" of the "representative" register class for each
2570 /// ValueType. The cost is used by the scheduler to approximate register
2571 /// pressure.
2572 uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE];
2573
2574 /// For any value types we are promoting or expanding, this contains the value
2575 /// type that we are changing to. For Expanded types, this contains one step
2576 /// of the expand (e.g. i64 -> i32), even if there are multiple steps required
2577 /// (e.g. i64 -> i16). For types natively supported by the system, this holds
2578 /// the same type (e.g. i32 -> i32).
2579 MVT TransformToType[MVT::LAST_VALUETYPE];
2580
2581 /// For each operation and each value type, keep a LegalizeAction that
2582 /// indicates how instruction selection should deal with the operation. Most
2583 /// operations are Legal (aka, supported natively by the target), but
2584 /// operations that are not should be described. Note that operations on
2585 /// non-legal value types are not described here.
2586 LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
2587
2588 /// For each load extension type and each value type, keep a LegalizeAction
2589 /// that indicates how instruction selection should deal with a load of a
2590 /// specific value type and extension type. Uses 4-bits to store the action
2591 /// for each of the 4 load ext types.
2592 uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
2593
2594 /// For each value type pair keep a LegalizeAction that indicates whether a
2595 /// truncating store of a specific value type and truncating type is legal.
2596 LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
2597
2598 /// For each indexed mode and each value type, keep a pair of LegalizeAction
2599 /// that indicates how instruction selection should deal with the load /
2600 /// store.
2601 ///
2602 /// The first dimension is the value_type for the reference. The second
2603 /// dimension represents the various modes for load store.
2604 uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
2605
2606 /// For each condition code (ISD::CondCode) keep a LegalizeAction that
2607 /// indicates how instruction selection should deal with the condition code.
2608 ///
2609 /// Because each CC action takes up 4 bits, we need to have the array size be
2610 /// large enough to fit all of the value types. This can be done by rounding
2611 /// up the MVT::LAST_VALUETYPE value to the next multiple of 8.
2612 uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8];
2613
2614protected:
2615 ValueTypeActionImpl ValueTypeActions;
2616
2617private:
2618 LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
2619
2620 /// Targets can specify ISD nodes that they would like PerformDAGCombine
2621 /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
2622 /// array.
2623 unsigned char
2624 TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT8-1)/CHAR_BIT8];
2625
2626 /// For operations that must be promoted to a specific type, this holds the
2627 /// destination type. This map should be sparse, so don't hold it as an
2628 /// array.
2629 ///
2630 /// Targets add entries to this map with AddPromotedToType(..), clients access
2631 /// this with getTypeToPromoteTo(..).
2632 std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType>
2633 PromoteToType;
2634
2635 /// Stores the name each libcall.
2636 const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1];
2637
2638 /// The ISD::CondCode that should be used to test the result of each of the
2639 /// comparison libcall against zero.
2640 ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
2641
2642 /// Stores the CallingConv that should be used for each libcall.
2643 CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL];
2644
2645 /// Set default libcall names and calling conventions.
2646 void InitLibcalls(const Triple &TT);
2647
2648protected:
2649 /// Return true if the extension represented by \p I is free.
2650 /// \pre \p I is a sign, zero, or fp extension and
2651 /// is[Z|FP]ExtFree of the related types is not true.
2652 virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
2653
2654 /// Depth that GatherAllAliases should should continue looking for chain
2655 /// dependencies when trying to find a more preferable chain. As an
2656 /// approximation, this should be more than the number of consecutive stores
2657 /// expected to be merged.
2658 unsigned GatherAllAliasesMaxDepth;
2659
2660 /// Specify maximum number of store instructions per memset call.
2661 ///
2662 /// When lowering \@llvm.memset this field specifies the maximum number of
2663 /// store operations that may be substituted for the call to memset. Targets
2664 /// must set this value based on the cost threshold for that target. Targets
2665 /// should assume that the memset will be done using as many of the largest
2666 /// store operations first, followed by smaller ones, if necessary, per
2667 /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine
2668 /// with 16-bit alignment would result in four 2-byte stores and one 1-byte
2669 /// store. This only applies to setting a constant array of a constant size.
2670 unsigned MaxStoresPerMemset;
2671
2672 /// Maximum number of stores operations that may be substituted for the call
2673 /// to memset, used for functions with OptSize attribute.
2674 unsigned MaxStoresPerMemsetOptSize;
2675
2676 /// Specify maximum bytes of store instructions per memcpy call.
2677 ///
2678 /// When lowering \@llvm.memcpy this field specifies the maximum number of
2679 /// store operations that may be substituted for a call to memcpy. Targets
2680 /// must set this value based on the cost threshold for that target. Targets
2681 /// should assume that the memcpy will be done using as many of the largest
2682 /// store operations first, followed by smaller ones, if necessary, per
2683 /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine
2684 /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store
2685 /// and one 1-byte store. This only applies to copying a constant array of
2686 /// constant size.
2687 unsigned MaxStoresPerMemcpy;
2688
2689
2690 /// \brief Specify max number of store instructions to glue in inlined memcpy.
2691 ///
2692 /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
2693 /// of store instructions to keep together. This helps in pairing and
2694 // vectorization later on.
2695 unsigned MaxGluedStoresPerMemcpy = 0;
2696
2697 /// Maximum number of store operations that may be substituted for a call to
2698 /// memcpy, used for functions with OptSize attribute.
2699 unsigned MaxStoresPerMemcpyOptSize;
2700 unsigned MaxLoadsPerMemcmp;
2701 unsigned MaxLoadsPerMemcmpOptSize;
2702
2703 /// Specify maximum bytes of store instructions per memmove call.
2704 ///
2705 /// When lowering \@llvm.memmove this field specifies the maximum number of
2706 /// store instructions that may be substituted for a call to memmove. Targets
2707 /// must set this value based on the cost threshold for that target. Targets
2708 /// should assume that the memmove will be done using as many of the largest
2709 /// store operations first, followed by smaller ones, if necessary, per
2710 /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine
2711 /// with 8-bit alignment would result in nine 1-byte stores. This only
2712 /// applies to copying a constant array of constant size.
2713 unsigned MaxStoresPerMemmove;
2714
2715 /// Maximum number of store instructions that may be substituted for a call to
2716 /// memmove, used for functions with OptSize attribute.
2717 unsigned MaxStoresPerMemmoveOptSize;
2718
2719 /// Tells the code generator that select is more expensive than a branch if
2720 /// the branch is usually predicted right.
2721 bool PredictableSelectIsExpensive;
2722
2723 /// \see enableExtLdPromotion.
2724 bool EnableExtLdPromotion;
2725
2726 /// Return true if the value types that can be represented by the specified
2727 /// register class are all legal.
2728 bool isLegalRC(const TargetRegisterInfo &TRI,
2729 const TargetRegisterClass &RC) const;
2730
2731 /// Replace/modify any TargetFrameIndex operands with a targte-dependent
2732 /// sequence of memory operands that is recognized by PrologEpilogInserter.
2733 MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
2734 MachineBasicBlock *MBB) const;
2735
2736 /// Replace/modify the XRay custom event operands with target-dependent
2737 /// details.
2738 MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI,
2739 MachineBasicBlock *MBB) const;
2740
2741 /// Replace/modify the XRay typed event operands with target-dependent
2742 /// details.
2743 MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
2744 MachineBasicBlock *MBB) const;
2745};
2746
2747/// This class defines information used to lower LLVM code to legal SelectionDAG
2748/// operators that the target instruction selector can accept natively.
2749///
2750/// This class also defines callbacks that targets must implement to lower
2751/// target-specific constructs to SelectionDAG operators.
2752class TargetLowering : public TargetLoweringBase {
2753public:
2754 struct DAGCombinerInfo;
2755
2756 TargetLowering(const TargetLowering &) = delete;
2757 TargetLowering &operator=(const TargetLowering &) = delete;
2758
2759 /// NOTE: The TargetMachine owns TLOF.
2760 explicit TargetLowering(const TargetMachine &TM);
2761
2762 bool isPositionIndependent() const;
2763
2764 virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
2765 FunctionLoweringInfo *FLI,
2766 LegacyDivergenceAnalysis *DA) const {
2767 return false;
2768 }
2769
2770 virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
2771 return false;
2772 }
2773
2774 /// Returns true by value, base pointer and offset pointer and addressing mode
2775 /// by reference if the node's address can be legally represented as
2776 /// pre-indexed load / store address.
2777 virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/,
2778 SDValue &/*Offset*/,
2779 ISD::MemIndexedMode &/*AM*/,
2780 SelectionDAG &/*DAG*/) const {
2781 return false;
2782 }
2783
2784 /// Returns true by value, base pointer and offset pointer and addressing mode
2785 /// by reference if this node can be combined with a load / store to form a
2786 /// post-indexed load / store.
2787 virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/,
2788 SDValue &/*Base*/,
2789 SDValue &/*Offset*/,
2790 ISD::MemIndexedMode &/*AM*/,
2791 SelectionDAG &/*DAG*/) const {
2792 return false;
2793 }
2794
2795 /// Return the entry encoding for a jump table in the current function. The
2796 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
2797 virtual unsigned getJumpTableEncoding() const;
2798
2799 virtual const MCExpr *
2800 LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
2801 const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
2802 MCContext &/*Ctx*/) const {
2803 llvm_unreachable("Need to implement this hook if target has custom JTIs")::llvm::llvm_unreachable_internal("Need to implement this hook if target has custom JTIs"
, "/build/llvm-toolchain-snapshot-8~svn350071/include/llvm/CodeGen/TargetLowering.h"
, 2803)
;
2804 }
2805
2806 /// Returns relocation base for the given PIC jumptable.
2807 virtual SDValue getPICJumpTableRelocBase(SDValue Table,
2808 SelectionDAG &DAG) const;
2809
2810 /// This returns the relocation base for the given PIC jumptable, the same as
2811 /// getPICJumpTableRelocBase, but as an MCExpr.
2812 virtual const MCExpr *
2813 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2814 unsigned JTI, MCContext &Ctx) const;
2815
2816 /// Return true if folding a constant offset with the given GlobalAddress is
2817 /// legal. It is frequently not legal in PIC relocation models.
2818 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
2819
2820 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
2821 SDValue &Chain) const;
2822
2823 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
2824 SDValue &NewRHS, ISD::CondCode &CCCode,
2825 const SDLoc &DL) const;
2826
2827 /// Returns a pair of (return value, chain).
2828 /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
2829 std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
2830 EVT RetVT, ArrayRef<SDValue> Ops,
2831 bool isSigned, const SDLoc &dl,
2832 bool doesNotReturn = false,
2833 bool isReturnValueUsed = true) const;
2834
2835 /// Check whether parameters to a call that are passed in callee saved
2836 /// registers are the same as from the calling function. This needs to be
2837 /// checked for tail call eligibility.
2838 bool parametersInCSRMatch(const MachineRegisterInfo &MRI,
2839 const uint32_t *CallerPreservedMask,
2840 const SmallVectorImpl<CCValAssign> &ArgLocs,
2841 const SmallVectorImpl<SDValue> &OutVals) const;
2842
2843 //===--------------------------------------------------------------------===//
2844 // TargetLowering Optimization Methods
2845 //
2846
2847 /// A convenience struct that encapsulates a DAG, and two SDValues for
2848 /// returning information from TargetLowering to its clients that want to
2849 /// combine.
2850 struct TargetLoweringOpt {
2851 SelectionDAG &DAG;
2852 bool LegalTys;
2853 bool LegalOps;
2854 SDValue Old;
2855 SDValue New;
2856
2857 explicit TargetLoweringOpt(SelectionDAG &InDAG,
2858 bool LT, bool LO) :
2859 DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
2860
2861 bool LegalTypes() const { return LegalTys; }
2862 bool LegalOperations() const { return LegalOps; }
2863
2864 bool CombineTo(SDValue O, SDValue N) {
2865 Old = O;
2866 New = N;
2867 return true;
2868 }
2869 };
2870
2871 /// Check to see if the specified operand of the specified instruction is a
2872 /// constant integer. If so, check to see if there are any bits set in the
2873 /// constant that are not demanded. If so, shrink the constant and return
2874 /// true.
2875 bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
2876 TargetLoweringOpt &TLO) const;
2877
2878 // Target hook to do target-specific const optimization, which is called by
2879 // ShrinkDemandedConstant. This function should return true if the target
2880 // doesn't want ShrinkDemandedConstant to further optimize the constant.
2881 virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
2882 TargetLoweringOpt &TLO) const {
2883 return false;
2884 }
2885
2886 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This
2887 /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
2888 /// generalized for targets with other types of implicit widening casts.
2889 bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
2890 TargetLoweringOpt &TLO) const;
2891
2892 /// Helper for SimplifyDemandedBits that can simplify an operation with
2893 /// multiple uses. This function simplifies operand \p OpIdx of \p User and
2894 /// then updates \p User with the simplified version. No other uses of
2895 /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this
2896 /// function behaves exactly like function SimplifyDemandedBits declared
2897 /// below except that it also updates the DAG by calling
2898 /// DCI.CommitTargetLoweringOpt.
2899 bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded,
2900 DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const;
2901
2902 /// Look at Op. At this point, we know that only the DemandedBits bits of the
2903 /// result of Op are ever used downstream. If we can use this information to
2904 /// simplify Op, create a new simplified DAG node and return true, returning
2905 /// the original and new nodes in Old and New. Otherwise, analyze the
2906 /// expression and return a mask of KnownOne and KnownZero bits for the
2907 /// expression (used to simplify the caller). The KnownZero/One bits may only
2908 /// be accurate for those bits in the Demanded masks.
2909 /// \p AssumeSingleUse When this parameter is true, this function will
2910 /// attempt to simplify \p Op even if there are multiple uses.
2911 /// Callers are responsible for correctly updating the DAG based on the
2912 /// results of this function, because simply replacing replacing TLO.Old
2913 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
2914 /// has multiple uses.
2915 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
2916 const APInt &DemandedElts, KnownBits &Known,
2917 TargetLoweringOpt &TLO, unsigned Depth = 0,
2918 bool AssumeSingleUse = false) const;
2919
2920 /// Helper wrapper around SimplifyDemandedBits, demanding all elements.
2921 /// Adds Op back to the worklist upon success.
2922 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
2923 KnownBits &Known, TargetLoweringOpt &TLO,
2924 unsigned Depth = 0,
2925 bool AssumeSingleUse = false) const;
2926
2927 /// Helper wrapper around SimplifyDemandedBits.
2928 /// Adds Op back to the worklist upon success.
2929 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
2930 DAGCombinerInfo &DCI) const;
2931
2932 /// Look at Vector Op. At this point, we know that only the DemandedElts
2933 /// elements of the result of Op are ever used downstream. If we can use
2934 /// this information to simplify Op, create a new simplified DAG node and
2935 /// return true, storing the original and new nodes in TLO.
2936 /// Otherwise, analyze the expression and return a mask of KnownUndef and
2937 /// KnownZero elements for the expression (used to simplify the caller).
2938 /// The KnownUndef/Zero elements may only be accurate for those bits
2939 /// in the DemandedMask.
2940 /// \p AssumeSingleUse When this parameter is true, this function will
2941 /// attempt to simplify \p Op even if there are multiple uses.
2942 /// Callers are responsible for correctly updating the DAG based on the
2943 /// results of this function, because simply replacing replacing TLO.Old
2944 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
2945 /// has multiple uses.
2946 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
2947 APInt &KnownUndef, APInt &KnownZero,
2948 TargetLoweringOpt &TLO, unsigned Depth = 0,
2949 bool AssumeSingleUse = false) const;
2950
2951 /// Helper wrapper around SimplifyDemandedVectorElts.
2952 /// Adds Op back to the worklist upon success.
2953 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
2954 APInt &KnownUndef, APInt &KnownZero,
2955 DAGCombinerInfo &DCI) const;
2956
2957 /// Determine which of the bits specified in Mask are known to be either zero
2958 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
2959 /// argument allows us to only collect the known bits that are shared by the
2960 /// requested vector elements.
2961 virtual void computeKnownBitsForTargetNode(const SDValue Op,
2962 KnownBits &Known,
2963 const APInt &DemandedElts,
2964 const SelectionDAG &DAG,
2965 unsigned Depth = 0) const;
2966
2967 /// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
2968 /// Default implementation computes low bits based on alignment
2969 /// information. This should preserve known bits passed into it.
2970 virtual void computeKnownBitsForFrameIndex(const SDValue FIOp,
2971 KnownBits &Known,
2972 const APInt &DemandedElts,
2973 const SelectionDAG &DAG,
2974 unsigned Depth = 0) const;
2975
2976 /// This method can be implemented by targets that want to expose additional
2977 /// information about sign bits to the DAG Combiner. The DemandedElts
2978 /// argument allows us to only collect the minimum sign bits that are shared
2979 /// by the requested vector elements.
2980 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
2981 const APInt &DemandedElts,
2982 const SelectionDAG &DAG,
2983 unsigned Depth = 0) const;
2984
2985 /// Attempt to simplify any target nodes based on the demanded vector
2986 /// elements, returning true on success. Otherwise, analyze the expression and
2987 /// return a mask of KnownUndef and KnownZero elements for the expression
2988 /// (used to simplify the caller). The KnownUndef/Zero elements may only be
2989 /// accurate for those bits in the DemandedMask.
2990 virtual bool SimplifyDemandedVectorEltsForTargetNode(
2991 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
2992 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
2993
2994 /// Attempt to simplify any target nodes based on the demanded bits/elts,
2995 /// returning true on success. Otherwise, analyze the
2996 /// expression and return a mask of KnownOne and KnownZero bits for the
2997 /// expression (used to simplify the caller). The KnownZero/One bits may only
2998 /// be accurate for those bits in the Demanded masks.
2999 virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op,
3000 const APInt &DemandedBits,
3001 const APInt &DemandedElts,
3002 KnownBits &Known,
3003 TargetLoweringOpt &TLO,
3004 unsigned Depth = 0) const;
3005
3006 /// If \p SNaN is false, \returns true if \p Op is known to never be any
3007 /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
3008 /// NaN.
3009 virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
3010 const SelectionDAG &DAG,
3011 bool SNaN = false,
3012 unsigned Depth = 0) const;
3013 struct DAGCombinerInfo {
3014 void *DC; // The DAG Combiner object.
3015 CombineLevel Level;
3016 bool CalledByLegalizer;
3017
3018 public:
3019 SelectionDAG &DAG;
3020
3021 DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc)
3022 : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {}
3023
3024 bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
3025 bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
3026 bool isAfterLegalizeDAG() const {
3027 return Level == AfterLegalizeDAG;
3028 }
3029 CombineLevel getDAGCombineLevel() { return Level; }
3030 bool isCalledByLegalizer() const { return CalledByLegalizer; }
3031
3032 void AddToWorklist(SDNode *N);
3033 SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true);
3034 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
3035 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
3036
3037 void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
3038 };
3039
3040 /// Return if the N is a constant or constant vector equal to the true value
3041 /// from getBooleanContents().
3042 bool isConstTrueVal(const SDNode *N) const;
3043
3044 /// Return if the N is a constant or constant vector equal to the false value
3045 /// from getBooleanContents().
3046 bool isConstFalseVal(const SDNode *N) const;
3047
3048 /// Return if \p N is a True value when extended to \p VT.
3049 bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
3050
3051 /// Try to simplify a setcc built with the specified operands and cc. If it is
3052 /// unable to simplify it, return a null SDValue.
3053 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
3054 bool foldBooleans, DAGCombinerInfo &DCI,
3055 const SDLoc &dl) const;
3056
3057 // For targets which wrap address, unwrap for analysis.
3058 virtual SDValue unwrapAddress(SDValue N) const { return N; }
3059
3060 /// Returns true (and the GlobalValue and the offset) if the node is a
3061 /// GlobalAddress + offset.
3062 virtual bool
3063 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
3064
3065 /// This method will be invoked for all target nodes and for any
3066 /// target-independent nodes that the target has registered with invoke it
3067 /// for.
3068 ///
3069 /// The semantics are as follows:
3070 /// Return Value:
3071 /// SDValue.Val == 0 - No change was made
3072 /// SDValue.Val == N - N was replaced, is dead, and is already handled.
3073 /// otherwise - N should be replaced by the returned Operand.
3074 ///
3075 /// In addition, methods provided by DAGCombinerInfo may be used to perform
3076 /// more complex transformations.
3077 ///
3078 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
3079
3080 /// Return true if it is profitable to move this shift by a constant amount
3081 /// though its operand, adjusting any immediate operands as necessary to
3082 /// preserve semantics. This transformation may not be desirable if it
3083 /// disrupts a particularly auspicious target-specific tree (e.g. bitfield
3084 /// extraction in AArch64). By default, it returns true.
3085 ///
3086 /// @param N the shift node
3087 /// @param Level the current DAGCombine legalization level.
3088 virtual bool isDesirableToCommuteWithShift(const SDNode *N,
3089 CombineLevel Level) const {
3090 return true;
3091 }
3092
3093 /// Return true if it is profitable to fold a pair of shifts into a mask.
3094 /// This is usually true on most targets. But some targets, like Thumb1,
3095 /// have immediate shift instructions, but no immediate "and" instruction;
3096 /// this makes the fold unprofitable.
3097 virtual bool shouldFoldShiftPairToMask(const SDNode *N,
3098