Bug Summary

File:llvm/include/llvm/CodeGen/TargetLowering.h
Warning:line 1348, column 31
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64ExpandImm.h"
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/IR/IntrinsicsAArch64.h"
19#include "llvm/Support/Debug.h"
20#include <algorithm>
21using namespace llvm;
22
23#define DEBUG_TYPE"aarch64tti" "aarch64tti"
24
25static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
26 cl::init(true), cl::Hidden);
27
28bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
29 const Function *Callee) const {
30 const TargetMachine &TM = getTLI()->getTargetMachine();
31
32 const FeatureBitset &CallerBits =
33 TM.getSubtargetImpl(*Caller)->getFeatureBits();
34 const FeatureBitset &CalleeBits =
35 TM.getSubtargetImpl(*Callee)->getFeatureBits();
36
37 // Inline a callee if its target-features are a subset of the callers
38 // target-features.
39 return (CallerBits & CalleeBits) == CalleeBits;
40}
41
42/// Calculate the cost of materializing a 64-bit value. This helper
43/// method might only calculate a fraction of a larger immediate. Therefore it
44/// is valid to return a cost of ZERO.
45int AArch64TTIImpl::getIntImmCost(int64_t Val) {
46 // Check if the immediate can be encoded within an instruction.
47 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
48 return 0;
49
50 if (Val < 0)
51 Val = ~Val;
52
53 // Calculate how many moves we will need to materialize this constant.
54 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
55 AArch64_IMM::expandMOVImm(Val, 64, Insn);
56 return Insn.size();
57}
58
59/// Calculate the cost of materializing the given constant.
60int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
61 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 61, __PRETTY_FUNCTION__))
;
62
63 unsigned BitSize = Ty->getPrimitiveSizeInBits();
64 if (BitSize == 0)
65 return ~0U;
66
67 // Sign-extend all constants to a multiple of 64-bit.
68 APInt ImmVal = Imm;
69 if (BitSize & 0x3f)
70 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
71
72 // Split the constant into 64-bit chunks and calculate the cost for each
73 // chunk.
74 int Cost = 0;
75 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
76 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
77 int64_t Val = Tmp.getSExtValue();
78 Cost += getIntImmCost(Val);
79 }
80 // We need at least one instruction to materialze the constant.
81 return std::max(1, Cost);
82}
83
84int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
85 const APInt &Imm, Type *Ty) {
86 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 86, __PRETTY_FUNCTION__))
;
87
88 unsigned BitSize = Ty->getPrimitiveSizeInBits();
89 // There is no cost model for constants with a bit size of 0. Return TCC_Free
90 // here, so that constant hoisting will ignore this constant.
91 if (BitSize == 0)
92 return TTI::TCC_Free;
93
94 unsigned ImmIdx = ~0U;
95 switch (Opcode) {
96 default:
97 return TTI::TCC_Free;
98 case Instruction::GetElementPtr:
99 // Always hoist the base address of a GetElementPtr.
100 if (Idx == 0)
101 return 2 * TTI::TCC_Basic;
102 return TTI::TCC_Free;
103 case Instruction::Store:
104 ImmIdx = 0;
105 break;
106 case Instruction::Add:
107 case Instruction::Sub:
108 case Instruction::Mul:
109 case Instruction::UDiv:
110 case Instruction::SDiv:
111 case Instruction::URem:
112 case Instruction::SRem:
113 case Instruction::And:
114 case Instruction::Or:
115 case Instruction::Xor:
116 case Instruction::ICmp:
117 ImmIdx = 1;
118 break;
119 // Always return TCC_Free for the shift value of a shift instruction.
120 case Instruction::Shl:
121 case Instruction::LShr:
122 case Instruction::AShr:
123 if (Idx == 1)
124 return TTI::TCC_Free;
125 break;
126 case Instruction::Trunc:
127 case Instruction::ZExt:
128 case Instruction::SExt:
129 case Instruction::IntToPtr:
130 case Instruction::PtrToInt:
131 case Instruction::BitCast:
132 case Instruction::PHI:
133 case Instruction::Call:
134 case Instruction::Select:
135 case Instruction::Ret:
136 case Instruction::Load:
137 break;
138 }
139
140 if (Idx == ImmIdx) {
141 int NumConstants = (BitSize + 63) / 64;
142 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
143 return (Cost <= NumConstants * TTI::TCC_Basic)
144 ? static_cast<int>(TTI::TCC_Free)
145 : Cost;
146 }
147 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
148}
149
150int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
151 const APInt &Imm, Type *Ty) {
152 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 152, __PRETTY_FUNCTION__))
;
153
154 unsigned BitSize = Ty->getPrimitiveSizeInBits();
155 // There is no cost model for constants with a bit size of 0. Return TCC_Free
156 // here, so that constant hoisting will ignore this constant.
157 if (BitSize == 0)
158 return TTI::TCC_Free;
159
160 // Most (all?) AArch64 intrinsics do not support folding immediates into the
161 // selected instruction, so we compute the materialization cost for the
162 // immediate directly.
163 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
164 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
165
166 switch (IID) {
167 default:
168 return TTI::TCC_Free;
169 case Intrinsic::sadd_with_overflow:
170 case Intrinsic::uadd_with_overflow:
171 case Intrinsic::ssub_with_overflow:
172 case Intrinsic::usub_with_overflow:
173 case Intrinsic::smul_with_overflow:
174 case Intrinsic::umul_with_overflow:
175 if (Idx == 1) {
176 int NumConstants = (BitSize + 63) / 64;
177 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
178 return (Cost <= NumConstants * TTI::TCC_Basic)
179 ? static_cast<int>(TTI::TCC_Free)
180 : Cost;
181 }
182 break;
183 case Intrinsic::experimental_stackmap:
184 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
185 return TTI::TCC_Free;
186 break;
187 case Intrinsic::experimental_patchpoint_void:
188 case Intrinsic::experimental_patchpoint_i64:
189 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
190 return TTI::TCC_Free;
191 break;
192 }
193 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
194}
195
196TargetTransformInfo::PopcntSupportKind
197AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
198 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 198, __PRETTY_FUNCTION__))
;
199 if (TyWidth == 32 || TyWidth == 64)
200 return TTI::PSK_FastHardware;
201 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
202 return TTI::PSK_Software;
203}
204
205bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
206 ArrayRef<const Value *> Args) {
207
208 // A helper that returns a vector type from the given type. The number of
209 // elements in type Ty determine the vector width.
210 auto toVectorTy = [&](Type *ArgTy) {
211 return VectorType::get(ArgTy->getScalarType(),
212 DstTy->getVectorNumElements());
213 };
214
215 // Exit early if DstTy is not a vector type whose elements are at least
216 // 16-bits wide.
217 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
218 return false;
219
220 // Determine if the operation has a widening variant. We consider both the
221 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
222 // instructions.
223 //
224 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
225 // verify that their extending operands are eliminated during code
226 // generation.
227 switch (Opcode) {
228 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
229 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
230 break;
231 default:
232 return false;
233 }
234
235 // To be a widening instruction (either the "wide" or "long" versions), the
236 // second operand must be a sign- or zero extend having a single user. We
237 // only consider extends having a single user because they may otherwise not
238 // be eliminated.
239 if (Args.size() != 2 ||
240 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
241 !Args[1]->hasOneUse())
242 return false;
243 auto *Extend = cast<CastInst>(Args[1]);
244
245 // Legalize the destination type and ensure it can be used in a widening
246 // operation.
247 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
248 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
249 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
250 return false;
251
252 // Legalize the source type and ensure it can be used in a widening
253 // operation.
254 Type *SrcTy = toVectorTy(Extend->getSrcTy());
255 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
256 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
257 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
258 return false;
259
260 // Get the total number of vector elements in the legalized types.
261 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
262 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
263
264 // Return true if the legalized types have the same number of vector elements
265 // and the destination element type size is twice that of the source type.
266 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
267}
268
269int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
270 const Instruction *I) {
271 int ISD = TLI->InstructionOpcodeToISD(Opcode);
272 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 272, __PRETTY_FUNCTION__))
;
273
274 // If the cast is observable, and it is used by a widening instruction (e.g.,
275 // uaddl, saddw, etc.), it may be free.
276 if (I && I->hasOneUse()) {
277 auto *SingleUser = cast<Instruction>(*I->user_begin());
278 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
279 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
280 // If the cast is the second operand, it is free. We will generate either
281 // a "wide" or "long" version of the widening instruction.
282 if (I == SingleUser->getOperand(1))
283 return 0;
284 // If the cast is not the second operand, it will be free if it looks the
285 // same as the second operand. In this case, we will generate a "long"
286 // version of the widening instruction.
287 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
288 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
289 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
290 return 0;
291 }
292 }
293
294 EVT SrcTy = TLI->getValueType(DL, Src);
295 EVT DstTy = TLI->getValueType(DL, Dst);
296
297 if (!SrcTy.isSimple() || !DstTy.isSimple())
298 return BaseT::getCastInstrCost(Opcode, Dst, Src);
299
300 static const TypeConversionCostTblEntry
301 ConversionTbl[] = {
302 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
303 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
304 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
305 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
306
307 // The number of shll instructions for the extension.
308 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
309 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
310 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
311 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
312 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
313 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
314 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
315 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
316 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
317 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
318 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
319 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
320 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
321 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
322 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
323 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
324
325 // LowerVectorINT_TO_FP:
326 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
327 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
328 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
329 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
330 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
331 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
332
333 // Complex: to v2f32
334 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
335 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
336 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
337 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
338 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
339 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
340
341 // Complex: to v4f32
342 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
343 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
344 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
345 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
346
347 // Complex: to v8f32
348 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
349 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
350 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
351 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
352
353 // Complex: to v16f32
354 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
355 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
356
357 // Complex: to v2f64
358 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
359 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
360 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
361 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
362 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
363 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
364
365
366 // LowerVectorFP_TO_INT
367 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
368 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
369 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
370 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
371 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
372 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
373
374 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
375 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
376 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
377 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
378 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
379 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
380 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
381
382 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
383 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
384 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
385 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
386 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
387
388 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
389 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
390 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
391 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
392 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
393 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
394 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
395 };
396
397 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
398 DstTy.getSimpleVT(),
399 SrcTy.getSimpleVT()))
400 return Entry->Cost;
401
402 return BaseT::getCastInstrCost(Opcode, Dst, Src);
403}
404
405int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
406 VectorType *VecTy,
407 unsigned Index) {
408
409 // Make sure we were given a valid extend opcode.
410 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
411 "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
;
412
413 // We are extending an element we extract from a vector, so the source type
414 // of the extend is the element type of the vector.
415 auto *Src = VecTy->getElementType();
416
417 // Sign- and zero-extends are for integer types only.
418 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType>
(Src) && "Invalid type") ? static_cast<void> (0
) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 418, __PRETTY_FUNCTION__))
;
419
420 // Get the cost for the extract. We compute the cost (if any) for the extend
421 // below.
422 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
423
424 // Legalize the types.
425 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
426 auto DstVT = TLI->getValueType(DL, Dst);
427 auto SrcVT = TLI->getValueType(DL, Src);
428
429 // If the resulting type is still a vector and the destination type is legal,
430 // we may get the extension for free. If not, get the default cost for the
431 // extend.
432 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
433 return Cost + getCastInstrCost(Opcode, Dst, Src);
434
435 // The destination type should be larger than the element type. If not, get
436 // the default cost for the extend.
437 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
438 return Cost + getCastInstrCost(Opcode, Dst, Src);
439
440 switch (Opcode) {
441 default:
442 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 442)
;
443
444 // For sign-extends, we only need a smov, which performs the extension
445 // automatically.
446 case Instruction::SExt:
447 return Cost;
448
449 // For zero-extends, the extend is performed automatically by a umov unless
450 // the destination type is i64 and the element type is i8 or i16.
451 case Instruction::ZExt:
452 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
453 return Cost;
454 }
455
456 // If we are unable to perform the extend for free, get the default cost.
457 return Cost + getCastInstrCost(Opcode, Dst, Src);
458}
459
460int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
461 unsigned Index) {
462 assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type"
) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 462, __PRETTY_FUNCTION__))
;
463
464 if (Index != -1U) {
465 // Legalize the type.
466 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
467
468 // This type is legalized to a scalar type.
469 if (!LT.second.isVector())
470 return 0;
471
472 // The type may be split. Normalize the index to the new type.
473 unsigned Width = LT.second.getVectorNumElements();
474 Index = Index % Width;
475
476 // The element at index zero is already inside the vector.
477 if (Index == 0)
478 return 0;
479 }
480
481 // All other insert/extracts cost this much.
482 return ST->getVectorInsertExtractBaseCost();
483}
484
485int AArch64TTIImpl::getArithmeticInstrCost(
486 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
487 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
488 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
489 const Instruction *CxtI) {
490 // Legalize the type.
491 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
492
493 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
494 // add in the widening overhead specified by the sub-target. Since the
495 // extends feeding widening instructions are performed automatically, they
496 // aren't present in the generated code and have a zero cost. By adding a
497 // widening overhead here, we attach the total cost of the combined operation
498 // to the widening instruction.
499 int Cost = 0;
500 if (isWideningInstruction(Ty, Opcode, Args))
501 Cost += ST->getWideningBaseCost();
502
503 int ISD = TLI->InstructionOpcodeToISD(Opcode);
504
505 switch (ISD) {
506 default:
507 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
508 Opd1PropInfo, Opd2PropInfo);
509 case ISD::SDIV:
510 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
511 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
512 // On AArch64, scalar signed division by constants power-of-two are
513 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
514 // The OperandValue properties many not be same as that of previous
515 // operation; conservatively assume OP_None.
516 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
517 TargetTransformInfo::OP_None,
518 TargetTransformInfo::OP_None);
519 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
520 TargetTransformInfo::OP_None,
521 TargetTransformInfo::OP_None);
522 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
523 TargetTransformInfo::OP_None,
524 TargetTransformInfo::OP_None);
525 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
526 TargetTransformInfo::OP_None,
527 TargetTransformInfo::OP_None);
528 return Cost;
529 }
530 LLVM_FALLTHROUGH[[gnu::fallthrough]];
531 case ISD::UDIV:
532 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
533 auto VT = TLI->getValueType(DL, Ty);
534 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
535 // Vector signed division by constant are expanded to the
536 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
537 // to MULHS + SUB + SRL + ADD + SRL.
538 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
539 Opd2Info,
540 TargetTransformInfo::OP_None,
541 TargetTransformInfo::OP_None);
542 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
543 Opd2Info,
544 TargetTransformInfo::OP_None,
545 TargetTransformInfo::OP_None);
546 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
547 Opd2Info,
548 TargetTransformInfo::OP_None,
549 TargetTransformInfo::OP_None);
550 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
551 }
552 }
553
554 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
555 Opd1PropInfo, Opd2PropInfo);
556 if (Ty->isVectorTy()) {
557 // On AArch64, vector divisions are not supported natively and are
558 // expanded into scalar divisions of each pair of elements.
559 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
560 Opd2Info, Opd1PropInfo, Opd2PropInfo);
561 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
562 Opd2Info, Opd1PropInfo, Opd2PropInfo);
563 // TODO: if one of the arguments is scalar, then it's not necessary to
564 // double the cost of handling the vector elements.
565 Cost += Cost;
566 }
567 return Cost;
568
569 case ISD::ADD:
570 case ISD::MUL:
571 case ISD::XOR:
572 case ISD::OR:
573 case ISD::AND:
574 // These nodes are marked as 'custom' for combining purposes only.
575 // We know that they are legal. See LowerAdd in ISelLowering.
576 return (Cost + 1) * LT.first;
577 }
578}
579
580int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
581 const SCEV *Ptr) {
582 // Address computations in vectorized code with non-consecutive addresses will
583 // likely result in more instructions compared to scalar code where the
584 // computation can more often be merged into the index mode. The resulting
585 // extra micro-ops can significantly decrease throughput.
586 unsigned NumVectorInstToHideOverhead = 10;
587 int MaxMergeDistance = 64;
588
589 if (Ty->isVectorTy() && SE &&
590 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
591 return NumVectorInstToHideOverhead;
592
593 // In many cases the address computation is not merged into the instruction
594 // addressing mode.
595 return 1;
596}
597
598int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
599 Type *CondTy, const Instruction *I) {
600
601 int ISD = TLI->InstructionOpcodeToISD(Opcode);
602 // We don't lower some vector selects well that are wider than the register
603 // width.
604 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
1
Calling 'Type::isVectorTy'
4
Returning from 'Type::isVectorTy'
20
Calling 'Type::isVectorTy'
23
Returning from 'Type::isVectorTy'
24
Assuming 'ISD' is equal to SELECT
25
Taking true branch
605 // We would need this many instructions to hide the scalarization happening.
606 const int AmortizationCost = 20;
607 static const TypeConversionCostTblEntry
608 VectorSelectTbl[] = {
609 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
610 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
611 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
612 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
613 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
614 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
615 };
616
617 EVT SelCondTy = TLI->getValueType(DL, CondTy);
26
Passing null pointer value via 2nd parameter 'Ty'
27
Calling 'TargetLoweringBase::getValueType'
618 EVT SelValTy = TLI->getValueType(DL, ValTy);
619 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
620 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
621 SelCondTy.getSimpleVT(),
622 SelValTy.getSimpleVT()))
623 return Entry->Cost;
624 }
625 }
626 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
5
Passing value via 3rd parameter 'CondTy'
6
Calling 'BasicTTIImplBase::getCmpSelInstrCost'
627}
628
629AArch64TTIImpl::TTI::MemCmpExpansionOptions
630AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
631 TTI::MemCmpExpansionOptions Options;
632 Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
633 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
634 Options.NumLoadsPerBlock = Options.MaxNumLoads;
635 // TODO: Though vector loads usually perform well on AArch64, in some targets
636 // they may wake up the FP unit, which raises the power consumption. Perhaps
637 // they could be used with no holds barred (-O3).
638 Options.LoadSizes = {8, 4, 2, 1};
639 return Options;
640}
641
642int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
643 MaybeAlign Alignment, unsigned AddressSpace,
644 const Instruction *I) {
645 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
646
647 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
648 LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
649 // Unaligned stores are extremely inefficient. We don't split all
650 // unaligned 128-bit stores because the negative impact that has shown in
651 // practice on inlined block copy code.
652 // We make such stores expensive so that we will only vectorize if there
653 // are 6 other instructions getting vectorized.
654 const int AmortizationCost = 6;
655
656 return LT.first * 2 * AmortizationCost;
657 }
658
659 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
660 unsigned ProfitableNumElements;
661 if (Opcode == Instruction::Store)
662 // We use a custom trunc store lowering so v.4b should be profitable.
663 ProfitableNumElements = 4;
664 else
665 // We scalarize the loads because there is not v.4b register and we
666 // have to promote the elements to v.2.
667 ProfitableNumElements = 8;
668
669 if (Ty->getVectorNumElements() < ProfitableNumElements) {
670 unsigned NumVecElts = Ty->getVectorNumElements();
671 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
672 // We generate 2 instructions per vector element.
673 return NumVectorizableInstsToAmortize * NumVecElts * 2;
674 }
675 }
676
677 return LT.first;
678}
679
680int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
681 unsigned Factor,
682 ArrayRef<unsigned> Indices,
683 unsigned Alignment,
684 unsigned AddressSpace,
685 bool UseMaskForCond,
686 bool UseMaskForGaps) {
687 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 687, __PRETTY_FUNCTION__))
;
688 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 688, __PRETTY_FUNCTION__))
;
689
690 if (!UseMaskForCond && !UseMaskForGaps &&
691 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
692 unsigned NumElts = VecTy->getVectorNumElements();
693 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
694
695 // ldN/stN only support legal vector types of size 64 or 128 in bits.
696 // Accesses having vector types that are a multiple of 128 bits can be
697 // matched to more than one ldN/stN instruction.
698 if (NumElts % Factor == 0 &&
699 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
700 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
701 }
702
703 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
704 Alignment, AddressSpace,
705 UseMaskForCond, UseMaskForGaps);
706}
707
708int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
709 int Cost = 0;
710 for (auto *I : Tys) {
711 if (!I->isVectorTy())
712 continue;
713 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
714 Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
715 getMemoryOpCost(Instruction::Load, I, Align(128), 0);
716 }
717 return Cost;
718}
719
720unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
721 return ST->getMaxInterleaveFactor();
722}
723
724// For Falkor, we want to avoid having too many strided loads in a loop since
725// that can exhaust the HW prefetcher resources. We adjust the unroller
726// MaxCount preference below to attempt to ensure unrolling doesn't create too
727// many strided loads.
728static void
729getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
730 TargetTransformInfo::UnrollingPreferences &UP) {
731 enum { MaxStridedLoads = 7 };
732 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
733 int StridedLoads = 0;
734 // FIXME? We could make this more precise by looking at the CFG and
735 // e.g. not counting loads in each side of an if-then-else diamond.
736 for (const auto BB : L->blocks()) {
737 for (auto &I : *BB) {
738 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
739 if (!LMemI)
740 continue;
741
742 Value *PtrValue = LMemI->getPointerOperand();
743 if (L->isLoopInvariant(PtrValue))
744 continue;
745
746 const SCEV *LSCEV = SE.getSCEV(PtrValue);
747 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
748 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
749 continue;
750
751 // FIXME? We could take pairing of unrolled load copies into account
752 // by looking at the AddRec, but we would probably have to limit this
753 // to loops with no stores or other memory optimization barriers.
754 ++StridedLoads;
755 // We've seen enough strided loads that seeing more won't make a
756 // difference.
757 if (StridedLoads > MaxStridedLoads / 2)
758 return StridedLoads;
759 }
760 }
761 return StridedLoads;
762 };
763
764 int StridedLoads = countStridedLoads(L, SE);
765 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
766 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
767 // Pick the largest power of 2 unroll count that won't result in too many
768 // strided loads.
769 if (StridedLoads) {
770 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
771 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
772 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
773 }
774}
775
776void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
777 TTI::UnrollingPreferences &UP) {
778 // Enable partial unrolling and runtime unrolling.
779 BaseT::getUnrollingPreferences(L, SE, UP);
780
781 // For inner loop, it is more likely to be a hot one, and the runtime check
782 // can be promoted out from LICM pass, so the overhead is less, let's try
783 // a larger threshold to unroll more loops.
784 if (L->getLoopDepth() > 1)
785 UP.PartialThreshold *= 2;
786
787 // Disable partial & runtime unrolling on -Os.
788 UP.PartialOptSizeThreshold = 0;
789
790 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
791 EnableFalkorHWPFUnrollFix)
792 getFalkorUnrollingPreferences(L, SE, UP);
793}
794
795Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
796 Type *ExpectedType) {
797 switch (Inst->getIntrinsicID()) {
798 default:
799 return nullptr;
800 case Intrinsic::aarch64_neon_st2:
801 case Intrinsic::aarch64_neon_st3:
802 case Intrinsic::aarch64_neon_st4: {
803 // Create a struct type
804 StructType *ST = dyn_cast<StructType>(ExpectedType);
805 if (!ST)
806 return nullptr;
807 unsigned NumElts = Inst->getNumArgOperands() - 1;
808 if (ST->getNumElements() != NumElts)
809 return nullptr;
810 for (unsigned i = 0, e = NumElts; i != e; ++i) {
811 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
812 return nullptr;
813 }
814 Value *Res = UndefValue::get(ExpectedType);
815 IRBuilder<> Builder(Inst);
816 for (unsigned i = 0, e = NumElts; i != e; ++i) {
817 Value *L = Inst->getArgOperand(i);
818 Res = Builder.CreateInsertValue(Res, L, i);
819 }
820 return Res;
821 }
822 case Intrinsic::aarch64_neon_ld2:
823 case Intrinsic::aarch64_neon_ld3:
824 case Intrinsic::aarch64_neon_ld4:
825 if (Inst->getType() == ExpectedType)
826 return Inst;
827 return nullptr;
828 }
829}
830
831bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
832 MemIntrinsicInfo &Info) {
833 switch (Inst->getIntrinsicID()) {
834 default:
835 break;
836 case Intrinsic::aarch64_neon_ld2:
837 case Intrinsic::aarch64_neon_ld3:
838 case Intrinsic::aarch64_neon_ld4:
839 Info.ReadMem = true;
840 Info.WriteMem = false;
841 Info.PtrVal = Inst->getArgOperand(0);
842 break;
843 case Intrinsic::aarch64_neon_st2:
844 case Intrinsic::aarch64_neon_st3:
845 case Intrinsic::aarch64_neon_st4:
846 Info.ReadMem = false;
847 Info.WriteMem = true;
848 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
849 break;
850 }
851
852 switch (Inst->getIntrinsicID()) {
853 default:
854 return false;
855 case Intrinsic::aarch64_neon_ld2:
856 case Intrinsic::aarch64_neon_st2:
857 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
858 break;
859 case Intrinsic::aarch64_neon_ld3:
860 case Intrinsic::aarch64_neon_st3:
861 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
862 break;
863 case Intrinsic::aarch64_neon_ld4:
864 case Intrinsic::aarch64_neon_st4:
865 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
866 break;
867 }
868 return true;
869}
870
871/// See if \p I should be considered for address type promotion. We check if \p
872/// I is a sext with right type and used in memory accesses. If it used in a
873/// "complex" getelementptr, we allow it to be promoted without finding other
874/// sext instructions that sign extended the same initial value. A getelementptr
875/// is considered as "complex" if it has more than 2 operands.
876bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
877 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
878 bool Considerable = false;
879 AllowPromotionWithoutCommonHeader = false;
880 if (!isa<SExtInst>(&I))
881 return false;
882 Type *ConsideredSExtType =
883 Type::getInt64Ty(I.getParent()->getParent()->getContext());
884 if (I.getType() != ConsideredSExtType)
885 return false;
886 // See if the sext is the one with the right type and used in at least one
887 // GetElementPtrInst.
888 for (const User *U : I.users()) {
889 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
890 Considerable = true;
891 // A getelementptr is considered as "complex" if it has more than 2
892 // operands. We will promote a SExt used in such complex GEP as we
893 // expect some computation to be merged if they are done on 64 bits.
894 if (GEPInst->getNumOperands() > 2) {
895 AllowPromotionWithoutCommonHeader = true;
896 break;
897 }
898 }
899 }
900 return Considerable;
901}
902
903bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
904 TTI::ReductionFlags Flags) const {
905 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 905, __PRETTY_FUNCTION__))
;
906 unsigned ScalarBits = Ty->getScalarSizeInBits();
907 switch (Opcode) {
908 case Instruction::FAdd:
909 case Instruction::FMul:
910 case Instruction::And:
911 case Instruction::Or:
912 case Instruction::Xor:
913 case Instruction::Mul:
914 return false;
915 case Instruction::Add:
916 return ScalarBits * Ty->getVectorNumElements() >= 128;
917 case Instruction::ICmp:
918 return (ScalarBits < 64) &&
919 (ScalarBits * Ty->getVectorNumElements() >= 128);
920 case Instruction::FCmp:
921 return Flags.NoNaN;
922 default:
923 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 923)
;
924 }
925 return false;
926}
927
928int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
929 bool IsPairwiseForm) {
930
931 if (IsPairwiseForm)
932 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
933
934 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
935 MVT MTy = LT.second;
936 int ISD = TLI->InstructionOpcodeToISD(Opcode);
937 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 937, __PRETTY_FUNCTION__))
;
938
939 // Horizontal adds can use the 'addv' instruction. We model the cost of these
940 // instructions as normal vector adds. This is the only arithmetic vector
941 // reduction operation for which we have an instruction.
942 static const CostTblEntry CostTblNoPairwise[]{
943 {ISD::ADD, MVT::v8i8, 1},
944 {ISD::ADD, MVT::v16i8, 1},
945 {ISD::ADD, MVT::v4i16, 1},
946 {ISD::ADD, MVT::v8i16, 1},
947 {ISD::ADD, MVT::v4i32, 1},
948 };
949
950 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
951 return LT.first * Entry->Cost;
952
953 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
954}
955
956int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
957 Type *SubTp) {
958 if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
959 Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
960 static const CostTblEntry ShuffleTbl[] = {
961 // Broadcast shuffle kinds can be performed with 'dup'.
962 { TTI::SK_Broadcast, MVT::v8i8, 1 },
963 { TTI::SK_Broadcast, MVT::v16i8, 1 },
964 { TTI::SK_Broadcast, MVT::v4i16, 1 },
965 { TTI::SK_Broadcast, MVT::v8i16, 1 },
966 { TTI::SK_Broadcast, MVT::v2i32, 1 },
967 { TTI::SK_Broadcast, MVT::v4i32, 1 },
968 { TTI::SK_Broadcast, MVT::v2i64, 1 },
969 { TTI::SK_Broadcast, MVT::v2f32, 1 },
970 { TTI::SK_Broadcast, MVT::v4f32, 1 },
971 { TTI::SK_Broadcast, MVT::v2f64, 1 },
972 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
973 // 'zip1/zip2' instructions.
974 { TTI::SK_Transpose, MVT::v8i8, 1 },
975 { TTI::SK_Transpose, MVT::v16i8, 1 },
976 { TTI::SK_Transpose, MVT::v4i16, 1 },
977 { TTI::SK_Transpose, MVT::v8i16, 1 },
978 { TTI::SK_Transpose, MVT::v2i32, 1 },
979 { TTI::SK_Transpose, MVT::v4i32, 1 },
980 { TTI::SK_Transpose, MVT::v2i64, 1 },
981 { TTI::SK_Transpose, MVT::v2f32, 1 },
982 { TTI::SK_Transpose, MVT::v4f32, 1 },
983 { TTI::SK_Transpose, MVT::v2f64, 1 },
984 // Select shuffle kinds.
985 // TODO: handle vXi8/vXi16.
986 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
987 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
988 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
989 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
990 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
991 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
992 // PermuteSingleSrc shuffle kinds.
993 // TODO: handle vXi8/vXi16.
994 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
995 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
996 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
997 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
998 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
999 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
1000 };
1001 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1002 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
1003 return LT.first * Entry->Cost;
1004 }
1005
1006 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1007}

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Type.h

1//===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declaration of the Type class. For more "Type"
10// stuff, look in DerivedTypes.h.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_IR_TYPE_H
15#define LLVM_IR_TYPE_H
16
17#include "llvm/ADT/APFloat.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/Support/CBindingWrapping.h"
21#include "llvm/Support/Casting.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/TypeSize.h"
25#include <cassert>
26#include <cstdint>
27#include <iterator>
28
29namespace llvm {
30
31template<class GraphType> struct GraphTraits;
32class IntegerType;
33class LLVMContext;
34class PointerType;
35class raw_ostream;
36class StringRef;
37
38/// The instances of the Type class are immutable: once they are created,
39/// they are never changed. Also note that only one instance of a particular
40/// type is ever created. Thus seeing if two types are equal is a matter of
41/// doing a trivial pointer comparison. To enforce that no two equal instances
42/// are created, Type instances can only be created via static factory methods
43/// in class Type and in derived classes. Once allocated, Types are never
44/// free'd.
45///
46class Type {
47public:
48 //===--------------------------------------------------------------------===//
49 /// Definitions of all of the base types for the Type system. Based on this
50 /// value, you can cast to a class defined in DerivedTypes.h.
51 /// Note: If you add an element to this, you need to add an element to the
52 /// Type::getPrimitiveType function, or else things will break!
53 /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
54 ///
55 enum TypeID {
56 // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date.
57 VoidTyID = 0, ///< 0: type with no size
58 HalfTyID, ///< 1: 16-bit floating point type
59 FloatTyID, ///< 2: 32-bit floating point type
60 DoubleTyID, ///< 3: 64-bit floating point type
61 X86_FP80TyID, ///< 4: 80-bit floating point type (X87)
62 FP128TyID, ///< 5: 128-bit floating point type (112-bit mantissa)
63 PPC_FP128TyID, ///< 6: 128-bit floating point type (two 64-bits, PowerPC)
64 LabelTyID, ///< 7: Labels
65 MetadataTyID, ///< 8: Metadata
66 X86_MMXTyID, ///< 9: MMX vectors (64 bits, X86 specific)
67 TokenTyID, ///< 10: Tokens
68
69 // Derived types... see DerivedTypes.h file.
70 // Make sure FirstDerivedTyID stays up to date!
71 IntegerTyID, ///< 11: Arbitrary bit width integers
72 FunctionTyID, ///< 12: Functions
73 StructTyID, ///< 13: Structures
74 ArrayTyID, ///< 14: Arrays
75 PointerTyID, ///< 15: Pointers
76 VectorTyID ///< 16: SIMD 'packed' format, or other vector type
77 };
78
79private:
80 /// This refers to the LLVMContext in which this type was uniqued.
81 LLVMContext &Context;
82
83 TypeID ID : 8; // The current base type of this type.
84 unsigned SubclassData : 24; // Space for subclasses to store data.
85 // Note that this should be synchronized with
86 // MAX_INT_BITS value in IntegerType class.
87
88protected:
89 friend class LLVMContextImpl;
90
91 explicit Type(LLVMContext &C, TypeID tid)
92 : Context(C), ID(tid), SubclassData(0) {}
93 ~Type() = default;
94
95 unsigned getSubclassData() const { return SubclassData; }
96
97 void setSubclassData(unsigned val) {
98 SubclassData = val;
99 // Ensure we don't have any accidental truncation.
100 assert(getSubclassData() == val && "Subclass data too large for field")((getSubclassData() == val && "Subclass data too large for field"
) ? static_cast<void> (0) : __assert_fail ("getSubclassData() == val && \"Subclass data too large for field\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Type.h"
, 100, __PRETTY_FUNCTION__))
;
101 }
102
103 /// Keeps track of how many Type*'s there are in the ContainedTys list.
104 unsigned NumContainedTys = 0;
105
106 /// A pointer to the array of Types contained by this Type. For example, this
107 /// includes the arguments of a function type, the elements of a structure,
108 /// the pointee of a pointer, the element type of an array, etc. This pointer
109 /// may be 0 for types that don't contain other types (Integer, Double,
110 /// Float).
111 Type * const *ContainedTys = nullptr;
112
113 static bool isSequentialType(TypeID TyID) {
114 return TyID == ArrayTyID || TyID == VectorTyID;
115 }
116
117public:
118 /// Print the current type.
119 /// Omit the type details if \p NoDetails == true.
120 /// E.g., let %st = type { i32, i16 }
121 /// When \p NoDetails is true, we only print %st.
122 /// Put differently, \p NoDetails prints the type as if
123 /// inlined with the operands when printing an instruction.
124 void print(raw_ostream &O, bool IsForDebug = false,
125 bool NoDetails = false) const;
126
127 void dump() const;
128
129 /// Return the LLVMContext in which this type was uniqued.
130 LLVMContext &getContext() const { return Context; }
131
132 //===--------------------------------------------------------------------===//
133 // Accessors for working with types.
134 //
135
136 /// Return the type id for the type. This will return one of the TypeID enum
137 /// elements defined above.
138 TypeID getTypeID() const { return ID; }
139
140 /// Return true if this is 'void'.
141 bool isVoidTy() const { return getTypeID() == VoidTyID; }
142
143 /// Return true if this is 'half', a 16-bit IEEE fp type.
144 bool isHalfTy() const { return getTypeID() == HalfTyID; }
145
146 /// Return true if this is 'float', a 32-bit IEEE fp type.
147 bool isFloatTy() const { return getTypeID() == FloatTyID; }
148
149 /// Return true if this is 'double', a 64-bit IEEE fp type.
150 bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
151
152 /// Return true if this is x86 long double.
153 bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; }
154
155 /// Return true if this is 'fp128'.
156 bool isFP128Ty() const { return getTypeID() == FP128TyID; }
157
158 /// Return true if this is powerpc long double.
159 bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; }
160
161 /// Return true if this is one of the six floating-point types
162 bool isFloatingPointTy() const {
163 return getTypeID() == HalfTyID || getTypeID() == FloatTyID ||
164 getTypeID() == DoubleTyID ||
165 getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID ||
166 getTypeID() == PPC_FP128TyID;
167 }
168
169 const fltSemantics &getFltSemantics() const {
170 switch (getTypeID()) {
171 case HalfTyID: return APFloat::IEEEhalf();
172 case FloatTyID: return APFloat::IEEEsingle();
173 case DoubleTyID: return APFloat::IEEEdouble();
174 case X86_FP80TyID: return APFloat::x87DoubleExtended();
175 case FP128TyID: return APFloat::IEEEquad();
176 case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
177 default: llvm_unreachable("Invalid floating type")::llvm::llvm_unreachable_internal("Invalid floating type", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Type.h"
, 177)
;
178 }
179 }
180
181 /// Return true if this is X86 MMX.
182 bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
183
184 /// Return true if this is a FP type or a vector of FP.
185 bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
186
187 /// Return true if this is 'label'.
188 bool isLabelTy() const { return getTypeID() == LabelTyID; }
189
190 /// Return true if this is 'metadata'.
191 bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
192
193 /// Return true if this is 'token'.
194 bool isTokenTy() const { return getTypeID() == TokenTyID; }
195
196 /// True if this is an instance of IntegerType.
197 bool isIntegerTy() const { return getTypeID() == IntegerTyID; }
198
199 /// Return true if this is an IntegerType of the given width.
200 bool isIntegerTy(unsigned Bitwidth) const;
201
202 /// Return true if this is an integer type or a vector of integer types.
203 bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); }
204
205 /// Return true if this is an integer type or a vector of integer types of
206 /// the given width.
207 bool isIntOrIntVectorTy(unsigned BitWidth) const {
208 return getScalarType()->isIntegerTy(BitWidth);
209 }
210
211 /// Return true if this is an integer type or a pointer type.
212 bool isIntOrPtrTy() const { return isIntegerTy() || isPointerTy(); }
213
214 /// True if this is an instance of FunctionType.
215 bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
216
217 /// True if this is an instance of StructType.
218 bool isStructTy() const { return getTypeID() == StructTyID; }
219
220 /// True if this is an instance of ArrayType.
221 bool isArrayTy() const { return getTypeID() == ArrayTyID; }
222
223 /// True if this is an instance of PointerType.
224 bool isPointerTy() const { return getTypeID() == PointerTyID; }
225
226 /// Return true if this is a pointer type or a vector of pointer types.
227 bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
228
229 /// True if this is an instance of VectorType.
230 bool isVectorTy() const { return getTypeID() == VectorTyID; }
2
Assuming the condition is false
3
Returning zero, which participates in a condition later
13
Returning the value 1, which participates in a condition later
21
Assuming the condition is true
22
Returning the value 1, which participates in a condition later
231
232 /// Return true if this type could be converted with a lossless BitCast to
233 /// type 'Ty'. For example, i8* to i32*. BitCasts are valid for types of the
234 /// same size only where no re-interpretation of the bits is done.
235 /// Determine if this type could be losslessly bitcast to Ty
236 bool canLosslesslyBitCastTo(Type *Ty) const;
237
238 /// Return true if this type is empty, that is, it has no elements or all of
239 /// its elements are empty.
240 bool isEmptyTy() const;
241
242 /// Return true if the type is "first class", meaning it is a valid type for a
243 /// Value.
244 bool isFirstClassType() const {
245 return getTypeID() != FunctionTyID && getTypeID() != VoidTyID;
246 }
247
248 /// Return true if the type is a valid type for a register in codegen. This
249 /// includes all first-class types except struct and array types.
250 bool isSingleValueType() const {
251 return isFloatingPointTy() || isX86_MMXTy() || isIntegerTy() ||
252 isPointerTy() || isVectorTy();
253 }
254
255 /// Return true if the type is an aggregate type. This means it is valid as
256 /// the first operand of an insertvalue or extractvalue instruction. This
257 /// includes struct and array types, but does not include vector types.
258 bool isAggregateType() const {
259 return getTypeID() == StructTyID || getTypeID() == ArrayTyID;
260 }
261
262 /// Return true if it makes sense to take the size of this type. To get the
263 /// actual size for a particular target, it is reasonable to use the
264 /// DataLayout subsystem to do this.
265 bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const {
266 // If it's a primitive, it is always sized.
267 if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
268 getTypeID() == PointerTyID ||
269 getTypeID() == X86_MMXTyID)
270 return true;
271 // If it is not something that can have a size (e.g. a function or label),
272 // it doesn't have a size.
273 if (getTypeID() != StructTyID && getTypeID() != ArrayTyID &&
274 getTypeID() != VectorTyID)
275 return false;
276 // Otherwise we have to try harder to decide.
277 return isSizedDerivedType(Visited);
278 }
279
280 /// Return the basic size of this type if it is a primitive type. These are
281 /// fixed by LLVM and are not target-dependent.
282 /// This will return zero if the type does not have a size or is not a
283 /// primitive type.
284 ///
285 /// If this is a scalable vector type, the scalable property will be set and
286 /// the runtime size will be a positive integer multiple of the base size.
287 ///
288 /// Note that this may not reflect the size of memory allocated for an
289 /// instance of the type or the number of bytes that are written when an
290 /// instance of the type is stored to memory. The DataLayout class provides
291 /// additional query functions to provide this information.
292 ///
293 TypeSize getPrimitiveSizeInBits() const LLVM_READONLY__attribute__((__pure__));
294
295 /// If this is a vector type, return the getPrimitiveSizeInBits value for the
296 /// element type. Otherwise return the getPrimitiveSizeInBits value for this
297 /// type.
298 unsigned getScalarSizeInBits() const LLVM_READONLY__attribute__((__pure__));
299
300 /// Return the width of the mantissa of this type. This is only valid on
301 /// floating-point types. If the FP type does not have a stable mantissa (e.g.
302 /// ppc long double), this method returns -1.
303 int getFPMantissaWidth() const;
304
305 /// If this is a vector type, return the element type, otherwise return
306 /// 'this'.
307 Type *getScalarType() const {
308 if (isVectorTy())
309 return getVectorElementType();
310 return const_cast<Type*>(this);
311 }
312
313 //===--------------------------------------------------------------------===//
314 // Type Iteration support.
315 //
316 using subtype_iterator = Type * const *;
317
318 subtype_iterator subtype_begin() const { return ContainedTys; }
319 subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];}
320 ArrayRef<Type*> subtypes() const {
321 return makeArrayRef(subtype_begin(), subtype_end());
322 }
323
324 using subtype_reverse_iterator = std::reverse_iterator<subtype_iterator>;
325
326 subtype_reverse_iterator subtype_rbegin() const {
327 return subtype_reverse_iterator(subtype_end());
328 }
329 subtype_reverse_iterator subtype_rend() const {
330 return subtype_reverse_iterator(subtype_begin());
331 }
332
333 /// This method is used to implement the type iterator (defined at the end of
334 /// the file). For derived types, this returns the types 'contained' in the
335 /// derived type.
336 Type *getContainedType(unsigned i) const {
337 assert(i < NumContainedTys && "Index out of range!")((i < NumContainedTys && "Index out of range!") ? static_cast
<void> (0) : __assert_fail ("i < NumContainedTys && \"Index out of range!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Type.h"
, 337, __PRETTY_FUNCTION__))
;
338 return ContainedTys[i];
339 }
340
341 /// Return the number of types in the derived type.
342 unsigned getNumContainedTypes() const { return NumContainedTys; }
343
344 //===--------------------------------------------------------------------===//
345 // Helper methods corresponding to subclass methods. This forces a cast to
346 // the specified subclass and calls its accessor. "getVectorNumElements" (for
347 // example) is shorthand for cast<VectorType>(Ty)->getNumElements(). This is
348 // only intended to cover the core methods that are frequently used, helper
349 // methods should not be added here.
350
351 inline unsigned getIntegerBitWidth() const;
352
353 inline Type *getFunctionParamType(unsigned i) const;
354 inline unsigned getFunctionNumParams() const;
355 inline bool isFunctionVarArg() const;
356
357 inline StringRef getStructName() const;
358 inline unsigned getStructNumElements() const;
359 inline Type *getStructElementType(unsigned N) const;
360
361 inline Type *getSequentialElementType() const {
362 assert(isSequentialType(getTypeID()) && "Not a sequential type!")((isSequentialType(getTypeID()) && "Not a sequential type!"
) ? static_cast<void> (0) : __assert_fail ("isSequentialType(getTypeID()) && \"Not a sequential type!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Type.h"
, 362, __PRETTY_FUNCTION__))
;
363 return ContainedTys[0];
364 }
365
366 inline uint64_t getArrayNumElements() const;
367
368 Type *getArrayElementType() const {
369 assert(getTypeID() == ArrayTyID)((getTypeID() == ArrayTyID) ? static_cast<void> (0) : __assert_fail
("getTypeID() == ArrayTyID", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Type.h"
, 369, __PRETTY_FUNCTION__))
;
370 return ContainedTys[0];
371 }
372
373 inline bool getVectorIsScalable() const;
374 inline unsigned getVectorNumElements() const;
375 inline ElementCount getVectorElementCount() const;
376 Type *getVectorElementType() const {
377 assert(getTypeID() == VectorTyID)((getTypeID() == VectorTyID) ? static_cast<void> (0) : __assert_fail
("getTypeID() == VectorTyID", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Type.h"
, 377, __PRETTY_FUNCTION__))
;
378 return ContainedTys[0];
379 }
380
381 Type *getPointerElementType() const {
382 assert(getTypeID() == PointerTyID)((getTypeID() == PointerTyID) ? static_cast<void> (0) :
__assert_fail ("getTypeID() == PointerTyID", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Type.h"
, 382, __PRETTY_FUNCTION__))
;
383 return ContainedTys[0];
384 }
385
386 /// Given an integer or vector type, change the lane bitwidth to NewBitwidth,
387 /// whilst keeping the old number of lanes.
388 inline Type *getWithNewBitWidth(unsigned NewBitWidth) const;
389
390 /// Given scalar/vector integer type, returns a type with elements twice as
391 /// wide as in the original type. For vectors, preserves element count.
392 inline Type *getExtendedType() const;
393
394 /// Get the address space of this pointer or pointer vector type.
395 inline unsigned getPointerAddressSpace() const;
396
397 //===--------------------------------------------------------------------===//
398 // Static members exported by the Type class itself. Useful for getting
399 // instances of Type.
400 //
401
402 /// Return a type based on an identifier.
403 static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
404
405 //===--------------------------------------------------------------------===//
406 // These are the builtin types that are always available.
407 //
408 static Type *getVoidTy(LLVMContext &C);
409 static Type *getLabelTy(LLVMContext &C);
410 static Type *getHalfTy(LLVMContext &C);
411 static Type *getFloatTy(LLVMContext &C);
412 static Type *getDoubleTy(LLVMContext &C);
413 static Type *getMetadataTy(LLVMContext &C);
414 static Type *getX86_FP80Ty(LLVMContext &C);
415 static Type *getFP128Ty(LLVMContext &C);
416 static Type *getPPC_FP128Ty(LLVMContext &C);
417 static Type *getX86_MMXTy(LLVMContext &C);
418 static Type *getTokenTy(LLVMContext &C);
419 static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
420 static IntegerType *getInt1Ty(LLVMContext &C);
421 static IntegerType *getInt8Ty(LLVMContext &C);
422 static IntegerType *getInt16Ty(LLVMContext &C);
423 static IntegerType *getInt32Ty(LLVMContext &C);
424 static IntegerType *getInt64Ty(LLVMContext &C);
425 static IntegerType *getInt128Ty(LLVMContext &C);
426 template <typename ScalarTy> static Type *getScalarTy(LLVMContext &C) {
427 int noOfBits = sizeof(ScalarTy) * CHAR_BIT8;
428 if (std::is_integral<ScalarTy>::value) {
429 return (Type*) Type::getIntNTy(C, noOfBits);
430 } else if (std::is_floating_point<ScalarTy>::value) {
431 switch (noOfBits) {
432 case 32:
433 return Type::getFloatTy(C);
434 case 64:
435 return Type::getDoubleTy(C);
436 }
437 }
438 llvm_unreachable("Unsupported type in Type::getScalarTy")::llvm::llvm_unreachable_internal("Unsupported type in Type::getScalarTy"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/IR/Type.h"
, 438)
;
439 }
440
441 //===--------------------------------------------------------------------===//
442 // Convenience methods for getting pointer types with one of the above builtin
443 // types as pointee.
444 //
445 static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0);
446 static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
447 static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
448 static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
449 static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
450 static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
451 static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
452 static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0);
453 static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
454 static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
455 static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
456 static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
457 static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
458
459 /// Return a pointer to the current type. This is equivalent to
460 /// PointerType::get(Foo, AddrSpace).
461 PointerType *getPointerTo(unsigned AddrSpace = 0) const;
462
463private:
464 /// Derived types like structures and arrays are sized iff all of the members
465 /// of the type are sized as well. Since asking for their size is relatively
466 /// uncommon, move this operation out-of-line.
467 bool isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited = nullptr) const;
468};
469
470// Printing of types.
471inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
472 T.print(OS);
473 return OS;
474}
475
476// allow isa<PointerType>(x) to work without DerivedTypes.h included.
477template <> struct isa_impl<PointerType, Type> {
478 static inline bool doit(const Type &Ty) {
479 return Ty.getTypeID() == Type::PointerTyID;
480 }
481};
482
483// Create wrappers for C Binding types (see CBindingWrapping.h).
484DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)inline Type *unwrap(LLVMTypeRef P) { return reinterpret_cast<
Type*>(P); } inline LLVMTypeRef wrap(const Type *P) { return
reinterpret_cast<LLVMTypeRef>(const_cast<Type*>(
P)); } template<typename T> inline T *unwrap(LLVMTypeRef
P) { return cast<T>(unwrap(P)); }
485
486/* Specialized opaque type conversions.
487 */
488inline Type **unwrap(LLVMTypeRef* Tys) {
489 return reinterpret_cast<Type**>(Tys);
490}
491
492inline LLVMTypeRef *wrap(Type **Tys) {
493 return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
494}
495
496} // end namespace llvm
497
498#endif // LLVM_IR_TYPE_H

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h

1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/BitVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/Analysis/TargetTransformInfoImpl.h"
27#include "llvm/CodeGen/ISDOpcodes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/CodeGen/ValueTypes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CallSite.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/Value.h"
44#include "llvm/MC/MCSchedule.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/MachineValueType.h"
49#include "llvm/Support/MathExtras.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <limits>
54#include <utility>
55
56namespace llvm {
57
58class Function;
59class GlobalValue;
60class LLVMContext;
61class ScalarEvolution;
62class SCEV;
63class TargetMachine;
64
65extern cl::opt<unsigned> PartialUnrollingThreshold;
66
67/// Base class which can be used to help build a TTI implementation.
68///
69/// This class provides as much implementation of the TTI interface as is
70/// possible using the target independent parts of the code generator.
71///
72/// In order to subclass it, your class must implement a getST() method to
73/// return the subtarget, and a getTLI() method to return the target lowering.
74/// We need these methods implemented in the derived class so that this class
75/// doesn't have to duplicate storage for them.
76template <typename T>
77class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
78private:
79 using BaseT = TargetTransformInfoImplCRTPBase<T>;
80 using TTI = TargetTransformInfo;
81
82 /// Estimate a cost of Broadcast as an extract and sequence of insert
83 /// operations.
84 unsigned getBroadcastShuffleOverhead(Type *Ty) {
85 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 85, __PRETTY_FUNCTION__))
;
86 unsigned Cost = 0;
87 // Broadcast cost is equal to the cost of extracting the zero'th element
88 // plus the cost of inserting it into every element of the result vector.
89 Cost += static_cast<T *>(this)->getVectorInstrCost(
90 Instruction::ExtractElement, Ty, 0);
91
92 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93 Cost += static_cast<T *>(this)->getVectorInstrCost(
94 Instruction::InsertElement, Ty, i);
95 }
96 return Cost;
97 }
98
99 /// Estimate a cost of shuffle as a sequence of extract and insert
100 /// operations.
101 unsigned getPermuteShuffleOverhead(Type *Ty) {
102 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 102, __PRETTY_FUNCTION__))
;
103 unsigned Cost = 0;
104 // Shuffle cost is equal to the cost of extracting element from its argument
105 // plus the cost of inserting them onto the result vector.
106
107 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108 // index 0 of first vector, index 1 of second vector,index 2 of first
109 // vector and finally index 3 of second vector and insert them at index
110 // <0,1,2,3> of result vector.
111 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112 Cost += static_cast<T *>(this)
113 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114 Cost += static_cast<T *>(this)
115 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116 }
117 return Cost;
118 }
119
120 /// Estimate a cost of subvector extraction as a sequence of extract and
121 /// insert operations.
122 unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
124 "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
;
125 int NumSubElts = SubTy->getVectorNumElements();
126 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
127 "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
;
128
129 unsigned Cost = 0;
130 // Subvector extraction cost is equal to the cost of extracting element from
131 // the source type plus the cost of inserting them into the result vector
132 // type.
133 for (int i = 0; i != NumSubElts; ++i) {
134 Cost += static_cast<T *>(this)->getVectorInstrCost(
135 Instruction::ExtractElement, Ty, i + Index);
136 Cost += static_cast<T *>(this)->getVectorInstrCost(
137 Instruction::InsertElement, SubTy, i);
138 }
139 return Cost;
140 }
141
142 /// Estimate a cost of subvector insertion as a sequence of extract and
143 /// insert operations.
144 unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
146 "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
;
147 int NumSubElts = SubTy->getVectorNumElements();
148 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
149 "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
;
150
151 unsigned Cost = 0;
152 // Subvector insertion cost is equal to the cost of extracting element from
153 // the source type plus the cost of inserting them into the result vector
154 // type.
155 for (int i = 0; i != NumSubElts; ++i) {
156 Cost += static_cast<T *>(this)->getVectorInstrCost(
157 Instruction::ExtractElement, SubTy, i);
158 Cost += static_cast<T *>(this)->getVectorInstrCost(
159 Instruction::InsertElement, Ty, i + Index);
160 }
161 return Cost;
162 }
163
164 /// Local query method delegates up to T which *must* implement this!
165 const TargetSubtargetInfo *getST() const {
166 return static_cast<const T *>(this)->getST();
167 }
168
169 /// Local query method delegates up to T which *must* implement this!
170 const TargetLoweringBase *getTLI() const {
171 return static_cast<const T *>(this)->getTLI();
172 }
173
174 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175 switch (M) {
176 case TTI::MIM_Unindexed:
177 return ISD::UNINDEXED;
178 case TTI::MIM_PreInc:
179 return ISD::PRE_INC;
180 case TTI::MIM_PreDec:
181 return ISD::PRE_DEC;
182 case TTI::MIM_PostInc:
183 return ISD::POST_INC;
184 case TTI::MIM_PostDec:
185 return ISD::POST_DEC;
186 }
187 llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 187)
;
188 }
189
190protected:
191 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192 : BaseT(DL) {}
193 virtual ~BasicTTIImplBase() = default;
194
195 using TargetTransformInfoImplBase::DL;
196
197public:
198 /// \name Scalar TTI Implementations
199 /// @{
200 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
201 unsigned AddressSpace, unsigned Alignment,
202 bool *Fast) const {
203 EVT E = EVT::getIntegerVT(Context, BitWidth);
204 return getTLI()->allowsMisalignedMemoryAccesses(
205 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
206 }
207
208 bool hasBranchDivergence() { return false; }
209
210 bool useGPUDivergenceAnalysis() { return false; }
211
212 bool isSourceOfDivergence(const Value *V) { return false; }
213
214 bool isAlwaysUniform(const Value *V) { return false; }
215
216 unsigned getFlatAddressSpace() {
217 // Return an invalid address space.
218 return -1;
219 }
220
221 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
222 Intrinsic::ID IID) const {
223 return false;
224 }
225
226 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
227 Value *OldV, Value *NewV) const {
228 return false;
229 }
230
231 bool isLegalAddImmediate(int64_t imm) {
232 return getTLI()->isLegalAddImmediate(imm);
233 }
234
235 bool isLegalICmpImmediate(int64_t imm) {
236 return getTLI()->isLegalICmpImmediate(imm);
237 }
238
239 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
240 bool HasBaseReg, int64_t Scale,
241 unsigned AddrSpace, Instruction *I = nullptr) {
242 TargetLoweringBase::AddrMode AM;
243 AM.BaseGV = BaseGV;
244 AM.BaseOffs = BaseOffset;
245 AM.HasBaseReg = HasBaseReg;
246 AM.Scale = Scale;
247 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
248 }
249
250 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
251 const DataLayout &DL) const {
252 EVT VT = getTLI()->getValueType(DL, Ty);
253 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
254 }
255
256 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
257 const DataLayout &DL) const {
258 EVT VT = getTLI()->getValueType(DL, Ty);
259 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
260 }
261
262 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
263 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
264 }
265
266 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
267 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
268 TargetLoweringBase::AddrMode AM;
269 AM.BaseGV = BaseGV;
270 AM.BaseOffs = BaseOffset;
271 AM.HasBaseReg = HasBaseReg;
272 AM.Scale = Scale;
273 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
274 }
275
276 bool isTruncateFree(Type *Ty1, Type *Ty2) {
277 return getTLI()->isTruncateFree(Ty1, Ty2);
278 }
279
280 bool isProfitableToHoist(Instruction *I) {
281 return getTLI()->isProfitableToHoist(I);
282 }
283
284 bool useAA() const { return getST()->useAA(); }
285
286 bool isTypeLegal(Type *Ty) {
287 EVT VT = getTLI()->getValueType(DL, Ty);
288 return getTLI()->isTypeLegal(VT);
289 }
290
291 int getGEPCost(Type *PointeeType, const Value *Ptr,
292 ArrayRef<const Value *> Operands) {
293 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
294 }
295
296 int getExtCost(const Instruction *I, const Value *Src) {
297 if (getTLI()->isExtFree(I))
298 return TargetTransformInfo::TCC_Free;
299
300 if (isa<ZExtInst>(I) || isa<SExtInst>(I))
301 if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
302 if (getTLI()->isExtLoad(LI, I, DL))
303 return TargetTransformInfo::TCC_Free;
304
305 return TargetTransformInfo::TCC_Basic;
306 }
307
308 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
309 ArrayRef<const Value *> Arguments, const User *U) {
310 return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
311 }
312
313 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
314 ArrayRef<Type *> ParamTys, const User *U) {
315 if (IID == Intrinsic::cttz) {
316 if (getTLI()->isCheapToSpeculateCttz())
317 return TargetTransformInfo::TCC_Basic;
318 return TargetTransformInfo::TCC_Expensive;
319 }
320
321 if (IID == Intrinsic::ctlz) {
322 if (getTLI()->isCheapToSpeculateCtlz())
323 return TargetTransformInfo::TCC_Basic;
324 return TargetTransformInfo::TCC_Expensive;
325 }
326
327 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
328 }
329
330 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
331 unsigned &JumpTableSize,
332 ProfileSummaryInfo *PSI,
333 BlockFrequencyInfo *BFI) {
334 /// Try to find the estimated number of clusters. Note that the number of
335 /// clusters identified in this function could be different from the actual
336 /// numbers found in lowering. This function ignore switches that are
337 /// lowered with a mix of jump table / bit test / BTree. This function was
338 /// initially intended to be used when estimating the cost of switch in
339 /// inline cost heuristic, but it's a generic cost model to be used in other
340 /// places (e.g., in loop unrolling).
341 unsigned N = SI.getNumCases();
342 const TargetLoweringBase *TLI = getTLI();
343 const DataLayout &DL = this->getDataLayout();
344
345 JumpTableSize = 0;
346 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
347
348 // Early exit if both a jump table and bit test are not allowed.
349 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
350 return N;
351
352 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
353 APInt MinCaseVal = MaxCaseVal;
354 for (auto CI : SI.cases()) {
355 const APInt &CaseVal = CI.getCaseValue()->getValue();
356 if (CaseVal.sgt(MaxCaseVal))
357 MaxCaseVal = CaseVal;
358 if (CaseVal.slt(MinCaseVal))
359 MinCaseVal = CaseVal;
360 }
361
362 // Check if suitable for a bit test
363 if (N <= DL.getIndexSizeInBits(0u)) {
364 SmallPtrSet<const BasicBlock *, 4> Dests;
365 for (auto I : SI.cases())
366 Dests.insert(I.getCaseSuccessor());
367
368 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
369 DL))
370 return 1;
371 }
372
373 // Check if suitable for a jump table.
374 if (IsJTAllowed) {
375 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
376 return N;
377 uint64_t Range =
378 (MaxCaseVal - MinCaseVal)
379 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
380 // Check whether a range of clusters is dense enough for a jump table
381 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
382 JumpTableSize = Range;
383 return 1;
384 }
385 }
386 return N;
387 }
388
389 bool shouldBuildLookupTables() {
390 const TargetLoweringBase *TLI = getTLI();
391 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
392 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
393 }
394
395 bool haveFastSqrt(Type *Ty) {
396 const TargetLoweringBase *TLI = getTLI();
397 EVT VT = TLI->getValueType(DL, Ty);
398 return TLI->isTypeLegal(VT) &&
399 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
400 }
401
402 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
403 return true;
404 }
405
406 unsigned getFPOpCost(Type *Ty) {
407 // Check whether FADD is available, as a proxy for floating-point in
408 // general.
409 const TargetLoweringBase *TLI = getTLI();
410 EVT VT = TLI->getValueType(DL, Ty);
411 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
412 return TargetTransformInfo::TCC_Basic;
413 return TargetTransformInfo::TCC_Expensive;
414 }
415
416 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
417 const TargetLoweringBase *TLI = getTLI();
418 switch (Opcode) {
419 default: break;
420 case Instruction::Trunc:
421 if (TLI->isTruncateFree(OpTy, Ty))
422 return TargetTransformInfo::TCC_Free;
423 return TargetTransformInfo::TCC_Basic;
424 case Instruction::ZExt:
425 if (TLI->isZExtFree(OpTy, Ty))
426 return TargetTransformInfo::TCC_Free;
427 return TargetTransformInfo::TCC_Basic;
428
429 case Instruction::AddrSpaceCast:
430 if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
431 Ty->getPointerAddressSpace()))
432 return TargetTransformInfo::TCC_Free;
433 return TargetTransformInfo::TCC_Basic;
434 }
435
436 return BaseT::getOperationCost(Opcode, Ty, OpTy);
437 }
438
439 unsigned getInliningThresholdMultiplier() { return 1; }
440
441 int getInlinerVectorBonusPercent() { return 150; }
442
443 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
444 TTI::UnrollingPreferences &UP) {
445 // This unrolling functionality is target independent, but to provide some
446 // motivation for its intended use, for x86:
447
448 // According to the Intel 64 and IA-32 Architectures Optimization Reference
449 // Manual, Intel Core models and later have a loop stream detector (and
450 // associated uop queue) that can benefit from partial unrolling.
451 // The relevant requirements are:
452 // - The loop must have no more than 4 (8 for Nehalem and later) branches
453 // taken, and none of them may be calls.
454 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
455
456 // According to the Software Optimization Guide for AMD Family 15h
457 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
458 // and loop buffer which can benefit from partial unrolling.
459 // The relevant requirements are:
460 // - The loop must have fewer than 16 branches
461 // - The loop must have less than 40 uops in all executed loop branches
462
463 // The number of taken branches in a loop is hard to estimate here, and
464 // benchmarking has revealed that it is better not to be conservative when
465 // estimating the branch count. As a result, we'll ignore the branch limits
466 // until someone finds a case where it matters in practice.
467
468 unsigned MaxOps;
469 const TargetSubtargetInfo *ST = getST();
470 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
471 MaxOps = PartialUnrollingThreshold;
472 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
473 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
474 else
475 return;
476
477 // Scan the loop: don't unroll loops with calls.
478 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
479 ++I) {
480 BasicBlock *BB = *I;
481
482 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
483 if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
484 ImmutableCallSite CS(&*J);
485 if (const Function *F = CS.getCalledFunction()) {
486 if (!static_cast<T *>(this)->isLoweredToCall(F))
487 continue;
488 }
489
490 return;
491 }
492 }
493
494 // Enable runtime and partial unrolling up to the specified size.
495 // Enable using trip count upper bound to unroll loops.
496 UP.Partial = UP.Runtime = UP.UpperBound = true;
497 UP.PartialThreshold = MaxOps;
498
499 // Avoid unrolling when optimizing for size.
500 UP.OptSizeThreshold = 0;
501 UP.PartialOptSizeThreshold = 0;
502
503 // Set number of instructions optimized when "back edge"
504 // becomes "fall through" to default value of 2.
505 UP.BEInsns = 2;
506 }
507
508 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
509 AssumptionCache &AC,
510 TargetLibraryInfo *LibInfo,
511 HardwareLoopInfo &HWLoopInfo) {
512 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
513 }
514
515 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
516 AssumptionCache &AC, TargetLibraryInfo *TLI,
517 DominatorTree *DT,
518 const LoopAccessInfo *LAI) {
519 return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
520 }
521
522 int getInstructionLatency(const Instruction *I) {
523 if (isa<LoadInst>(I))
524 return getST()->getSchedModel().DefaultLoadLatency;
525
526 return BaseT::getInstructionLatency(I);
527 }
528
529 virtual Optional<unsigned>
530 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
531 return Optional<unsigned>(
532 getST()->getCacheSize(static_cast<unsigned>(Level)));
533 }
534
535 virtual Optional<unsigned>
536 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
537 Optional<unsigned> TargetResult =
538 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
539
540 if (TargetResult)
541 return TargetResult;
542
543 return BaseT::getCacheAssociativity(Level);
544 }
545
546 virtual unsigned getCacheLineSize() const {
547 return getST()->getCacheLineSize();
548 }
549
550 virtual unsigned getPrefetchDistance() const {
551 return getST()->getPrefetchDistance();
552 }
553
554 virtual unsigned getMinPrefetchStride() const {
555 return getST()->getMinPrefetchStride();
556 }
557
558 virtual unsigned getMaxPrefetchIterationsAhead() const {
559 return getST()->getMaxPrefetchIterationsAhead();
560 }
561
562 /// @}
563
564 /// \name Vector TTI Implementations
565 /// @{
566
567 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
568
569 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
570 /// are set if the result needs to be inserted and/or extracted from vectors.
571 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
572 assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 572, __PRETTY_FUNCTION__))
;
573 unsigned Cost = 0;
574
575 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
576 if (Insert)
577 Cost += static_cast<T *>(this)
578 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
579 if (Extract)
580 Cost += static_cast<T *>(this)
581 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
582 }
583
584 return Cost;
585 }
586
587 /// Estimate the overhead of scalarizing an instructions unique
588 /// non-constant operands. The types of the arguments are ordinarily
589 /// scalar, in which case the costs are multiplied with VF.
590 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
591 unsigned VF) {
592 unsigned Cost = 0;
593 SmallPtrSet<const Value*, 4> UniqueOperands;
594 for (const Value *A : Args) {
595 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
596 Type *VecTy = nullptr;
597 if (A->getType()->isVectorTy()) {
598 VecTy = A->getType();
599 // If A is a vector operand, VF should be 1 or correspond to A.
600 assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 601, __PRETTY_FUNCTION__))
601 "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 601, __PRETTY_FUNCTION__))
;
602 }
603 else
604 VecTy = VectorType::get(A->getType(), VF);
605
606 Cost += getScalarizationOverhead(VecTy, false, true);
607 }
608 }
609
610 return Cost;
611 }
612
613 unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
614 assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail
("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 614, __PRETTY_FUNCTION__))
;
615
616 unsigned Cost = 0;
617
618 Cost += getScalarizationOverhead(VecTy, true, false);
619 if (!Args.empty())
620 Cost += getOperandsScalarizationOverhead(Args,
621 VecTy->getVectorNumElements());
622 else
623 // When no information on arguments is provided, we add the cost
624 // associated with one argument as a heuristic.
625 Cost += getScalarizationOverhead(VecTy, false, true);
626
627 return Cost;
628 }
629
630 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
631
632 unsigned getArithmeticInstrCost(
633 unsigned Opcode, Type *Ty,
634 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
635 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
636 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
637 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
638 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
639 const Instruction *CxtI = nullptr) {
640 // Check if any of the operands are vector operands.
641 const TargetLoweringBase *TLI = getTLI();
642 int ISD = TLI->InstructionOpcodeToISD(Opcode);
643 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 643, __PRETTY_FUNCTION__))
;
644
645 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
646
647 bool IsFloat = Ty->isFPOrFPVectorTy();
648 // Assume that floating point arithmetic operations cost twice as much as
649 // integer operations.
650 unsigned OpCost = (IsFloat ? 2 : 1);
651
652 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
653 // The operation is legal. Assume it costs 1.
654 // TODO: Once we have extract/insert subvector cost we need to use them.
655 return LT.first * OpCost;
656 }
657
658 if (!TLI->isOperationExpand(ISD, LT.second)) {
659 // If the operation is custom lowered, then assume that the code is twice
660 // as expensive.
661 return LT.first * 2 * OpCost;
662 }
663
664 // Else, assume that we need to scalarize this op.
665 // TODO: If one of the types get legalized by splitting, handle this
666 // similarly to what getCastInstrCost() does.
667 if (Ty->isVectorTy()) {
668 unsigned Num = Ty->getVectorNumElements();
669 unsigned Cost = static_cast<T *>(this)
670 ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
671 // Return the cost of multiple scalar invocation plus the cost of
672 // inserting and extracting the values.
673 return getScalarizationOverhead(Ty, Args) + Num * Cost;
674 }
675
676 // We don't know anything about this scalar instruction.
677 return OpCost;
678 }
679
680 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
681 Type *SubTp) {
682 switch (Kind) {
683 case TTI::SK_Broadcast:
684 return getBroadcastShuffleOverhead(Tp);
685 case TTI::SK_Select:
686 case TTI::SK_Reverse:
687 case TTI::SK_Transpose:
688 case TTI::SK_PermuteSingleSrc:
689 case TTI::SK_PermuteTwoSrc:
690 return getPermuteShuffleOverhead(Tp);
691 case TTI::SK_ExtractSubvector:
692 return getExtractSubvectorOverhead(Tp, Index, SubTp);
693 case TTI::SK_InsertSubvector:
694 return getInsertSubvectorOverhead(Tp, Index, SubTp);
695 }
696 llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind",
"/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 696)
;
697 }
698
699 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
700 const Instruction *I = nullptr) {
701 const TargetLoweringBase *TLI = getTLI();
702 int ISD = TLI->InstructionOpcodeToISD(Opcode);
703 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 703, __PRETTY_FUNCTION__))
;
704 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
705 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
706
707 // Check for NOOP conversions.
708 if (SrcLT.first == DstLT.first &&
709 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
710
711 // Bitcast between types that are legalized to the same type are free.
712 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
713 return 0;
714 }
715
716 if (Opcode == Instruction::Trunc &&
717 TLI->isTruncateFree(SrcLT.second, DstLT.second))
718 return 0;
719
720 if (Opcode == Instruction::ZExt &&
721 TLI->isZExtFree(SrcLT.second, DstLT.second))
722 return 0;
723
724 if (Opcode == Instruction::AddrSpaceCast &&
725 TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
726 Dst->getPointerAddressSpace()))
727 return 0;
728
729 // If this is a zext/sext of a load, return 0 if the corresponding
730 // extending load exists on target.
731 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
732 I && isa<LoadInst>(I->getOperand(0))) {
733 EVT ExtVT = EVT::getEVT(Dst);
734 EVT LoadVT = EVT::getEVT(Src);
735 unsigned LType =
736 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
737 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
738 return 0;
739 }
740
741 // If the cast is marked as legal (or promote) then assume low cost.
742 if (SrcLT.first == DstLT.first &&
743 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
744 return 1;
745
746 // Handle scalar conversions.
747 if (!Src->isVectorTy() && !Dst->isVectorTy()) {
748 // Scalar bitcasts are usually free.
749 if (Opcode == Instruction::BitCast)
750 return 0;
751
752 // Just check the op cost. If the operation is legal then assume it costs
753 // 1.
754 if (!TLI->isOperationExpand(ISD, DstLT.second))
755 return 1;
756
757 // Assume that illegal scalar instruction are expensive.
758 return 4;
759 }
760
761 // Check vector-to-vector casts.
762 if (Dst->isVectorTy() && Src->isVectorTy()) {
763 // If the cast is between same-sized registers, then the check is simple.
764 if (SrcLT.first == DstLT.first &&
765 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
766
767 // Assume that Zext is done using AND.
768 if (Opcode == Instruction::ZExt)
769 return 1;
770
771 // Assume that sext is done using SHL and SRA.
772 if (Opcode == Instruction::SExt)
773 return 2;
774
775 // Just check the op cost. If the operation is legal then assume it
776 // costs
777 // 1 and multiply by the type-legalization overhead.
778 if (!TLI->isOperationExpand(ISD, DstLT.second))
779 return SrcLT.first * 1;
780 }
781
782 // If we are legalizing by splitting, query the concrete TTI for the cost
783 // of casting the original vector twice. We also need to factor in the
784 // cost of the split itself. Count that as 1, to be consistent with
785 // TLI->getTypeLegalizationCost().
786 if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
787 TargetLowering::TypeSplitVector ||
788 TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
789 TargetLowering::TypeSplitVector) &&
790 Src->getVectorNumElements() > 1 && Dst->getVectorNumElements() > 1) {
791 Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
792 Dst->getVectorNumElements() / 2);
793 Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
794 Src->getVectorNumElements() / 2);
795 T *TTI = static_cast<T *>(this);
796 return TTI->getVectorSplitCost() +
797 (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
798 }
799
800 // In other cases where the source or destination are illegal, assume
801 // the operation will get scalarized.
802 unsigned Num = Dst->getVectorNumElements();
803 unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
804 Opcode, Dst->getScalarType(), Src->getScalarType(), I);
805
806 // Return the cost of multiple scalar invocation plus the cost of
807 // inserting and extracting the values.
808 return getScalarizationOverhead(Dst, true, true) + Num * Cost;
809 }
810
811 // We already handled vector-to-vector and scalar-to-scalar conversions.
812 // This
813 // is where we handle bitcast between vectors and scalars. We need to assume
814 // that the conversion is scalarized in one way or another.
815 if (Opcode == Instruction::BitCast)
816 // Illegal bitcasts are done by storing and loading from a stack slot.
817 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
818 : 0) +
819 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
820 : 0);
821
822 llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 822)
;
823 }
824
825 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
826 VectorType *VecTy, unsigned Index) {
827 return static_cast<T *>(this)->getVectorInstrCost(
828 Instruction::ExtractElement, VecTy, Index) +
829 static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
830 VecTy->getElementType());
831 }
832
833 unsigned getCFInstrCost(unsigned Opcode) {
834 // Branches are assumed to be predicted.
835 return 0;
836 }
837
838 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
839 const Instruction *I) {
840 const TargetLoweringBase *TLI = getTLI();
841 int ISD = TLI->InstructionOpcodeToISD(Opcode);
842 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 842, __PRETTY_FUNCTION__))
;
7
Assuming 'ISD' is not equal to 0
8
'?' condition is true
843
844 // Selects on vectors are actually vector selects.
845 if (ISD == ISD::SELECT) {
9
Assuming 'ISD' is not equal to SELECT
10
Taking false branch
846 assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void
> (0) : __assert_fail ("CondTy && \"CondTy must exist\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 846, __PRETTY_FUNCTION__))
;
847 if (CondTy->isVectorTy())
848 ISD = ISD::VSELECT;
849 }
850 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
851
852 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
11
Taking false branch
853 !TLI->isOperationExpand(ISD, LT.second)) {
854 // The operation is legal. Assume it costs 1. Multiply
855 // by the type-legalization overhead.
856 return LT.first * 1;
857 }
858
859 // Otherwise, assume that the cast is scalarized.
860 // TODO: If one of the types get legalized by splitting, handle this
861 // similarly to what getCastInstrCost() does.
862 if (ValTy->isVectorTy()) {
12
Calling 'Type::isVectorTy'
14
Returning from 'Type::isVectorTy'
15
Taking true branch
863 unsigned Num = ValTy->getVectorNumElements();
864 if (CondTy)
16
Assuming 'CondTy' is null
17
Taking false branch
865 CondTy = CondTy->getScalarType();
866 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
19
Calling 'AArch64TTIImpl::getCmpSelInstrCost'
867 Opcode, ValTy->getScalarType(), CondTy, I);
18
Passing null pointer value via 3rd parameter 'CondTy'
868
869 // Return the cost of multiple scalar invocation plus the cost of
870 // inserting and extracting the values.
871 return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
872 }
873
874 // Unknown scalar opcode.
875 return 1;
876 }
877
878 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
879 std::pair<unsigned, MVT> LT =
880 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
881
882 return LT.first;
883 }
884
885 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
886 unsigned AddressSpace,
887 const Instruction *I = nullptr) {
888 assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast
<void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 888, __PRETTY_FUNCTION__))
;
889 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
890
891 // Assuming that all loads of legal types cost 1.
892 unsigned Cost = LT.first;
893
894 if (Src->isVectorTy() &&
895 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
896 // This is a vector load that legalizes to a larger type than the vector
897 // itself. Unless the corresponding extending load or truncating store is
898 // legal, then this will scalarize.
899 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
900 EVT MemVT = getTLI()->getValueType(DL, Src);
901 if (Opcode == Instruction::Store)
902 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
903 else
904 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
905
906 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
907 // This is a vector load/store for some illegal type that is scalarized.
908 // We must account for the cost of building or decomposing the vector.
909 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
910 Opcode == Instruction::Store);
911 }
912 }
913
914 return Cost;
915 }
916
917 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
918 unsigned Factor,
919 ArrayRef<unsigned> Indices,
920 unsigned Alignment, unsigned AddressSpace,
921 bool UseMaskForCond = false,
922 bool UseMaskForGaps = false) {
923 VectorType *VT = dyn_cast<VectorType>(VecTy);
924 assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 924, __PRETTY_FUNCTION__))
;
925
926 unsigned NumElts = VT->getNumElements();
927 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"
) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 927, __PRETTY_FUNCTION__))
;
928
929 unsigned NumSubElts = NumElts / Factor;
930 VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
931
932 // Firstly, the cost of load/store operation.
933 unsigned Cost;
934 if (UseMaskForCond || UseMaskForGaps)
935 Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
936 Opcode, VecTy, Alignment, AddressSpace);
937 else
938 Cost = static_cast<T *>(this)->getMemoryOpCost(
939 Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
940
941 // Legalize the vector type, and get the legalized and unlegalized type
942 // sizes.
943 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
944 unsigned VecTySize =
945 static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
946 unsigned VecTyLTSize = VecTyLT.getStoreSize();
947
948 // Return the ceiling of dividing A by B.
949 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
950
951 // Scale the cost of the memory operation by the fraction of legalized
952 // instructions that will actually be used. We shouldn't account for the
953 // cost of dead instructions since they will be removed.
954 //
955 // E.g., An interleaved load of factor 8:
956 // %vec = load <16 x i64>, <16 x i64>* %ptr
957 // %v0 = shufflevector %vec, undef, <0, 8>
958 //
959 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
960 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
961 // type). The other loads are unused.
962 //
963 // We only scale the cost of loads since interleaved store groups aren't
964 // allowed to have gaps.
965 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
966 // The number of loads of a legal type it will take to represent a load
967 // of the unlegalized vector type.
968 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
969
970 // The number of elements of the unlegalized type that correspond to a
971 // single legal instruction.
972 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
973
974 // Determine which legal instructions will be used.
975 BitVector UsedInsts(NumLegalInsts, false);
976 for (unsigned Index : Indices)
977 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
978 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
979
980 // Scale the cost of the load by the fraction of legal instructions that
981 // will be used.
982 Cost *= UsedInsts.count() / NumLegalInsts;
983 }
984
985 // Then plus the cost of interleave operation.
986 if (Opcode == Instruction::Load) {
987 // The interleave cost is similar to extract sub vectors' elements
988 // from the wide vector, and insert them into sub vectors.
989 //
990 // E.g. An interleaved load of factor 2 (with one member of index 0):
991 // %vec = load <8 x i32>, <8 x i32>* %ptr
992 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
993 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
994 // <8 x i32> vector and insert them into a <4 x i32> vector.
995
996 assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 997, __PRETTY_FUNCTION__))
997 "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 997, __PRETTY_FUNCTION__))
;
998
999 for (unsigned Index : Indices) {
1000 assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1000, __PRETTY_FUNCTION__))
;
1001
1002 // Extract elements from loaded vector for each sub vector.
1003 for (unsigned i = 0; i < NumSubElts; i++)
1004 Cost += static_cast<T *>(this)->getVectorInstrCost(
1005 Instruction::ExtractElement, VT, Index + i * Factor);
1006 }
1007
1008 unsigned InsSubCost = 0;
1009 for (unsigned i = 0; i < NumSubElts; i++)
1010 InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
1011 Instruction::InsertElement, SubVT, i);
1012
1013 Cost += Indices.size() * InsSubCost;
1014 } else {
1015 // The interleave cost is extract all elements from sub vectors, and
1016 // insert them into the wide vector.
1017 //
1018 // E.g. An interleaved store of factor 2:
1019 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1020 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1021 // The cost is estimated as extract all elements from both <4 x i32>
1022 // vectors and insert into the <8 x i32> vector.
1023
1024 unsigned ExtSubCost = 0;
1025 for (unsigned i = 0; i < NumSubElts; i++)
1026 ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
1027 Instruction::ExtractElement, SubVT, i);
1028 Cost += ExtSubCost * Factor;
1029
1030 for (unsigned i = 0; i < NumElts; i++)
1031 Cost += static_cast<T *>(this)
1032 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1033 }
1034
1035 if (!UseMaskForCond)
1036 return Cost;
1037
1038 Type *I8Type = Type::getInt8Ty(VT->getContext());
1039 VectorType *MaskVT = VectorType::get(I8Type, NumElts);
1040 SubVT = VectorType::get(I8Type, NumSubElts);
1041
1042 // The Mask shuffling cost is extract all the elements of the Mask
1043 // and insert each of them Factor times into the wide vector:
1044 //
1045 // E.g. an interleaved group with factor 3:
1046 // %mask = icmp ult <8 x i32> %vec1, %vec2
1047 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1048 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1049 // The cost is estimated as extract all mask elements from the <8xi1> mask
1050 // vector and insert them factor times into the <24xi1> shuffled mask
1051 // vector.
1052 for (unsigned i = 0; i < NumSubElts; i++)
1053 Cost += static_cast<T *>(this)->getVectorInstrCost(
1054 Instruction::ExtractElement, SubVT, i);
1055
1056 for (unsigned i = 0; i < NumElts; i++)
1057 Cost += static_cast<T *>(this)->getVectorInstrCost(
1058 Instruction::InsertElement, MaskVT, i);
1059
1060 // The Gaps mask is invariant and created outside the loop, therefore the
1061 // cost of creating it is not accounted for here. However if we have both
1062 // a MaskForGaps and some other mask that guards the execution of the
1063 // memory access, we need to account for the cost of And-ing the two masks
1064 // inside the loop.
1065 if (UseMaskForGaps)
1066 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1067 BinaryOperator::And, MaskVT);
1068
1069 return Cost;
1070 }
1071
1072 /// Get intrinsic cost based on arguments.
1073 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
1074 ArrayRef<Value *> Args, FastMathFlags FMF,
1075 unsigned VF = 1) {
1076 unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1077 assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"
) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1077, __PRETTY_FUNCTION__))
;
1078 auto *ConcreteTTI = static_cast<T *>(this);
1079
1080 switch (IID) {
1081 default: {
1082 // Assume that we need to scalarize this intrinsic.
1083 SmallVector<Type *, 4> Types;
1084 for (Value *Op : Args) {
1085 Type *OpTy = Op->getType();
1086 assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void>
(0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1086, __PRETTY_FUNCTION__))
;
1087 Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1088 }
1089
1090 if (VF > 1 && !RetTy->isVoidTy())
1091 RetTy = VectorType::get(RetTy, VF);
1092
1093 // Compute the scalarization overhead based on Args for a vector
1094 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1095 // CostModel will pass a vector RetTy and VF is 1.
1096 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1097 if (RetVF > 1 || VF > 1) {
1098 ScalarizationCost = 0;
1099 if (!RetTy->isVoidTy())
1100 ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1101 ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1102 }
1103
1104 return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1105 ScalarizationCost);
1106 }
1107 case Intrinsic::masked_scatter: {
1108 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1108, __PRETTY_FUNCTION__))
;
1109 Value *Mask = Args[3];
1110 bool VarMask = !isa<Constant>(Mask);
1111 unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1112 return ConcreteTTI->getGatherScatterOpCost(
1113 Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1114 }
1115 case Intrinsic::masked_gather: {
1116 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1116, __PRETTY_FUNCTION__))
;
1117 Value *Mask = Args[2];
1118 bool VarMask = !isa<Constant>(Mask);
1119 unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1120 return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1121 Args[0], VarMask, Alignment);
1122 }
1123 case Intrinsic::experimental_vector_reduce_add:
1124 case Intrinsic::experimental_vector_reduce_mul:
1125 case Intrinsic::experimental_vector_reduce_and:
1126 case Intrinsic::experimental_vector_reduce_or:
1127 case Intrinsic::experimental_vector_reduce_xor:
1128 case Intrinsic::experimental_vector_reduce_v2_fadd:
1129 case Intrinsic::experimental_vector_reduce_v2_fmul:
1130 case Intrinsic::experimental_vector_reduce_smax:
1131 case Intrinsic::experimental_vector_reduce_smin:
1132 case Intrinsic::experimental_vector_reduce_fmax:
1133 case Intrinsic::experimental_vector_reduce_fmin:
1134 case Intrinsic::experimental_vector_reduce_umax:
1135 case Intrinsic::experimental_vector_reduce_umin:
1136 return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1137 case Intrinsic::fshl:
1138 case Intrinsic::fshr: {
1139 Value *X = Args[0];
1140 Value *Y = Args[1];
1141 Value *Z = Args[2];
1142 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1143 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1144 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1145 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1146 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1147 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1148 : TTI::OP_None;
1149 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1150 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1151 unsigned Cost = 0;
1152 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1153 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1154 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1155 OpKindX, OpKindZ, OpPropsX);
1156 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1157 OpKindY, OpKindZ, OpPropsY);
1158 // Non-constant shift amounts requires a modulo.
1159 if (OpKindZ != TTI::OK_UniformConstantValue &&
1160 OpKindZ != TTI::OK_NonUniformConstantValue)
1161 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1162 OpKindZ, OpKindBW, OpPropsZ,
1163 OpPropsBW);
1164 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1165 if (X != Y) {
1166 Type *CondTy = RetTy->getWithNewBitWidth(1);
1167 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1168 CondTy, nullptr);
1169 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1170 CondTy, nullptr);
1171 }
1172 return Cost;
1173 }
1174 }
1175 }
1176
1177 /// Get intrinsic cost based on argument types.
1178 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1179 /// cost of scalarizing the arguments and the return value will be computed
1180 /// based on types.
1181 unsigned getIntrinsicInstrCost(
1182 Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1183 unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1184 auto *ConcreteTTI = static_cast<T *>(this);
1185
1186 SmallVector<unsigned, 2> ISDs;
1187 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1188 switch (IID) {
1189 default: {
1190 // Assume that we need to scalarize this intrinsic.
1191 unsigned ScalarizationCost = ScalarizationCostPassed;
1192 unsigned ScalarCalls = 1;
1193 Type *ScalarRetTy = RetTy;
1194 if (RetTy->isVectorTy()) {
1195 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1196 ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1197 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1198 ScalarRetTy = RetTy->getScalarType();
1199 }
1200 SmallVector<Type *, 4> ScalarTys;
1201 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1202 Type *Ty = Tys[i];
1203 if (Ty->isVectorTy()) {
1204 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1205 ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1206 ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1207 Ty = Ty->getScalarType();
1208 }
1209 ScalarTys.push_back(Ty);
1210 }
1211 if (ScalarCalls == 1)
1212 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1213
1214 unsigned ScalarCost =
1215 ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1216
1217 return ScalarCalls * ScalarCost + ScalarizationCost;
1218 }
1219 // Look for intrinsics that can be lowered directly or turned into a scalar
1220 // intrinsic call.
1221 case Intrinsic::sqrt:
1222 ISDs.push_back(ISD::FSQRT);
1223 break;
1224 case Intrinsic::sin:
1225 ISDs.push_back(ISD::FSIN);
1226 break;
1227 case Intrinsic::cos:
1228 ISDs.push_back(ISD::FCOS);
1229 break;
1230 case Intrinsic::exp:
1231 ISDs.push_back(ISD::FEXP);
1232 break;
1233 case Intrinsic::exp2:
1234 ISDs.push_back(ISD::FEXP2);
1235 break;
1236 case Intrinsic::log:
1237 ISDs.push_back(ISD::FLOG);
1238 break;
1239 case Intrinsic::log10:
1240 ISDs.push_back(ISD::FLOG10);
1241 break;
1242 case Intrinsic::log2:
1243 ISDs.push_back(ISD::FLOG2);
1244 break;
1245 case Intrinsic::fabs:
1246 ISDs.push_back(ISD::FABS);
1247 break;
1248 case Intrinsic::canonicalize:
1249 ISDs.push_back(ISD::FCANONICALIZE);
1250 break;
1251 case Intrinsic::minnum:
1252 ISDs.push_back(ISD::FMINNUM);
1253 if (FMF.noNaNs())
1254 ISDs.push_back(ISD::FMINIMUM);
1255 break;
1256 case Intrinsic::maxnum:
1257 ISDs.push_back(ISD::FMAXNUM);
1258 if (FMF.noNaNs())
1259 ISDs.push_back(ISD::FMAXIMUM);
1260 break;
1261 case Intrinsic::copysign:
1262 ISDs.push_back(ISD::FCOPYSIGN);
1263 break;
1264 case Intrinsic::floor:
1265 ISDs.push_back(ISD::FFLOOR);
1266 break;
1267 case Intrinsic::ceil:
1268 ISDs.push_back(ISD::FCEIL);
1269 break;
1270 case Intrinsic::trunc:
1271 ISDs.push_back(ISD::FTRUNC);
1272 break;
1273 case Intrinsic::nearbyint:
1274 ISDs.push_back(ISD::FNEARBYINT);
1275 break;
1276 case Intrinsic::rint:
1277 ISDs.push_back(ISD::FRINT);
1278 break;
1279 case Intrinsic::round:
1280 ISDs.push_back(ISD::FROUND);
1281 break;
1282 case Intrinsic::pow:
1283 ISDs.push_back(ISD::FPOW);
1284 break;
1285 case Intrinsic::fma:
1286 ISDs.push_back(ISD::FMA);
1287 break;
1288 case Intrinsic::fmuladd:
1289 ISDs.push_back(ISD::FMA);
1290 break;
1291 case Intrinsic::experimental_constrained_fmuladd:
1292 ISDs.push_back(ISD::STRICT_FMA);
1293 break;
1294 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1295 case Intrinsic::lifetime_start:
1296 case Intrinsic::lifetime_end:
1297 case Intrinsic::sideeffect:
1298 return 0;
1299 case Intrinsic::masked_store:
1300 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1301 0);
1302 case Intrinsic::masked_load:
1303 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1304 case Intrinsic::experimental_vector_reduce_add:
1305 return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1306 /*IsPairwiseForm=*/false);
1307 case Intrinsic::experimental_vector_reduce_mul:
1308 return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1309 /*IsPairwiseForm=*/false);
1310 case Intrinsic::experimental_vector_reduce_and:
1311 return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1312 /*IsPairwiseForm=*/false);
1313 case Intrinsic::experimental_vector_reduce_or:
1314 return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1315 /*IsPairwiseForm=*/false);
1316 case Intrinsic::experimental_vector_reduce_xor:
1317 return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1318 /*IsPairwiseForm=*/false);
1319 case Intrinsic::experimental_vector_reduce_v2_fadd:
1320 return ConcreteTTI->getArithmeticReductionCost(
1321 Instruction::FAdd, Tys[0],
1322 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1323 // reductions.
1324 case Intrinsic::experimental_vector_reduce_v2_fmul:
1325 return ConcreteTTI->getArithmeticReductionCost(
1326 Instruction::FMul, Tys[0],
1327 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1328 // reductions.
1329 case Intrinsic::experimental_vector_reduce_smax:
1330 case Intrinsic::experimental_vector_reduce_smin:
1331 case Intrinsic::experimental_vector_reduce_fmax:
1332 case Intrinsic::experimental_vector_reduce_fmin:
1333 return ConcreteTTI->getMinMaxReductionCost(
1334 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1335 /*IsUnsigned=*/true);
1336 case Intrinsic::experimental_vector_reduce_umax:
1337 case Intrinsic::experimental_vector_reduce_umin:
1338 return ConcreteTTI->getMinMaxReductionCost(
1339 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1340 /*IsUnsigned=*/false);
1341 case Intrinsic::sadd_sat:
1342 case Intrinsic::ssub_sat: {
1343 Type *CondTy = RetTy->getWithNewBitWidth(1);
1344
1345 Type *OpTy = StructType::create({RetTy, CondTy});
1346 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1347 ? Intrinsic::sadd_with_overflow
1348 : Intrinsic::ssub_with_overflow;
1349
1350 // SatMax -> Overflow && SumDiff < 0
1351 // SatMin -> Overflow && SumDiff >= 0
1352 unsigned Cost = 0;
1353 Cost += ConcreteTTI->getIntrinsicInstrCost(
1354 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1355 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1356 CondTy, nullptr);
1357 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1358 CondTy, nullptr);
1359 return Cost;
1360 }
1361 case Intrinsic::uadd_sat:
1362 case Intrinsic::usub_sat: {
1363 Type *CondTy = RetTy->getWithNewBitWidth(1);
1364
1365 Type *OpTy = StructType::create({RetTy, CondTy});
1366 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1367 ? Intrinsic::uadd_with_overflow
1368 : Intrinsic::usub_with_overflow;
1369
1370 unsigned Cost = 0;
1371 Cost += ConcreteTTI->getIntrinsicInstrCost(
1372 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1373 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1374 CondTy, nullptr);
1375 return Cost;
1376 }
1377 case Intrinsic::smul_fix:
1378 case Intrinsic::umul_fix: {
1379 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1380 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1381
1382 unsigned ExtOp =
1383 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1384
1385 unsigned Cost = 0;
1386 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1387 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1388 Cost +=
1389 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1390 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1391 TTI::OK_AnyValue,
1392 TTI::OK_UniformConstantValue);
1393 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1394 TTI::OK_AnyValue,
1395 TTI::OK_UniformConstantValue);
1396 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1397 return Cost;
1398 }
1399 case Intrinsic::sadd_with_overflow:
1400 case Intrinsic::ssub_with_overflow: {
1401 Type *SumTy = RetTy->getContainedType(0);
1402 Type *OverflowTy = RetTy->getContainedType(1);
1403 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1404 ? BinaryOperator::Add
1405 : BinaryOperator::Sub;
1406
1407 // LHSSign -> LHS >= 0
1408 // RHSSign -> RHS >= 0
1409 // SumSign -> Sum >= 0
1410 //
1411 // Add:
1412 // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1413 // Sub:
1414 // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1415 unsigned Cost = 0;
1416 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1417 Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1418 OverflowTy, nullptr);
1419 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1420 BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1421 Cost +=
1422 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1423 return Cost;
1424 }
1425 case Intrinsic::uadd_with_overflow:
1426 case Intrinsic::usub_with_overflow: {
1427 Type *SumTy = RetTy->getContainedType(0);
1428 Type *OverflowTy = RetTy->getContainedType(1);
1429 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1430 ? BinaryOperator::Add
1431 : BinaryOperator::Sub;
1432
1433 unsigned Cost = 0;
1434 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1435 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1436 OverflowTy, nullptr);
1437 return Cost;
1438 }
1439 case Intrinsic::smul_with_overflow:
1440 case Intrinsic::umul_with_overflow: {
1441 Type *MulTy = RetTy->getContainedType(0);
1442 Type *OverflowTy = RetTy->getContainedType(1);
1443 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1444 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1445
1446 unsigned ExtOp =
1447 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1448
1449 unsigned Cost = 0;
1450 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1451 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1452 Cost +=
1453 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1454 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1455 TTI::OK_AnyValue,
1456 TTI::OK_UniformConstantValue);
1457
1458 if (IID == Intrinsic::smul_with_overflow)
1459 Cost += ConcreteTTI->getArithmeticInstrCost(
1460 Instruction::AShr, MulTy, TTI::OK_AnyValue,
1461 TTI::OK_UniformConstantValue);
1462
1463 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1464 OverflowTy, nullptr);
1465 return Cost;
1466 }
1467 case Intrinsic::ctpop:
1468 ISDs.push_back(ISD::CTPOP);
1469 // In case of legalization use TCC_Expensive. This is cheaper than a
1470 // library call but still not a cheap instruction.
1471 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1472 break;
1473 // FIXME: ctlz, cttz, ...
1474 case Intrinsic::bswap:
1475 ISDs.push_back(ISD::BSWAP);
1476 break;
1477 case Intrinsic::bitreverse:
1478 ISDs.push_back(ISD::BITREVERSE);
1479 break;
1480 }
1481
1482 const TargetLoweringBase *TLI = getTLI();
1483 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1484
1485 SmallVector<unsigned, 2> LegalCost;
1486 SmallVector<unsigned, 2> CustomCost;
1487 for (unsigned ISD : ISDs) {
1488 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1489 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1490 TLI->isFAbsFree(LT.second)) {
1491 return 0;
1492 }
1493
1494 // The operation is legal. Assume it costs 1.
1495 // If the type is split to multiple registers, assume that there is some
1496 // overhead to this.
1497 // TODO: Once we have extract/insert subvector cost we need to use them.
1498 if (LT.first > 1)
1499 LegalCost.push_back(LT.first * 2);
1500 else
1501 LegalCost.push_back(LT.first * 1);
1502 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1503 // If the operation is custom lowered then assume
1504 // that the code is twice as expensive.
1505 CustomCost.push_back(LT.first * 2);
1506 }
1507 }
1508
1509 auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1510 if (MinLegalCostI != LegalCost.end())
1511 return *MinLegalCostI;
1512
1513 auto MinCustomCostI =
1514 std::min_element(CustomCost.begin(), CustomCost.end());
1515 if (MinCustomCostI != CustomCost.end())
1516 return *MinCustomCostI;
1517
1518 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1519 // point mul followed by an add.
1520 if (IID == Intrinsic::fmuladd)
1521 return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1522 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1523 if (IID == Intrinsic::experimental_constrained_fmuladd)
1524 return ConcreteTTI->getIntrinsicCost(
1525 Intrinsic::experimental_constrained_fmul, RetTy, Tys,
1526 nullptr) +
1527 ConcreteTTI->getIntrinsicCost(
1528 Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr);
1529
1530 // Else, assume that we need to scalarize this intrinsic. For math builtins
1531 // this will emit a costly libcall, adding call overhead and spills. Make it
1532 // very expensive.
1533 if (RetTy->isVectorTy()) {
1534 unsigned ScalarizationCost =
1535 ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1536 ? ScalarizationCostPassed
1537 : getScalarizationOverhead(RetTy, true, false));
1538 unsigned ScalarCalls = RetTy->getVectorNumElements();
1539 SmallVector<Type *, 4> ScalarTys;
1540 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1541 Type *Ty = Tys[i];
1542 if (Ty->isVectorTy())
1543 Ty = Ty->getScalarType();
1544 ScalarTys.push_back(Ty);
1545 }
1546 unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1547 IID, RetTy->getScalarType(), ScalarTys, FMF);
1548 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1549 if (Tys[i]->isVectorTy()) {
1550 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1551 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1552 ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1553 }
1554 }
1555
1556 return ScalarCalls * ScalarCost + ScalarizationCost;
1557 }
1558
1559 // This is going to be turned into a library call, make it expensive.
1560 return SingleCallCost;
1561 }
1562
1563 /// Compute a cost of the given call instruction.
1564 ///
1565 /// Compute the cost of calling function F with return type RetTy and
1566 /// argument types Tys. F might be nullptr, in this case the cost of an
1567 /// arbitrary call with the specified signature will be returned.
1568 /// This is used, for instance, when we estimate call of a vector
1569 /// counterpart of the given function.
1570 /// \param F Called function, might be nullptr.
1571 /// \param RetTy Return value types.
1572 /// \param Tys Argument types.
1573 /// \returns The cost of Call instruction.
1574 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1575 return 10;
1576 }
1577
1578 unsigned getNumberOfParts(Type *Tp) {
1579 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1580 return LT.first;
1581 }
1582
1583 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1584 const SCEV *) {
1585 return 0;
1586 }
1587
1588 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1589 /// We're assuming that reduction operation are performing the following way:
1590 /// 1. Non-pairwise reduction
1591 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1592 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1593 /// \----------------v-------------/ \----------v------------/
1594 /// n/2 elements n/2 elements
1595 /// %red1 = op <n x t> %val, <n x t> val1
1596 /// After this operation we have a vector %red1 where only the first n/2
1597 /// elements are meaningful, the second n/2 elements are undefined and can be
1598 /// dropped. All other operations are actually working with the vector of
1599 /// length n/2, not n, though the real vector length is still n.
1600 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1601 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1602 /// \----------------v-------------/ \----------v------------/
1603 /// n/4 elements 3*n/4 elements
1604 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1605 /// length n/2, the resulting vector has length n/4 etc.
1606 /// 2. Pairwise reduction:
1607 /// Everything is the same except for an additional shuffle operation which
1608 /// is used to produce operands for pairwise kind of reductions.
1609 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1610 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1611 /// \-------------v----------/ \----------v------------/
1612 /// n/2 elements n/2 elements
1613 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1614 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1615 /// \-------------v----------/ \----------v------------/
1616 /// n/2 elements n/2 elements
1617 /// %red1 = op <n x t> %val1, <n x t> val2
1618 /// Again, the operation is performed on <n x t> vector, but the resulting
1619 /// vector %red1 is <n/2 x t> vector.
1620 ///
1621 /// The cost model should take into account that the actual length of the
1622 /// vector is reduced on each iteration.
1623 unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1624 bool IsPairwise) {
1625 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1625, __PRETTY_FUNCTION__))
;
1626 Type *ScalarTy = Ty->getVectorElementType();
1627 unsigned NumVecElts = Ty->getVectorNumElements();
1628 unsigned NumReduxLevels = Log2_32(NumVecElts);
1629 unsigned ArithCost = 0;
1630 unsigned ShuffleCost = 0;
1631 auto *ConcreteTTI = static_cast<T *>(this);
1632 std::pair<unsigned, MVT> LT =
1633 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1634 unsigned LongVectorCount = 0;
1635 unsigned MVTLen =
1636 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1637 while (NumVecElts > MVTLen) {
1638 NumVecElts /= 2;
1639 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1640 // Assume the pairwise shuffles add a cost.
1641 ShuffleCost += (IsPairwise + 1) *
1642 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1643 NumVecElts, SubTy);
1644 ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1645 Ty = SubTy;
1646 ++LongVectorCount;
1647 }
1648
1649 NumReduxLevels -= LongVectorCount;
1650
1651 // The minimal length of the vector is limited by the real length of vector
1652 // operations performed on the current platform. That's why several final
1653 // reduction operations are performed on the vectors with the same
1654 // architecture-dependent length.
1655
1656 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1657 // reductions need two shuffles on every level, but the last one. On that
1658 // level one of the shuffles is <0, u, u, ...> which is identity.
1659 unsigned NumShuffles = NumReduxLevels;
1660 if (IsPairwise && NumReduxLevels >= 1)
1661 NumShuffles += NumReduxLevels - 1;
1662 ShuffleCost += NumShuffles *
1663 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1664 0, Ty);
1665 ArithCost += NumReduxLevels *
1666 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1667 return ShuffleCost + ArithCost +
1668 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1669 }
1670
1671 /// Try to calculate op costs for min/max reduction operations.
1672 /// \param CondTy Conditional type for the Select instruction.
1673 unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1674 bool) {
1675 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1675, __PRETTY_FUNCTION__))
;
1676 Type *ScalarTy = Ty->getVectorElementType();
1677 Type *ScalarCondTy = CondTy->getVectorElementType();
1678 unsigned NumVecElts = Ty->getVectorNumElements();
1679 unsigned NumReduxLevels = Log2_32(NumVecElts);
1680 unsigned CmpOpcode;
1681 if (Ty->isFPOrFPVectorTy()) {
1682 CmpOpcode = Instruction::FCmp;
1683 } else {
1684 assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1685, __PRETTY_FUNCTION__))
1685 "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1685, __PRETTY_FUNCTION__))
;
1686 CmpOpcode = Instruction::ICmp;
1687 }
1688 unsigned MinMaxCost = 0;
1689 unsigned ShuffleCost = 0;
1690 auto *ConcreteTTI = static_cast<T *>(this);
1691 std::pair<unsigned, MVT> LT =
1692 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1693 unsigned LongVectorCount = 0;
1694 unsigned MVTLen =
1695 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1696 while (NumVecElts > MVTLen) {
1697 NumVecElts /= 2;
1698 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1699 CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1700
1701 // Assume the pairwise shuffles add a cost.
1702 ShuffleCost += (IsPairwise + 1) *
1703 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1704 NumVecElts, SubTy);
1705 MinMaxCost +=
1706 ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1707 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1708 nullptr);
1709 Ty = SubTy;
1710 ++LongVectorCount;
1711 }
1712
1713 NumReduxLevels -= LongVectorCount;
1714
1715 // The minimal length of the vector is limited by the real length of vector
1716 // operations performed on the current platform. That's why several final
1717 // reduction opertions are perfomed on the vectors with the same
1718 // architecture-dependent length.
1719
1720 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1721 // reductions need two shuffles on every level, but the last one. On that
1722 // level one of the shuffles is <0, u, u, ...> which is identity.
1723 unsigned NumShuffles = NumReduxLevels;
1724 if (IsPairwise && NumReduxLevels >= 1)
1725 NumShuffles += NumReduxLevels - 1;
1726 ShuffleCost += NumShuffles *
1727 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1728 0, Ty);
1729 MinMaxCost +=
1730 NumReduxLevels *
1731 (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1732 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1733 nullptr));
1734 // The last min/max should be in vector registers and we counted it above.
1735 // So just need a single extractelement.
1736 return ShuffleCost + MinMaxCost +
1737 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1738 }
1739
1740 unsigned getVectorSplitCost() { return 1; }
1741
1742 /// @}
1743};
1744
1745/// Concrete BasicTTIImpl that can be used if no further customization
1746/// is needed.
1747class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1748 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1749
1750 friend class BasicTTIImplBase<BasicTTIImpl>;
1751
1752 const TargetSubtargetInfo *ST;
1753 const TargetLoweringBase *TLI;
1754
1755 const TargetSubtargetInfo *getST() const { return ST; }
1756 const TargetLoweringBase *getTLI() const { return TLI; }
1757
1758public:
1759 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1760};
1761
1762} // end namespace llvm
1763
1764#endif // LLVM_CODEGEN_BASICTTIIMPL_H

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h

1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file describes how to lower LLVM code to machine code. This has two
11/// main components:
12///
13/// 1. Which ValueTypes are natively supported by the target.
14/// 2. Which operations are supported for supported ValueTypes.
15/// 3. Cost thresholds for alternative implementations of certain operations.
16///
17/// In addition it has a few other components, like information about FP
18/// immediates.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_TARGETLOWERING_H
23#define LLVM_CODEGEN_TARGETLOWERING_H
24
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/CodeGen/DAGCombine.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/RuntimeLibcalls.h"
35#include "llvm/CodeGen/SelectionDAG.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetCallingConv.h"
38#include "llvm/CodeGen/ValueTypes.h"
39#include "llvm/IR/Attributes.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/IRBuilder.h"
46#include "llvm/IR/InlineAsm.h"
47#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Type.h"
50#include "llvm/MC/MCRegisterInfo.h"
51#include "llvm/Support/Alignment.h"
52#include "llvm/Support/AtomicOrdering.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/MachineValueType.h"
56#include "llvm/Target/TargetMachine.h"
57#include "llvm/Transforms/Utils/SizeOpts.h"
58#include <algorithm>
59#include <cassert>
60#include <climits>
61#include <cstdint>
62#include <iterator>
63#include <map>
64#include <string>
65#include <utility>
66#include <vector>
67
68namespace llvm {
69
70class BranchProbability;
71class CCState;
72class CCValAssign;
73class Constant;
74class FastISel;
75class FunctionLoweringInfo;
76class GlobalValue;
77class GISelKnownBits;
78class IntrinsicInst;
79struct KnownBits;
80class LegacyDivergenceAnalysis;
81class LLVMContext;
82class MachineBasicBlock;
83class MachineFunction;
84class MachineInstr;
85class MachineJumpTableInfo;
86class MachineLoop;
87class MachineRegisterInfo;
88class MCContext;
89class MCExpr;
90class Module;
91class TargetRegisterClass;
92class TargetLibraryInfo;
93class TargetRegisterInfo;
94class Value;
95
96namespace Sched {
97
98 enum Preference {
99 None, // No preference
100 Source, // Follow source order.
101 RegPressure, // Scheduling for lowest register pressure.
102 Hybrid, // Scheduling for both latency and register pressure.
103 ILP, // Scheduling for ILP in low register pressure mode.
104 VLIW // Scheduling for VLIW targets.
105 };
106
107} // end namespace Sched
108
109// MemOp models a memory operation, either memset or memcpy/memmove.
110struct MemOp {
111private:
112 // Shared
113 uint64_t Size;
114 bool DstAlignCanChange; // true if destination alignment can satisfy any
115 // constraint.
116 Align DstAlign; // Specified alignment of the memory operation.
117
118 bool AllowOverlap;
119 // memset only
120 bool IsMemset; // If setthis memory operation is a memset.
121 bool ZeroMemset; // If set clears out memory with zeros.
122 // memcpy only
123 bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
124 // constant so it does not need to be loaded.
125 Align SrcAlign; // Inferred alignment of the source or default value if the
126 // memory operation does not need to load the value.
127public:
128 static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
129 Align SrcAlign, bool IsVolatile,
130 bool MemcpyStrSrc = false) {
131 MemOp Op;
132 Op.Size = Size;
133 Op.DstAlignCanChange = DstAlignCanChange;
134 Op.DstAlign = DstAlign;
135 Op.AllowOverlap = !IsVolatile;
136 Op.IsMemset = false;
137 Op.ZeroMemset = false;
138 Op.MemcpyStrSrc = MemcpyStrSrc;
139 Op.SrcAlign = SrcAlign;
140 return Op;
141 }
142
143 static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
144 bool IsZeroMemset, bool IsVolatile) {
145 MemOp Op;
146 Op.Size = Size;
147 Op.DstAlignCanChange = DstAlignCanChange;
148 Op.DstAlign = DstAlign;
149 Op.AllowOverlap = !IsVolatile;
150 Op.IsMemset = true;
151 Op.ZeroMemset = IsZeroMemset;
152 Op.MemcpyStrSrc = false;
153 return Op;
154 }
155
156 uint64_t size() const { return Size; }
157 Align getDstAlign() const {
158 assert(!DstAlignCanChange)((!DstAlignCanChange) ? static_cast<void> (0) : __assert_fail
("!DstAlignCanChange", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 158, __PRETTY_FUNCTION__))
;
159 return DstAlign;
160 }
161 bool isFixedDstAlign() const { return !DstAlignCanChange; }
162 bool allowOverlap() const { return AllowOverlap; }
163 bool isMemset() const { return IsMemset; }
164 bool isMemcpy() const { return !IsMemset; }
165 bool isMemcpyWithFixedDstAlign() const {
166 return isMemcpy() && !DstAlignCanChange;
167 }
168 bool isZeroMemset() const { return isMemset() && ZeroMemset; }
169 bool isMemcpyStrSrc() const {
170 assert(isMemcpy() && "Must be a memcpy")((isMemcpy() && "Must be a memcpy") ? static_cast<
void> (0) : __assert_fail ("isMemcpy() && \"Must be a memcpy\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 170, __PRETTY_FUNCTION__))
;
171 return MemcpyStrSrc;
172 }
173 Align getSrcAlign() const {
174 assert(isMemcpy() && "Must be a memcpy")((isMemcpy() && "Must be a memcpy") ? static_cast<
void> (0) : __assert_fail ("isMemcpy() && \"Must be a memcpy\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 174, __PRETTY_FUNCTION__))
;
175 return SrcAlign;
176 }
177 bool isSrcAligned(Align AlignCheck) const {
178 return isMemset() || llvm::isAligned(AlignCheck, SrcAlign.value());
179 }
180 bool isDstAligned(Align AlignCheck) const {
181 return DstAlignCanChange || llvm::isAligned(AlignCheck, DstAlign.value());
182 }
183 bool isAligned(Align AlignCheck) const {
184 return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck);
185 }
186};
187
188/// This base class for TargetLowering contains the SelectionDAG-independent
189/// parts that can be used from the rest of CodeGen.
190class TargetLoweringBase {
191public:
192 /// This enum indicates whether operations are valid for a target, and if not,
193 /// what action should be used to make them valid.
194 enum LegalizeAction : uint8_t {
195 Legal, // The target natively supports this operation.
196 Promote, // This operation should be executed in a larger type.
197 Expand, // Try to expand this to other ops, otherwise use a libcall.
198 LibCall, // Don't try to expand this to other ops, always use a libcall.
199 Custom // Use the LowerOperation hook to implement custom lowering.
200 };
201
202 /// This enum indicates whether a types are legal for a target, and if not,
203 /// what action should be used to make them valid.
204 enum LegalizeTypeAction : uint8_t {
205 TypeLegal, // The target natively supports this type.
206 TypePromoteInteger, // Replace this integer with a larger one.
207 TypeExpandInteger, // Split this integer into two of half the size.
208 TypeSoftenFloat, // Convert this float to a same size integer type.
209 TypeExpandFloat, // Split this float into two of half the size.
210 TypeScalarizeVector, // Replace this one-element vector with its element.
211 TypeSplitVector, // Split this vector into two of half the size.
212 TypeWidenVector, // This vector should be widened into a larger vector.
213 TypePromoteFloat, // Replace this float with a larger one.
214 TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic.
215 };
216
217 /// LegalizeKind holds the legalization kind that needs to happen to EVT
218 /// in order to type-legalize it.
219 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
220
221 /// Enum that describes how the target represents true/false values.
222 enum BooleanContent {
223 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
224 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
225 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
226 };
227
228 /// Enum that describes what type of support for selects the target has.
229 enum SelectSupportKind {
230 ScalarValSelect, // The target supports scalar selects (ex: cmov).
231 ScalarCondVectorVal, // The target supports selects with a scalar condition
232 // and vector values (ex: cmov).
233 VectorMaskSelect // The target supports vector selects with a vector
234 // mask (ex: x86 blends).
235 };
236
237 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
238 /// to, if at all. Exists because different targets have different levels of
239 /// support for these atomic instructions, and also have different options
240 /// w.r.t. what they should expand to.
241 enum class AtomicExpansionKind {
242 None, // Don't expand the instruction.
243 LLSC, // Expand the instruction into loadlinked/storeconditional; used
244 // by ARM/AArch64.
245 LLOnly, // Expand the (load) instruction into just a load-linked, which has
246 // greater atomic guarantees than a normal load.
247 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
248 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
249 };
250
251 /// Enum that specifies when a multiplication should be expanded.
252 enum class MulExpansionKind {
253 Always, // Always expand the instruction.
254 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
255 // or custom.
256 };
257
258 /// Enum that specifies when a float negation is beneficial.
259 enum class NegatibleCost {
260 Expensive = 0, // Negated expression is more expensive.
261 Neutral = 1, // Negated expression has the same cost.
262 Cheaper = 2 // Negated expression is cheaper.
263 };
264
265 class ArgListEntry {
266 public:
267 Value *Val = nullptr;
268 SDValue Node = SDValue();
269 Type *Ty = nullptr;
270 bool IsSExt : 1;
271 bool IsZExt : 1;
272 bool IsInReg : 1;
273 bool IsSRet : 1;
274 bool IsNest : 1;
275 bool IsByVal : 1;
276 bool IsInAlloca : 1;
277 bool IsReturned : 1;
278 bool IsSwiftSelf : 1;
279 bool IsSwiftError : 1;
280 bool IsCFGuardTarget : 1;
281 uint16_t Alignment = 0;
282 Type *ByValType = nullptr;
283
284 ArgListEntry()
285 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
286 IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
287 IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
288
289 void setAttributes(const CallBase *Call, unsigned ArgIdx);
290
291 void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) {
292 return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx);
293 }
294 };
295 using ArgListTy = std::vector<ArgListEntry>;
296
297 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
298 ArgListTy &Args) const {};
299
300 static ISD::NodeType getExtendForContent(BooleanContent Content) {
301 switch (Content) {
302 case UndefinedBooleanContent:
303 // Extend by adding rubbish bits.
304 return ISD::ANY_EXTEND;
305 case ZeroOrOneBooleanContent:
306 // Extend by adding zero bits.
307 return ISD::ZERO_EXTEND;
308 case ZeroOrNegativeOneBooleanContent:
309 // Extend by copying the sign bit.
310 return ISD::SIGN_EXTEND;
311 }
312 llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 312)
;
313 }
314
315 explicit TargetLoweringBase(const TargetMachine &TM);
316 TargetLoweringBase(const TargetLoweringBase &) = delete;
317 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
318 virtual ~TargetLoweringBase() = default;
319
320 /// Return true if the target support strict float operation
321 bool isStrictFPEnabled() const {
322 return IsStrictFPEnabled;
323 }
324
325protected:
326 /// Initialize all of the actions to default values.
327 void initActions();
328
329public:
330 const TargetMachine &getTargetMachine() const { return TM; }
331
332 virtual bool useSoftFloat() const { return false; }
333
334 /// Return the pointer type for the given address space, defaults to
335 /// the pointer type from the data layout.
336 /// FIXME: The default needs to be removed once all the code is updated.
337 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
338 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
339 }
340
341 /// Return the in-memory pointer type for the given address space, defaults to
342 /// the pointer type from the data layout. FIXME: The default needs to be
343 /// removed once all the code is updated.
344 MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
345 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
346 }
347
348 /// Return the type for frame index, which is determined by
349 /// the alloca address space specified through the data layout.
350 MVT getFrameIndexTy(const DataLayout &DL) const {
351 return getPointerTy(DL, DL.getAllocaAddrSpace());
352 }
353
354 /// Return the type for operands of fence.
355 /// TODO: Let fence operands be of i32 type and remove this.
356 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
357 return getPointerTy(DL);
358 }
359
360 /// EVT is not used in-tree, but is used by out-of-tree target.
361 /// A documentation for this function would be nice...
362 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
363
364 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
365 bool LegalTypes = true) const;
366
367 /// Returns the type to be used for the index operand of:
368 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
369 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
370 virtual MVT getVectorIdxTy(const DataLayout &DL) const {
371 return getPointerTy(DL);
372 }
373
374 /// This callback is used to inspect load/store instructions and add
375 /// target-specific MachineMemOperand flags to them. The default
376 /// implementation does nothing.
377 virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const {
378 return MachineMemOperand::MONone;
379 }
380
381 MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI,
382 const DataLayout &DL) const;
383 MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI,
384 const DataLayout &DL) const;
385 MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI,
386 const DataLayout &DL) const;
387
388 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
389 return true;
390 }
391
392 /// Return true if it is profitable to convert a select of FP constants into
393 /// a constant pool load whose address depends on the select condition. The
394 /// parameter may be used to differentiate a select with FP compare from
395 /// integer compare.
396 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
397 return true;
398 }
399
400 /// Return true if multiple condition registers are available.
401 bool hasMultipleConditionRegisters() const {
402 return HasMultipleConditionRegisters;
403 }
404
405 /// Return true if the target has BitExtract instructions.
406 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
407
408 /// Return the preferred vector type legalization action.
409 virtual TargetLoweringBase::LegalizeTypeAction
410 getPreferredVectorAction(MVT VT) const {
411 // The default action for one element vectors is to scalarize
412 if (VT.getVectorNumElements() == 1)
413 return TypeScalarizeVector;
414 // The default action for an odd-width vector is to widen.
415 if (!VT.isPow2VectorType())
416 return TypeWidenVector;
417 // The default action for other vectors is to promote
418 return TypePromoteInteger;
419 }
420
421 // Return true if the half type should be passed around as i16, but promoted
422 // to float around arithmetic. The default behavior is to pass around as
423 // float and convert around loads/stores/bitcasts and other places where
424 // the size matters.
425 virtual bool softPromoteHalfType() const { return false; }
426
427 // There are two general methods for expanding a BUILD_VECTOR node:
428 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
429 // them together.
430 // 2. Build the vector on the stack and then load it.
431 // If this function returns true, then method (1) will be used, subject to
432 // the constraint that all of the necessary shuffles are legal (as determined
433 // by isShuffleMaskLegal). If this function returns false, then method (2) is
434 // always used. The vector type, and the number of defined values, are
435 // provided.
436 virtual bool
437 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
438 unsigned DefinedValues) const {
439 return DefinedValues < 3;
440 }
441
442 /// Return true if integer divide is usually cheaper than a sequence of
443 /// several shifts, adds, and multiplies for this target.
444 /// The definition of "cheaper" may depend on whether we're optimizing
445 /// for speed or for size.
446 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
447
448 /// Return true if the target can handle a standalone remainder operation.
449 virtual bool hasStandaloneRem(EVT VT) const {
450 return true;
451 }
452
453 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
454 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
455 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
456 return false;
457 }
458
459 /// Reciprocal estimate status values used by the functions below.
460 enum ReciprocalEstimate : int {
461 Unspecified = -1,
462 Disabled = 0,
463 Enabled = 1
464 };
465
466 /// Return a ReciprocalEstimate enum value for a square root of the given type
467 /// based on the function's attributes. If the operation is not overridden by
468 /// the function's attributes, "Unspecified" is returned and target defaults
469 /// are expected to be used for instruction selection.
470 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
471
472 /// Return a ReciprocalEstimate enum value for a division of the given type
473 /// based on the function's attributes. If the operation is not overridden by
474 /// the function's attributes, "Unspecified" is returned and target defaults
475 /// are expected to be used for instruction selection.
476 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
477
478 /// Return the refinement step count for a square root of the given type based
479 /// on the function's attributes. If the operation is not overridden by
480 /// the function's attributes, "Unspecified" is returned and target defaults
481 /// are expected to be used for instruction selection.
482 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
483
484 /// Return the refinement step count for a division of the given type based
485 /// on the function's attributes. If the operation is not overridden by
486 /// the function's attributes, "Unspecified" is returned and target defaults
487 /// are expected to be used for instruction selection.
488 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
489
490 /// Returns true if target has indicated at least one type should be bypassed.
491 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
492
493 /// Returns map of slow types for division or remainder with corresponding
494 /// fast types
495 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
496 return BypassSlowDivWidths;
497 }
498
499 /// Return true if Flow Control is an expensive operation that should be
500 /// avoided.
501 bool isJumpExpensive() const { return JumpIsExpensive; }
502
503 /// Return true if selects are only cheaper than branches if the branch is
504 /// unlikely to be predicted right.
505 bool isPredictableSelectExpensive() const {
506 return PredictableSelectIsExpensive;
507 }
508
509 /// If a branch or a select condition is skewed in one direction by more than
510 /// this factor, it is very likely to be predicted correctly.
511 virtual BranchProbability getPredictableBranchThreshold() const;
512
513 /// Return true if the following transform is beneficial:
514 /// fold (conv (load x)) -> (load (conv*)x)
515 /// On architectures that don't natively support some vector loads
516 /// efficiently, casting the load to a smaller vector of larger types and
517 /// loading is more efficient, however, this can be undone by optimizations in
518 /// dag combiner.
519 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
520 const SelectionDAG &DAG,
521 const MachineMemOperand &MMO) const {
522 // Don't do if we could do an indexed load on the original type, but not on
523 // the new one.
524 if (!LoadVT.isSimple() || !BitcastVT.isSimple())
525 return true;
526
527 MVT LoadMVT = LoadVT.getSimpleVT();
528
529 // Don't bother doing this if it's just going to be promoted again later, as
530 // doing so might interfere with other combines.
531 if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
532 getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
533 return false;
534
535 bool Fast = false;
536 return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
537 MMO, &Fast) && Fast;
538 }
539
540 /// Return true if the following transform is beneficial:
541 /// (store (y (conv x)), y*)) -> (store x, (x*))
542 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
543 const SelectionDAG &DAG,
544 const MachineMemOperand &MMO) const {
545 // Default to the same logic as loads.
546 return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO);
547 }
548
549 /// Return true if it is expected to be cheaper to do a store of a non-zero
550 /// vector constant with the given size and type for the address space than to
551 /// store the individual scalar element constants.
552 virtual bool storeOfVectorConstantIsCheap(EVT MemVT,
553 unsigned NumElem,
554 unsigned AddrSpace) const {
555 return false;
556 }
557
558 /// Allow store merging for the specified type after legalization in addition
559 /// to before legalization. This may transform stores that do not exist
560 /// earlier (for example, stores created from intrinsics).
561 virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
562 return true;
563 }
564
565 /// Returns if it's reasonable to merge stores to MemVT size.
566 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
567 const SelectionDAG &DAG) const {
568 return true;
569 }
570
571 /// Return true if it is cheap to speculate a call to intrinsic cttz.
572 virtual bool isCheapToSpeculateCttz() const {
573 return false;
574 }
575
576 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
577 virtual bool isCheapToSpeculateCtlz() const {
578 return false;
579 }
580
581 /// Return true if ctlz instruction is fast.
582 virtual bool isCtlzFast() const {
583 return false;
584 }
585
586 /// Return true if instruction generated for equality comparison is folded
587 /// with instruction generated for signed comparison.
588 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
589
590 /// Return true if it is safe to transform an integer-domain bitwise operation
591 /// into the equivalent floating-point operation. This should be set to true
592 /// if the target has IEEE-754-compliant fabs/fneg operations for the input
593 /// type.
594 virtual bool hasBitPreservingFPLogic(EVT VT) const {
595 return false;
596 }
597
598 /// Return true if it is cheaper to split the store of a merged int val
599 /// from a pair of smaller values into multiple stores.
600 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
601 return false;
602 }
603
604 /// Return if the target supports combining a
605 /// chain like:
606 /// \code
607 /// %andResult = and %val1, #mask
608 /// %icmpResult = icmp %andResult, 0
609 /// \endcode
610 /// into a single machine instruction of a form like:
611 /// \code
612 /// cc = test %register, #mask
613 /// \endcode
614 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
615 return false;
616 }
617
618 /// Use bitwise logic to make pairs of compares more efficient. For example:
619 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
620 /// This should be true when it takes more than one instruction to lower
621 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
622 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
623 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
624 return false;
625 }
626
627 /// Return the preferred operand type if the target has a quick way to compare
628 /// integer values of the given size. Assume that any legal integer type can
629 /// be compared efficiently. Targets may override this to allow illegal wide
630 /// types to return a vector type if there is support to compare that type.
631 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
632 MVT VT = MVT::getIntegerVT(NumBits);
633 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
634 }
635
636 /// Return true if the target should transform:
637 /// (X & Y) == Y ---> (~X & Y) == 0
638 /// (X & Y) != Y ---> (~X & Y) != 0
639 ///
640 /// This may be profitable if the target has a bitwise and-not operation that
641 /// sets comparison flags. A target may want to limit the transformation based
642 /// on the type of Y or if Y is a constant.
643 ///
644 /// Note that the transform will not occur if Y is known to be a power-of-2
645 /// because a mask and compare of a single bit can be handled by inverting the
646 /// predicate, for example:
647 /// (X & 8) == 8 ---> (X & 8) != 0
648 virtual bool hasAndNotCompare(SDValue Y) const {
649 return false;
650 }
651
652 /// Return true if the target has a bitwise and-not operation:
653 /// X = ~A & B
654 /// This can be used to simplify select or other instructions.
655 virtual bool hasAndNot(SDValue X) const {
656 // If the target has the more complex version of this operation, assume that
657 // it has this operation too.
658 return hasAndNotCompare(X);
659 }
660
661 /// Return true if the target has a bit-test instruction:
662 /// (X & (1 << Y)) ==/!= 0
663 /// This knowledge can be used to prevent breaking the pattern,
664 /// or creating it if it could be recognized.
665 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
666
667 /// There are two ways to clear extreme bits (either low or high):
668 /// Mask: x & (-1 << y) (the instcombine canonical form)
669 /// Shifts: x >> y << y
670 /// Return true if the variant with 2 variable shifts is preferred.
671 /// Return false if there is no preference.
672 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
673 // By default, let's assume that no one prefers shifts.
674 return false;
675 }
676
677 /// Return true if it is profitable to fold a pair of shifts into a mask.
678 /// This is usually true on most targets. But some targets, like Thumb1,
679 /// have immediate shift instructions, but no immediate "and" instruction;
680 /// this makes the fold unprofitable.
681 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
682 CombineLevel Level) const {
683 return true;
684 }
685
686 /// Should we tranform the IR-optimal check for whether given truncation
687 /// down into KeptBits would be truncating or not:
688 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
689 /// Into it's more traditional form:
690 /// ((%x << C) a>> C) dstcond %x
691 /// Return true if we should transform.
692 /// Return false if there is no preference.
693 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
694 unsigned KeptBits) const {
695 // By default, let's assume that no one prefers shifts.
696 return false;
697 }
698
699 /// Given the pattern
700 /// (X & (C l>>/<< Y)) ==/!= 0
701 /// return true if it should be transformed into:
702 /// ((X <</l>> Y) & C) ==/!= 0
703 /// WARNING: if 'X' is a constant, the fold may deadlock!
704 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
705 /// here because it can end up being not linked in.
706 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
707 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
708 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
709 SelectionDAG &DAG) const {
710 if (hasBitTest(X, Y)) {
711 // One interesting pattern that we'd want to form is 'bit test':
712 // ((1 << Y) & C) ==/!= 0
713 // But we also need to be careful not to try to reverse that fold.
714
715 // Is this '1 << Y' ?
716 if (OldShiftOpcode == ISD::SHL && CC->isOne())
717 return false; // Keep the 'bit test' pattern.
718
719 // Will it be '1 << Y' after the transform ?
720 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
721 return true; // Do form the 'bit test' pattern.
722 }
723
724 // If 'X' is a constant, and we transform, then we will immediately
725 // try to undo the fold, thus causing endless combine loop.
726 // So by default, let's assume everyone prefers the fold
727 // iff 'X' is not a constant.
728 return !XC;
729 }
730
731 /// These two forms are equivalent:
732 /// sub %y, (xor %x, -1)
733 /// add (add %x, 1), %y
734 /// The variant with two add's is IR-canonical.
735 /// Some targets may prefer one to the other.
736 virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
737 // By default, let's assume that everyone prefers the form with two add's.
738 return true;
739 }
740
741 /// Return true if the target wants to use the optimization that
742 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
743 /// promotedInst1(...(promotedInstN(ext(load)))).
744 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
745
746 /// Return true if the target can combine store(extractelement VectorTy,
747 /// Idx).
748 /// \p Cost[out] gives the cost of that transformation when this is true.
749 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
750 unsigned &Cost) const {
751 return false;
752 }
753
754 /// Return true if inserting a scalar into a variable element of an undef
755 /// vector is more efficiently handled by splatting the scalar instead.
756 virtual bool shouldSplatInsEltVarIndex(EVT) const {
757 return false;
758 }
759
760 /// Return true if target always beneficiates from combining into FMA for a
761 /// given value type. This must typically return false on targets where FMA
762 /// takes more cycles to execute than FADD.
763 virtual bool enableAggressiveFMAFusion(EVT VT) const {
764 return false;
765 }
766
767 /// Return the ValueType of the result of SETCC operations.
768 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
769 EVT VT) const;
770
771 /// Return the ValueType for comparison libcalls. Comparions libcalls include
772 /// floating point comparion calls, and Ordered/Unordered check calls on
773 /// floating point numbers.
774 virtual
775 MVT::SimpleValueType getCmpLibcallReturnType() const;
776
777 /// For targets without i1 registers, this gives the nature of the high-bits
778 /// of boolean values held in types wider than i1.
779 ///
780 /// "Boolean values" are special true/false values produced by nodes like
781 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
782 /// Not to be confused with general values promoted from i1. Some cpus
783 /// distinguish between vectors of boolean and scalars; the isVec parameter
784 /// selects between the two kinds. For example on X86 a scalar boolean should
785 /// be zero extended from i1, while the elements of a vector of booleans
786 /// should be sign extended from i1.
787 ///
788 /// Some cpus also treat floating point types the same way as they treat
789 /// vectors instead of the way they treat scalars.
790 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
791 if (isVec)
792 return BooleanVectorContents;
793 return isFloat ? BooleanFloatContents : BooleanContents;
794 }
795
796 BooleanContent getBooleanContents(EVT Type) const {
797 return getBooleanContents(Type.isVector(), Type.isFloatingPoint());
798 }
799
800 /// Return target scheduling preference.
801 Sched::Preference getSchedulingPreference() const {
802 return SchedPreferenceInfo;
803 }
804
805 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
806 /// for different nodes. This function returns the preference (or none) for
807 /// the given node.
808 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
809 return Sched::None;
810 }
811
812 /// Return the register class that should be used for the specified value
813 /// type.
814 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
815 (void)isDivergent;
816 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
817 assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!")
? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 817, __PRETTY_FUNCTION__))
;
818 return RC;
819 }
820
821 /// Allows target to decide about the register class of the
822 /// specific value that is live outside the defining block.
823 /// Returns true if the value needs uniform register class.
824 virtual bool requiresUniformRegister(MachineFunction &MF,
825 const Value *) const {
826 return false;
827 }
828
829 /// Return the 'representative' register class for the specified value
830 /// type.
831 ///
832 /// The 'representative' register class is the largest legal super-reg
833 /// register class for the register class of the value type. For example, on
834 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
835 /// register class is GR64 on x86_64.
836 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
837 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
838 return RC;
839 }
840
841 /// Return the cost of the 'representative' register class for the specified
842 /// value type.
843 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
844 return RepRegClassCostForVT[VT.SimpleTy];
845 }
846
847 /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS
848 /// instructions, and false if a library call is preferred (e.g for code-size
849 /// reasons).
850 virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
851 return true;
852 }
853
854 /// Return true if the target has native support for the specified value type.
855 /// This means that it has a register that directly holds it without
856 /// promotions or expansions.
857 bool isTypeLegal(EVT VT) const {
858 assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 859, __PRETTY_FUNCTION__))
859 (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 859, __PRETTY_FUNCTION__))
;
860 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
861 }
862
863 class ValueTypeActionImpl {
864 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
865 /// that indicates how instruction selection should deal with the type.
866 LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
867
868 public:
869 ValueTypeActionImpl() {
870 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
871 TypeLegal);
872 }
873
874 LegalizeTypeAction getTypeAction(MVT VT) const {
875 return ValueTypeActions[VT.SimpleTy];
876 }
877
878 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
879 ValueTypeActions[VT.SimpleTy] = Action;
880 }
881 };
882
883 const ValueTypeActionImpl &getValueTypeActions() const {
884 return ValueTypeActions;
885 }
886
887 /// Return how we should legalize values of this type, either it is already
888 /// legal (return 'Legal') or we need to promote it to a larger type (return
889 /// 'Promote'), or we need to expand it into multiple registers of smaller
890 /// integer type (return 'Expand'). 'Custom' is not an option.
891 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
892 return getTypeConversion(Context, VT).first;
893 }
894 LegalizeTypeAction getTypeAction(MVT VT) const {
895 return ValueTypeActions.getTypeAction(VT);
896 }
897
898 /// For types supported by the target, this is an identity function. For
899 /// types that must be promoted to larger types, this returns the larger type
900 /// to promote to. For integer types that are larger than the largest integer
901 /// register, this contains one step in the expansion to get to the smaller
902 /// register. For illegal floating point types, this returns the integer type
903 /// to transform to.
904 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
905 return getTypeConversion(Context, VT).second;
906 }
907
908 /// For types supported by the target, this is an identity function. For
909 /// types that must be expanded (i.e. integer types that are larger than the
910 /// largest integer register or illegal floating point types), this returns
911 /// the largest legal type it will be expanded to.
912 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
913 assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail
("!VT.isVector()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 913, __PRETTY_FUNCTION__))
;
914 while (true) {
915 switch (getTypeAction(Context, VT)) {
916 case TypeLegal:
917 return VT;
918 case TypeExpandInteger:
919 VT = getTypeToTransformTo(Context, VT);
920 break;
921 default:
922 llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 922)
;
923 }
924 }
925 }
926
927 /// Vector types are broken down into some number of legal first class types.
928 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
929 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
930 /// turns into 4 EVT::i32 values with both PPC and X86.
931 ///
932 /// This method returns the number of registers needed, and the VT for each
933 /// register. It also returns the VT and quantity of the intermediate values
934 /// before they are promoted/expanded.
935 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
936 EVT &IntermediateVT,
937 unsigned &NumIntermediates,
938 MVT &RegisterVT) const;
939
940 /// Certain targets such as MIPS require that some types such as vectors are
941 /// always broken down into scalars in some contexts. This occurs even if the
942 /// vector type is legal.
943 virtual unsigned getVectorTypeBreakdownForCallingConv(
944 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
945 unsigned &NumIntermediates, MVT &RegisterVT) const {
946 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
947 RegisterVT);
948 }
949
950 struct IntrinsicInfo {
951 unsigned opc = 0; // target opcode
952 EVT memVT; // memory VT
953
954 // value representing memory location
955 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
956
957 int offset = 0; // offset off of ptrVal
958 uint64_t size = 0; // the size of the memory location
959 // (taken from memVT if zero)
960 MaybeAlign align = Align(1); // alignment
961
962 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
963 IntrinsicInfo() = default;
964 };
965
966 /// Given an intrinsic, checks if on the target the intrinsic will need to map
967 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
968 /// true and store the intrinsic information into the IntrinsicInfo that was
969 /// passed to the function.
970 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
971 MachineFunction &,
972 unsigned /*Intrinsic*/) const {
973 return false;
974 }
975
976 /// Returns true if the target can instruction select the specified FP
977 /// immediate natively. If false, the legalizer will materialize the FP
978 /// immediate as a load from a constant pool.
979 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
980 bool ForCodeSize = false) const {
981 return false;
982 }
983
984 /// Targets can use this to indicate that they only support *some*
985 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
986 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
987 /// legal.
988 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
989 return true;
990 }
991
992 /// Returns true if the operation can trap for the value type.
993 ///
994 /// VT must be a legal type. By default, we optimistically assume most
995 /// operations don't trap except for integer divide and remainder.
996 virtual bool canOpTrap(unsigned Op, EVT VT) const;
997
998 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
999 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1000 /// constant pool entry.
1001 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
1002 EVT /*VT*/) const {
1003 return false;
1004 }
1005
1006 /// Return how this operation should be treated: either it is legal, needs to
1007 /// be promoted to a larger size, needs to be expanded to some other code
1008 /// sequence, or the target has a custom expander for it.
1009 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
1010 if (VT.isExtended()) return Expand;
1011 // If a target-specific SDNode requires legalization, require the target
1012 // to provide custom legalization for it.
1013 if (Op >= array_lengthof(OpActions[0])) return Custom;
1014 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
1015 }
1016
1017 /// Custom method defined by each target to indicate if an operation which
1018 /// may require a scale is supported natively by the target.
1019 /// If not, the operation is illegal.
1020 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
1021 unsigned Scale) const {
1022 return false;
1023 }
1024
1025 /// Some fixed point operations may be natively supported by the target but
1026 /// only for specific scales. This method allows for checking
1027 /// if the width is supported by the target for a given operation that may
1028 /// depend on scale.
1029 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
1030 unsigned Scale) const {
1031 auto Action = getOperationAction(Op, VT);
1032 if (Action != Legal)
1033 return Action;
1034
1035 // This operation is supported in this type but may only work on specific
1036 // scales.
1037 bool Supported;
1038 switch (Op) {
1039 default:
1040 llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation."
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1040)
;
1041 case ISD::SMULFIX:
1042 case ISD::SMULFIXSAT:
1043 case ISD::UMULFIX:
1044 case ISD::UMULFIXSAT:
1045 case ISD::SDIVFIX:
1046 case ISD::SDIVFIXSAT:
1047 case ISD::UDIVFIX:
1048 case ISD::UDIVFIXSAT:
1049 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
1050 break;
1051 }
1052
1053 return Supported ? Action : Expand;
1054 }
1055
1056 // If Op is a strict floating-point operation, return the result
1057 // of getOperationAction for the equivalent non-strict operation.
1058 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
1059 unsigned EqOpc;
1060 switch (Op) {
1061 default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1061)
;
1062#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1063 case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break;
1064#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1065 case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break;
1066#include "llvm/IR/ConstrainedOps.def"
1067 }
1068
1069 return getOperationAction(EqOpc, VT);
1070 }
1071
1072 /// Return true if the specified operation is legal on this target or can be
1073 /// made legal with custom lowering. This is used to help guide high-level
1074 /// lowering decisions.
1075 bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
1076 return (VT == MVT::Other || isTypeLegal(VT)) &&
1077 (getOperationAction(Op, VT) == Legal ||
1078 getOperationAction(Op, VT) == Custom);
1079 }
1080
1081 /// Return true if the specified operation is legal on this target or can be
1082 /// made legal using promotion. This is used to help guide high-level lowering
1083 /// decisions.
1084 bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
1085 return (VT == MVT::Other || isTypeLegal(VT)) &&
1086 (getOperationAction(Op, VT) == Legal ||
1087 getOperationAction(Op, VT) == Promote);
1088 }
1089
1090 /// Return true if the specified operation is legal on this target or can be
1091 /// made legal with custom lowering or using promotion. This is used to help
1092 /// guide high-level lowering decisions.
1093 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const {
1094 return (VT == MVT::Other || isTypeLegal(VT)) &&
1095 (getOperationAction(Op, VT) == Legal ||
1096 getOperationAction(Op, VT) == Custom ||
1097 getOperationAction(Op, VT) == Promote);
1098 }
1099
1100 /// Return true if the operation uses custom lowering, regardless of whether
1101 /// the type is legal or not.
1102 bool isOperationCustom(unsigned Op, EVT VT) const {
1103 return getOperationAction(Op, VT) == Custom;
1104 }
1105
1106 /// Return true if lowering to a jump table is allowed.
1107 virtual bool areJTsAllowed(const Function *Fn) const {
1108 if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
1109 return false;
1110
1111 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1112 isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
1113 }
1114
1115 /// Check whether the range [Low,High] fits in a machine word.
1116 bool rangeFitsInWord(const APInt &Low, const APInt &High,
1117 const DataLayout &DL) const {
1118 // FIXME: Using the pointer type doesn't seem ideal.
1119 uint64_t BW = DL.getIndexSizeInBits(0u);
1120 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1;
1121 return Range <= BW;
1122 }
1123
1124 /// Return true if lowering to a jump table is suitable for a set of case
1125 /// clusters which may contain \p NumCases cases, \p Range range of values.
1126 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
1127 uint64_t Range, ProfileSummaryInfo *PSI,
1128 BlockFrequencyInfo *BFI) const;
1129
1130 /// Return true if lowering to a bit test is suitable for a set of case
1131 /// clusters which contains \p NumDests unique destinations, \p Low and
1132 /// \p High as its lowest and highest case values, and expects \p NumCmps
1133 /// case value comparisons. Check if the number of destinations, comparison
1134 /// metric, and range are all suitable.
1135 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1136 const APInt &Low, const APInt &High,
1137 const DataLayout &DL) const {
1138 // FIXME: I don't think NumCmps is the correct metric: a single case and a
1139 // range of cases both require only one branch to lower. Just looking at the
1140 // number of clusters and destinations should be enough to decide whether to
1141 // build bit tests.
1142
1143 // To lower a range with bit tests, the range must fit the bitwidth of a
1144 // machine word.
1145 if (!rangeFitsInWord(Low, High, DL))
1146 return false;
1147
1148 // Decide whether it's profitable to lower this range with bit tests. Each
1149 // destination requires a bit test and branch, and there is an overall range
1150 // check branch. For a small number of clusters, separate comparisons might
1151 // be cheaper, and for many destinations, splitting the range might be
1152 // better.
1153 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
1154 (NumDests == 3 && NumCmps >= 6);
1155 }
1156
1157 /// Return true if the specified operation is illegal on this target or
1158 /// unlikely to be made legal with custom lowering. This is used to help guide
1159 /// high-level lowering decisions.
1160 bool isOperationExpand(unsigned Op, EVT VT) const {
1161 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
1162 }
1163
1164 /// Return true if the specified operation is legal on this target.
1165 bool isOperationLegal(unsigned Op, EVT VT) const {
1166 return (VT == MVT::Other || isTypeLegal(VT)) &&
1167 getOperationAction(Op, VT) == Legal;
1168 }
1169
1170 /// Return how this load with extension should be treated: either it is legal,
1171 /// needs to be promoted to a larger size, needs to be expanded to some other
1172 /// code sequence, or the target has a custom expander for it.
1173 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
1174 EVT MemVT) const {
1175 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1176 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1177 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1178 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1179, __PRETTY_FUNCTION__))
1179 MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1179, __PRETTY_FUNCTION__))
;
1180 unsigned Shift = 4 * ExtType;
1181 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1182 }
1183
1184 /// Return true if the specified load with extension is legal on this target.
1185 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1186 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1187 }
1188
1189 /// Return true if the specified load with extension is legal or custom
1190 /// on this target.
1191 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1192 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1193 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1194 }
1195
1196 /// Return how this store with truncation should be treated: either it is
1197 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1198 /// other code sequence, or the target has a custom expander for it.
1199 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1200 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1201 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1202 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1203 assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1204, __PRETTY_FUNCTION__))
1204 "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1204, __PRETTY_FUNCTION__))
;
1205 return TruncStoreActions[ValI][MemI];
1206 }
1207
1208 /// Return true if the specified store with truncation is legal on this
1209 /// target.
1210 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1211 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1212 }
1213
1214 /// Return true if the specified store with truncation has solution on this
1215 /// target.
1216 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1217 return isTypeLegal(ValVT) &&
1218 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1219 getTruncStoreAction(ValVT, MemVT) == Custom);
1220 }
1221
1222 /// Return how the indexed load should be treated: either it is legal, needs
1223 /// to be promoted to a larger size, needs to be expanded to some other code
1224 /// sequence, or the target has a custom expander for it.
1225 LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1226 return getIndexedModeAction(IdxMode, VT, IMAB_Load);
1227 }
1228
1229 /// Return true if the specified indexed load is legal on this target.
1230 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1231 return VT.isSimple() &&
1232 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1233 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1234 }
1235
1236 /// Return how the indexed store should be treated: either it is legal, needs
1237 /// to be promoted to a larger size, needs to be expanded to some other code
1238 /// sequence, or the target has a custom expander for it.
1239 LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1240 return getIndexedModeAction(IdxMode, VT, IMAB_Store);
1241 }
1242
1243 /// Return true if the specified indexed load is legal on this target.
1244 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1245 return VT.isSimple() &&
1246 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1247 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1248 }
1249
1250 /// Return how the indexed load should be treated: either it is legal, needs
1251 /// to be promoted to a larger size, needs to be expanded to some other code
1252 /// sequence, or the target has a custom expander for it.
1253 LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const {
1254 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad);
1255 }
1256
1257 /// Return true if the specified indexed load is legal on this target.
1258 bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const {
1259 return VT.isSimple() &&
1260 (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1261 getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1262 }
1263
1264 /// Return how the indexed store should be treated: either it is legal, needs
1265 /// to be promoted to a larger size, needs to be expanded to some other code
1266 /// sequence, or the target has a custom expander for it.
1267 LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const {
1268 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore);
1269 }
1270
1271 /// Return true if the specified indexed load is legal on this target.
1272 bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const {
1273 return VT.isSimple() &&
1274 (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1275 getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1276 }
1277
1278 /// Return how the condition code should be treated: either it is legal, needs
1279 /// to be expanded to some other code sequence, or the target has a custom
1280 /// expander for it.
1281 LegalizeAction
1282 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1283 assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1285, __PRETTY_FUNCTION__))
1284 ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1285, __PRETTY_FUNCTION__))
1285 "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1285, __PRETTY_FUNCTION__))
;
1286 // See setCondCodeAction for how this is encoded.
1287 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1288 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1289 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1290 assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!"
) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1290, __PRETTY_FUNCTION__))
;
1291 return Action;
1292 }
1293
1294 /// Return true if the specified condition code is legal on this target.
1295 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1296 return getCondCodeAction(CC, VT) == Legal;
1297 }
1298
1299 /// Return true if the specified condition code is legal or custom on this
1300 /// target.
1301 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1302 return getCondCodeAction(CC, VT) == Legal ||
1303 getCondCodeAction(CC, VT) == Custom;
1304 }
1305
1306 /// If the action for this operation is to promote, this method returns the
1307 /// ValueType to promote to.
1308 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1309 assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1310, __PRETTY_FUNCTION__))
1310 "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1310, __PRETTY_FUNCTION__))
;
1311
1312 // See if this has an explicit type specified.
1313 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1314 MVT::SimpleValueType>::const_iterator PTTI =
1315 PromoteToType.find(std::make_pair(Op, VT.SimpleTy));
1316 if (PTTI != PromoteToType.end()) return PTTI->second;
1317
1318 assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1319, __PRETTY_FUNCTION__))
1319 "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1319, __PRETTY_FUNCTION__))
;
1320
1321 MVT NVT = VT;
1322 do {
1323 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1324 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1325, __PRETTY_FUNCTION__))
1325 "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1325, __PRETTY_FUNCTION__))
;
1326 } while (!isTypeLegal(NVT) ||
1327 getOperationAction(Op, NVT) == Promote);
1328 return NVT;
1329 }
1330
1331 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1332 /// operations except for the pointer size. If AllowUnknown is true, this
1333 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1334 /// otherwise it will assert.
1335 EVT getValueType(const DataLayout &DL, Type *Ty,
1336 bool AllowUnknown = false) const {
1337 // Lower scalar pointers to native pointer types.
1338 if (auto *PTy = dyn_cast<PointerType>(Ty))
28
Assuming 'PTy' is null
29
Taking false branch
1339 return getPointerTy(DL, PTy->getAddressSpace());
1340
1341 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
30
Assuming 'VTy' is non-null
31
Taking true branch
1342 Type *EltTy = VTy->getElementType();
1343 // Lower vectors of pointers to native pointer types.
1344 if (auto *PTy
32.1
'PTy' is null
32.1
'PTy' is null
32.1
'PTy' is null
32.1
'PTy' is null
= dyn_cast<PointerType>(EltTy)) {
32
Assuming 'EltTy' is not a 'PointerType'
33
Taking false branch
1345 EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace()));
1346 EltTy = PointerTy.getTypeForEVT(Ty->getContext());
1347 }
1348 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
34
Called C++ object pointer is null
1349 VTy->getElementCount());
1350 }
1351
1352 return EVT::getEVT(Ty, AllowUnknown);
1353 }
1354
1355 EVT getMemValueType(const DataLayout &DL, Type *Ty,
1356 bool AllowUnknown = false) const {
1357 // Lower scalar pointers to native pointer types.
1358 if (PointerType *PTy = dyn_cast<PointerType>(Ty))
1359 return getPointerMemTy(DL, PTy->getAddressSpace());
1360 else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1361 Type *Elm = VTy->getElementType();
1362 if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
1363 EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
1364 Elm = PointerTy.getTypeForEVT(Ty->getContext());
1365 }
1366 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
1367 VTy->getElementCount());
1368 }
1369
1370 return getValueType(DL, Ty, AllowUnknown);
1371 }
1372
1373
1374 /// Return the MVT corresponding to this LLVM type. See getValueType.
1375 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1376 bool AllowUnknown = false) const {
1377 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1378 }
1379
1380 /// Return the desired alignment for ByVal or InAlloca aggregate function
1381 /// arguments in the caller parameter area. This is the actual alignment, not
1382 /// its logarithm.
1383 virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1384
1385 /// Return the type of registers that this ValueType will eventually require.
1386 MVT getRegisterType(MVT VT) const {
1387 assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1387, __PRETTY_FUNCTION__))
;
1388 return RegisterTypeForVT[VT.SimpleTy];
1389 }
1390
1391 /// Return the type of registers that this ValueType will eventually require.
1392 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1393 if (VT.isSimple()) {
1394 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1395, __PRETTY_FUNCTION__))
1395 array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1395, __PRETTY_FUNCTION__))
;
1396 return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
1397 }
1398 if (VT.isVector()) {
1399 EVT VT1;
1400 MVT RegisterVT;
1401 unsigned NumIntermediates;
1402 (void)getVectorTypeBreakdown(Context, VT, VT1,
1403 NumIntermediates, RegisterVT);
1404 return RegisterVT;
1405 }
1406 if (VT.isInteger()) {
1407 return getRegisterType(Context, getTypeToTransformTo(Context, VT));
1408 }
1409 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1409)
;
1410 }
1411
1412 /// Return the number of registers that this ValueType will eventually
1413 /// require.
1414 ///
1415 /// This is one for any types promoted to live in larger registers, but may be
1416 /// more than one for types (like i64) that are split into pieces. For types
1417 /// like i140, which are first promoted then expanded, it is the number of
1418 /// registers needed to hold all the bits of the original type. For an i140
1419 /// on a 32 bit machine this means 5 registers.
1420 unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
1421 if (VT.isSimple()) {
1422 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1423, __PRETTY_FUNCTION__))
1423 array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1423, __PRETTY_FUNCTION__))
;
1424 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1425 }
1426 if (VT.isVector()) {
1427 EVT VT1;
1428 MVT VT2;
1429 unsigned NumIntermediates;
1430 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
1431 }
1432 if (VT.isInteger()) {
1433 unsigned BitWidth = VT.getSizeInBits();
1434 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1435 return (BitWidth + RegWidth - 1) / RegWidth;
1436 }
1437 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1437)
;
1438 }
1439
1440 /// Certain combinations of ABIs, Targets and features require that types
1441 /// are legal for some operations and not for other operations.
1442 /// For MIPS all vector types must be passed through the integer register set.
1443 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1444 CallingConv::ID CC, EVT VT) const {
1445 return getRegisterType(Context, VT);
1446 }
1447
1448 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1449 /// this occurs when a vector type is used, as vector are passed through the
1450 /// integer register set.
1451 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1452 CallingConv::ID CC,
1453 EVT VT) const {
1454 return getNumRegisters(Context, VT);
1455 }
1456
1457 /// Certain targets have context senstive alignment requirements, where one
1458 /// type has the alignment requirement of another type.
1459 virtual Align getABIAlignmentForCallingConv(Type *ArgTy,
1460 DataLayout DL) const {
1461 return Align(DL.getABITypeAlignment(ArgTy));
1462 }
1463
1464 /// If true, then instruction selection should seek to shrink the FP constant
1465 /// of the specified type to a smaller type in order to save space and / or
1466 /// reduce runtime.
1467 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1468
1469 /// Return true if it is profitable to reduce a load to a smaller type.
1470 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1471 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1472 EVT NewVT) const {
1473 // By default, assume that it is cheaper to extract a subvector from a wide
1474 // vector load rather than creating multiple narrow vector loads.
1475 if (NewVT.isVector() && !Load->hasOneUse())
1476 return false;
1477
1478 return true;
1479 }
1480
1481 /// When splitting a value of the specified type into parts, does the Lo
1482 /// or Hi part come first? This usually follows the endianness, except
1483 /// for ppcf128, where the Hi part always comes first.
1484 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1485 return DL.isBigEndian() || VT == MVT::ppcf128;
1486 }
1487
1488 /// If true, the target has custom DAG combine transformations that it can
1489 /// perform for the specified node.
1490 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1491 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1491, __PRETTY_FUNCTION__))
;
1492 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1493 }
1494
1495 unsigned getGatherAllAliasesMaxDepth() const {
1496 return GatherAllAliasesMaxDepth;
1497 }
1498
1499 /// Returns the size of the platform's va_list object.
1500 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1501 return getPointerTy(DL).getSizeInBits();
1502 }
1503
1504 /// Get maximum # of store operations permitted for llvm.memset
1505 ///
1506 /// This function returns the maximum number of store operations permitted
1507 /// to replace a call to llvm.memset. The value is set by the target at the
1508 /// performance threshold for such a replacement. If OptSize is true,
1509 /// return the limit for functions that have OptSize attribute.
1510 unsigned getMaxStoresPerMemset(bool OptSize) const {
1511 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1512 }
1513
1514 /// Get maximum # of store operations permitted for llvm.memcpy
1515 ///
1516 /// This function returns the maximum number of store operations permitted
1517 /// to replace a call to llvm.memcpy. The value is set by the target at the
1518 /// performance threshold for such a replacement. If OptSize is true,
1519 /// return the limit for functions that have OptSize attribute.
1520 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1521 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1522 }
1523
1524 /// \brief Get maximum # of store operations to be glued together
1525 ///
1526 /// This function returns the maximum number of store operations permitted
1527 /// to glue together during lowering of llvm.memcpy. The value is set by
1528 // the target at the performance threshold for such a replacement.
1529 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1530 return MaxGluedStoresPerMemcpy;
1531 }
1532
1533 /// Get maximum # of load operations permitted for memcmp
1534 ///
1535 /// This function returns the maximum number of load operations permitted
1536 /// to replace a call to memcmp. The value is set by the target at the
1537 /// performance threshold for such a replacement. If OptSize is true,
1538 /// return the limit for functions that have OptSize attribute.
1539 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1540 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1541 }
1542
1543 /// Get maximum # of store operations permitted for llvm.memmove
1544 ///
1545 /// This function returns the maximum number of store operations permitted
1546 /// to replace a call to llvm.memmove. The value is set by the target at the
1547 /// performance threshold for such a replacement. If OptSize is true,
1548 /// return the limit for functions that have OptSize attribute.
1549 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1550 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1551 }
1552
1553 /// Determine if the target supports unaligned memory accesses.
1554 ///
1555 /// This function returns true if the target allows unaligned memory accesses
1556 /// of the specified type in the given address space. If true, it also returns
1557 /// whether the unaligned memory access is "fast" in the last argument by
1558 /// reference. This is used, for example, in situations where an array
1559 /// copy/move/set is converted to a sequence of store operations. Its use
1560 /// helps to ensure that such replacements don't generate code that causes an
1561 /// alignment error (trap) on the target machine.
1562 virtual bool allowsMisalignedMemoryAccesses(
1563 EVT, unsigned AddrSpace = 0, unsigned Align = 1,
1564 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1565 bool * /*Fast*/ = nullptr) const {
1566 return false;
1567 }
1568
1569 /// LLT handling variant.
1570 virtual bool allowsMisalignedMemoryAccesses(
1571 LLT, unsigned AddrSpace = 0, unsigned Align = 1,
1572 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1573 bool * /*Fast*/ = nullptr) const {
1574 return false;
1575 }
1576
1577 /// This function returns true if the memory access is aligned or if the
1578 /// target allows this specific unaligned memory access. If the access is
1579 /// allowed, the optional final parameter returns if the access is also fast
1580 /// (as defined by the target).
1581 bool allowsMemoryAccessForAlignment(
1582 LLVMContext &Context, const DataLayout &DL, EVT VT,
1583 unsigned AddrSpace = 0, unsigned Alignment = 1,
1584 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1585 bool *Fast = nullptr) const;
1586
1587 /// Return true if the memory access of this type is aligned or if the target
1588 /// allows this specific unaligned access for the given MachineMemOperand.
1589 /// If the access is allowed, the optional final parameter returns if the
1590 /// access is also fast (as defined by the target).
1591 bool allowsMemoryAccessForAlignment(LLVMContext &Context,
1592 const DataLayout &DL, EVT VT,
1593 const MachineMemOperand &MMO,
1594 bool *Fast = nullptr) const;
1595
1596 /// Return true if the target supports a memory access of this type for the
1597 /// given address space and alignment. If the access is allowed, the optional
1598 /// final parameter returns if the access is also fast (as defined by the
1599 /// target).
1600 virtual bool
1601 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1602 unsigned AddrSpace = 0, unsigned Alignment = 1,
1603 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1604 bool *Fast = nullptr) const;
1605
1606 /// Return true if the target supports a memory access of this type for the
1607 /// given MachineMemOperand. If the access is allowed, the optional
1608 /// final parameter returns if the access is also fast (as defined by the
1609 /// target).
1610 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1611 const MachineMemOperand &MMO,
1612 bool *Fast = nullptr) const;
1613
1614 /// Returns the target specific optimal type for load and store operations as
1615 /// a result of memset, memcpy, and memmove lowering.
1616 /// It returns EVT::Other if the type should be determined using generic
1617 /// target-independent logic.
1618 virtual EVT
1619 getOptimalMemOpType(const MemOp &Op,
1620 const AttributeList & /*FuncAttributes*/) const {
1621 return MVT::Other;
1622 }
1623
1624 /// LLT returning variant.
1625 virtual LLT
1626 getOptimalMemOpLLT(const MemOp &Op,
1627 const AttributeList & /*FuncAttributes*/) const {
1628 return LLT();
1629 }
1630
1631 /// Returns true if it's safe to use load / store of the specified type to
1632 /// expand memcpy / memset inline.
1633 ///
1634 /// This is mostly true for all types except for some special cases. For
1635 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
1636 /// fstpl which also does type conversion. Note the specified type doesn't
1637 /// have to be legal as the hook is used before type legalization.
1638 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
1639
1640 /// Return lower limit for number of blocks in a jump table.
1641 virtual unsigned getMinimumJumpTableEntries() const;
1642
1643 /// Return lower limit of the density in a jump table.
1644 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
1645
1646 /// Return upper limit for number of entries in a jump table.
1647 /// Zero if no limit.
1648 unsigned getMaximumJumpTableSize() const;
1649
1650 virtual bool isJumpTableRelative() const {
1651 return TM.isPositionIndependent();
1652 }
1653
1654 /// If a physical register, this specifies the register that
1655 /// llvm.savestack/llvm.restorestack should save and restore.
1656 unsigned getStackPointerRegisterToSaveRestore() const {
1657 return StackPointerRegisterToSaveRestore;
1658 }
1659
1660 /// If a physical register, this returns the register that receives the
1661 /// exception address on entry to an EH pad.
1662 virtual unsigned
1663 getExceptionPointerRegister(const Constant *PersonalityFn) const {
1664 // 0 is guaranteed to be the NoRegister value on all targets
1665 return 0;
1666 }
1667
1668 /// If a physical register, this returns the register that receives the
1669 /// exception typeid on entry to a landing pad.
1670 virtual unsigned
1671 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
1672 // 0 is guaranteed to be the NoRegister value on all targets
1673 return 0;
1674 }
1675
1676 virtual bool needsFixedCatchObjects() const {
1677 report_fatal_error("Funclet EH is not implemented for this target");
1678 }
1679
1680 /// Return the minimum stack alignment of an argument.
1681 Align getMinStackArgumentAlignment() const {
1682 return MinStackArgumentAlignment;
1683 }
1684
1685 /// Return the minimum function alignment.
1686 Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
1687
1688 /// Return the preferred function alignment.
1689 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
1690
1691 /// Return the preferred loop alignment.
1692 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
1693 return PrefLoopAlignment;
1694 }
1695
1696 /// Should loops be aligned even when the function is marked OptSize (but not
1697 /// MinSize).
1698 virtual bool alignLoopsWithOptSize() const {
1699 return false;
1700 }
1701
1702 /// If the target has a standard location for the stack protector guard,
1703 /// returns the address of that location. Otherwise, returns nullptr.
1704 /// DEPRECATED: please override useLoadStackGuardNode and customize
1705 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
1706 virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
1707
1708 /// Inserts necessary declarations for SSP (stack protection) purpose.
1709 /// Should be used only when getIRStackGuard returns nullptr.
1710 virtual void insertSSPDeclarations(Module &M) const;
1711
1712 /// Return the variable that's previously inserted by insertSSPDeclarations,
1713 /// if any, otherwise return nullptr. Should be used only when
1714 /// getIRStackGuard returns nullptr.
1715 virtual Value *getSDagStackGuard(const Module &M) const;
1716
1717 /// If this function returns true, stack protection checks should XOR the
1718 /// frame pointer (or whichever pointer is used to address locals) into the
1719 /// stack guard value before checking it. getIRStackGuard must return nullptr
1720 /// if this returns true.
1721 virtual bool useStackGuardXorFP() const { return false; }
1722
1723 /// If the target has a standard stack protection check function that
1724 /// performs validation and error handling, returns the function. Otherwise,
1725 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
1726 /// Should be used only when getIRStackGuard returns nullptr.
1727 virtual Function *getSSPStackGuardCheck(const Module &M) const;
1728
1729protected:
1730 Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
1731 bool UseTLS) const;
1732
1733public:
1734 /// Returns the target-specific address of the unsafe stack pointer.
1735 virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
1736
1737 /// Returns the name of the symbol used to emit stack probes or the empty
1738 /// string if not applicable.
1739 virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; }
1740
1741 virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; }
1742
1743 virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
1744 return "";
1745 }
1746
1747 /// Returns true if a cast between SrcAS and DestAS is a noop.
1748 virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1749 return false;
1750 }
1751
1752 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
1753 /// are happy to sink it into basic blocks. A cast may be free, but not
1754 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
1755 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1756 return isNoopAddrSpaceCast(SrcAS, DestAS);
1757 }
1758
1759 /// Return true if the pointer arguments to CI should be aligned by aligning
1760 /// the object whose address is being passed. If so then MinSize is set to the
1761 /// minimum size the object must be to be aligned and PrefAlign is set to the
1762 /// preferred alignment.
1763 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
1764 unsigned & /*PrefAlign*/) const {
1765 return false;
1766 }
1767
1768 //===--------------------------------------------------------------------===//
1769 /// \name Helpers for TargetTransformInfo implementations
1770 /// @{
1771
1772 /// Get the ISD node that corresponds to the Instruction class opcode.
1773 int InstructionOpcodeToISD(unsigned Opcode) const;
1774
1775 /// Estimate the cost of type-legalization and the legalized type.
1776 std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
1777 Type *Ty) const;
1778
1779 /// @}
1780
1781 //===--------------------------------------------------------------------===//
1782 /// \name Helpers for atomic expansion.
1783 /// @{
1784
1785 /// Returns the maximum atomic operation size (in bits) supported by
1786 /// the backend. Atomic operations greater than this size (as well
1787 /// as ones that are not naturally aligned), will be expanded by
1788 /// AtomicExpandPass into an __atomic_* library call.
1789 unsigned getMaxAtomicSizeInBitsSupported() const {
1790 return MaxAtomicSizeInBitsSupported;
1791 }
1792
1793 /// Returns the size of the smallest cmpxchg or ll/sc instruction
1794 /// the backend supports. Any smaller operations are widened in
1795 /// AtomicExpandPass.
1796 ///
1797 /// Note that *unlike* operations above the maximum size, atomic ops
1798 /// are still natively supported below the minimum; they just
1799 /// require a more complex expansion.
1800 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
1801
1802 /// Whether the target supports unaligned atomic operations.
1803 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
1804
1805 /// Whether AtomicExpandPass should automatically insert fences and reduce
1806 /// ordering for this atomic. This should be true for most architectures with
1807 /// weak memory ordering. Defaults to false.
1808 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
1809 return false;
1810 }
1811
1812 /// Perform a load-linked operation on Addr, returning a "Value *" with the
1813 /// corresponding pointee type. This may entail some non-trivial operations to
1814 /// truncate or reconstruct types that will be illegal in the backend. See
1815 /// ARMISelLowering for an example implementation.
1816 virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
1817 AtomicOrdering Ord) const {
1818 llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1818)
;
1819 }
1820
1821 /// Perform a store-conditional operation to Addr. Return the status of the
1822 /// store. This should be 0 if the store succeeded, non-zero otherwise.
1823 virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
1824 Value *Addr, AtomicOrdering Ord) const {
1825 llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1825)
;
1826 }
1827
1828 /// Perform a masked atomicrmw using a target-specific intrinsic. This
1829 /// represents the core LL/SC loop which will be lowered at a late stage by
1830 /// the backend.
1831 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
1832 AtomicRMWInst *AI,
1833 Value *AlignedAddr, Value *Incr,
1834 Value *Mask, Value *ShiftAmt,
1835 AtomicOrdering Ord) const {
1836 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1836)
;
1837 }
1838
1839 /// Perform a masked cmpxchg using a target-specific intrinsic. This
1840 /// represents the core LL/SC loop which will be lowered at a late stage by
1841 /// the backend.
1842 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
1843 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1844 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
1845 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1845)
;
1846 }
1847
1848 /// Inserts in the IR a target-specific intrinsic specifying a fence.
1849 /// It is called by AtomicExpandPass before expanding an
1850 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
1851 /// if shouldInsertFencesForAtomic returns true.
1852 ///
1853 /// Inst is the original atomic instruction, prior to other expansions that
1854 /// may be performed.
1855 ///
1856 /// This function should either return a nullptr, or a pointer to an IR-level
1857 /// Instruction*. Even complex fence sequences can be represented by a
1858 /// single Instruction* through an intrinsic to be lowered later.
1859 /// Backends should override this method to produce target-specific intrinsic
1860 /// for their fences.
1861 /// FIXME: Please note that the default implementation here in terms of
1862 /// IR-level fences exists for historical/compatibility reasons and is
1863 /// *unsound* ! Fences cannot, in general, be used to restore sequential
1864 /// consistency. For example, consider the following example:
1865 /// atomic<int> x = y = 0;
1866 /// int r1, r2, r3, r4;
1867 /// Thread 0:
1868 /// x.store(1);
1869 /// Thread 1:
1870 /// y.store(1);
1871 /// Thread 2:
1872 /// r1 = x.load();
1873 /// r2 = y.load();
1874 /// Thread 3:
1875 /// r3 = y.load();
1876 /// r4 = x.load();
1877 /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all
1878 /// seq_cst. But if they are lowered to monotonic accesses, no amount of
1879 /// IR-level fences can prevent it.
1880 /// @{
1881 virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
1882 AtomicOrdering Ord) const {
1883 if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
1884 return Builder.CreateFence(Ord);
1885 else
1886 return nullptr;
1887 }
1888
1889 virtual Instruction *emitTrailingFence(IRBuilder<> &Builder,
1890 Instruction *Inst,
1891 AtomicOrdering Ord) const {
1892 if (isAcquireOrStronger(Ord))
1893 return Builder.CreateFence(Ord);
1894 else
1895 return nullptr;
1896 }
1897 /// @}
1898
1899 // Emits code that executes when the comparison result in the ll/sc
1900 // expansion of a cmpxchg instruction is such that the store-conditional will
1901 // not execute. This makes it possible to balance out the load-linked with
1902 // a dedicated instruction, if desired.
1903 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
1904 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
1905 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
1906
1907 /// Returns true if the given (atomic) store should be expanded by the
1908 /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
1909 virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
1910 return false;
1911 }
1912
1913 /// Returns true if arguments should be sign-extended in lib calls.
1914 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
1915 return IsSigned;
1916 }
1917
1918 /// Returns true if arguments should be extended in lib calls.
1919 virtual bool shouldExtendTypeInLibCall(EVT Type) const {
1920 return true;
1921 }
1922
1923 /// Returns how the given (atomic) load should be expanded by the
1924 /// IR-level AtomicExpand pass.
1925 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
1926 return AtomicExpansionKind::None;
1927 }
1928
1929 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
1930 /// AtomicExpand pass.
1931 virtual AtomicExpansionKind
1932 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1933 return AtomicExpansionKind::None;
1934 }
1935
1936 /// Returns how the IR-level AtomicExpand pass should expand the given
1937 /// AtomicRMW, if at all. Default is to never expand.
1938 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1939 return RMW->isFloatingPointOperation() ?
1940 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
1941 }
1942
1943 /// On some platforms, an AtomicRMW that never actually modifies the value
1944 /// (such as fetch_add of 0) can be turned into a fence followed by an
1945 /// atomic load. This may sound useless, but it makes it possible for the
1946 /// processor to keep the cacheline shared, dramatically improving
1947 /// performance. And such idempotent RMWs are useful for implementing some
1948 /// kinds of locks, see for example (justification + benchmarks):
1949 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
1950 /// This method tries doing that transformation, returning the atomic load if
1951 /// it succeeds, and nullptr otherwise.
1952 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
1953 /// another round of expansion.
1954 virtual LoadInst *
1955 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
1956 return nullptr;
1957 }
1958
1959 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
1960 /// SIGN_EXTEND, or ANY_EXTEND).
1961 virtual ISD::NodeType getExtendForAtomicOps() const {
1962 return ISD::ZERO_EXTEND;
1963 }
1964
1965 /// @}
1966
1967 /// Returns true if we should normalize
1968 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1969 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
1970 /// that it saves us from materializing N0 and N1 in an integer register.
1971 /// Targets that are able to perform and/or on flags should return false here.
1972 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
1973 EVT VT) const {
1974 // If a target has multiple condition registers, then it likely has logical
1975 // operations on those registers.
1976 if (hasMultipleConditionRegisters())
1977 return false;
1978 // Only do the transform if the value won't be split into multiple
1979 // registers.
1980 LegalizeTypeAction Action = getTypeAction(Context, VT);
1981 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
1982 Action != TypeSplitVector;
1983 }
1984
1985 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
1986
1987 /// Return true if a select of constants (select Cond, C1, C2) should be
1988 /// transformed into simple math ops with the condition value. For example:
1989 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
1990 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
1991 return false;
1992 }
1993
1994 /// Return true if it is profitable to transform an integer
1995 /// multiplication-by-constant into simpler operations like shifts and adds.
1996 /// This may be true if the target does not directly support the
1997 /// multiplication operation for the specified type or the sequence of simpler
1998 /// ops is faster than the multiply.
1999 virtual bool decomposeMulByConstant(LLVMContext &Context,
2000 EVT VT, SDValue C) const {
2001 return false;
2002 }
2003
2004 /// Return true if it is more correct/profitable to use strict FP_TO_INT
2005 /// conversion operations - canonicalizing the FP source value instead of
2006 /// converting all cases and then selecting based on value.
2007 /// This may be true if the target throws exceptions for out of bounds
2008 /// conversions or has fast FP CMOV.
2009 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
2010 bool IsSigned) const {
2011 return false;
2012 }
2013
2014 //===--------------------------------------------------------------------===//
2015 // TargetLowering Configuration Methods - These methods should be invoked by
2016 // the derived class constructor to configure this object for the target.
2017 //
2018protected:
2019 /// Specify how the target extends the result of integer and floating point
2020 /// boolean values from i1 to a wider type. See getBooleanContents.
2021 void setBooleanContents(BooleanContent Ty) {
2022 BooleanContents = Ty;
2023 BooleanFloatContents = Ty;
2024 }
2025
2026 /// Specify how the target extends the result of integer and floating point
2027 /// boolean values from i1 to a wider type. See getBooleanContents.
2028 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
2029 BooleanContents = IntTy;
2030 BooleanFloatContents = FloatTy;
2031 }
2032
2033 /// Specify how the target extends the result of a vector boolean value from a
2034 /// vector of i1 to a wider type. See getBooleanContents.
2035 void setBooleanVectorContents(BooleanContent Ty) {
2036 BooleanVectorContents = Ty;
2037 }
2038
2039 /// Specify the target scheduling preference.
2040 void setSchedulingPreference(Sched::Preference Pref) {
2041 SchedPreferenceInfo = Pref;
2042 }
2043
2044 /// Indicate the minimum number of blocks to generate jump tables.
2045 void setMinimumJumpTableEntries(unsigned Val);
2046
2047 /// Indicate the maximum number of entries in jump tables.
2048 /// Set to zero to generate unlimited jump tables.
2049 void setMaximumJumpTableSize(unsigned);
2050
2051 /// If set to a physical register, this specifies the register that
2052 /// llvm.savestack/llvm.restorestack should save and restore.
2053 void setStackPointerRegisterToSaveRestore(unsigned R) {
2054 StackPointerRegisterToSaveRestore = R;
2055 }
2056
2057 /// Tells the code generator that the target has multiple (allocatable)
2058 /// condition registers that can be used to store the results of comparisons
2059 /// for use by selects and conditional branches. With multiple condition
2060 /// registers, the code generator will not aggressively sink comparisons into
2061 /// the blocks of their users.
2062 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
2063 HasMultipleConditionRegisters = hasManyRegs;
2064 }
2065
2066 /// Tells the code generator that the target has BitExtract instructions.
2067 /// The code generator will aggressively sink "shift"s into the blocks of
2068 /// their users if the users will generate "and" instructions which can be
2069 /// combined with "shift" to BitExtract instructions.
2070 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
2071 HasExtractBitsInsn = hasExtractInsn;
2072 }
2073
2074 /// Tells the code generator not to expand logic operations on comparison
2075 /// predicates into separate sequences that increase the amount of flow
2076 /// control.
2077 void setJumpIsExpensive(bool isExpensive = true);
2078
2079 /// Tells the code generator which bitwidths to bypass.
2080 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
2081 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
2082 }
2083
2084 /// Add the specified register class as an available regclass for the
2085 /// specified value type. This indicates the selector can handle values of
2086 /// that class natively.
2087 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
2088 assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ?
static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2088, __PRETTY_FUNCTION__))
;
2089 RegClassForVT[VT.SimpleTy] = RC;
2090 }
2091
2092 /// Return the largest legal super-reg register class of the register class
2093 /// for the specified type and its associated "cost".
2094 virtual std::pair<const TargetRegisterClass *, uint8_t>
2095 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
2096
2097 /// Once all of the register classes are added, this allows us to compute
2098 /// derived properties we expose.
2099 void computeRegisterProperties(const TargetRegisterInfo *TRI);
2100
2101 /// Indicate that the specified operation does not work with the specified
2102 /// type and indicate what to do about it. Note that VT may refer to either
2103 /// the type of a result or that of an operand of Op.
2104 void setOperationAction(unsigned Op, MVT VT,
2105 LegalizeAction Action) {
2106 assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2106, __PRETTY_FUNCTION__))
;
2107 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
2108 }
2109
2110 /// Indicate that the specified load with extension does not work with the
2111 /// specified type and indicate what to do about it.
2112 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2113 LegalizeAction Action) {
2114 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2115, __PRETTY_FUNCTION__))
2115 MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2115, __PRETTY_FUNCTION__))
;
2116 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2116, __PRETTY_FUNCTION__))
;
2117 unsigned Shift = 4 * ExtType;
2118 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
2119 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
2120 }
2121
2122 /// Indicate that the specified truncating store does not work with the
2123 /// specified type and indicate what to do about it.
2124 void setTruncStoreAction(MVT ValVT, MVT MemVT,
2125 LegalizeAction Action) {
2126 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2126, __PRETTY_FUNCTION__))
;
2127 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
2128 }
2129
2130 /// Indicate that the specified indexed load does or does not work with the
2131 /// specified type and indicate what to do abort it.
2132 ///
2133 /// NOTE: All indexed mode loads are initialized to Expand in
2134 /// TargetLowering.cpp
2135 void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
2136 setIndexedModeAction(IdxMode, VT, IMAB_Load, Action);
2137 }
2138
2139 /// Indicate that the specified indexed store does or does not work with the
2140 /// specified type and indicate what to do about it.
2141 ///
2142 /// NOTE: All indexed mode stores are initialized to Expand in
2143 /// TargetLowering.cpp
2144 void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
2145 setIndexedModeAction(IdxMode, VT, IMAB_Store, Action);
2146 }
2147
2148 /// Indicate that the specified indexed masked load does or does not work with
2149 /// the specified type and indicate what to do about it.
2150 ///
2151 /// NOTE: All indexed mode masked loads are initialized to Expand in
2152 /// TargetLowering.cpp
2153 void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT,
2154 LegalizeAction Action) {
2155 setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action);
2156 }
2157
2158 /// Indicate that the specified indexed masked store does or does not work
2159 /// with the specified type and indicate what to do about it.
2160 ///
2161 /// NOTE: All indexed mode masked stores are initialized to Expand in
2162 /// TargetLowering.cpp
2163 void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT,
2164 LegalizeAction Action) {
2165 setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action);
2166 }
2167
2168 /// Indicate that the specified condition code is or isn't supported on the
2169 /// target and indicate what to do about it.
2170 void setCondCodeAction(ISD::CondCode CC, MVT VT,
2171 LegalizeAction Action) {
2172 assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2173, __PRETTY_FUNCTION__))
2173 "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2173, __PRETTY_FUNCTION__))
;
2174 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2174, __PRETTY_FUNCTION__))
;
2175 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
2176 /// value and the upper 29 bits index into the second dimension of the array
2177 /// to select what 32-bit value to use.
2178 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
2179 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
2180 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
2181 }
2182
2183 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
2184 /// to trying a larger integer/fp until it can find one that works. If that
2185 /// default is insufficient, this method can be used by the target to override
2186 /// the default.
2187 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2188 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
2189 }
2190
2191 /// Convenience method to set an operation to Promote and specify the type
2192 /// in a single call.
2193 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2194 setOperationAction(Opc, OrigVT, Promote);
2195 AddPromotedToType(Opc, OrigVT, DestVT);
2196 }
2197
2198 /// Targets should invoke this method for each target independent node that
2199 /// they want to provide a custom DAG combiner for by implementing the
2200 /// PerformDAGCombine virtual method.
2201 void setTargetDAGCombine(ISD::NodeType NT) {
2202 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2202, __PRETTY_FUNCTION__))
;
2203 TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
2204 }
2205
2206 /// Set the target's minimum function alignment.
2207 void setMinFunctionAlignment(Align Alignment) {
2208 MinFunctionAlignment = Alignment;
2209 }
2210
2211 /// Set the target's preferred function alignment. This should be set if
2212 /// there is a performance benefit to higher-than-minimum alignment
2213 void setPrefFunctionAlignment(Align Alignment) {
2214 PrefFunctionAlignment = Alignment;
2215 }
2216
2217 /// Set the target's preferred loop alignment. Default alignment is one, it
2218 /// means the target does not care about loop alignment. The target may also
2219 /// override getPrefLoopAlignment to provide per-loop values.
2220 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
2221
2222 /// Set the minimum stack alignment of an argument.
2223 void setMinStackArgumentAlignment(Align Alignment) {
2224 MinStackArgumentAlignment = Alignment;
2225 }
2226
2227 /// Set the maximum atomic operation size supported by the
2228 /// backend. Atomic operations greater than this size (as well as
2229 /// ones that are not naturally aligned), will be expanded by
2230 /// AtomicExpandPass into an __atomic_* library call.
2231 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2232 MaxAtomicSizeInBitsSupported = SizeInBits;
2233 }
2234
2235 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2236 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2237 MinCmpXchgSizeInBits = SizeInBits;
2238 }
2239
2240 /// Sets whether unaligned atomic operations are supported.
2241 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2242 SupportsUnalignedAtomics = UnalignedSupported;
2243 }
2244
2245public:
2246 //===--------------------------------------------------------------------===//
2247 // Addressing mode description hooks (used by LSR etc).
2248 //
2249
2250 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2251 /// instructions reading the address. This allows as much computation as
2252 /// possible to be done in the address mode for that operand. This hook lets
2253 /// targets also pass back when this should be done on intrinsics which
2254 /// load/store.
2255 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
2256 SmallVectorImpl<Value*> &/*Ops*/,
2257 Type *&/*AccessTy*/) const {
2258 return false;
2259 }
2260
2261 /// This represents an addressing mode of:
2262 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
2263 /// If BaseGV is null, there is no BaseGV.
2264 /// If BaseOffs is zero, there is no base offset.
2265 /// If HasBaseReg is false, there is no base register.
2266 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2267 /// no scale.
2268 struct AddrMode {
2269 GlobalValue *BaseGV = nullptr;
2270 int64_t BaseOffs = 0;
2271 bool HasBaseReg = false;
2272 int64_t Scale = 0;
2273 AddrMode() = default;
2274 };
2275
2276 /// Return true if the addressing mode represented by AM is legal for this
2277 /// target, for a load/store of the specified type.
2278 ///
2279 /// The type may be VoidTy, in which case only return true if the addressing
2280 /// mode is legal for a load/store of any legal type. TODO: Handle
2281 /// pre/postinc as well.
2282 ///
2283 /// If the address space cannot be determined, it will be -1.
2284 ///
2285 /// TODO: Remove default argument
2286 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2287 Type *Ty, unsigned AddrSpace,
2288 Instruction *I = nullptr) const;
2289
2290 /// Return the cost of the scaling factor used in the addressing mode
2291 /// represented by AM for this target, for a load/store of the specified type.
2292 ///
2293 /// If the AM is supported, the return value must be >= 0.
2294 /// If the AM is not supported, it returns a negative value.
2295 /// TODO: Handle pre/postinc as well.
2296 /// TODO: Remove default argument
2297 virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
2298 Type *Ty, unsigned AS = 0) const {
2299 // Default: assume that any scaling factor used in a legal AM is free.
2300 if (isLegalAddressingMode(DL, AM, Ty, AS))
2301 return 0;
2302 return -1;
2303 }
2304
2305 /// Return true if the specified immediate is legal icmp immediate, that is
2306 /// the target has icmp instructions which can compare a register against the
2307 /// immediate without having to materialize the immediate into a register.
2308 virtual bool isLegalICmpImmediate(int64_t) const {
2309 return true;
2310 }
2311
2312 /// Return true if the specified immediate is legal add immediate, that is the
2313 /// target has add instructions which can add a register with the immediate
2314 /// without having to materialize the immediate into a register.
2315 virtual bool isLegalAddImmediate(int64_t) const {
2316 return true;
2317 }
2318
2319 /// Return true if the specified immediate is legal for the value input of a
2320 /// store instruction.
2321 virtual bool isLegalStoreImmediate(int64_t Value) const {
2322 // Default implementation assumes that at least 0 works since it is likely
2323 // that a zero register exists or a zero immediate is allowed.
2324 return Value == 0;
2325 }
2326
2327 /// Return true if it's significantly cheaper to shift a vector by a uniform
2328 /// scalar than by an amount which will vary across each lane. On x86, for
2329 /// example, there is a "psllw" instruction for the former case, but no simple
2330 /// instruction for a general "a << b" operation on vectors.
2331 virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
2332 return false;
2333 }
2334
2335 /// Returns true if the opcode is a commutative binary operation.
2336 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2337 // FIXME: This should get its info from the td file.
2338 switch (Opcode) {
2339 case ISD::ADD:
2340 case ISD::SMIN:
2341 case ISD::SMAX:
2342 case ISD::UMIN:
2343 case ISD::UMAX:
2344 case ISD::MUL:
2345 case ISD::MULHU:
2346 case ISD::MULHS:
2347 case ISD::SMUL_LOHI:
2348 case ISD::UMUL_LOHI:
2349 case ISD::FADD:
2350 case ISD::FMUL:
2351 case ISD::AND:
2352 case ISD::OR:
2353 case ISD::XOR:
2354 case ISD::SADDO:
2355 case ISD::UADDO:
2356 case ISD::ADDC:
2357 case ISD::ADDE:
2358 case ISD::SADDSAT:
2359 case ISD::UADDSAT:
2360 case ISD::FMINNUM:
2361 case ISD::FMAXNUM:
2362 case ISD::FMINNUM_IEEE:
2363 case ISD::FMAXNUM_IEEE:
2364 case ISD::FMINIMUM:
2365 case ISD::FMAXIMUM:
2366 return true;
2367 default: return false;
2368 }
2369 }
2370
2371 /// Return true if the node is a math/logic binary operator.
2372 virtual bool isBinOp(unsigned Opcode) const {
2373 // A commutative binop must be a binop.
2374 if (isCommutativeBinOp(Opcode))
2375 return true;
2376 // These are non-commutative binops.
2377 switch (Opcode) {
2378 case ISD::SUB:
2379 case ISD::SHL:
2380 case ISD::SRL:
2381 case ISD::SRA